├── model_data
    ├── new_class.txt
    ├── yolo_anchors.txt
    ├── readme.md
    ├── voc_classes.txt
    └── coco_classes.txt
├── img
    ├── 1.jpg
    ├── 1.png
    ├── 2.jpg
    ├── 2.png
    ├── 3.png
    ├── 4.jpg
    ├── 4.png
    ├── 5.jpg
    └── 6.jpg
├── scripts
    ├── 1_vitisAI_tf_printNode.sh
    ├── readme.md
    ├── 3_vitisAI_tf_compile.sh
    └── 2_vitisAI_tf_quantize.sh
├── compile_result
    └── readme.md
├── nets
    ├── __pycache__
    │   ├── ious.cpython-36.pyc
    │   ├── loss.cpython-36.pyc
    │   ├── yolo4_tiny.cpython-36.pyc
    │   └── CSPdarknet53_tiny.cpython-36.pyc
    ├── ious.py
    ├── CSPdarknet53_tiny.py
    ├── loss.py
    └── yolo4_tiny.py
├── quantize_result
    └── readme.md
├── utils
    ├── __pycache__
    │   └── utils.cpython-36.pyc
    └── utils.py
├── yolo_fastest_tensorflow2
    ├── weights
    │   └── readme.md
    ├── nets
    │   ├── yolo_fastest_backbone.py
    │   └── yolo_fastest.py
    └── train.py
├── edge
    └── readme.md
├── test.py
├── VOCdevkit
    ├── readme.md
    ├── voc_data_migrate.py
    ├── ImageSets_Convert.py
    └── DETRAC_xmlParser.py
├── frozon_result
    └── readme.md
├── predict.py
├── video.py
├── voc_annotation.py
├── get_gt_txt.py
├── kmeans_for_anchors.py
├── README.md
├── input_fn.py
├── get_dr_txt.py
├── core
    ├── tf_prediction.py
    ├── yolo3_predictor.py
    └── evaluation.py
├── yolo.py
├── keras_to_tensorflow.py
├── train.py
└── Model_pruning
    ├── compressor.py
    └── train_purn.py


/model_data/new_class.txt:
--------------------------------------------------------------------------------
1 | car


--------------------------------------------------------------------------------
/model_data/yolo_anchors.txt:
--------------------------------------------------------------------------------
1 | 13,18, 19,26, 28,40, 43,39, 53,63, 83,104


--------------------------------------------------------------------------------
/img/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/1.jpg


--------------------------------------------------------------------------------
/img/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/1.png


--------------------------------------------------------------------------------
/img/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/2.jpg


--------------------------------------------------------------------------------
/img/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/2.png


--------------------------------------------------------------------------------
/img/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/3.png


--------------------------------------------------------------------------------
/img/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/4.jpg


--------------------------------------------------------------------------------
/img/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/4.png


--------------------------------------------------------------------------------
/img/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/5.jpg


--------------------------------------------------------------------------------
/img/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/img/6.jpg


--------------------------------------------------------------------------------
/scripts/1_vitisAI_tf_printNode.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # main process
4 | vai_q_tensorflow inspect --input_frozen_graph=model_data/model.pb
5 | 


--------------------------------------------------------------------------------
/compile_result/readme.md:
--------------------------------------------------------------------------------
1 | The compiled files will be here.
2 | 
3 | My files:
4 | 
5 | 链接：https://pan.baidu.com/s/1ZaXH9lgg3r4U6YR3NDkf7w 
6 | 提取码：1byo 


--------------------------------------------------------------------------------
/nets/__pycache__/ious.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/nets/__pycache__/ious.cpython-36.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/loss.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/nets/__pycache__/loss.cpython-36.pyc


--------------------------------------------------------------------------------
/quantize_result/readme.md:
--------------------------------------------------------------------------------
1 | The quantize result is here.
2 | 
3 | My file:
4 | 
5 | 链接：https://pan.baidu.com/s/1CZXitu0Rh7HkTt6PyNLxLg 
6 | 提取码：hk81 
7 | 


--------------------------------------------------------------------------------
/utils/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/utils/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/yolo4_tiny.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/nets/__pycache__/yolo4_tiny.cpython-36.pyc


--------------------------------------------------------------------------------
/nets/__pycache__/CSPdarknet53_tiny.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yss9701/Ultra96-Yolov4-tiny-and-Yolo-Fastest/HEAD/nets/__pycache__/CSPdarknet53_tiny.cpython-36.pyc


--------------------------------------------------------------------------------
/scripts/readme.md:
--------------------------------------------------------------------------------
1 | The dpu.json and dpu.dcf file are here:
2 | 
3 | 链接：https://pan.baidu.com/s/1Nl7y9-WkWOp1vyd9SuwYQw 
4 | 提取码：01ra 
5 | 
6 | We can use 
7 | 
8 | dlet -f dpu.hwh to generate .dcf file.


--------------------------------------------------------------------------------
/yolo_fastest_tensorflow2/weights/readme.md:
--------------------------------------------------------------------------------
1 | My weights and model structure of Yolo-Fastest implemented with tensorflow2:
2 | 
3 | 链接：https://pan.baidu.com/s/1PRbR1SHPd6r5gFIa_gg_OQ 
4 | 提取码：dmvo 
5 | 
6 | 


--------------------------------------------------------------------------------
/model_data/readme.md:
--------------------------------------------------------------------------------
1 | The Yolov4-tiny-voc weight is here：
2 | 
3 | 链接：https://pan.baidu.com/s/1MAnXMgzkxK8zvTNOmqu12w 
4 | 提取码：2095 
5 | 
6 | My reference：
7 | 
8 | https://github.com/bubbliiiing/yolov4-tiny-tf2


--------------------------------------------------------------------------------
/edge/readme.md:
--------------------------------------------------------------------------------
1 | The .bit and .hwh files are here. We can rebuild the official project DPU-PYNQ(https://github.com/Xilinx/DPU-PYNQ/tree/master/boards)
2 | 
3 | My files:
4 | 
5 | 链接：https://pan.baidu.com/s/1D4TiPUSjwU2tWVFq2EHUcw 
6 | 提取码：lcic 


--------------------------------------------------------------------------------
/model_data/voc_classes.txt:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor
21 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | from nets.yolo4_tiny import yolo_body
 2 | from tensorflow.keras.layers import Input
 3 | 
 4 | # 输入的图像为
 5 | image_input = Input(shape=(416, 416, 3))
 6 | model = yolo_body(image_input,3,20)
 7 | model.summary()
 8 | 
 9 | for i,layer in enumerate(model.layers):
10 |     print(i,layer.name)


--------------------------------------------------------------------------------
/VOCdevkit/readme.md:
--------------------------------------------------------------------------------
1 | These files are used to convert the dataset format. We use VOC format.
2 | 
3 | We can refer to https://blog.csdn.net/weixin_38106878/article/details/88684280?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-3.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-3.control


--------------------------------------------------------------------------------
/scripts/3_vitisAI_tf_compile.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Compile
 4 | 
 5 | vai_c_tensorflow --arch /workspace/ministNumber/dnndk/dnndk/dpu.json  -f quantize_results/deploy_model.pb --output_dir compile_result -n yolo_car
 6 | 
 7 | 
 8 | echo "#####################################"
 9 | echo "COMPILATION COMPLETED"
10 | echo "#####################################"
11 | 


--------------------------------------------------------------------------------
/frozon_result/readme.md:
--------------------------------------------------------------------------------
 1 | We can use keras_to_tensorflow.py to generate our .pb file. We will use this file to quantize the model. 
 2 | 
 3 | Requirement : Tensorflow 1.15.2
 4 | 
 5 | My pb file：
 6 | 
 7 | 链接：https://pan.baidu.com/s/1jrBh0l2umt_mENZf9RJA6w 
 8 | 提取码：sy0j 
 9 | 
10 | 
11 | 
12 | My weights and model structure：
13 | 
14 | 链接：https://pan.baidu.com/s/1RyHs3Fzf0V46y2h3YVZr1g 
15 | 提取码：86e4 


--------------------------------------------------------------------------------
/scripts/2_vitisAI_tf_quantize.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | # run quantization
 5 | 
 6 | vai_q_tensorflow quantize \
 7 |   --input_frozen_graph ./model_data/model.pb \
 8 |   --input_nodes input_1 \
 9 |   --input_shapes ?,320,320,3 \
10 |   --output_nodes conv2d_20/BiasAdd,conv2d_23/BiasAdd \
11 |   --method 1 \
12 |   --input_fn input_fn.calib_input \
13 |   --gpu 0 \
14 |   --calib_iter 100 \
15 | 
16 | echo "#####################################"
17 | echo "QUANTIZATION COMPLETED"
18 | echo "#####################################"
19 | 
20 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
 1 | from yolo import YOLO
 2 | from PIL import Image
 3 | import tensorflow as tf
 4 | 
 5 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
 6 | for gpu in gpus:
 7 |     tf.config.experimental.set_memory_growth(gpu, True)
 8 |     
 9 | yolo = YOLO()
10 | 
11 | while True:
12 |     img = input('Input image filename:')
13 |     try:
14 |         image = Image.open(img)
15 |     except:
16 |         print('Open Error! Try again!')
17 |         continue
18 |     else:
19 |         r_image = yolo.detect_image(image)
20 |         r_image.show()
21 | 


--------------------------------------------------------------------------------
/VOCdevkit/voc_data_migrate.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import random
 4 | import shutil
 5 | 
 6 | #xml路径的地址
 7 | XmlPath=r'xml_test'
 8 | #原图片的地址
 9 | pictureBasePath=r"Insight-MVT_Annotation_Train"
10 | #保存图片的地址
11 | saveBasePath=r"picture_test"
12 | 
13 | total_xml = os.listdir(XmlPath)
14 | num=len(total_xml)
15 | list=range(num)
16 | if os.path.exists(saveBasePath)==False: #判断文件夹是否存在
17 |      os.makedirs(saveBasePath)
18 | 
19 | 
20 | for xml in total_xml:
21 |     xml_temp=xml.split("__")
22 |     folder=xml_temp[0]
23 |     filename=xml_temp[1].split(".")[0]+".jpg"
24 |     # print(folder)
25 |     # print(filename)
26 |     temp_pictureBasePath=os.path.join(pictureBasePath,folder)
27 |     filePath=os.path.join(temp_pictureBasePath,filename)
28 |     # print(filePath)
29 |     newfile=xml.split(".")[0]+".jpg"
30 |     newfile_path=os.path.join(saveBasePath,newfile)
31 |     print(newfile_path)
32 |     shutil.copyfile(filePath, newfile_path)
33 | print("xml file total number",num)
34 | 


--------------------------------------------------------------------------------
/model_data/coco_classes.txt:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/VOCdevkit/ImageSets_Convert.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import time
 4 |  
 5 | xmlfilepath=r'./VOC2020/Annotations'
 6 | saveBasePath=r"./"
 7 |  
 8 | trainval_percent=0.8
 9 | train_percent=0.85
10 | total_xml = os.listdir(xmlfilepath)
11 | num=len(total_xml)
12 | list=range(num)
13 | tv=int(num*trainval_percent)
14 | tr=int(tv*train_percent)
15 | trainval= random.sample(list,tv)
16 | train=random.sample(trainval,tr)
17 |  
18 | print("train and val size",tv)
19 | print("traub suze",tr)
20 | ftrainval = open(os.path.join(saveBasePath,'VOC2020/ImageSets/Main/trainval.txt'), 'w')
21 | ftest = open(os.path.join(saveBasePath,'VOC2020/ImageSets/Main/test.txt'), 'w')
22 | ftrain = open(os.path.join(saveBasePath,'VOC2020/ImageSets/Main/train.txt'), 'w')
23 | fval = open(os.path.join(saveBasePath,'VOC2020/ImageSets/Main/val.txt'), 'w')
24 | # Start time
25 | start = time.time()
26 | for i  in list:
27 |     name=total_xml[i][:-4]+'\n'
28 |     if i in trainval:
29 |         ftrainval.write(name)
30 |         if i in train:
31 |             ftrain.write(name)
32 |         else:
33 |             fval.write(name)
34 |     else:
35 |         ftest.write(name)
36 | # End time
37 | end = time.time()
38 | seconds=end-start
39 | print( "Time taken : {0} seconds".format(seconds))
40 |  
41 | ftrainval.close()
42 | ftrain.close()
43 | fval.close()
44 | ftest .close()


--------------------------------------------------------------------------------
/video.py:
--------------------------------------------------------------------------------
 1 | #-------------------------------------#
 2 | #       调用摄像头检测
 3 | #-------------------------------------#
 4 | from yolo import YOLO
 5 | from PIL import Image
 6 | import numpy as np
 7 | import cv2
 8 | import time
 9 | 
10 | import tensorflow as tf
11 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
12 | for gpu in gpus:
13 |     tf.config.experimental.set_memory_growth(gpu, True)
14 | 
15 | yolo = YOLO()
16 | # 调用摄像头
17 | capture=cv2.VideoCapture(0) # capture=cv2.VideoCapture("1.mp4")
18 | 
19 | fps = 0.0
20 | t1 = time.time()
21 | while(True):
22 |     t1 = time.time()
23 |     # 读取某一帧
24 |     ref,frame=capture.read()
25 |     # 格式转变，BGRtoRGB
26 |     frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
27 |     # 转变成Image
28 |     frame = Image.fromarray(np.uint8(frame))
29 | 
30 |     # 进行检测
31 |     frame = np.array(yolo.detect_image(frame))
32 | 
33 |     # RGBtoBGR满足opencv显示格式
34 |     frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
35 |     
36 |     fps  = ( fps + (1./(time.time()-t1)) ) / 2
37 |     print("fps= %.2f"%(fps))
38 |     frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
39 |     
40 |     t1 = time.time()
41 |     cv2.imshow("video",frame)
42 |     c= cv2.waitKey(1) & 0xff 
43 |     if c==27:
44 |         capture.release()
45 |         break
46 | 
47 | yolo.close_session()    


--------------------------------------------------------------------------------
/voc_annotation.py:
--------------------------------------------------------------------------------
 1 | import xml.etree.ElementTree as ET
 2 | from os import getcwd
 3 | 
 4 | sets=[('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
 5 | 
 6 | classes = ["car"]
 7 | 
 8 | def convert_annotation(year, image_id, list_file):
 9 |     in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
10 |     tree=ET.parse(in_file)
11 |     root = tree.getroot()
12 | 
13 |     for obj in root.iter('object'):
14 |         difficult = 0 
15 |         if obj.find('difficult')!=None:
16 |             difficult = obj.find('difficult').text
17 |             
18 |         cls = obj.find('name').text
19 |         if cls not in classes or int(difficult)==1:
20 |             continue
21 |         cls_id = classes.index(cls)
22 |         xmlbox = obj.find('bndbox')
23 |         b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))
24 |         list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
25 | 
26 | wd = getcwd()
27 | 
28 | for year, image_set in sets:
29 |     image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
30 |     list_file = open('%s_%s.txt'%(year, image_set), 'w')
31 |     for image_id in image_ids:
32 |         list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(wd, year, image_id))
33 |         convert_annotation(year, image_id, list_file)
34 |         list_file.write('\n')
35 |     list_file.close()
36 | 


--------------------------------------------------------------------------------
/get_gt_txt.py:
--------------------------------------------------------------------------------
 1 | #----------------------------------------------------#
 2 | #   获取测试集的ground-truth
 3 | #   具体视频教程可查看
 4 | #   https://www.bilibili.com/video/BV1zE411u7Vw
 5 | #----------------------------------------------------#
 6 | import sys
 7 | import os
 8 | import glob
 9 | import xml.etree.ElementTree as ET
10 | 
11 | image_ids = open('VOCdevkit/VOC2007/ImageSets/Main/test.txt').read().strip().split()
12 | 
13 | if not os.path.exists("./input"):
14 |     os.makedirs("./input")
15 | if not os.path.exists("./input/ground-truth"):
16 |     os.makedirs("./input/ground-truth")
17 | 
18 | for image_id in image_ids:
19 |     with open("./input/ground-truth/"+image_id+".txt", "w") as new_f:
20 |         root = ET.parse("VOCdevkit/VOC2007/Annotations/"+image_id+".xml").getroot()
21 |         for obj in root.findall('object'):
22 |             difficult_flag = False
23 |             if obj.find('difficult')!=None:
24 |                 difficult = obj.find('difficult').text
25 |                 if int(difficult)==1:
26 |                     difficult_flag = True
27 |             obj_name = obj.find('name').text
28 |             bndbox = obj.find('bndbox')
29 |             left = bndbox.find('xmin').text
30 |             top = bndbox.find('ymin').text
31 |             right = bndbox.find('xmax').text
32 |             bottom = bndbox.find('ymax').text
33 |             if difficult_flag:
34 |                 new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
35 |             else:
36 |                 new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
37 | 
38 | print("Conversion completed!")
39 | 


--------------------------------------------------------------------------------
/nets/ious.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.keras import backend as K
 2 | import tensorflow as tf
 3 | import math
 4 | def box_ciou(b1, b2):
 5 |     """
 6 |     输入为：
 7 |     ----------
 8 |     b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
 9 |     b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
10 |     返回为：
11 |     -------
12 |     ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
13 |     """
14 |     # 求出预测框左上角右下角
15 |     b1_xy = b1[..., :2]
16 |     b1_wh = b1[..., 2:4]
17 |     b1_wh_half = b1_wh/2.
18 |     b1_mins = b1_xy - b1_wh_half
19 |     b1_maxes = b1_xy + b1_wh_half
20 |     # 求出真实框左上角右下角
21 |     b2_xy = b2[..., :2]
22 |     b2_wh = b2[..., 2:4]
23 |     b2_wh_half = b2_wh/2.
24 |     b2_mins = b2_xy - b2_wh_half
25 |     b2_maxes = b2_xy + b2_wh_half
26 | 
27 |     # 求真实框和预测框所有的iou
28 |     intersect_mins = K.maximum(b1_mins, b2_mins)
29 |     intersect_maxes = K.minimum(b1_maxes, b2_maxes)
30 |     intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
31 |     intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
32 |     b1_area = b1_wh[..., 0] * b1_wh[..., 1]
33 |     b2_area = b2_wh[..., 0] * b2_wh[..., 1]
34 |     union_area = b1_area + b2_area - intersect_area
35 |     iou = intersect_area / K.maximum(union_area,K.epsilon())
36 | 
37 |     # 计算中心的差距
38 |     center_distance = K.sum(K.square(b1_xy - b2_xy), axis=-1)
39 |     # 找到包裹两个框的最小框的左上角和右下角
40 |     enclose_mins = K.minimum(b1_mins, b2_mins)
41 |     enclose_maxes = K.maximum(b1_maxes, b2_maxes)
42 |     enclose_wh = K.maximum(enclose_maxes - enclose_mins, 0.0)
43 |     # 计算对角线距离
44 |     enclose_diagonal = K.sum(K.square(enclose_wh), axis=-1)
45 |     ciou = iou - 1.0 * (center_distance) / K.maximum(enclose_diagonal ,K.epsilon())
46 |     
47 |     v = 4*K.square(tf.math.atan2(b1_wh[..., 0], K.maximum(b1_wh[..., 1],K.epsilon())) - tf.math.atan2(b2_wh[..., 0], K.maximum(b2_wh[..., 1],K.epsilon()))) / (math.pi * math.pi)
48 |     alpha = v /  K.maximum((1.0 - iou + v), K.epsilon())
49 |     ciou = ciou - alpha * v
50 | 
51 |     ciou = K.expand_dims(ciou, -1)
52 |     return ciou


--------------------------------------------------------------------------------
/kmeans_for_anchors.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import xml.etree.ElementTree as ET
  3 | import glob
  4 | import random
  5 | 
  6 | def cas_iou(box,cluster):
  7 |     x = np.minimum(cluster[:,0],box[0])
  8 |     y = np.minimum(cluster[:,1],box[1])
  9 | 
 10 |     intersection = x * y
 11 |     area1 = box[0] * box[1]
 12 | 
 13 |     area2 = cluster[:,0] * cluster[:,1]
 14 |     iou = intersection / (area1 + area2 -intersection)
 15 | 
 16 |     return iou
 17 | 
 18 | def avg_iou(box,cluster):
 19 |     return np.mean([np.max(cas_iou(box[i],cluster)) for i in range(box.shape[0])])
 20 | 
 21 | 
 22 | def kmeans(box,k):
 23 |     # 取出一共有多少框
 24 |     row = box.shape[0]
 25 |     
 26 |     # 每个框各个点的位置
 27 |     distance = np.empty((row,k))
 28 |     
 29 |     # 最后的聚类位置
 30 |     last_clu = np.zeros((row,))
 31 | 
 32 |     np.random.seed()
 33 | 
 34 |     # 随机选5个当聚类中心
 35 |     cluster = box[np.random.choice(row,k,replace = False)]
 36 |     # cluster = random.sample(row, k)
 37 |     while True:
 38 |         # 计算每一行距离五个点的iou情况。
 39 |         for i in range(row):
 40 |             distance[i] = 1 - cas_iou(box[i],cluster)
 41 |         
 42 |         # 取出最小点
 43 |         near = np.argmin(distance,axis=1)
 44 | 
 45 |         if (last_clu == near).all():
 46 |             break
 47 |         
 48 |         # 求每一个类的中位点
 49 |         for j in range(k):
 50 |             cluster[j] = np.median(
 51 |                 box[near == j],axis=0)
 52 | 
 53 |         last_clu = near
 54 | 
 55 |     return cluster
 56 | 
 57 | def load_data(path):
 58 |     data = []
 59 |     # 对于每一个xml都寻找box
 60 |     for xml_file in glob.glob('{}/*xml'.format(path)):
 61 |         tree = ET.parse(xml_file)
 62 |         height = int(tree.findtext('./size/height'))
 63 |         width = int(tree.findtext('./size/width'))
 64 |         # 对于每一个目标都获得它的宽高
 65 |         for obj in tree.iter('object'):
 66 |             xmin = int(float(obj.findtext('bndbox/xmin'))) / width
 67 |             ymin = int(float(obj.findtext('bndbox/ymin'))) / height
 68 |             xmax = int(float(obj.findtext('bndbox/xmax'))) / width
 69 |             ymax = int(float(obj.findtext('bndbox/ymax'))) / height
 70 | 
 71 |             xmin = np.float64(xmin)
 72 |             ymin = np.float64(ymin)
 73 |             xmax = np.float64(xmax)
 74 |             ymax = np.float64(ymax)
 75 |             # 得到宽高
 76 |             data.append([xmax-xmin,ymax-ymin])
 77 |     return np.array(data)
 78 | 
 79 | 
 80 | if __name__ == '__main__':
 81 |     # 运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
 82 |     # 会生成yolo_anchors.txt
 83 |     SIZE = 416
 84 |     anchors_num = 6
 85 |     # 载入数据集，可以使用VOC的xml
 86 |     path = r'./VOCdevkit/VOC2007/Annotations'
 87 |     
 88 |     # 载入所有的xml
 89 |     # 存储格式为转化为比例后的width,height
 90 |     data = load_data(path)
 91 |     
 92 |     # 使用k聚类算法
 93 |     out = kmeans(data,anchors_num)
 94 |     out = out[np.argsort(out[:,0])]
 95 |     print('acc:{:.2f}%'.format(avg_iou(data,out) * 100))
 96 |     print(out*SIZE)
 97 |     data = out*SIZE
 98 |     f = open("yolo_anchors.txt", 'w')
 99 |     row = np.shape(data)[0]
100 |     for i in range(row):
101 |         if i == 0:
102 |             x_y = "%d,%d" % (data[i][0], data[i][1])
103 |         else:
104 |             x_y = ", %d,%d" % (data[i][0], data[i][1])
105 |         f.write(x_y)
106 |     f.close()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Ultra96 Yolov4-tiny and Yolo-Fastest
 2 | 1. We convert dataset to VOC format. I use UA-DETRAC dataset, and we can use ./VOCdevkit/ files to convert dataset.
 3 | 
 4 | 2. In the official yolov4-tiny, there is a slice operation to realize  the CSPnet, but the quantitative tools don't support the operation, so I use a 1*1 convolution to replace it.
 5 | 
 6 | 3. Then we can use train.py to train the model, and save the model structure and weights as model.json and model.h5. I use TensorFlow-gpu 2.2.0.
 7 | 
 8 | 4. Then we can generate pb file that is suitable for deployment tools. We can see ./frozon_result/readme.md for details.
 9 | 
10 | 5. Then we use Vitis-AI to quantify our model. We can use ./scripts/1_vitisAI_tf_printNode.sh to find the input and output, and use ./scripts/2_vitisAI_tf_quantize.sh to quantify our model.
11 | 
12 | 6. We can compile our model. We can use ./scripts/3_vitisAI_tf_compile.sh to compile our model.
13 | 
14 | 7. We should use vivado and Vitis to build the hardware platform. (./edge/readme.md)
15 | 
16 | 8. The last, we can run our model on Ultra96-v2 board. There is an example that using yolo model to detate vehicles (./edge/dpu_yolo_v4_tiny.ipynb). There are the results, the fps is 25 with 320*320 images.
17 | 
18 |    ![1](https://github.com/yss9701/Ultra96-Yolov4-tiny/raw/main/img/1.png)
19 | 
20 |    ![2](https://github.com/yss9701/Ultra96-Yolov4-tiny/raw/main/img/2.png)
21 | 
22 | 9. In order to achieve faster detection speed, I try to use Yolo-Fastest ([Yolo-Fastest](https://github.com/dog-qiuqiu/Yolo-Fastest)) and implement it with tensorflow, then deploy it to Ultra96-v2 board. There are the results, it can achieve 30fps+.
23 | 
24 |    ![3](https://github.com/yss9701/Ultra96-Yolov4-tiny/raw/main/img/3.png)
25 | 
26 |    ![4](https://github.com/yss9701/Ultra96-Yolov4-tiny/raw/main/img/4.png)
27 | 
28 | 10. Now we support model pruning. We use [keras-surgeon](https://github.com/BenWhetton/keras-surgeon) 0.2.0 and [nni](https://github.com/microsoft/nni) 1.5 to prune the model, you can see in ./Model_pruning. I modified the source code of nni (compressor.py) and fixed some bugs, then we can choose the layer that we want to prune, and I gave a demo that use FPGM to prune the model.
29 | 
30 |    
31 | 
32 |    
33 | 
34 |    
35 | 
36 |    References:
37 | 
38 |    [Yolov4-tiny-tf2](https://github.com/bubbliiiing/yolov4-tiny-tf2)
39 | 
40 |    [Yolo-v3-Xilinx](https://github.com/Xilinx/Vitis-AI-Tutorials/tree/ML-at-Edge-yolov3)
41 | 
42 |    [Yolo-v4-tutorial-Xilinx](https://github.com/Xilinx/Vitis-Tutorials/tree/33d6cf9686398ef1179778dc0da163291c68b465/Machine_Learning/Design_Tutorials/07-yolov4-tutorial)
43 | 
44 |    [Yolo-v3-dnndk](https://github.com/Xilinx/Vitis-AI/blob/v1.1/mpsoc/vitis_ai_dnndk_samples/tf_yolov3_voc_py/tf_yolov3_voc.py)
45 | 
46 |    [UA-DETRAC to VOC](https://blog.csdn.net/weixin_38106878/article/details/88684280?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-3.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-3.control)
47 | 
48 |    [Vitis-AI 1.1](https://www.xilinx.com/html_docs/vitis_ai/1_1/zkj1576857115470.html)
49 | 
50 |    [Yolo-Fastest](https://github.com/dog-qiuqiu/Yolo-Fastest)
51 | 
52 |    [keras-surgeon](https://github.com/BenWhetton/keras-surgeon)
53 | 
54 |    [nni](https://github.com/microsoft/nni)
55 | 
56 | 


--------------------------------------------------------------------------------
/nets/CSPdarknet53_tiny.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | from tensorflow.keras import backend as K
 3 | from tensorflow.keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D, Lambda, Layer, LeakyReLU, BatchNormalization
 4 | from tensorflow.keras.regularizers import l2
 5 | from utils.utils import compose
 6 | import tensorflow as tf
 7 | 
 8 | def route_group(input_layer, groups, group_id):
 9 |     # 对通道数进行均等分割，我们取第二部分
10 |     #convs = tf.split(input_layer, num_or_size_splits=groups, axis=-1)
11 |     #return convs[group_id]
12 |     in_channels = input_layer.get_shape().as_list()[3]
13 |     convs = input_layer[:, :, :, in_channels//2:]
14 |     return convs
15 | 
16 | #--------------------------------------------------#
17 | #   单次卷积
18 | #--------------------------------------------------#
19 | @wraps(Conv2D)
20 | def DarknetConv2D(*args, **kwargs):
21 |     # 多了一个正则化的项
22 |     # 正则化系数5e-4
23 |     darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
24 |     darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
25 |     darknet_conv_kwargs.update(kwargs)
26 |     return Conv2D(*args, **darknet_conv_kwargs)
27 | 
28 | #---------------------------------------------------#
29 | #   卷积块
30 | #   DarknetConv2D + BatchNormalization + LeakyReLU
31 | #---------------------------------------------------#
32 | def DarknetConv2D_BN_Leaky(*args, **kwargs):
33 |     no_bias_kwargs = {'use_bias': False}
34 |     no_bias_kwargs.update(kwargs)
35 |     return compose( 
36 |         DarknetConv2D(*args, **no_bias_kwargs),
37 |         BatchNormalization(),
38 |         LeakyReLU(alpha=0.1))
39 | 
40 | #---------------------------------------------------#
41 | #   CSPdarknet的结构块
42 | #   存在一个大残差边
43 | #   这个大残差边绕过了很多的残差结构
44 | #---------------------------------------------------#
45 | def resblock_body(x, num_filters):
46 |     # 特征整合
47 |     x = DarknetConv2D_BN_Leaky(num_filters, (3,3))(x)
48 |     # 残差边route
49 |     route = x
50 |     # 通道分割
51 |     #x = Lambda(route_group,arguments={'groups':2, 'group_id':1})(x)
52 |     x = DarknetConv2D_BN_Leaky(int(num_filters/2), (1,1))(x)
53 |     x = DarknetConv2D_BN_Leaky(int(num_filters/2), (3,3))(x)
54 | 
55 |     # 小残差边route1
56 |     route_1 = x
57 |     x = DarknetConv2D_BN_Leaky(int(num_filters/2), (3,3))(x)
58 |     # 堆叠
59 |     x = Concatenate()([x, route_1])
60 | 
61 |     x = DarknetConv2D_BN_Leaky(num_filters, (1,1))(x)
62 |     # 第三个resblockbody会引出来一个有效特征层分支
63 |     feat = x
64 |     # 连接
65 |     x = Concatenate()([route, x])
66 |     x = MaxPooling2D(pool_size=[2,2],)(x)
67 | 
68 |     # 最后对通道数进行整合
69 |     return x, feat
70 | 
71 | #---------------------------------------------------#
72 | #   darknet53 的主体部分
73 | #---------------------------------------------------#
74 | def darknet_body(x):
75 |     # 进行长和宽的压缩
76 |     x = ZeroPadding2D(((1,0),(1,0)))(x)
77 |     # 416,416,3 -> 208,208,32
78 |     x = DarknetConv2D_BN_Leaky(32, (3,3), strides=(2,2))(x)
79 | 
80 |     # 进行长和宽的压缩
81 |     x = ZeroPadding2D(((1,0),(1,0)))(x)
82 |     # 208,208,32 -> 104,104,64
83 |     x = DarknetConv2D_BN_Leaky(64, (3,3), strides=(2,2))(x)
84 |     # 104,104,64 -> 52,52,128
85 |     x, _ = resblock_body(x,num_filters = 64)
86 |     # 52,52,128 -> 26,26,256
87 |     x, _ = resblock_body(x,num_filters = 128)
88 |     # 26,26,256 -> 13,13,512
89 |     # feat1的shape = 26,26,256
90 |     x, feat1 = resblock_body(x,num_filters = 256)
91 | 
92 |     x = DarknetConv2D_BN_Leaky(512, (3,3))(x)
93 | 
94 |     feat2 = x
95 |     return feat1, feat2
96 | 
97 | 


--------------------------------------------------------------------------------
/yolo_fastest_tensorflow2/nets/yolo_fastest_backbone.py:
--------------------------------------------------------------------------------
  1 | from functools import wraps
  2 | from tensorflow.keras import backend as K
  3 | from tensorflow.keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D, Lambda, Layer, LeakyReLU, BatchNormalization, SeparableConv2D
  4 | from tensorflow.keras.regularizers import l2
  5 | from utils.utils import compose
  6 | import tensorflow as tf
  7 | 
  8 | #--------------------------------------------------#
  9 | #   单次卷积
 10 | #--------------------------------------------------#
 11 | @wraps(Conv2D)
 12 | def DarknetConv2D(*args, **kwargs):
 13 |     # 多了一个正则化的项
 14 |     # 正则化系数5e-4
 15 |     darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
 16 |     darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
 17 |     darknet_conv_kwargs.update(kwargs)
 18 |     return Conv2D(*args, **darknet_conv_kwargs)
 19 | 
 20 | def DepthwiseConv2D(*args, **kwargs):
 21 |     # 多了一个正则化的项
 22 |     # 正则化系数5e-4
 23 |     darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
 24 |     darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
 25 |     darknet_conv_kwargs.update(kwargs)
 26 |     return SeparableConv2D(*args, **darknet_conv_kwargs)
 27 | 
 28 | #---------------------------------------------------#
 29 | #   卷积块
 30 | #   DarknetConv2D + BatchNormalization + LeakyReLU
 31 | #---------------------------------------------------#
 32 | 
 33 | def DarknetConv2D_BN_Leaky(*args, **kwargs):
 34 |     no_bias_kwargs = {'use_bias': False}
 35 |     no_bias_kwargs.update(kwargs)
 36 |     return compose(
 37 |         DarknetConv2D(*args, **no_bias_kwargs),
 38 |         BatchNormalization(),
 39 |         LeakyReLU(alpha=0.1))
 40 | 
 41 | def DepthwiseConv2D_BN_Leaky(*args, **kwargs):
 42 |     no_bias_kwargs = {'use_bias': False}
 43 |     no_bias_kwargs.update(kwargs)
 44 |     return compose(
 45 |         DepthwiseConv2D(*args, **no_bias_kwargs),
 46 |         BatchNormalization(),
 47 |         LeakyReLU(alpha=0.1))
 48 | #---------------------------------------------------#
 49 | #   CSPdarknet的结构块
 50 | #   存在一个大残差边
 51 | #   这个大残差边绕过了很多的残差结构
 52 | #---------------------------------------------------#
 53 | def resblock_body(x, num_filters, num_filters_1):
 54 |     route = x
 55 |     x = DarknetConv2D_BN_Leaky(num_filters, (1, 1))(x)
 56 |     #x = ZeroPadding2D(((1, 0), (1, 0)))(x)
 57 |     x = DepthwiseConv2D_BN_Leaky(num_filters_1, (3, 3))(x)
 58 |     #x = DarknetConv2D_BN_Leaky(num_filters_1, (1, 1))(x)
 59 |     #x = route + x
 60 |     x = Concatenate()([route, x])
 61 |     return x
 62 | 
 63 | #---------------------------------------------------#
 64 | #   darknet53 的主体部分
 65 | #---------------------------------------------------#
 66 | def darknet_body(x):
 67 |     # 进行长和宽的压缩，下一步步长为2
 68 |     x = ZeroPadding2D(((1, 0),(1, 0)))(x)
 69 |     # 416,416,3 -> 208,208,32
 70 |     x = DepthwiseConv2D_BN_Leaky(8, (3, 3), strides=(2, 2))(x)
 71 |     #x = DarknetConv2D_BN_Leaky(8, (1, 1), strides=(1, 1))(x)
 72 |     #x = ZeroPadding2D(((1, 0), (1, 0)))(x)
 73 |     x = DepthwiseConv2D_BN_Leaky(4, (3, 3), strides=(1, 1))(x)
 74 |     #x = DarknetConv2D_BN_Leaky(4, (1, 1), strides=(1, 1))(x)
 75 |     x = resblock_body(x, 8, 4)
 76 |     x = DarknetConv2D_BN_Leaky(24, (1, 1), strides=(1, 1))(x)
 77 |     x = ZeroPadding2D(((1, 0), (1, 0)))(x)
 78 |     x = DepthwiseConv2D_BN_Leaky(8, (3, 3), strides=(2, 2))(x)
 79 |     #x = DarknetConv2D_BN_Leaky(8, (1, 1), strides=(1, 1))(x)
 80 |     x = resblock_body(x, 32, 8)
 81 |     x = resblock_body(x, 32, 8)
 82 |     x = DarknetConv2D_BN_Leaky(32, (1, 1), strides=(1, 1))(x)
 83 |     x = ZeroPadding2D(((1, 0), (1, 0)))(x)
 84 |     x = DepthwiseConv2D_BN_Leaky(8, (3, 3), strides=(2, 2))(x)
 85 |     #x = DarknetConv2D_BN_Leaky(8, (1, 1), strides=(1, 1))(x)
 86 |     x = resblock_body(x, 48, 8)
 87 |     x = resblock_body(x, 48, 8)
 88 |     x = DarknetConv2D_BN_Leaky(48, (1, 1), strides=(1, 1))(x)
 89 |     x = DepthwiseConv2D_BN_Leaky(16, (3, 3), strides=(1, 1))(x)
 90 |     #x = DarknetConv2D_BN_Leaky(16, (1, 1), strides=(1, 1))(x)
 91 |     x = resblock_body(x, 96, 16)
 92 |     x = resblock_body(x, 96, 16)
 93 |     x = resblock_body(x, 96, 16)
 94 |     x = resblock_body(x, 96, 16)
 95 |     x = DarknetConv2D_BN_Leaky(96, (1, 1), strides=(1, 1))(x)
 96 |     x = ZeroPadding2D(((1, 0), (1, 0)))(x)
 97 |     x = DepthwiseConv2D_BN_Leaky(24, (3, 3), strides=(2, 2))(x)
 98 |     #x = DarknetConv2D_BN_Leaky(24, (1, 1), strides=(1, 1))(x)
 99 |     x = resblock_body(x, 136, 24)
100 |     x = resblock_body(x, 136, 24)
101 |     x = resblock_body(x, 136, 24)
102 |     x = resblock_body(x, 136, 24)
103 |     x = DarknetConv2D_BN_Leaky(136, (1, 1), strides=(1, 1))(x)
104 |     feat1 = x
105 |     x = ZeroPadding2D(((1, 0), (1, 0)))(x)
106 |     x = DepthwiseConv2D_BN_Leaky(48, (3, 3), strides=(2, 2))(x)
107 |     #x = DarknetConv2D_BN_Leaky(48, (1, 1), strides=(1, 1))(x)
108 |     x = resblock_body(x, 224, 48)
109 |     x = resblock_body(x, 224, 48)
110 |     x = resblock_body(x, 224, 48)
111 |     x = resblock_body(x, 224, 48)
112 |     x = resblock_body(x, 224, 48)
113 |     x = DarknetConv2D_BN_Leaky(96, (1, 1), strides=(1, 1))(x)
114 |     feat2 = x
115 |     return feat1, feat2
116 | 


--------------------------------------------------------------------------------
/input_fn.py:
--------------------------------------------------------------------------------
  1 | #MIT License
  2 | 
  3 | #Copyright (c) 2018 qqwweee
  4 | 
  5 | #Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | #of this software and associated documentation files (the "Software"), to deal
  7 | #in the Software without restriction, including without limitation the rights
  8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | #copies of the Software, and to permit persons to whom the Software is
 10 | #furnished to do so, subject to the following conditions:
 11 | 
 12 | #The above copyright notice and this permission notice shall be included in all
 13 | #copies or substantial portions of the Software.
 14 | 
 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | #SOFTWARE.
 22 | 
 23 | # Modification made by Xilinx, Inc.
 24 | # Copyright (c) 2019, Xilinx, Inc.
 25 | # Licensed under the Apache License, Version 2.0 (the "License");
 26 | # you may not use this file except in compliance with the License.
 27 | # You may obtain a copy of the License at
 28 | #     http://www.apache.org/licenses/LICENSE-2.0
 29 | # Unless required by applicable law or agreed to in writing, software
 30 | # distributed under the License is distributed on an "AS IS" BASIS,
 31 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 32 | # See the License for the specific language governing permissions and
 33 | # limitations under the License.
 34 | 
 35 | # Origin code:https://github.com/lji72/inference/blob/master/others/cloud/single_stage_detector/tensorflow/train/eval_ssd_large.py 
 36 | 
 37 | # All rights reserved.
 38 | # 
 39 | # Redistribution and use in source and binary forms, with or without modification,
 40 | # are permitted provided that the following conditions are met:
 41 | # 
 42 | # 1. Redistributions of source code must retain the above copyright notice,
 43 | # this list of conditions and the following disclaimer.
 44 | # 
 45 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 46 | # this list of conditions and the following disclaimer in the documentation
 47 | # and/or other materials provided with the distribution.
 48 | # 
 49 | # 3. Neither the name of the copyright holder nor the names of its contributors
 50 | # may be used to endorse or promote products derived from this software
 51 | # without specific prior written permission.
 52 | # 
 53 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 54 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 55 | # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 56 | # IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 57 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 58 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 59 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 60 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 61 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 62 | 
 63 | 
 64 | 
 65 | from PIL import Image
 66 | import numpy as np
 67 | 
 68 | 
 69 | def letterbox_image(image, size):
 70 |     '''resize image with unchanged aspect ratio using padding'''
 71 |     iw, ih = image.size
 72 |     w, h = size
 73 |     scale = min(w/iw, h/ih)
 74 |     nw = int(iw*scale)
 75 |     nh = int(ih*scale)
 76 | 
 77 |     image = image.resize((nw,nh), Image.BICUBIC)
 78 |     new_image = Image.new('RGB', size, (128,128,128))
 79 |     new_image.paste(image, ((w-nw)//2, (h-nh)//2))
 80 |     return new_image
 81 | 
 82 | #image = Image.open(img_path)
 83 | 
 84 | def preprocessing_fn(image, model_image_size=(416,416)):
 85 |     if model_image_size != (None, None):
 86 |         assert model_image_size[0]%32 == 0, 'Multiples of 32 required'
 87 |         assert model_image_size[1]%32 == 0, 'Multiples of 32 required'
 88 |         boxed_image = letterbox_image(image, tuple(reversed(model_image_size)))
 89 |     else:
 90 |         new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32))
 91 |         boxed_image = letterbox_image(image, new_image_size)
 92 |     image_data = np.array(boxed_image, dtype='float32')
 93 |     image_data /= 255.
 94 |     return image_data
 95 | 
 96 | calib_image_dir = "./images/"
 97 | calib_image_list = "./list.txt"
 98 | calib_batch_size = 1
 99 | def calib_input(iter):
100 |   images = []
101 |   line = open(calib_image_list).readlines()
102 |   for index in range(0, calib_batch_size):
103 |     curline = line[iter * calib_batch_size + index]
104 |     image_name = curline.strip()
105 |     image = Image.open(calib_image_dir + image_name)
106 |     image = preprocessing_fn(image)
107 |     images.append(image)
108 |   return {"input_1": images}
109 | 


--------------------------------------------------------------------------------
/get_dr_txt.py:
--------------------------------------------------------------------------------
  1 | #----------------------------------------------------#
  2 | #   获取测试集的detection-result和images-optional
  3 | #   具体视频教程可查看
  4 | #   https://www.bilibili.com/video/BV1zE411u7Vw
  5 | #----------------------------------------------------#
  6 | from yolo import YOLO
  7 | import os
  8 | import numpy as np
  9 | import copy
 10 | import colorsys
 11 | import tensorflow as tf
 12 | from timeit import default_timer as timer
 13 | from tensorflow.keras import backend as K
 14 | from tensorflow.keras.models import load_model
 15 | from tensorflow.keras.layers import Input, Lambda
 16 | from tensorflow.keras.models import Model
 17 | from PIL import Image, ImageFont, ImageDraw
 18 | from nets.yolo4_tiny import yolo_body,yolo_eval
 19 | from utils.utils import letterbox_image
 20 | from tqdm import tqdm
 21 | 
 22 | import tensorflow as tf
 23 | 
 24 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
 25 | for gpu in gpus:
 26 |     tf.config.experimental.set_memory_growth(gpu, True)
 27 |     
 28 | class mAP_YOLO(YOLO):
 29 |     #---------------------------------------------------#
 30 |     #   获得所有的分类
 31 |     #---------------------------------------------------#
 32 |     def generate(self):
 33 |         self.score = 0.01
 34 |         self.iou = 0.5
 35 |         model_path = os.path.expanduser(self.model_path)
 36 |         assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
 37 |         
 38 |         # 计算anchor数量
 39 |         num_anchors = len(self.anchors)
 40 |         num_classes = len(self.class_names)
 41 | 
 42 |         # 载入模型，如果原来的模型里已经包括了模型结构则直接载入。
 43 |         # 否则先构建模型再载入
 44 |         self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes)
 45 |         self.yolo_model.load_weights(self.model_path,by_name=True)
 46 |         print('{} model, anchors, and classes loaded.'.format(model_path))
 47 | 
 48 | 
 49 |         # 画框设置不同的颜色
 50 |         hsv_tuples = [(x / len(self.class_names), 1., 1.)
 51 |                       for x in range(len(self.class_names))]
 52 |         self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
 53 |         self.colors = list(
 54 |             map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
 55 |                 self.colors))
 56 | 
 57 |         # 打乱颜色
 58 |         np.random.seed(10101)
 59 |         np.random.shuffle(self.colors)
 60 |         np.random.seed(None)
 61 | 
 62 |         if self.eager:
 63 |             self.input_image_shape = Input([2,],batch_size=1)
 64 |             inputs = [*self.yolo_model.output, self.input_image_shape]
 65 |             outputs = Lambda(yolo_eval, output_shape=(1,), name='yolo_eval',
 66 |                 arguments={'anchors': self.anchors, 'num_classes': len(self.class_names), 'image_shape': self.model_image_size, 
 67 |                 'score_threshold': self.score, 'eager': True, 'max_boxes': self.max_boxes})(inputs)
 68 |             self.yolo_model = Model([self.yolo_model.input, self.input_image_shape], outputs)
 69 |         else:
 70 |             self.input_image_shape = K.placeholder(shape=(2, ))
 71 |             
 72 |             self.boxes, self.scores, self.classes = yolo_eval(self.yolo_model.output, self.anchors,
 73 |                     num_classes, self.input_image_shape, max_boxes=self.max_boxes,
 74 |                     score_threshold=self.score, iou_threshold=self.iou)
 75 |  
 76 | 
 77 |     #---------------------------------------------------#
 78 |     #   检测图片
 79 |     #---------------------------------------------------#
 80 |     def detect_image(self, image_id, image):
 81 |         f = open("./input/detection-results/"+image_id+".txt","w") 
 82 | 
 83 |         # 调整图片使其符合输入要求
 84 |         new_image_size = (self.model_image_size[1],self.model_image_size[0])
 85 |         boxed_image = letterbox_image(image, new_image_size)
 86 |         image_data = np.array(boxed_image, dtype='float32')
 87 |         image_data /= 255.
 88 |         image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
 89 | 
 90 |         if self.eager:
 91 |             # 预测结果
 92 |             input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0)
 93 |             out_boxes, out_scores, out_classes = self.yolo_model.predict([image_data, input_image_shape]) 
 94 |         else:
 95 |             # 预测结果
 96 |             out_boxes, out_scores, out_classes = self.sess.run(
 97 |                 [self.boxes, self.scores, self.classes],
 98 |                 feed_dict={
 99 |                     self.yolo_model.input: image_data,
100 |                     self.input_image_shape: [image.size[1], image.size[0]],
101 |                     K.learning_phase(): 0
102 |                 })
103 | 
104 |         for i, c in enumerate(out_classes):
105 |             predicted_class = self.class_names[int(c)]
106 |             score = str(out_scores[i])
107 | 
108 |             top, left, bottom, right = out_boxes[i]
109 |             f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))
110 | 
111 |         f.close()
112 |         return 
113 | 
114 | yolo = mAP_YOLO()
115 | 
116 | image_ids = open('VOCdevkit/VOC2007/ImageSets/Main/test.txt').read().strip().split()
117 | 
118 | if not os.path.exists("./input"):
119 |     os.makedirs("./input")
120 | if not os.path.exists("./input/detection-results"):
121 |     os.makedirs("./input/detection-results")
122 | if not os.path.exists("./input/images-optional"):
123 |     os.makedirs("./input/images-optional")
124 | 
125 | for image_id in tqdm(image_ids):
126 |     image_path = "./VOCdevkit/VOC2007/JPEGImages/"+image_id+".jpg"
127 |     image = Image.open(image_path)
128 |     # 开启后在之后计算mAP可以可视化
129 |     # image.save("./input/images-optional/"+image_id+".jpg")
130 |     yolo.detect_image(image_id,image)
131 |     
132 | print("Conversion completed!")
133 | 


--------------------------------------------------------------------------------
/core/tf_prediction.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Xilinx Inc.
  2 | # 
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | # 
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import tensorflow as tf
 16 | import os
 17 | import cv2
 18 | from tensorflow.python.platform import gfile
 19 | import numpy as np
 20 | from PIL import Image
 21 | from yolo3_predictor import yolo_predictor
 22 | from tqdm import tqdm
 23 | import argparse
 24 | 
 25 | from tensorflow.contrib import decent_q
 26 | 
 27 | def letterbox_image(image, size):
 28 |     '''resize image with unchanged aspect ratio using padding'''
 29 |     ih, iw, _ = image.shape
 30 |     w, h = size
 31 |     scale = min(w/iw, h/ih)
 32 |     nw = int(iw*scale)
 33 |     nh = int(ih*scale)
 34 | 
 35 |     image = cv2.resize(image, (nw,nh), interpolation=cv2.INTER_LINEAR)
 36 |     new_image = np.ones((h,w,3), np.uint8) * 128
 37 |     h_start = (h-nh)//2
 38 |     w_start = (w-nw)//2
 39 |     new_image[h_start:h_start+nh, w_start:w_start+nw, :] = image
 40 |     return new_image
 41 | 
 42 | 
 43 | def get_class(classes_path):
 44 |     with open(classes_path) as f:
 45 |         class_names = f.readlines()
 46 |     class_names = [c.strip() for c in class_names]
 47 |     return class_names
 48 | 
 49 | 
 50 | def get_anchors(anchors_path):
 51 |     with open(anchors_path) as f:
 52 |         anchors = f.readline()
 53 |     anchors = [float(x) for x in anchors.split(',')]
 54 |     return np.array(anchors).reshape(-1, 2)
 55 | 
 56 | 
 57 | def write_items_to_file(image_id, items, fw):
 58 |     for item in items:
 59 |         fw.write(image_id + " " + " ".join([str(comp) for comp in item]) + "\n")
 60 | 
 61 | 
 62 | def pred_img(img_path, model_image_size):
 63 |     image = cv2.imread(img_path)
 64 |     image = image[...,::-1]
 65 |     image_h, image_w, _ = image.shape
 66 | 
 67 |     # image preprocessing
 68 |     if model_image_size != (None, None):
 69 |         assert model_image_size[0]%32 == 0, 'Multiples of 32 required'
 70 |         assert model_image_size[1]%32 == 0, 'Multiples of 32 required'
 71 |         boxed_image = letterbox_image(image, tuple(reversed(model_image_size)))
 72 |     else:
 73 |         new_image_size = (image_w - (image_w % 32), image_h - (image_h % 32))
 74 |         boxed_image = letterbox_image(image, new_image_size)
 75 |     image_data = np.array(boxed_image, dtype='float32')
 76 |     image_data /= 255.
 77 |     image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
 78 | 
 79 |     out_boxes, out_scores, out_classes, out_y = sess.run(
 80 |         [pred_boxes, pred_scores, pred_classes, output_y],
 81 |         feed_dict={input_x: image_data, input_image_shape: (image_h, image_w)})
 82 | 
 83 |     # convert the result to label format
 84 |     items = []
 85 |     for i, c in reversed(list(enumerate(out_classes))):
 86 |         predicted_class = class_names[c]
 87 |         box = out_boxes[i]
 88 |         score = out_scores[i]
 89 | 
 90 |         top, left, bottom, right = box
 91 |         top = max(0, np.floor(top + 0.5).astype('int32'))
 92 |         left = max(0, np.floor(left + 0.5).astype('int32'))
 93 |         bottom = min(image_h, np.floor(bottom + 0.5).astype('int32'))
 94 |         right = min(image_w, np.floor(right + 0.5).astype('int32'))
 95 |         item  = [predicted_class, score, left, top, right, bottom]
 96 |         items.append(item)
 97 | 
 98 |     return items
 99 | 
100 | 
101 | if __name__ == "__main__":
102 | 
103 |     parser = argparse.ArgumentParser()
104 |     parser.add_argument('--pb_file', type=str, default="./model_data/yolov3_voc.pb" , help='path of frozon pb file')
105 |     parser.add_argument('--test_list', type=str, help='path of voc test list')
106 |     parser.add_argument('--result_file', type=str, help='path of voc prediction result')
107 |     FLAGS = parser.parse_args()
108 | 
109 |     classes_path = "model_data/voc_classes.txt"
110 |     anchors_path = "model_data/yolo_anchors.txt"
111 |     pb_file_path = FLAGS.pb_file
112 |     score_thresh = 0.005
113 |     nms_thresh = 0.45
114 | 
115 |     class_names = get_class(classes_path)
116 |     predictor = yolo_predictor(score_thresh, nms_thresh, classes_path, anchors_path)
117 | 
118 |     sess = tf.Session()
119 |     with gfile.FastGFile(pb_file_path, 'rb') as f: # file I/O
120 |         graph_def = tf.GraphDef()
121 |         graph_def.ParseFromString(f.read()) # get graph_def from file
122 |         sess.graph.as_default()
123 |         tf.import_graph_def(graph_def, name='')  # import graph
124 |     sess.run(tf.global_variables_initializer())
125 | 
126 |     input_x = sess.graph.get_tensor_by_name('input_1:0')
127 |     output_y1 = sess.graph.get_tensor_by_name('conv2d_17/BiasAdd:0')
128 |     output_y2 = sess.graph.get_tensor_by_name('conv2d_20/BiasAdd:0')
129 |     #output_y3 = sess.graph.get_tensor_by_name('output:0')
130 |     output_y = [output_y1, output_y2]
131 |     input_image_shape = tf.placeholder(tf.int32, shape=(2))
132 |     pred_boxes, pred_scores, pred_classes = predictor.predict(output_y, input_image_shape)
133 | 
134 |     with open(FLAGS.test_list) as fr:
135 |             lines = fr.readlines()
136 |     fw = open(FLAGS.result_file, "w")
137 |     for line in tqdm(lines):
138 |         img_path = line.strip().split(" ")[0]
139 |         fname = os.path.split(img_path)[-1]
140 |         image_id = os.path.splitext(fname)[0]
141 | 
142 |         items = pred_img(img_path, (416, 416))
143 |         write_items_to_file(image_id, items, fw)
144 | 
145 |     fw.close()
146 | 


--------------------------------------------------------------------------------
/nets/loss.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.keras import backend as K
  3 | from nets.ious import box_ciou
  4 | 
  5 | #---------------------------------------------------#
  6 | #   平滑标签
  7 | #---------------------------------------------------#
  8 | def _smooth_labels(y_true, label_smoothing):
  9 |     num_classes = tf.cast(K.shape(y_true)[-1], dtype=K.floatx())
 10 |     label_smoothing = K.constant(label_smoothing, dtype=K.floatx())
 11 |     return y_true * (1.0 - label_smoothing) + label_smoothing / num_classes
 12 | #---------------------------------------------------#
 13 | #   将预测值的每个特征层调成真实值
 14 | #---------------------------------------------------#
 15 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
 16 |     num_anchors = len(anchors)
 17 |     # [1, 1, 1, num_anchors, 2]
 18 |     anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
 19 | 
 20 |     # 获得x，y的网格
 21 |     # (13, 13, 1, 2)
 22 |     grid_shape = K.shape(feats)[1:3] # height, width
 23 |     grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
 24 |         [1, grid_shape[1], 1, 1])
 25 |     grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
 26 |         [grid_shape[0], 1, 1, 1])
 27 |     grid = K.concatenate([grid_x, grid_y])
 28 |     grid = K.cast(grid, K.dtype(feats))
 29 | 
 30 |     # (batch_size,13,13,3,85)
 31 |     feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
 32 | 
 33 |     # 将预测值调成真实值
 34 |     # box_xy对应框的中心点
 35 |     # box_wh对应框的宽和高
 36 |     box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[...,::-1], K.dtype(feats))
 37 |     box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[...,::-1], K.dtype(feats))
 38 |     box_confidence = K.sigmoid(feats[..., 4:5])
 39 |     box_class_probs = K.sigmoid(feats[..., 5:])
 40 | 
 41 |     # 在计算loss的时候返回如下参数
 42 |     if calc_loss == True:
 43 |         return grid, feats, box_xy, box_wh
 44 |     return box_xy, box_wh, box_confidence, box_class_probs
 45 | 
 46 | #---------------------------------------------------#
 47 | #   用于计算每个预测框与真实框的iou
 48 | #---------------------------------------------------#
 49 | def box_iou(b1, b2):
 50 |     # 13,13,3,1,4
 51 |     # 计算左上角的坐标和右下角的坐标
 52 |     b1 = K.expand_dims(b1, -2)
 53 |     b1_xy = b1[..., :2]
 54 |     b1_wh = b1[..., 2:4]
 55 |     b1_wh_half = b1_wh/2.
 56 |     b1_mins = b1_xy - b1_wh_half
 57 |     b1_maxes = b1_xy + b1_wh_half
 58 | 
 59 |     # 1,n,4
 60 |     # 计算左上角和右下角的坐标
 61 |     b2 = K.expand_dims(b2, 0)
 62 |     b2_xy = b2[..., :2]
 63 |     b2_wh = b2[..., 2:4]
 64 |     b2_wh_half = b2_wh/2.
 65 |     b2_mins = b2_xy - b2_wh_half
 66 |     b2_maxes = b2_xy + b2_wh_half
 67 | 
 68 |     # 计算重合面积
 69 |     intersect_mins = K.maximum(b1_mins, b2_mins)
 70 |     intersect_maxes = K.minimum(b1_maxes, b2_maxes)
 71 |     intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
 72 |     intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
 73 |     b1_area = b1_wh[..., 0] * b1_wh[..., 1]
 74 |     b2_area = b2_wh[..., 0] * b2_wh[..., 1]
 75 |     iou = intersect_area / (b1_area + b2_area - intersect_area)
 76 | 
 77 |     return iou
 78 | 
 79 | 
 80 | #---------------------------------------------------#
 81 | #   loss值计算
 82 | #---------------------------------------------------#
 83 | def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1, print_loss=False):
 84 | 
 85 |     # 一共有2层
 86 |     num_layers = len(anchors)//3
 87 | 
 88 |     # 将预测结果和实际ground truth分开，args是[*model_body.output, *y_true]
 89 |     # y_true是一个列表，包含两个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85)。
 90 |     # yolo_outputs是一个列表，包含两个特征层，shape分别为(m,13,13,255),(m,26,26,255)。
 91 |     y_true = args[num_layers:]
 92 |     yolo_outputs = args[:num_layers]
 93 | 
 94 |     # 先验框
 95 |     anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]]
 96 | 
 97 |     # 得到input_shpae为608,608 
 98 |     input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
 99 | 
100 |     loss = 0
101 | 
102 |     # 取出每一张图片
103 |     # m的值就是batch_size
104 |     m = K.shape(yolo_outputs[0])[0]
105 |     mf = K.cast(m, K.dtype(yolo_outputs[0]))
106 | 
107 |     # y_true是一个列表，包含两个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85)。
108 |     # yolo_outputs是一个列表，包含两个特征层，shape分别为(m,13,13,255),(m,26,26,255)。
109 |     for l in range(num_layers):
110 |         # 以第一个特征层(m,13,13,3,85)为例子
111 |         # 取出该特征层中存在目标的点的位置。(m,13,13,3,1)
112 |         object_mask = y_true[l][..., 4:5]
113 |         # 取出其对应的种类(m,13,13,3,80)
114 |         true_class_probs = y_true[l][..., 5:]
115 |         if label_smoothing:
116 |             true_class_probs = _smooth_labels(true_class_probs, label_smoothing)
117 | 
118 |         # 将yolo_outputs的特征层输出进行处理
119 |         # grid为网格结构(13,13,1,2)，raw_pred为尚未处理的预测结果(m,13,13,3,85)
120 |         # 还有解码后的xy，wh，(m,13,13,3,2)
121 |         grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
122 |              anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
123 |         
124 |         # 这个是解码后的预测的box的位置
125 |         # (m,13,13,3,4)
126 |         pred_box = K.concatenate([pred_xy, pred_wh])
127 | 
128 |         # 找到负样本群组，第一步是创建一个数组，[]
129 |         ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
130 |         object_mask_bool = K.cast(object_mask, 'bool')
131 |         
132 |         # 对每一张图片计算ignore_mask
133 |         def loop_body(b, ignore_mask):
134 |             # 取出第b副图内，真实存在的所有的box的参数
135 |             # n,4
136 |             true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
137 |             # 计算预测结果与真实情况的iou
138 |             # pred_box为13,13,3,4
139 |             # 计算的结果是每个pred_box和其它所有真实框的iou
140 |             # 13,13,3,n
141 |             iou = box_iou(pred_box[b], true_box)
142 | 
143 |             # 13,13,3
144 |             best_iou = K.max(iou, axis=-1)
145 | 
146 |             # 如果某些预测框和真实框的重合程度大于0.5，则忽略。
147 |             ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
148 |             return b+1, ignore_mask
149 | 
150 |         # 遍历所有的图片
151 |         _, ignore_mask = tf.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
152 | 
153 |         # 将每幅图的内容压缩，进行处理
154 |         ignore_mask = ignore_mask.stack()
155 |         #(m,13,13,3,1)
156 |         ignore_mask = K.expand_dims(ignore_mask, -1)
157 | 
158 |         box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]
159 | 
160 |         # Calculate ciou loss as location loss
161 |         raw_true_box = y_true[l][...,0:4]
162 |         ciou = box_ciou(pred_box, raw_true_box)
163 |         ciou_loss = object_mask * box_loss_scale * (1 - ciou)
164 |         ciou_loss = K.sum(ciou_loss) / mf
165 |         location_loss = ciou_loss
166 |         
167 |         # 如果该位置本来有框，那么计算1与置信度的交叉熵
168 |         # 如果该位置本来没有框，而且满足best_iou<ignore_thresh，则被认定为负样本
169 |         # best_iou<ignore_thresh用于限制负样本数量
170 |         confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
171 |             (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
172 |         
173 |         class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)
174 | 
175 |         confidence_loss = K.sum(confidence_loss) / mf
176 |         class_loss = K.sum(class_loss) / mf
177 |         loss += location_loss + confidence_loss + class_loss
178 |         # if print_loss:
179 |         # loss = tf.Print(loss, [loss, confidence_loss, class_loss, location_loss], message='loss: ')
180 |     loss = K.expand_dims(loss, axis=-1)
181 |     return loss
182 | 


--------------------------------------------------------------------------------
/yolo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import copy
  4 | import colorsys
  5 | import tensorflow as tf
  6 | from timeit import default_timer as timer
  7 | from tensorflow.compat.v1.keras import backend as K
  8 | from tensorflow.keras.models import load_model
  9 | from tensorflow.keras.layers import Input
 10 | from tensorflow.keras.layers import Lambda
 11 | from tensorflow.keras.models import Model
 12 | from PIL import Image, ImageFont, ImageDraw
 13 | from nets.yolo4_tiny import yolo_body,yolo_eval
 14 | from utils.utils import letterbox_image
 15 | 
 16 | class YOLO(object):
 17 |     _defaults = {
 18 |         "model_path"        : 'logs_2/last1.h5',
 19 |         "anchors_path"      : 'model_data/yolo_anchors.txt',
 20 |         "classes_path"      : 'model_data/new_class.txt',
 21 |         "score"             : 0.5,
 22 |         "iou"               : 0.3,
 23 |         "eager"             : False,
 24 |         "max_boxes"         : 100,
 25 |         # 显存比较小可以使用416x416
 26 |         # 显存比较大可以使用608x608
 27 |         "model_image_size"  : (320, 320)
 28 |     }
 29 | 
 30 |     @classmethod
 31 |     def get_defaults(cls, n):
 32 |         if n in cls._defaults:
 33 |             return cls._defaults[n]
 34 |         else:
 35 |             return "Unrecognized attribute name '" + n + "'"
 36 | 
 37 |     #---------------------------------------------------#
 38 |     #   初始化yolo
 39 |     #---------------------------------------------------#
 40 |     def __init__(self, **kwargs):
 41 |         self.__dict__.update(self._defaults)
 42 |         self.class_names = self._get_class()
 43 |         self.anchors = self._get_anchors()
 44 |         if not self.eager:
 45 |             tf.compat.v1.disable_eager_execution()
 46 |             self.sess = K.get_session()
 47 |         self.generate()
 48 | 
 49 |     #---------------------------------------------------#
 50 |     #   获得所有的分类
 51 |     #---------------------------------------------------#
 52 |     def _get_class(self):
 53 |         classes_path = os.path.expanduser(self.classes_path)
 54 |         with open(classes_path) as f:
 55 |             class_names = f.readlines()
 56 |         class_names = [c.strip() for c in class_names]
 57 |         return class_names
 58 | 
 59 |     #---------------------------------------------------#
 60 |     #   获得所有的先验框
 61 |     #---------------------------------------------------#
 62 |     def _get_anchors(self):
 63 |         anchors_path = os.path.expanduser(self.anchors_path)
 64 |         with open(anchors_path) as f:
 65 |             anchors = f.readline()
 66 |         anchors = [float(x) for x in anchors.split(',')]
 67 |         return np.array(anchors).reshape(-1, 2)
 68 | 
 69 |     #---------------------------------------------------#
 70 |     #   获得所有的分类
 71 |     #---------------------------------------------------#
 72 |     def generate(self):
 73 |         model_path = os.path.expanduser(self.model_path)
 74 |         assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
 75 |         
 76 |         # 计算anchor数量
 77 |         num_anchors = len(self.anchors)
 78 |         num_classes = len(self.class_names)
 79 | 
 80 |         # 载入模型，如果原来的模型里已经包括了模型结构则直接载入。
 81 |         # 否则先构建模型再载入
 82 |         self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes)
 83 |         self.yolo_model.load_weights(self.model_path)
 84 |         print('{} model, anchors, and classes loaded.'.format(model_path))
 85 | 
 86 |         # 画框设置不同的颜色
 87 |         hsv_tuples = [(x / len(self.class_names), 1., 1.)
 88 |                       for x in range(len(self.class_names))]
 89 |         self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
 90 |         self.colors = list(
 91 |             map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
 92 |                 self.colors))
 93 | 
 94 |         # 打乱颜色
 95 |         np.random.seed(10101)
 96 |         np.random.shuffle(self.colors)
 97 |         np.random.seed(None)
 98 | 
 99 |         if self.eager:
100 |             self.input_image_shape = Input([2,],batch_size=1)
101 |             inputs = [*self.yolo_model.output, self.input_image_shape]
102 |             outputs = Lambda(yolo_eval, output_shape=(1,), name='yolo_eval',
103 |                 arguments={'anchors': self.anchors, 'num_classes': len(self.class_names), 'image_shape': self.model_image_size, 
104 |                 'score_threshold': self.score, 'eager': True, 'max_boxes': self.max_boxes})(inputs)
105 |             self.yolo_model = Model([self.yolo_model.input, self.input_image_shape], outputs)
106 |         else:
107 |             self.input_image_shape = K.placeholder(shape=(2, ))
108 |             
109 |             self.boxes, self.scores, self.classes = yolo_eval(self.yolo_model.output, self.anchors,
110 |                     num_classes, self.input_image_shape, max_boxes=self.max_boxes,
111 |                     score_threshold=self.score, iou_threshold=self.iou)
112 |  
113 |     #---------------------------------------------------#
114 |     #   检测图片
115 |     #---------------------------------------------------#
116 |     def detect_image(self, image):
117 |         start = timer()
118 | 
119 |         # 调整图片使其符合输入要求
120 |         new_image_size = (self.model_image_size[1],self.model_image_size[0])
121 |         boxed_image = letterbox_image(image, new_image_size)
122 |         image_data = np.array(boxed_image, dtype='float32')
123 |         image_data /= 255.
124 |         image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
125 | 
126 |         if self.eager:
127 |             # 预测结果
128 |             input_image_shape = np.expand_dims(np.array([image.size[1], image.size[0]], dtype='float32'), 0)
129 |             out_boxes, out_scores, out_classes = self.yolo_model.predict([image_data, input_image_shape]) 
130 |         else:
131 |             # 预测结果
132 |             out_boxes, out_scores, out_classes = self.sess.run(
133 |                 [self.boxes, self.scores, self.classes],
134 |                 feed_dict={
135 |                     self.yolo_model.input: image_data,
136 |                     self.input_image_shape: [image.size[1], image.size[0]],
137 |                     K.learning_phase(): 0
138 |                 })
139 |         
140 |         print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
141 |         # 设置字体
142 |         font = ImageFont.truetype(font='font/simhei.ttf',
143 |                     size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
144 |         thickness = (image.size[0] + image.size[1]) // 300
145 | 
146 |         for i, c in list(enumerate(out_classes)):
147 |             predicted_class = self.class_names[c]
148 |             box = out_boxes[i]
149 |             score = out_scores[i]
150 | 
151 |             top, left, bottom, right = box
152 |             top = top - 5
153 |             left = left - 5
154 |             bottom = bottom + 5
155 |             right = right + 5
156 |             top = max(0, np.floor(top + 0.5).astype('int32'))
157 |             left = max(0, np.floor(left + 0.5).astype('int32'))
158 |             bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
159 |             right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
160 | 
161 | 
162 |             # 画框框
163 |             label = '{} {:.2f}'.format(predicted_class, score)
164 |             draw = ImageDraw.Draw(image)
165 |             label_size = draw.textsize(label, font)
166 |             label = label.encode('utf-8')
167 |             print(label)
168 |             
169 |             if top - label_size[1] >= 0:
170 |                 text_origin = np.array([left, top - label_size[1]])
171 |             else:
172 |                 text_origin = np.array([left, top + 1])
173 | 
174 |             for i in range(thickness):
175 |                 draw.rectangle(
176 |                     [left + i, top + i, right - i, bottom - i],
177 |                     outline=self.colors[c])
178 |             draw.rectangle(
179 |                 [tuple(text_origin), tuple(text_origin + label_size)],
180 |                 fill=self.colors[c])
181 |             draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
182 |             del draw
183 | 
184 |         end = timer()
185 |         print(end - start)
186 |         return image
187 | 


--------------------------------------------------------------------------------
/nets/yolo4_tiny.py:
--------------------------------------------------------------------------------
  1 | from functools import wraps
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from tensorflow.keras import backend as K
  6 | from tensorflow.keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D, LeakyReLU, \
  7 |     BatchNormalization
  8 | from tensorflow.keras.models import Model
  9 | from tensorflow.keras.regularizers import l2
 10 | from nets.CSPdarknet53_tiny import darknet_body
 11 | from utils.utils import compose
 12 | 
 13 | 
 14 | 
 15 | #--------------------------------------------------#
 16 | #   单次卷积
 17 | #--------------------------------------------------#
 18 | @wraps(Conv2D)
 19 | def DarknetConv2D(*args, **kwargs):
 20 |     darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
 21 |     darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
 22 |     darknet_conv_kwargs.update(kwargs)
 23 |     return Conv2D(*args, **darknet_conv_kwargs)
 24 | 
 25 | #---------------------------------------------------#
 26 | #   卷积块
 27 | #   DarknetConv2D + BatchNormalization + LeakyReLU
 28 | #---------------------------------------------------#
 29 | def DarknetConv2D_BN_Leaky(*args, **kwargs):
 30 |     no_bias_kwargs = {'use_bias': False}
 31 |     no_bias_kwargs.update(kwargs)
 32 |     return compose( 
 33 |         DarknetConv2D(*args, **no_bias_kwargs),
 34 |         BatchNormalization(),
 35 |         LeakyReLU(alpha=0.1))
 36 | 
 37 | #---------------------------------------------------#
 38 | #   特征层->最后的输出
 39 | #---------------------------------------------------#
 40 | def yolo_body(inputs, num_anchors, num_classes):
 41 |     # 生成darknet53的主干模型
 42 |     # 首先我们会获取到两个有效特征层
 43 |     # feat1 26x26x256
 44 |     # feat2 13x13x512
 45 |     feat1,feat2 = darknet_body(inputs)
 46 | 
 47 |     # 13x13x512 -> 13x13x256
 48 |     P5 = DarknetConv2D_BN_Leaky(256, (1,1))(feat2)
 49 | 
 50 |     P5_output = DarknetConv2D_BN_Leaky(512, (3,3))(P5)
 51 |     P5_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P5_output)
 52 |     
 53 |     # Conv+UpSampling2D 13x13x256 -> 26x26x128
 54 |     P5_upsample = compose(DarknetConv2D_BN_Leaky(128, (1,1)), UpSampling2D(2))(P5)
 55 |     
 56 |     # 26x26x(128+256) 26x26x384
 57 |     P4 = Concatenate()([feat1, P5_upsample])
 58 |     
 59 |     P4_output = DarknetConv2D_BN_Leaky(256, (3,3))(P4)
 60 |     P4_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P4_output)
 61 |     
 62 |     return Model(inputs, [P5_output, P4_output])
 63 | 
 64 | #---------------------------------------------------#
 65 | #   将预测值的每个特征层调成真实值
 66 | #---------------------------------------------------#
 67 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
 68 |     num_anchors = len(anchors)
 69 |     # [1, 1, 1, num_anchors, 2]
 70 |     anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
 71 | 
 72 |     # 获得x，y的网格
 73 |     # (13,13, 1, 2)
 74 |     grid_shape = K.shape(feats)[1:3] # height, width
 75 |     grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
 76 |         [1, grid_shape[1], 1, 1])
 77 |     grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
 78 |         [grid_shape[0], 1, 1, 1])
 79 |     grid = K.concatenate([grid_x, grid_y])
 80 |     grid = K.cast(grid, K.dtype(feats))
 81 | 
 82 |     # (batch_size,13,13,3,85)
 83 |     feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
 84 | 
 85 |     # 将预测值调成真实值
 86 |     # box_xy对应框的中心点
 87 |     # box_wh对应框的宽和高
 88 |     box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[...,::-1], K.dtype(feats))
 89 |     box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[...,::-1], K.dtype(feats))
 90 |     box_confidence = K.sigmoid(feats[..., 4:5])
 91 |     box_class_probs = K.sigmoid(feats[..., 5:])
 92 | 
 93 |     # 在计算loss的时候返回如下参数
 94 |     if calc_loss == True:
 95 |         return grid, feats, box_xy, box_wh
 96 |     return box_xy, box_wh, box_confidence, box_class_probs
 97 | 
 98 | #---------------------------------------------------#
 99 | #   对box进行调整，使其符合真实图片的样子
100 | #---------------------------------------------------#
101 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
102 |     box_yx = box_xy[..., ::-1]
103 |     box_hw = box_wh[..., ::-1]
104 |     
105 |     input_shape = K.cast(input_shape, K.dtype(box_yx))
106 |     image_shape = K.cast(image_shape, K.dtype(box_yx))
107 | 
108 |     new_shape = K.round(image_shape * K.min(input_shape/image_shape))
109 |     offset = (input_shape-new_shape)/2./input_shape
110 |     scale = input_shape/new_shape
111 | 
112 |     box_yx = (box_yx - offset) * scale
113 |     box_hw *= scale
114 | 
115 |     box_mins = box_yx - (box_hw / 2.)
116 |     box_maxes = box_yx + (box_hw / 2.)
117 |     boxes =  K.concatenate([
118 |         box_mins[..., 0:1],  # y_min
119 |         box_mins[..., 1:2],  # x_min
120 |         box_maxes[..., 0:1],  # y_max
121 |         box_maxes[..., 1:2]  # x_max
122 |     ])
123 | 
124 |     boxes *= K.concatenate([image_shape, image_shape])
125 |     return boxes
126 | 
127 | #---------------------------------------------------#
128 | #   获取每个box和它的得分
129 | #---------------------------------------------------#
130 | def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
131 |     # 将预测值调成真实值
132 |     # box_xy对应框的中心点
133 |     # box_wh对应框的宽和高
134 |     # -1,13,13,3,2; -1,13,13,3,2; -1,13,13,3,1; -1,13,13,3,80
135 |     box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, anchors, num_classes, input_shape)
136 |     # 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax
137 |     boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
138 |     # 获得得分和box
139 |     boxes = K.reshape(boxes, [-1, 4])
140 |     box_scores = box_confidence * box_class_probs
141 |     box_scores = K.reshape(box_scores, [-1, num_classes])
142 |     return boxes, box_scores
143 | 
144 | # ---------------------------------------------------#
145 | #   图片预测
146 | # ---------------------------------------------------#
147 | def yolo_eval(yolo_outputs,
148 |               anchors,
149 |               num_classes,
150 |               image_shape,
151 |               max_boxes=20,
152 |               score_threshold=.6,
153 |               iou_threshold=.5,
154 |               eager = False):
155 |     if eager:
156 |         image_shape = K.reshape(yolo_outputs[-1],[-1])
157 |         num_layers = len(yolo_outputs)-1
158 |     else:
159 |         # 获得特征层的数量
160 |         num_layers = len(yolo_outputs)
161 |     # 特征层1对应的anchor是678
162 |     # 特征层2对应的anchor是345
163 |     # 特征层3对应的anchor是012
164 |     anchor_mask = [[3, 4, 5], [0, 1, 2]]
165 | 
166 |     input_shape = K.shape(yolo_outputs[0])[1:3] * 32
167 |     boxes = []
168 |     box_scores = []
169 |     # 对每个特征层进行处理
170 |     for l in range(num_layers):
171 |         _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape,
172 |                                                     image_shape)
173 |         boxes.append(_boxes)
174 |         box_scores.append(_box_scores)
175 |     # 将每个特征层的结果进行堆叠
176 |     boxes = K.concatenate(boxes, axis=0)
177 |     box_scores = K.concatenate(box_scores, axis=0)
178 | 
179 |     mask = box_scores >= score_threshold
180 |     max_boxes_tensor = K.constant(max_boxes, dtype='int32')
181 |     boxes_ = []
182 |     scores_ = []
183 |     classes_ = []
184 |     for c in range(num_classes):
185 |         # 取出所有box_scores >= score_threshold的框，和成绩
186 |         class_boxes = tf.boolean_mask(boxes, mask[:, c])
187 |         class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
188 | 
189 |         # 非极大抑制，去掉box重合程度高的那一些
190 |         nms_index = tf.image.non_max_suppression(
191 |             class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
192 | 
193 |         # 获取非极大抑制后的结果
194 |         # 下列三个分别是
195 |         # 框的位置，得分与种类
196 |         class_boxes = K.gather(class_boxes, nms_index)
197 |         class_box_scores = K.gather(class_box_scores, nms_index)
198 |         classes = K.ones_like(class_box_scores, 'int32') * c
199 |         boxes_.append(class_boxes)
200 |         scores_.append(class_box_scores)
201 |         classes_.append(classes)
202 |     boxes_ = K.concatenate(boxes_, axis=0)
203 |     scores_ = K.concatenate(scores_, axis=0)
204 |     classes_ = K.concatenate(classes_, axis=0)
205 | 
206 |     return boxes_, scores_, classes_
207 | 


--------------------------------------------------------------------------------
/VOCdevkit/DETRAC_xmlParser.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import xml.etree.ElementTree as ET
  4 | from xml.dom.minidom import Document
  5 | import os
  6 | import cv2
  7 | import time
  8 | 
  9 | def ConvertVOCXml(file_path="",file_name=""):
 10 |    tree = ET.parse(file_name)
 11 |    root = tree.getroot()
 12 |    # print(root.tag)
 13 | 
 14 |    num=0 #计数
 15 |    #读xml操作
 16 | 
 17 |    frame_lists=[]
 18 |    output_file_name=""
 19 |    for child in root:
 20 | 
 21 |       if(child.tag=="frame"):
 22 |           # 创建dom文档
 23 |          doc = Document()
 24 |          # 创建根节点
 25 |          annotation = doc.createElement('annotation')
 26 |          # 根节点插入dom树
 27 |          doc.appendChild(annotation)
 28 | 
 29 |          #print(child.tag, child.attrib["num"])
 30 |          pic_id= child.attrib["num"].zfill(5)
 31 |          #print(pic_id)
 32 |          output_file_name=root.attrib["name"]+"__img"+pic_id+".xml"
 33 |         #  print(output_file_name)
 34 | 
 35 |          folder = doc.createElement("folder")
 36 |          folder.appendChild(doc.createTextNode("VOC2007"))
 37 |          annotation.appendChild(folder)
 38 | 
 39 |          filename = doc.createElement("filename")
 40 |          pic_name=root.attrib["name"]+"__img"+pic_id+".jpg"
 41 |          filename.appendChild(doc.createTextNode(pic_name))
 42 |          annotation.appendChild(filename)
 43 | 
 44 |          sizeimage = doc.createElement("size")
 45 |          imagewidth = doc.createElement("width")
 46 |          imageheight = doc.createElement("height")
 47 |          imagedepth = doc.createElement("depth")
 48 | 
 49 |          imagewidth.appendChild(doc.createTextNode("960"))
 50 |          imageheight.appendChild(doc.createTextNode("540"))
 51 |          imagedepth.appendChild(doc.createTextNode("3"))
 52 | 
 53 |          sizeimage.appendChild(imagedepth)
 54 |          sizeimage.appendChild(imagewidth)
 55 |          sizeimage.appendChild(imageheight)
 56 |          annotation.appendChild(sizeimage)
 57 | 
 58 |          target_list=child.getchildren()[0]  #获取target_list
 59 |          #print(target_list.tag)
 60 |          object=None
 61 |          for target in target_list:
 62 |              if(target.tag=="target"):
 63 |                  #print(target.tag)
 64 |                  object = doc.createElement('object')
 65 |                  bndbox = doc.createElement("bndbox")
 66 | 
 67 |                  for target_child in target:
 68 |                      if(target_child.tag=="box"):
 69 |                          xmin = doc.createElement("xmin")
 70 |                          ymin = doc.createElement("ymin")
 71 |                          xmax = doc.createElement("xmax")
 72 |                          ymax = doc.createElement("ymax")
 73 |                          xmin_value=int(float(target_child.attrib["left"]))
 74 |                          ymin_value=int(float(target_child.attrib["top"]))
 75 |                          box_width_value=int(float(target_child.attrib["width"]))
 76 |                          box_height_value=int(float(target_child.attrib["height"]))
 77 |                          xmin.appendChild(doc.createTextNode(str(xmin_value)))
 78 |                          ymin.appendChild(doc.createTextNode(str(ymin_value)))
 79 |                          if(xmin_value+box_width_value>960):
 80 |                             xmax.appendChild(doc.createTextNode(str(960)))
 81 |                          else:
 82 |                             xmax.appendChild(doc.createTextNode(str(xmin_value+box_width_value)))
 83 |                          if(ymin_value+box_height_value>540):
 84 |                             ymax.appendChild(doc.createTextNode(str(540)))
 85 |                          else:
 86 |                             ymax.appendChild(doc.createTextNode(str(ymin_value+box_height_value)))
 87 | 
 88 |                      if(target_child.tag=="attribute"):
 89 |                          name = doc.createElement('name')
 90 |                          pose=doc.createElement('pose')
 91 |                          truncated=doc.createElement('truncated')
 92 |                          difficult=doc.createElement('difficult')
 93 | 
 94 |                          name.appendChild(doc.createTextNode("car"))
 95 |                          pose.appendChild(doc.createTextNode("Left"))  #随意指定
 96 |                          truncated.appendChild(doc.createTextNode("0"))  #随意指定
 97 |                          difficult.appendChild(doc.createTextNode("0"))  #随意指定
 98 | 
 99 |                          
100 |                          object.appendChild(name)
101 |                          object.appendChild(pose)
102 |                          object.appendChild(truncated)
103 |                          object.appendChild(difficult)
104 |                          
105 |                  bndbox.appendChild(xmin)
106 |                  bndbox.appendChild(ymin)
107 |                  bndbox.appendChild(xmax)
108 |                  bndbox.appendChild(ymax)
109 |                  object.appendChild(bndbox)
110 |                  annotation.appendChild(object)
111 | 
112 | 
113 |          file_path_out=os.path.join(file_path,output_file_name)
114 |          f = open(file_path_out, 'w')
115 |          f.write(doc.toprettyxml(indent=' ' * 4))
116 |          f.close()
117 |          num=num+1
118 |    return num
119 | 
120 | 
121 | 
122 | 
123 | '''
124 | 画方框
125 | '''
126 | def bboxes_draw_on_img(img, bbox, color=[255, 0, 0], thickness=2):
127 | 
128 |     # Draw bounding box...
129 |     print(bbox)
130 |     p1 = (int(float(bbox["xmin"])), int(float(bbox["ymin"])))
131 |     p2 = (int(float(bbox["xmax"])), int(float(bbox["ymax"])))
132 |     cv2.rectangle(img, p1, p2, color, thickness)
133 | 
134 | 
135 | def visualization_image(image_name,xml_file_name):
136 |     tree = ET.parse(xml_file_name)
137 |     root = tree.getroot()
138 | 
139 |     object_lists=[]
140 |     for child in root:
141 |        if(child.tag=="folder"):
142 |           print(child.tag, child.text)
143 |        elif (child.tag == "filename"):
144 |           print(child.tag, child.text)
145 |        elif (child.tag == "size"):  #解析size
146 |           for size_child in child:
147 |              if(size_child.tag=="width"):
148 |                 print(size_child.tag,size_child.text)
149 |              elif (size_child.tag == "height"):
150 |                 print(size_child.tag, size_child.text)
151 |              elif (size_child.tag == "depth"):
152 |                 print(size_child.tag, size_child.text)
153 |        elif (child.tag == "object"):  #解析object
154 |           singleObject={}
155 |           for object_child in child:
156 |              if (object_child.tag == "name"):
157 |                 # print(object_child.tag,object_child.text)
158 |                 singleObject["name"] = object_child.text
159 |              elif (object_child.tag == "bndbox"):
160 |                 for bndbox_child in object_child:
161 |                    if (bndbox_child.tag == "xmin"):
162 |                       singleObject["xmin"] = bndbox_child.text
163 |                       # print(bndbox_child.tag, bndbox_child.text)
164 |                    elif (bndbox_child.tag == "ymin"):
165 |                       # print(bndbox_child.tag, bndbox_child.text)
166 |                       singleObject["ymin"] = bndbox_child.text
167 |                    elif (bndbox_child.tag == "xmax"):
168 |                       singleObject["xmax"] = bndbox_child.text
169 |                    elif (bndbox_child.tag == "ymax"):
170 |                       singleObject["ymax"] = bndbox_child.text
171 |           object_length=len(singleObject)
172 |           if(object_length>0):
173 |           	object_lists.append(singleObject)
174 |     img = cv2.imread(image_name)
175 |     for object_coordinate in object_lists:
176 |         bboxes_draw_on_img(img,object_coordinate)
177 |     cv2.imshow("capture", img)
178 |     cv2.waitKey (0)
179 |     cv2.destroyAllWindows()
180 | 
181 | 
182 | if ( __name__ == "__main__"):
183 |    #print("main")
184 |    basePath="DETRAC-Train-Annotations-XML"
185 |    totalxml=os.listdir(basePath)
186 |    total_num=0
187 |    flag=False
188 |    print("正在转换")
189 |    saveBasePath="xml_test"
190 |    if os.path.exists(saveBasePath)==False: #判断文件夹是否存在
191 |         os.makedirs(saveBasePath)
192 | 
193 |    #ConvertVOCXml(file_path="samplexml",file_name="000009.xml")
194 |    # Start time
195 |    start = time.time()
196 |    log=open("xml_statistical.txt","w") #分析日志，进行排错
197 |    for xml in totalxml:
198 |      file_name=os.path.join(basePath,xml)
199 |      print(file_name)
200 |      num=ConvertVOCXml(file_path=saveBasePath,file_name=file_name)
201 |      print(num)
202 |      total_num=total_num+num
203 |      log.write(file_name+" "+str(num)+"\n")
204 |    # End time
205 |    end = time.time()
206 |    seconds=end-start
207 |    print( "Time taken : {0} seconds".format(seconds))
208 |    print(total_num)
209 |    log.write(str(total_num)+"\n")
210 |    visualization_image("Insight-MVT_Annotation_Train/MVI_40212/img00396.jpg","xml_test/MVI_40212__img00396.xml")
211 | 


--------------------------------------------------------------------------------
/keras_to_tensorflow.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Copyright (c) 2019, by the Authors: Amir H. Abdi
  4 | This script is freely available under the MIT Public License.
  5 | Please see the License file in the root for details.
  6 | The following code snippet will convert the keras model files
  7 | to the freezed .pb tensorflow weight file. The resultant TensorFlow model
  8 | holds both the model architecture and its associated weights.
  9 | """
 10 | 
 11 | import tensorflow as tf
 12 | from tensorflow.python.framework import graph_util
 13 | from tensorflow.python.framework import graph_io
 14 | from pathlib import Path
 15 | from absl import app
 16 | from absl import flags
 17 | from absl import logging
 18 | import tensorflow.keras as keras
 19 | from tensorflow.keras import backend as K
 20 | from tensorflow.keras.models import model_from_json, model_from_yaml
 21 | 
 22 | K.set_learning_phase(0)
 23 | FLAGS = flags.FLAGS
 24 | 
 25 | flags.DEFINE_string('input_model', None, 'Path to the input model.')
 26 | flags.DEFINE_string('input_model_json', None, 'Path to the input model '
 27 |                                               'architecture in json format.')
 28 | flags.DEFINE_string('input_model_yaml', None, 'Path to the input model '
 29 |                                               'architecture in yaml format.')
 30 | flags.DEFINE_string('output_model', None, 'Path where the converted model will '
 31 |                                           'be stored.')
 32 | flags.DEFINE_boolean('save_graph_def', False,
 33 |                      'Whether to save the graphdef.pbtxt file which contains '
 34 |                      'the graph definition in ASCII format.')
 35 | flags.DEFINE_string('output_nodes_prefix', None,
 36 |                     'If set, the output nodes will be renamed to '
 37 |                     '`output_nodes_prefix`+i, where `i` will numerate the '
 38 |                     'number of of output nodes of the network.')
 39 | flags.DEFINE_boolean('quantize', False,
 40 |                      'If set, the resultant TensorFlow graph weights will be '
 41 |                      'converted from float into eight-bit equivalents. See '
 42 |                      'documentation here: '
 43 |                      'https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms')
 44 | flags.DEFINE_boolean('channels_first', False,
 45 |                      'Whether channels are the first dimension of a tensor. '
 46 |                      'The default is TensorFlow behaviour where channels are '
 47 |                      'the last dimension.')
 48 | flags.DEFINE_boolean('output_meta_ckpt', False,
 49 |                      'If set to True, exports the model as .meta, .index, and '
 50 |                      '.data files, with a checkpoint file. These can be later '
 51 |                      'loaded in TensorFlow to continue training.')
 52 | 
 53 | flags.mark_flag_as_required('input_model')
 54 | flags.mark_flag_as_required('output_model')
 55 | 
 56 | 
 57 | def load_model(input_model_path, input_json_path=None, input_yaml_path=None):
 58 |     if not Path(input_model_path).exists():
 59 |         raise FileNotFoundError('Model file `{}` does not exist.'.format(input_model_path))
 60 |     try:
 61 |         # model = keras.models.load_model(input_model_path, compile=False)
 62 |         with open(input_json_path) as json_file:
 63 |             json_config = json_file.read()
 64 |             model = tf.keras.models.model_from_json(json_config, custom_objects={'tf': tf})
 65 | 
 66 |             # Load weights
 67 |             model.load_weights(input_model_path)
 68 |             return model
 69 |     except FileNotFoundError as err:
 70 |         logging.error('Input mode file (%s) does not exist.', FLAGS.input_model)
 71 |         raise err
 72 |     except ValueError as wrong_file_err:
 73 |         if input_json_path:
 74 |             if not Path(input_json_path).exists():
 75 |                 raise FileNotFoundError(
 76 |                     'Model description json file `{}` does not exist.'.format(
 77 |                         input_json_path))
 78 |             try:
 79 |                 model = model_from_json(open(str(input_json_path)).read())
 80 |                 model.load_weights(input_model_path)
 81 |                 return model
 82 |             except Exception as err:
 83 |                 logging.error("Couldn't load model from json.")
 84 |                 raise err
 85 |         elif input_yaml_path:
 86 |             if not Path(input_yaml_path).exists():
 87 |                 raise FileNotFoundError(
 88 |                     'Model description yaml file `{}` does not exist.'.format(
 89 |                         input_yaml_path))
 90 |             try:
 91 |                 model = model_from_yaml(open(str(input_yaml_path)).read())
 92 |                 model.load_weights(input_model_path)
 93 |                 return model
 94 |             except Exception as err:
 95 |                 logging.error("Couldn't load model from yaml.")
 96 |                 raise err
 97 |         else:
 98 |             logging.error(
 99 |                 'Input file specified only holds the weights, and not '
100 |                 'the model definition. Save the model using '
101 |                 'model.save(filename.h5) which will contain the network '
102 |                 'architecture as well as its weights. '
103 |                 'If the model is saved using the '
104 |                 'model.save_weights(filename) function, either '
105 |                 'input_model_json or input_model_yaml flags should be set to '
106 |                 'to import the network architecture prior to loading the '
107 |                 'weights. \n'
108 |                 'Check the keras documentation for more details '
109 |                 '(https://keras.io/getting-started/faq/)')
110 |             raise wrong_file_err
111 | 
112 | 
113 | def main(args):
114 |     # If output_model path is relative and in cwd, make it absolute from root
115 |     output_model = FLAGS.output_model
116 |     if str(Path(output_model).parent) == '.':
117 |         output_model = str((Path.cwd() / output_model))
118 | 
119 |     output_fld = Path(output_model).parent
120 |     output_model_name = Path(output_model).name
121 |     output_model_stem = Path(output_model).stem
122 |     output_model_pbtxt_name = output_model_stem + '.pbtxt'
123 | 
124 |     # Create output directory if it does not exist
125 |     #Path(output_model).parent.mkdir(parents=True, exist_ok=True)
126 |     #Path(output_model).parent.mkdir(parents=True)
127 | 
128 |     if FLAGS.channels_first:
129 |         K.set_image_data_format('channels_first')
130 |     else:
131 |         K.set_image_data_format('channels_last')
132 | 
133 |     model = load_model(FLAGS.input_model, FLAGS.input_model_json, FLAGS.input_model_yaml)
134 | 
135 |     # TODO(amirabdi): Support networks with multiple inputs
136 |     orig_output_node_names = [node.op.name for node in model.outputs]
137 |     if FLAGS.output_nodes_prefix:
138 |         num_output = len(orig_output_node_names)
139 |         pred = [None] * num_output
140 |         converted_output_node_names = [None] * num_output
141 | 
142 |         # Create dummy tf nodes to rename output
143 |         for i in range(num_output):
144 |             converted_output_node_names[i] = '{}{}'.format(
145 |                 FLAGS.output_nodes_prefix, i)
146 |             pred[i] = tf.identity(model.outputs[i],
147 |                                   name=converted_output_node_names[i])
148 |     else:
149 |         converted_output_node_names = orig_output_node_names
150 |     logging.info('Converted output node names are: %s',
151 |                  str(converted_output_node_names))
152 | 
153 |     sess = K.get_session()
154 |     if FLAGS.output_meta_ckpt:
155 |         saver = tf.train.Saver()
156 |         saver.save(sess, str(output_fld / output_model_stem))
157 | 
158 |     if FLAGS.save_graph_def:
159 |         tf.train.write_graph(sess.graph.as_graph_def(), str(output_fld),
160 |                              output_model_pbtxt_name, as_text=True)
161 |         logging.info('Saved the graph definition in ascii format at %s',
162 |                      str(Path(output_fld) / output_model_pbtxt_name))
163 | 
164 |     if FLAGS.quantize:
165 |         from tensorflow.tools.graph_transforms import TransformGraph
166 |         transforms = ["quantize_weights", "quantize_nodes"]
167 |         transformed_graph_def = TransformGraph(sess.graph.as_graph_def(), [],
168 |                                                converted_output_node_names,
169 |                                                transforms)
170 |         constant_graph = graph_util.convert_variables_to_constants(
171 |             sess,
172 |             transformed_graph_def,
173 |             converted_output_node_names)
174 |     else:
175 |         constant_graph = graph_util.convert_variables_to_constants(
176 |             sess,
177 |             sess.graph.as_graph_def(),
178 |             converted_output_node_names)
179 | 
180 |     graph_io.write_graph(constant_graph, str(output_fld), output_model_name,
181 |                          as_text=False)
182 |     logging.info('Saved the freezed graph at %s',
183 |                  str(Path(output_fld) / output_model_name))
184 | 
185 | 
186 | if __name__ == "__main__":
187 |     app.run(main)
188 | 


--------------------------------------------------------------------------------
/yolo_fastest_tensorflow2/nets/yolo_fastest.py:
--------------------------------------------------------------------------------
  1 | from functools import wraps
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from tensorflow.keras import backend as K
  6 | from tensorflow.keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D, LeakyReLU, \
  7 |     BatchNormalization, SeparableConv2D
  8 | from tensorflow.keras.models import Model
  9 | from tensorflow.keras.regularizers import l2
 10 | from nets.yolo_fastest_backbone import darknet_body
 11 | from utils.utils import compose
 12 | 
 13 | 
 14 | # --------------------------------------------------#
 15 | #   单次卷积
 16 | # --------------------------------------------------#
 17 | @wraps(Conv2D)
 18 | def DarknetConv2D(*args, **kwargs):
 19 |     # 多了一个正则化的项
 20 |     # 正则化系数5e-4
 21 |     darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
 22 |     darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
 23 |     darknet_conv_kwargs.update(kwargs)
 24 |     return Conv2D(*args, **darknet_conv_kwargs)
 25 | 
 26 | def DepthwiseConv2D(*args, **kwargs):
 27 |     # 多了一个正则化的项
 28 |     # 正则化系数5e-4
 29 |     darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
 30 |     darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
 31 |     darknet_conv_kwargs.update(kwargs)
 32 |     return SeparableConv2D(*args, **darknet_conv_kwargs)
 33 | 
 34 | #---------------------------------------------------#
 35 | #   卷积块
 36 | #   DarknetConv2D + BatchNormalization + LeakyReLU
 37 | #---------------------------------------------------#
 38 | 
 39 | def DarknetConv2D_BN_Leaky(*args, **kwargs):
 40 |     no_bias_kwargs = {'use_bias': False}
 41 |     no_bias_kwargs.update(kwargs)
 42 |     return compose(
 43 |         DarknetConv2D(*args, **no_bias_kwargs),
 44 |         BatchNormalization(),
 45 |         LeakyReLU(alpha=0.1))
 46 | 
 47 | def DarknetConv2D_Bias(*args, **kwargs):
 48 |     no_bias_kwargs = {'use_bias': True}
 49 |     no_bias_kwargs.update(kwargs)
 50 |     return DarknetConv2D(*args, **no_bias_kwargs)
 51 | 
 52 | def DepthwiseConv2D_BN_Leaky(*args, **kwargs):
 53 |     no_bias_kwargs = {'use_bias': False}
 54 |     no_bias_kwargs.update(kwargs)
 55 |     return compose(
 56 |         DepthwiseConv2D(*args, **no_bias_kwargs),
 57 |         BatchNormalization(),
 58 |         LeakyReLU(alpha=0.1))
 59 | 
 60 | 
 61 | # ---------------------------------------------------#
 62 | #   特征层->最后的输出
 63 | # ---------------------------------------------------#
 64 | def yolo_body(inputs, num_anchors, num_classes):
 65 |     # 生成darknet53的主干模型
 66 |     # 首先我们会获取到两个有效特征层
 67 |     # feat1 26x26x256
 68 |     # feat2 13x13x512
 69 |     feat1, feat2 = darknet_body(inputs)
 70 | 
 71 |     P5 = UpSampling2D(2)(feat2)
 72 |     P5 = Concatenate()([P5, feat1])
 73 |     P5_output = DarknetConv2D_BN_Leaky(96, (1, 1))(P5)
 74 |     P5_output = DepthwiseConv2D_BN_Leaky(96, (5, 5))(P5_output)
 75 |     #P5_output = DarknetConv2D_BN_Leaky(96, (1, 1))(P5_output)
 76 |     P5_output = DepthwiseConv2D_BN_Leaky(96, (5, 5))(P5_output)
 77 |     #P5_output = DarknetConv2D_BN_Leaky(96, (1, 1))(P5_output)
 78 |     P5_output = DarknetConv2D_Bias(num_anchors * (num_classes + 5), (1, 1))(P5_output)
 79 | 
 80 |     P4 = DepthwiseConv2D_BN_Leaky(128, (5, 5))(feat2)
 81 |     #P4 = DarknetConv2D_BN_Leaky(128, (1, 1))(P4)
 82 |     P4 = DepthwiseConv2D_BN_Leaky(128, (5, 5))(P4)
 83 |     #P4 = DarknetConv2D_BN_Leaky(128, (1, 1))(P4)
 84 |     P4_output = DarknetConv2D_Bias(num_anchors * (num_classes + 5), (1, 1))(P4)
 85 | 
 86 |     return Model(inputs, [P4_output, P5_output])
 87 | 
 88 | 
 89 | # ---------------------------------------------------#
 90 | #   将预测值的每个特征层调成真实值
 91 | # ---------------------------------------------------#
 92 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
 93 |     num_anchors = len(anchors)
 94 |     # [1, 1, 1, num_anchors, 2]
 95 |     anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
 96 | 
 97 |     # 获得x，y的网格
 98 |     # (13,13, 1, 2)
 99 |     grid_shape = K.shape(feats)[1:3]  # height, width
100 |     grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
101 |                     [1, grid_shape[1], 1, 1])
102 |     grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
103 |                     [grid_shape[0], 1, 1, 1])
104 |     grid = K.concatenate([grid_x, grid_y])
105 |     grid = K.cast(grid, K.dtype(feats))
106 | 
107 |     # (batch_size,13,13,3,85)
108 |     feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
109 | 
110 |     # 将预测值调成真实值
111 |     # box_xy对应框的中心点
112 |     # box_wh对应框的宽和高
113 |     box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[..., ::-1], K.dtype(feats))
114 |     box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[..., ::-1], K.dtype(feats))
115 |     box_confidence = K.sigmoid(feats[..., 4:5])
116 |     box_class_probs = K.sigmoid(feats[..., 5:])
117 | 
118 |     # 在计算loss的时候返回如下参数
119 |     if calc_loss == True:
120 |         return grid, feats, box_xy, box_wh
121 |     return box_xy, box_wh, box_confidence, box_class_probs
122 | 
123 | 
124 | # ---------------------------------------------------#
125 | #   对box进行调整，使其符合真实图片的样子
126 | # ---------------------------------------------------#
127 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
128 |     box_yx = box_xy[..., ::-1]
129 |     box_hw = box_wh[..., ::-1]
130 | 
131 |     input_shape = K.cast(input_shape, K.dtype(box_yx))
132 |     image_shape = K.cast(image_shape, K.dtype(box_yx))
133 | 
134 |     new_shape = K.round(image_shape * K.min(input_shape / image_shape))
135 |     offset = (input_shape - new_shape) / 2. / input_shape
136 |     scale = input_shape / new_shape
137 | 
138 |     box_yx = (box_yx - offset) * scale
139 |     box_hw *= scale
140 | 
141 |     box_mins = box_yx - (box_hw / 2.)
142 |     box_maxes = box_yx + (box_hw / 2.)
143 |     boxes = K.concatenate([
144 |         box_mins[..., 0:1],  # y_min
145 |         box_mins[..., 1:2],  # x_min
146 |         box_maxes[..., 0:1],  # y_max
147 |         box_maxes[..., 1:2]  # x_max
148 |     ])
149 | 
150 |     boxes *= K.concatenate([image_shape, image_shape])
151 |     return boxes
152 | 
153 | 
154 | # ---------------------------------------------------#
155 | #   获取每个box和它的得分
156 | # ---------------------------------------------------#
157 | def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
158 |     # 将预测值调成真实值
159 |     # box_xy对应框的中心点
160 |     # box_wh对应框的宽和高
161 |     # -1,13,13,3,2; -1,13,13,3,2; -1,13,13,3,1; -1,13,13,3,80
162 |     box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, anchors, num_classes, input_shape)
163 |     # 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax
164 |     boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
165 |     # 获得得分和box
166 |     boxes = K.reshape(boxes, [-1, 4])
167 |     box_scores = box_confidence * box_class_probs
168 |     box_scores = K.reshape(box_scores, [-1, num_classes])
169 |     return boxes, box_scores
170 | 
171 | 
172 | # ---------------------------------------------------#
173 | #   图片预测
174 | # ---------------------------------------------------#
175 | def yolo_eval(yolo_outputs,
176 |               anchors,
177 |               num_classes,
178 |               image_shape,
179 |               max_boxes=20,
180 |               score_threshold=.6,
181 |               iou_threshold=.5,
182 |               eager=False):
183 |     if eager:
184 |         image_shape = K.reshape(yolo_outputs[-1], [-1])
185 |         num_layers = len(yolo_outputs) - 1
186 |     else:
187 |         # 获得特征层的数量
188 |         num_layers = len(yolo_outputs)
189 |     # 特征层1对应的anchor是678
190 |     # 特征层2对应的anchor是345
191 |     # 特征层3对应的anchor是012
192 |     anchor_mask = [[3, 4, 5], [0, 1, 2]]
193 | 
194 |     input_shape = K.shape(yolo_outputs[0])[1:3] * 32
195 |     boxes = []
196 |     box_scores = []
197 |     # 对每个特征层进行处理
198 |     for l in range(num_layers):
199 |         _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape,
200 |                                                     image_shape)
201 |         boxes.append(_boxes)
202 |         box_scores.append(_box_scores)
203 |     # 将每个特征层的结果进行堆叠
204 |     boxes = K.concatenate(boxes, axis=0)
205 |     box_scores = K.concatenate(box_scores, axis=0)
206 | 
207 |     mask = box_scores >= score_threshold
208 |     max_boxes_tensor = K.constant(max_boxes, dtype='int32')
209 |     boxes_ = []
210 |     scores_ = []
211 |     classes_ = []
212 |     for c in range(num_classes):
213 |         # 取出所有box_scores >= score_threshold的框，和成绩
214 |         class_boxes = tf.boolean_mask(boxes, mask[:, c])
215 |         class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
216 | 
217 |         # 非极大抑制，去掉box重合程度高的那一些
218 |         nms_index = tf.image.non_max_suppression(
219 |             class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
220 | 
221 |         # 获取非极大抑制后的结果
222 |         # 下列三个分别是
223 |         # 框的位置，得分与种类
224 |         class_boxes = K.gather(class_boxes, nms_index)
225 |         class_box_scores = K.gather(class_box_scores, nms_index)
226 |         classes = K.ones_like(class_box_scores, 'int32') * c
227 |         boxes_.append(class_boxes)
228 |         scores_.append(class_box_scores)
229 |         classes_.append(classes)
230 |     boxes_ = K.concatenate(boxes_, axis=0)
231 |     scores_ = K.concatenate(scores_, axis=0)
232 |     classes_ = K.concatenate(classes_, axis=0)
233 | 
234 |     return boxes_, scores_, classes_


--------------------------------------------------------------------------------
/core/yolo3_predictor.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright 2020 Xilinx Inc.
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | # 
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | # 
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | # This file is modified from other's code from github.
 17 | # For more details, please refer to https://github.com/aloyschen/tensorflow-yolo3.git
 18 | 
 19 | 
 20 | import os
 21 | import random
 22 | import colorsys
 23 | import numpy as np
 24 | import tensorflow as tf
 25 | 
 26 | 
 27 | class yolo_predictor:
 28 |     def __init__(self, obj_threshold, nms_threshold, classes_file, anchors_file):
 29 |         """
 30 |         Introduction
 31 |         ------------
 32 |             初始化函数
 33 |         Parameters
 34 |         ----------
 35 |             obj_threshold: 目标检测为物体的阈值
 36 |             nms_threshold: nms阈值
 37 |         """
 38 |         self.obj_threshold = obj_threshold
 39 |         self.nms_threshold = nms_threshold
 40 |         self.classes_path = classes_file
 41 |         self.anchors_path = anchors_file
 42 |         self.class_names = self._get_class()
 43 |         self.anchors = self._get_anchors()
 44 |         hsv_tuples = [(x / len(self.class_names), 1., 1.)for x in range(len(self.class_names))]
 45 |         self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
 46 |         self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
 47 |         random.seed(10101)
 48 |         random.shuffle(self.colors)
 49 |         random.seed(None)
 50 | 
 51 | 
 52 |     def _get_class(self):
 53 |         """
 54 |         Introduction
 55 |         ------------
 56 |             读取类别名称
 57 |         """
 58 |         classes_path = os.path.expanduser(self.classes_path)
 59 |         with open(classes_path) as f:
 60 |             class_names = f.readlines()
 61 |         class_names = [c.strip() for c in class_names]
 62 |         return class_names
 63 | 
 64 |     def _get_anchors(self):
 65 |         """
 66 |         Introduction
 67 |         ------------
 68 |             读取anchors数据
 69 |         """
 70 |         anchors_path = os.path.expanduser(self.anchors_path)
 71 |         with open(anchors_path) as f:
 72 |             anchors = f.readline()
 73 |             anchors = [float(x) for x in anchors.split(',')]
 74 |             anchors = np.array(anchors).reshape(-1, 2)
 75 |         return anchors
 76 | 
 77 | 
 78 | 
 79 |     def eval(self, yolo_outputs, image_shape, max_boxes = 20):
 80 |         """
 81 |         Introduction
 82 |         ------------
 83 |             根据Yolo模型的输出进行非极大值抑制，获取最后的物体检测框和物体检测类别
 84 |         Parameters
 85 |         ----------
 86 |             yolo_outputs: yolo模型输出
 87 |             image_shape: 图片的大小
 88 |             max_boxes:  最大box数量
 89 |         Returns
 90 |         -------
 91 |             boxes_: 物体框的位置
 92 |             scores_: 物体类别的概率
 93 |             classes_: 物体类别
 94 |         """
 95 |         anchor_mask = [[3, 4, 5], [0, 1, 2]]
 96 |         boxes = []
 97 |         box_scores = []
 98 |         input_shape = tf.shape(yolo_outputs[0])[1 : 3] * 32
 99 |         # 对三个尺度的输出获取每个预测box坐标和box的分数，score计算为置信度x类别概率
100 |         for i in range(len(yolo_outputs)):
101 |             _boxes, _box_scores = self.boxes_and_scores(yolo_outputs[i], self.anchors[anchor_mask[i]], len(self.class_names), input_shape, image_shape)
102 |             boxes.append(_boxes)
103 |             box_scores.append(_box_scores)
104 |         boxes = tf.concat(boxes, axis = 0)
105 |         box_scores = tf.concat(box_scores, axis = 0)
106 | 
107 |         mask = box_scores >= self.obj_threshold
108 |         max_boxes_tensor = tf.constant(max_boxes, dtype = tf.int32)
109 |         boxes_ = []
110 |         scores_ = []
111 |         classes_ = []
112 |         for c in range(len(self.class_names)):
113 |             class_boxes = tf.boolean_mask(boxes, mask[:, c])
114 |             class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
115 |             nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold = self.nms_threshold)
116 |             class_boxes = tf.gather(class_boxes, nms_index)
117 |             class_box_scores = tf.gather(class_box_scores, nms_index)
118 |             classes = tf.ones_like(class_box_scores, 'int32') * c
119 |             boxes_.append(class_boxes)
120 |             scores_.append(class_box_scores)
121 |             classes_.append(classes)
122 |         boxes_ = tf.concat(boxes_, axis = 0)
123 |         scores_ = tf.concat(scores_, axis = 0)
124 |         classes_ = tf.concat(classes_, axis = 0)
125 |         return boxes_, scores_, classes_
126 | 
127 | 
128 |     def boxes_and_scores(self, feats, anchors, classes_num, input_shape, image_shape):
129 |         """
130 |         Introduction
131 |         ------------
132 |             将预测出的box坐标转换为对应原图的坐标，然后计算每个box的分数
133 |         Parameters
134 |         ----------
135 |             feats: yolo输出的feature map
136 |             anchors: anchor的位置
137 |             class_num: 类别数目
138 |             input_shape: 输入大小
139 |             image_shape: 图片大小
140 |         Returns
141 |         -------
142 |             boxes: 物体框的位置
143 |             boxes_scores: 物体框的分数，为置信度和类别概率的乘积
144 |         """
145 |         box_xy, box_wh, box_confidence, box_class_probs = self._get_feats(feats, anchors, classes_num, input_shape)
146 |         boxes = self.correct_boxes(box_xy, box_wh, input_shape, image_shape)
147 |         boxes = tf.reshape(boxes, [-1, 4])
148 |         box_scores = box_confidence * box_class_probs
149 |         box_scores = tf.reshape(box_scores, [-1, classes_num])
150 |         return boxes, box_scores
151 | 
152 | 
153 |     def correct_boxes(self, box_xy, box_wh, input_shape, image_shape):
154 |         """
155 |         Introduction
156 |         ------------
157 |             计算物体框预测坐标在原图中的位置坐标
158 |         Parameters
159 |         ----------
160 |             box_xy: 物体框左上角坐标
161 |             box_wh: 物体框的宽高
162 |             input_shape: 输入的大小
163 |             image_shape: 图片的大小
164 |         Returns
165 |         -------
166 |             boxes: 物体框的位置
167 |         """
168 |         box_yx = box_xy[..., ::-1]
169 |         box_hw = box_wh[..., ::-1]
170 |         input_shape = tf.cast(input_shape, dtype = tf.float32)
171 |         image_shape = tf.cast(image_shape, dtype = tf.float32)
172 |         new_shape = tf.round(image_shape * tf.reduce_min(input_shape / image_shape))
173 |         offset = (input_shape - new_shape) / 2. / input_shape
174 |         scale = input_shape / new_shape
175 |         box_yx = (box_yx - offset) * scale
176 |         box_hw *= scale
177 | 
178 |         box_mins = box_yx - (box_hw / 2.)
179 |         box_maxes = box_yx + (box_hw / 2.)
180 |         boxes = tf.concat([
181 |             box_mins[..., 0:1],
182 |             box_mins[..., 1:2],
183 |             box_maxes[..., 0:1],
184 |             box_maxes[..., 1:2]
185 |         ], axis = -1)
186 |         boxes *= tf.concat([image_shape, image_shape], axis = -1)
187 |         return boxes
188 | 
189 | 
190 | 
191 |     def _get_feats(self, feats, anchors, num_classes, input_shape):
192 |         """
193 |         Introduction
194 |         ------------
195 |             根据yolo最后一层的输出确定bounding box
196 |         Parameters
197 |         ----------
198 |             feats: yolo模型最后一层输出
199 |             anchors: anchors的位置
200 |             num_classes: 类别数量
201 |             input_shape: 输入大小
202 |         Returns
203 |         -------
204 |             box_xy, box_wh, box_confidence, box_class_probs
205 |         """
206 |         num_anchors = len(anchors)
207 |         anchors_tensor = tf.reshape(tf.constant(anchors, dtype=tf.float32), [1, 1, 1, num_anchors, 2])
208 |         grid_size = tf.shape(feats)[1:3]
209 |         predictions = tf.reshape(feats, [-1, grid_size[0], grid_size[1], num_anchors, num_classes + 5])
210 |         # 这里构建13*13*1*2的矩阵，对应每个格子加上对应的坐标
211 |         grid_y = tf.tile(tf.reshape(tf.range(grid_size[0]), [-1, 1, 1, 1]), [1, grid_size[1], 1, 1])
212 |         grid_x = tf.tile(tf.reshape(tf.range(grid_size[1]), [1, -1, 1, 1]), [grid_size[0], 1, 1, 1])
213 |         grid = tf.concat([grid_x, grid_y], axis = -1)
214 |         grid = tf.cast(grid, tf.float32)
215 |         # 将x,y坐标归一化为占416的比例
216 |         box_xy = (tf.sigmoid(predictions[..., :2]) + grid) / tf.cast(grid_size[::-1], tf.float32)
217 |         # 将w,h也归一化为占416的比例
218 |         box_wh = tf.exp(predictions[..., 2:4]) * anchors_tensor / tf.cast(input_shape[::-1], tf.float32)
219 |         box_confidence = tf.sigmoid(predictions[..., 4:5])
220 |         box_class_probs = tf.sigmoid(predictions[..., 5:])
221 |         return box_xy, box_wh, box_confidence, box_class_probs
222 | 
223 | 
224 |     def predict(self, output, image_shape):
225 |         """
226 |         Introduction
227 |         ------------
228 |             构建预测模型
229 |         Parameters
230 |         ----------
231 |             inputs: 处理之后的输入图片
232 |             image_shape: 图像原始大小
233 |         Returns
234 |         -------
235 |             boxes: 物体框坐标
236 |             scores: 物体概率值
237 |             classes: 物体类别
238 |         """
239 |         boxes, scores, classes = self.eval(output, image_shape, max_boxes = 20)
240 |         return boxes, scores, classes
241 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import tensorflow.keras.backend as K
  6 | from tensorflow.keras.layers import Input, Lambda
  7 | from tensorflow.keras.models import Model
  8 | from tensorflow.keras.optimizers import Adam
  9 | from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau, EarlyStopping
 10 | from nets.yolo4_tiny import yolo_body
 11 | from nets.loss import yolo_loss
 12 | from utils.utils import get_random_data, get_random_data_with_Mosaic, rand, WarmUpCosineDecayScheduler, ModelCheckpoint
 13 | import os
 14 | 
 15 | 
 16 | #---------------------------------------------------#
 17 | #   获得类和先验框
 18 | #---------------------------------------------------#
 19 | def get_classes(classes_path):
 20 |     '''loads the classes'''
 21 |     with open(classes_path) as f:
 22 |         class_names = f.readlines()
 23 |     class_names = [c.strip() for c in class_names]
 24 |     return class_names
 25 | 
 26 | def get_anchors(anchors_path):
 27 |     '''loads the anchors from a file'''
 28 |     with open(anchors_path) as f:
 29 |         anchors = f.readline()
 30 |     anchors = [float(x) for x in anchors.split(',')]
 31 |     return np.array(anchors).reshape(-1, 2)
 32 | 
 33 | #---------------------------------------------------#
 34 | #   训练数据生成器
 35 | #---------------------------------------------------#
 36 | def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, mosaic=False):
 37 |     '''data generator for fit_generator'''
 38 |     n = len(annotation_lines)
 39 |     i = 0
 40 |     flag = True
 41 |     while True:
 42 |         image_data = []
 43 |         box_data = []
 44 |         for b in range(batch_size):
 45 |             if i==0:
 46 |                 np.random.shuffle(annotation_lines)
 47 |             if mosaic:
 48 |                 if flag and (i+4) < n:
 49 |                     image, box = get_random_data_with_Mosaic(annotation_lines[i:i+4], input_shape)
 50 |                     i = (i+4) % n
 51 |                 else:
 52 |                     image, box = get_random_data(annotation_lines[i], input_shape)
 53 |                     i = (i+1) % n
 54 |                 flag = bool(1-flag)
 55 |             else:
 56 |                 image, box = get_random_data(annotation_lines[i], input_shape)
 57 |                 i = (i+1) % n
 58 |             image_data.append(image)
 59 |             box_data.append(box)
 60 |         image_data = np.array(image_data)
 61 |         box_data = np.array(box_data)
 62 |         y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
 63 |         yield [image_data, *y_true], np.zeros(batch_size)
 64 | 
 65 | 
 66 | #---------------------------------------------------#
 67 | #   读入xml文件，并输出y_true
 68 | #---------------------------------------------------#
 69 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
 70 |     assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'
 71 |     # 一共有三个特征层数
 72 |     num_layers = len(anchors)//3
 73 |     # 先验框
 74 |     anchor_mask: List[List[int]] = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]]
 75 | 
 76 |     true_boxes = np.array(true_boxes, dtype='float32')
 77 |     input_shape = np.array(input_shape, dtype='int32') # 416,416
 78 |     # 读出xy轴，读出长宽
 79 |     # 中心点(m,n,2)
 80 |     boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
 81 |     boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
 82 |     # 计算比例
 83 |     true_boxes[..., 0:2] = boxes_xy/input_shape[::-1]
 84 |     true_boxes[..., 2:4] = boxes_wh/input_shape[::-1]
 85 | 
 86 |     # m张图
 87 |     m = true_boxes.shape[0]
 88 |     # 得到网格的shape为13,13;26,26;
 89 |     grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(num_layers)]
 90 |     # y_true的格式为(m,13,13,3,85)(m,26,26,3,85)
 91 |     y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),
 92 |         dtype='float32') for l in range(num_layers)]
 93 |     # [1,9,2]
 94 |     anchors = np.expand_dims(anchors, 0)
 95 |     anchor_maxes = anchors / 2.
 96 |     anchor_mins = -anchor_maxes
 97 |     # 长宽要大于0才有效
 98 |     valid_mask = boxes_wh[..., 0]>0
 99 | 
100 |     for b in range(m):
101 |         # 对每一张图进行处理
102 |         wh = boxes_wh[b, valid_mask[b]]
103 |         if len(wh)==0: continue
104 |         # [n,1,2]
105 |         wh = np.expand_dims(wh, -2)
106 |         box_maxes = wh / 2.
107 |         box_mins = -box_maxes
108 | 
109 |         # 计算真实框和哪个先验框最契合
110 |         intersect_mins = np.maximum(box_mins, anchor_mins)
111 |         intersect_maxes = np.minimum(box_maxes, anchor_maxes)
112 |         intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
113 |         intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
114 |         box_area = wh[..., 0] * wh[..., 1]
115 |         anchor_area = anchors[..., 0] * anchors[..., 1]
116 |         iou = intersect_area / (box_area + anchor_area - intersect_area)
117 |         # 维度是(n) 感谢 消尽不死鸟 的提醒
118 |         best_anchor = np.argmax(iou, axis=-1)
119 | 
120 |         for t, n in enumerate(best_anchor):
121 |             for l in range(num_layers):
122 |                 if n in anchor_mask[l]:
123 |                     # floor用于向下取整
124 |                     i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
125 |                     j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
126 |                     # 找到真实框在特征层l中第b副图像对应的位置
127 |                     k = anchor_mask[l].index(n)
128 |                     c = true_boxes[b,t, 4].astype('int32')
129 |                     y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4]
130 |                     y_true[l][b, j, i, k, 4] = 1
131 |                     y_true[l][b, j, i, k, 5+c] = 1
132 | 
133 |     return y_true
134 | 
135 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
136 | for gpu in gpus:
137 |     tf.config.experimental.set_memory_growth(gpu, True)
138 | 
139 | 
140 | #----------------------------------------------------#
141 | #   检测精度mAP和pr曲线计算参考视频
142 | #   https://www.bilibili.com/video/BV1zE411u7Vw
143 | #----------------------------------------------------#
144 | if __name__ == "__main__":
145 |     # 标签的位置
146 |     annotation_path = '2007_train.txt'
147 |     # 获取classes和anchor的位置
148 |     classes_path = 'model_data/new_class.txt'
149 |     anchors_path = 'model_data/yolo_anchors.txt'
150 |     # 预训练模型的位置
151 |     weights_path = 'logs_1/last1.h5'
152 |     # 获得classes和anchor
153 |     class_names = get_classes(classes_path)
154 |     anchors = get_anchors(anchors_path)
155 |     # 一共有多少类
156 |     num_classes = len(class_names)
157 |     num_anchors = len(anchors)
158 |     # 训练后的模型保存的位置
159 |     log_dir = 'logs_2/'
160 |     #----------------------------------------------#
161 |     #   输入的shape大小
162 |     #   显存比较小可以使用416x416
163 |     #   现存比较大可以使用608x608
164 |     #----------------------------------------------#
165 |     input_shape = (320,320)
166 |     mosaic = False
167 |     Cosine_scheduler = False
168 |     label_smoothing = 0
169 | 
170 |     # 清除session
171 |     K.clear_session()
172 | 
173 |     # 输入的图像为
174 |     image_input = Input(shape=(None, None, 3))
175 |     h, w = input_shape
176 | 
177 |     # 创建yolo模型
178 |     print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
179 |     model_body = yolo_body(image_input, num_anchors//2, num_classes)
180 |     
181 |     model_body.summary()
182 | 
183 |     if not os.path.exists(log_dir):
184 |         os.makedirs(log_dir)
185 |     json_config = model_body.to_json()
186 |     with open(log_dir + 'model_config.json', 'w') as json_file:
187 |         json_file.write(json_config)
188 |     #-------------------------------------------#
189 |     #   权值文件的下载请看README
190 |     #-------------------------------------------#
191 |     print('Load weights {}.'.format(weights_path))
192 |     model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
193 |     
194 |     # y_true为13,13,3,85
195 |     # 26,26,3,85
196 |     y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], num_anchors//2, num_classes+5)) for l in range(2)]
197 | 
198 |     # 输入为*model_body.input, *y_true
199 |     # 输出为model_loss
200 |     loss_input = [*model_body.output, *y_true]
201 |     model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
202 |         arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 'label_smoothing': label_smoothing})(loss_input)
203 | 
204 |     model = Model([model_body.input, *y_true], model_loss)
205 | 
206 |     # 训练参数设置
207 |     logging = TensorBoard(log_dir=log_dir)
208 |     checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
209 |         monitor='val_loss', save_weights_only=True, save_best_only=False, period=1)
210 |     early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
211 | 
212 |     # 0.1用于验证，0.9用于训练
213 |     val_split = 0.1
214 |     with open(annotation_path) as f:
215 |         lines = f.readlines()
216 |     np.random.seed(10101)
217 |     np.random.shuffle(lines)
218 |     np.random.seed(None)
219 |     num_val = int(len(lines)*val_split)
220 |     num_train = len(lines) - num_val
221 |     
222 |     freeze_layers = 60
223 |     for i in range(freeze_layers): model_body.layers[i].trainable = False
224 |     print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers)))
225 | 
226 |     #------------------------------------------------------#
227 |     #   主干特征提取网络特征通用，冻结训练可以加快训练速度
228 |     #   也可以在训练初期防止权值被破坏。
229 |     #   Init_Epoch为起始世代
230 |     #   Freeze_Epoch为冻结训练的世代
231 |     #   Epoch总训练世代
232 |     #   提示OOM或者显存不足请调小Batch_size
233 |     #------------------------------------------------------#
234 |     if True:
235 |         Init_epoch = 0
236 |         Freeze_epoch = 1
237 |         # batch_size大小，每次喂入多少数据
238 |         batch_size = 16
239 |         # 最大学习率
240 |         learning_rate_base = 1e-3
241 |         if Cosine_scheduler:
242 |             # 预热期
243 |             warmup_epoch = int((Freeze_epoch-Init_epoch)*0.2)
244 |             # 总共的步长
245 |             total_steps = int((Freeze_epoch-Init_epoch) * num_train / batch_size)
246 |             # 预热步长
247 |             warmup_steps = int(warmup_epoch * num_train / batch_size)
248 |             # 学习率
249 |             reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base,
250 |                                                         total_steps=total_steps,
251 |                                                         warmup_learning_rate=1e-4,
252 |                                                          warmup_steps=warmup_steps,
253 |                                                         hold_base_rate_steps=num_train,
254 |                                                         min_learn_rate=1e-6
255 |                                                         )
256 |             model.compile(optimizer=Adam(), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
257 |         else:
258 |             reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
259 |             model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
260 | 
261 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
262 |         model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
263 |                 steps_per_epoch=max(1, num_train//batch_size),
264 |                 validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False),
265 |                 validation_steps=max(1, num_val//batch_size),
266 |                 epochs=Freeze_epoch,
267 |                 initial_epoch=Init_epoch,
268 |                 callbacks=[logging, checkpoint, reduce_lr, early_stopping])
269 |         model.save_weights(log_dir + 'trained_weights_stage_1.h5')
270 | 
271 |     for i in range(freeze_layers): model_body.layers[i].trainable = True
272 | 
273 |     # 解冻后训练
274 |     if True:
275 |         Freeze_epoch = 1
276 |         Epoch = 11
277 |         # batch_size大小，每次喂入多少数据
278 |         batch_size = 16
279 | 
280 |         # 最大学习率
281 |         learning_rate_base = 1e-4
282 |         if Cosine_scheduler:
283 |             # 预热期
284 |             warmup_epoch = int((Epoch-Freeze_epoch)*0.2)
285 |             # 总共的步长
286 |             total_steps = int((Epoch-Freeze_epoch) * num_train / batch_size)
287 |             # 预热步长
288 |             warmup_steps = int(warmup_epoch * num_train / batch_size)
289 |             # 学习率
290 |             reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base,
291 |                                                         total_steps=total_steps,
292 |                                                         warmup_learning_rate=1e-5,
293 |                                                         warmup_steps=warmup_steps,
294 |                                                         hold_base_rate_steps=num_train//2,
295 |                                                         min_learn_rate=1e-6
296 |                                                         )
297 |             model.compile(optimizer=Adam(), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
298 |         else:
299 |             reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
300 |             model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
301 | 
302 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
303 |         model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
304 |                 steps_per_epoch=max(1, num_train//batch_size),
305 |                 validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False),
306 |                 validation_steps=max(1, num_val//batch_size),
307 |                 epochs=Epoch,
308 |                 initial_epoch=Freeze_epoch,
309 |                 callbacks=[logging, checkpoint, reduce_lr, early_stopping])
310 |         model.save_weights(log_dir + 'last1.h5')
311 |         #json_config = model.to_json()
312 |         #with open(log_dir + 'model_config.json', 'w') as json_file:
313 |         #    json_file.write(json_config)
314 | 
315 | 


--------------------------------------------------------------------------------
/yolo_fastest_tensorflow2/train.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import tensorflow.keras.backend as K
  6 | from tensorflow.keras.layers import Input, Lambda
  7 | from tensorflow.keras.models import Model
  8 | from tensorflow.keras.optimizers import Adam
  9 | from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau, EarlyStopping
 10 | from nets.yolo_fastest import yolo_body
 11 | from nets.loss import yolo_loss
 12 | from utils.utils import get_random_data, get_random_data_with_Mosaic, rand, WarmUpCosineDecayScheduler, ModelCheckpoint
 13 | import os
 14 | 
 15 | 
 16 | #---------------------------------------------------#
 17 | #   获得类和先验框
 18 | #---------------------------------------------------#
 19 | def get_classes(classes_path):
 20 |     '''loads the classes'''
 21 |     with open(classes_path) as f:
 22 |         class_names = f.readlines()
 23 |     class_names = [c.strip() for c in class_names]
 24 |     return class_names
 25 | 
 26 | def get_anchors(anchors_path):
 27 |     '''loads the anchors from a file'''
 28 |     with open(anchors_path) as f:
 29 |         anchors = f.readline()
 30 |     anchors = [float(x) for x in anchors.split(',')]
 31 |     return np.array(anchors).reshape(-1, 2)
 32 | 
 33 | #---------------------------------------------------#
 34 | #   训练数据生成器
 35 | #---------------------------------------------------#
 36 | def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, mosaic=False):
 37 |     '''data generator for fit_generator'''
 38 |     n = len(annotation_lines)
 39 |     i = 0
 40 |     flag = True
 41 |     while True:
 42 |         image_data = []
 43 |         box_data = []
 44 |         for b in range(batch_size):
 45 |             if i==0:
 46 |                 np.random.shuffle(annotation_lines)
 47 |             if mosaic:
 48 |                 if flag and (i+4) < n:
 49 |                     image, box = get_random_data_with_Mosaic(annotation_lines[i:i+4], input_shape)
 50 |                     i = (i+4) % n
 51 |                 else:
 52 |                     image, box = get_random_data(annotation_lines[i], input_shape)
 53 |                     i = (i+1) % n
 54 |                 flag = bool(1-flag)
 55 |             else:
 56 |                 image, box = get_random_data(annotation_lines[i], input_shape)
 57 |                 i = (i+1) % n
 58 |             image_data.append(image)
 59 |             box_data.append(box)
 60 |         image_data = np.array(image_data)
 61 |         box_data = np.array(box_data)
 62 |         y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
 63 |         yield [image_data, *y_true], np.zeros(batch_size)
 64 | 
 65 | 
 66 | #---------------------------------------------------#
 67 | #   读入xml文件，并输出y_true
 68 | #---------------------------------------------------#
 69 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
 70 |     assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'
 71 |     # 一共有三个特征层数
 72 |     num_layers = len(anchors)//3
 73 |     # 先验框
 74 |     anchor_mask: List[List[int]] = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]]
 75 | 
 76 |     true_boxes = np.array(true_boxes, dtype='float32')
 77 |     input_shape = np.array(input_shape, dtype='int32') # 416,416
 78 |     # 读出xy轴，读出长宽
 79 |     # 中心点(m,n,2)
 80 |     boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
 81 |     boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
 82 |     # 计算比例
 83 |     true_boxes[..., 0:2] = boxes_xy/input_shape[::-1]
 84 |     true_boxes[..., 2:4] = boxes_wh/input_shape[::-1]
 85 | 
 86 |     # m张图
 87 |     m = true_boxes.shape[0]
 88 |     # 得到网格的shape为13,13;26,26;
 89 |     grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(num_layers)]
 90 |     # y_true的格式为(m,13,13,3,85)(m,26,26,3,85)
 91 |     y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),
 92 |         dtype='float32') for l in range(num_layers)]
 93 |     # [1,9,2]
 94 |     anchors = np.expand_dims(anchors, 0)
 95 |     anchor_maxes = anchors / 2.
 96 |     anchor_mins = -anchor_maxes
 97 |     # 长宽要大于0才有效
 98 |     valid_mask = boxes_wh[..., 0]>0
 99 | 
100 |     for b in range(m):
101 |         # 对每一张图进行处理
102 |         wh = boxes_wh[b, valid_mask[b]]
103 |         if len(wh)==0: continue
104 |         # [n,1,2]
105 |         wh = np.expand_dims(wh, -2)
106 |         box_maxes = wh / 2.
107 |         box_mins = -box_maxes
108 | 
109 |         # 计算真实框和哪个先验框最契合
110 |         intersect_mins = np.maximum(box_mins, anchor_mins)
111 |         intersect_maxes = np.minimum(box_maxes, anchor_maxes)
112 |         intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
113 |         intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
114 |         box_area = wh[..., 0] * wh[..., 1]
115 |         anchor_area = anchors[..., 0] * anchors[..., 1]
116 |         iou = intersect_area / (box_area + anchor_area - intersect_area)
117 |         # 维度是(n) 感谢 消尽不死鸟 的提醒
118 |         best_anchor = np.argmax(iou, axis=-1)
119 | 
120 |         for t, n in enumerate(best_anchor):
121 |             for l in range(num_layers):
122 |                 if n in anchor_mask[l]:
123 |                     # floor用于向下取整
124 |                     i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
125 |                     j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
126 |                     # 找到真实框在特征层l中第b副图像对应的位置
127 |                     k = anchor_mask[l].index(n)
128 |                     c = true_boxes[b,t, 4].astype('int32')
129 |                     y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4]
130 |                     y_true[l][b, j, i, k, 4] = 1
131 |                     y_true[l][b, j, i, k, 5+c] = 1
132 | 
133 |     return y_true
134 | 
135 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
136 | for gpu in gpus:
137 |     tf.config.experimental.set_memory_growth(gpu, True)
138 | 
139 | 
140 | #----------------------------------------------------#
141 | #   检测精度mAP和pr曲线计算参考视频
142 | #   https://www.bilibili.com/video/BV1zE411u7Vw
143 | #----------------------------------------------------#
144 | if __name__ == "__main__":
145 |     # 标签的位置
146 |     annotation_path = '2007_train.txt'
147 |     # 获取classes和anchor的位置
148 |     classes_path = 'model_data/new_class.txt'
149 |     anchors_path = 'model_data/yolo_anchors.txt'
150 |     # 预训练模型的位置
151 |     weights_path = 'logs_6/ep003-loss14.097-val_loss13.660.h5'
152 |     # 获得classes和anchor
153 |     class_names = get_classes(classes_path)
154 |     anchors = get_anchors(anchors_path)
155 |     # 一共有多少类
156 |     num_classes = len(class_names)
157 |     num_anchors = len(anchors)
158 |     # 训练后的模型保存的位置
159 |     log_dir = 'logs_7/'
160 |     #----------------------------------------------#
161 |     #   输入的shape大小
162 |     #   显存比较小可以使用416x416
163 |     #   现存比较大可以使用608x608
164 |     #----------------------------------------------#
165 |     input_shape = (320,320)
166 |     mosaic = False
167 |     Cosine_scheduler = False
168 |     label_smoothing = 0
169 | 
170 |     # 清除session
171 |     K.clear_session()
172 | 
173 |     # 输入的图像为
174 |     image_input = Input(shape=(None, None, 3))
175 |     h, w = input_shape
176 | 
177 |     # 创建yolo模型
178 |     print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
179 |     model_body = yolo_body(image_input, num_anchors//2, num_classes)
180 |     
181 |     model_body.summary()
182 | 
183 |     if not os.path.exists(log_dir):
184 |         os.makedirs(log_dir)
185 |     json_config = model_body.to_json()
186 |     with open(log_dir + 'model_config.json', 'w') as json_file:
187 |         json_file.write(json_config)
188 |     #-------------------------------------------#
189 |     #   权值文件的下载请看README
190 |     #-------------------------------------------#
191 |     print('Load weights {}.'.format(weights_path))
192 |     model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
193 |     
194 |     # y_true为13,13,3,85
195 |     # 26,26,3,85
196 |     y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], num_anchors//2, num_classes+5)) for l in range(2)]
197 | 
198 |     # 输入为*model_body.input, *y_true
199 |     # 输出为model_loss
200 |     loss_input = [*model_body.output, *y_true]
201 |     model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
202 |         arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 'label_smoothing': label_smoothing})(loss_input)
203 | 
204 |     model = Model([model_body.input, *y_true], model_loss)
205 | 
206 |     # 训练参数设置
207 |     logging = TensorBoard(log_dir=log_dir)
208 |     checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
209 |         monitor='val_loss', save_weights_only=True, save_best_only=False, period=1)
210 |     early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
211 | 
212 |     # 0.1用于验证，0.9用于训练
213 |     val_split = 0.1
214 |     with open(annotation_path) as f:
215 |         lines = f.readlines()
216 |     np.random.seed(10101)
217 |     np.random.shuffle(lines)
218 |     np.random.seed(None)
219 |     num_val = int(len(lines)*val_split)
220 |     num_train = len(lines) - num_val
221 |     
222 |     freeze_layers = 60
223 |     for i in range(freeze_layers): model_body.layers[i].trainable = False
224 |     print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers)))
225 | 
226 |     #------------------------------------------------------#
227 |     #   主干特征提取网络特征通用，冻结训练可以加快训练速度
228 |     #   也可以在训练初期防止权值被破坏。
229 |     #   Init_Epoch为起始世代
230 |     #   Freeze_Epoch为冻结训练的世代
231 |     #   Epoch总训练世代
232 |     #   提示OOM或者显存不足请调小Batch_size
233 |     #------------------------------------------------------#
234 |     if True:
235 |         Init_epoch = 0
236 |         Freeze_epoch = 0
237 |         # batch_size大小，每次喂入多少数据
238 |         batch_size = 32
239 |         # 最大学习率
240 |         learning_rate_base = 1e-3
241 |         if Cosine_scheduler:
242 |             # 预热期
243 |             warmup_epoch = int((Freeze_epoch-Init_epoch)*0.2)
244 |             # 总共的步长
245 |             total_steps = int((Freeze_epoch-Init_epoch) * num_train / batch_size)
246 |             # 预热步长
247 |             warmup_steps = int(warmup_epoch * num_train / batch_size)
248 |             # 学习率
249 |             reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base,
250 |                                                         total_steps=total_steps,
251 |                                                         warmup_learning_rate=1e-4,
252 |                                                          warmup_steps=warmup_steps,
253 |                                                         hold_base_rate_steps=num_train,
254 |                                                         min_learn_rate=1e-6
255 |                                                         )
256 |             model.compile(optimizer=Adam(), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
257 |         else:
258 |             reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
259 |             model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
260 | 
261 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
262 |         model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
263 |                 steps_per_epoch=max(1, num_train//batch_size),
264 |                 validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False),
265 |                 validation_steps=max(1, num_val//batch_size),
266 |                 epochs=Freeze_epoch,
267 |                 initial_epoch=Init_epoch,
268 |                 callbacks=[logging, checkpoint, reduce_lr, early_stopping])
269 |         model.save_weights(log_dir + 'trained_weights_stage_1.h5')
270 | 
271 |     for i in range(freeze_layers): model_body.layers[i].trainable = True
272 | 
273 |     # 解冻后训练
274 |     if True:
275 |         Freeze_epoch = 0
276 |         Epoch = 100
277 |         # batch_size大小，每次喂入多少数据
278 |         batch_size = 16
279 | 
280 |         # 最大学习率
281 |         learning_rate_base = 1e-4
282 |         if Cosine_scheduler:
283 |             # 预热期
284 |             warmup_epoch = int((Epoch-Freeze_epoch)*0.2)
285 |             # 总共的步长
286 |             total_steps = int((Epoch-Freeze_epoch) * num_train / batch_size)
287 |             # 预热步长
288 |             warmup_steps = int(warmup_epoch * num_train / batch_size)
289 |             # 学习率
290 |             reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base,
291 |                                                         total_steps=total_steps,
292 |                                                         warmup_learning_rate=1e-5,
293 |                                                         warmup_steps=warmup_steps,
294 |                                                         hold_base_rate_steps=num_train//2,
295 |                                                         min_learn_rate=1e-6
296 |                                                         )
297 |             model.compile(optimizer=Adam(), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
298 |         else:
299 |             reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
300 |             model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
301 | 
302 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
303 |         model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
304 |                 steps_per_epoch=max(1, num_train//batch_size),
305 |                 validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False),
306 |                 validation_steps=max(1, num_val//batch_size),
307 |                 epochs=Epoch,
308 |                 initial_epoch=Freeze_epoch,
309 |                 callbacks=[logging, checkpoint, reduce_lr, early_stopping])
310 |         model.save_weights(log_dir + 'last1.h5')
311 |         #json_config = model.to_json()
312 |         #with open(log_dir + 'model_config.json', 'w') as json_file:
313 |         #    json_file.write(json_config)
314 | 
315 | 


--------------------------------------------------------------------------------
/Model_pruning/compressor.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT license.
  3 | 
  4 | import logging
  5 | import tensorflow as tf
  6 | #tf.compat.v1.disable_eager_execution()
  7 | 
  8 | import numpy as np
  9 | from . import default_layers
 10 | from kerassurgeon import Surgeon
 11 | tf.config.experimental_run_functions_eagerly(True)
 12 | 
 13 | _logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | class LayerInfo:
 17 |     def __init__(self, keras_layer):
 18 |         self.keras_layer = keras_layer
 19 |         self.name = keras_layer.name
 20 |         self.type = default_layers.get_op_type(type(keras_layer))
 21 |         self.weight_index = default_layers.get_weight_index(self.type)
 22 |         if self.weight_index is not None:
 23 |             self.weight = keras_layer.weights[self.weight_index]
 24 |         self._call = None
 25 | 
 26 | class Compressor:
 27 |     """
 28 |     Abstract base TensorFlow compressor
 29 |     """
 30 | 
 31 |     def __init__(self, model, config_list):
 32 |         """
 33 |         Record necessary info in class members
 34 | 
 35 |         Parameters
 36 |         ----------
 37 |         model : keras model
 38 |             the model user wants to compress
 39 |         config_list : list
 40 |             the configurations that users specify for compression
 41 |         """
 42 |         self.bound_model = model
 43 |         self.config_list = config_list
 44 |         self.modules_to_compress = []
 45 |         self.modules_to_delete = []
 46 | 
 47 |     def detect_modules_to_compress(self):
 48 |         """
 49 |         detect all modules should be compressed, and save the result in `self.modules_to_compress`.
 50 | 
 51 |         The model will be instrumented and user should never edit it after calling this method.
 52 |         """
 53 |         if self.modules_to_compress is not None:
 54 |             self.modules_to_compress = []
 55 |             for keras_layer in self.bound_model.layers:
 56 |                 layer = LayerInfo(keras_layer)
 57 |                 config = self.select_config(layer)
 58 |                 if (config is not None) and (layer.name == 'conv2d_50'):    #Choose which layer to prune
 59 |                     self.modules_to_compress.append((layer, config))
 60 |                     self.modules_to_delete.append((keras_layer, config))
 61 |         return self.modules_to_compress, self.modules_to_delete
 62 | 
 63 |     def compress(self):
 64 |         """
 65 |         Compress the model with algorithm implemented by subclass.
 66 | 
 67 |         The model will be instrumented and user should never edit it after calling this method.
 68 |         `self.modules_to_compress` records all the to-be-compressed layers
 69 |         """
 70 |         modules_to_compress, ignore_ = self.detect_modules_to_compress()
 71 |         for layer, config in modules_to_compress:
 72 |             self._instrument_layer(layer, config)
 73 |         return self.bound_model
 74 | 
 75 |     def compress_model(self):
 76 |         """
 77 |         Compress the model with algorithm implemented by subclass.
 78 | 
 79 |         The model will be instrumented and user should never edit it after calling this method.
 80 |         `self.modules_to_compress` records all the to-be-compressed layers
 81 |         """
 82 |         ignore_, modules_to_compress = self.detect_modules_to_compress()
 83 |         for layer,config in modules_to_compress:
 84 |             layer_1 = LayerInfo(layer)
 85 |             self.bound_model = self.Prun_channel(layer_1, layer, config)
 86 |             #a_list = self.Prun_channel(layer_1, layer, config)
 87 |         return self.bound_model
 88 | 
 89 |     def compress_model_1(self, channels_p):
 90 |         """
 91 |         Compress the model with algorithm implemented by subclass.
 92 | 
 93 |         The model will be instrumented and user should never edit it after calling this method.
 94 |         `self.modules_to_compress` records all the to-be-compressed layers
 95 |         """
 96 |         ignore_, modules_to_compress = self.detect_modules_to_compress()
 97 |         for layer,config in modules_to_compress:
 98 |             layer_1 = LayerInfo(layer)
 99 |             #self.bound_model = self.Prun_channel(layer_1, layer, config)
100 |             self.bound_model = self.Prun_channel_1(layer_1, layer, config, channels_p)
101 |         return self.bound_model
102 | 
103 |     def get_modules_to_compress(self):
104 |         """
105 |         To obtain all the to-be-compressed layers.
106 | 
107 |         Returns
108 |         -------
109 |         self.modules_to_compress : list
110 |             a list of the layers, each of which is a tuple (`layer`, `config`),
111 |             `layer` is `LayerInfo`, `config` is a `dict`
112 |         """
113 |         return self.modules_to_compress
114 | 
115 |     def select_config(self, layer):
116 |         """
117 |         Find the configuration for `layer` by parsing `self.config_list`
118 | 
119 |         Parameters
120 |         ----------
121 |         layer: LayerInfo
122 |             one layer
123 | 
124 |         Returns
125 |         -------
126 |         ret : config or None
127 |             the retrieved configuration for this layer, if None, this layer should
128 |             not be compressed
129 |         """
130 |         ret = None
131 |         if layer.type is None:
132 |             return None
133 |         for config in self.config_list:
134 |             config = config.copy()
135 |             config['op_types'] = self._expand_config_op_types(config)
136 |             if layer.type not in config['op_types']:
137 |                 continue
138 |             if config.get('op_names') and layer.name not in config['op_names']:
139 |                 continue
140 |             ret = config
141 |         if ret is None or ret.get('exclude'):
142 |             return None
143 |         return ret
144 | 
145 |     def update_epoch(self, epoch):
146 |         """
147 |         If user want to update model every epoch, user can override this method.
148 |         This method should be called at the beginning of each epoch
149 | 
150 |         Parameters
151 |         ----------
152 |         epoch : num
153 |             the current epoch number
154 |         """
155 | 
156 |     def step(self):
157 |         """
158 |         If user want to update mask every step, user can override this method
159 |         """
160 | 
161 | 
162 |     def _instrument_layer(self, layer, config):
163 |         """
164 |         This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer`
165 | 
166 |         Parameters
167 |         ----------
168 |         layer : LayerInfo
169 |             the layer to instrument the compression operation
170 |         config : dict
171 |             the configuration for compressing this layer
172 |         """
173 |         raise NotImplementedError()
174 | 
175 |     def Prun_channel(self, layer, config):
176 |         """
177 |         This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer`
178 | 
179 |         Parameters
180 |         ----------
181 |         layer : LayerInfo
182 |             the layer to instrument the compression operation
183 |         config : dict
184 |             the configuration for compressing this layer
185 |         """
186 |         raise NotImplementedError()
187 | 
188 |     def Prun_channel_1(self, layer, config, channels_p):
189 |         """
190 |         This method is implemented in the subclasses, i.e., `Pruner` and `Quantizer`
191 | 
192 |         Parameters
193 |         ----------
194 |         layer : LayerInfo
195 |             the layer to instrument the compression operation
196 |         config : dict
197 |             the configuration for compressing this layer
198 |         """
199 |         raise NotImplementedError()
200 | 
201 |     def _expand_config_op_types(self, config):
202 |         if config is None:
203 |             return []
204 |         op_types = []
205 | 
206 |         for op_type in config.get('op_types', []):
207 |             if op_type == 'default':
208 |                 op_types.extend(default_layers.default_layers)
209 |             else:
210 |                 op_types.append(op_type)
211 |         return op_types
212 | 
213 | 
214 | class Pruner(Compressor):
215 |     """
216 |     Abstract base TensorFlow pruner
217 |     """
218 | 
219 |     def calc_mask(self, layer, config):
220 |         """
221 |         Pruners should overload this method to provide mask for weight tensors.
222 |         The mask must have the same shape and type comparing to the weight.
223 |         It will be applied with `mul()` operation on the weight.
224 |         This method is effectively hooked to `forward()` method of the model.
225 | 
226 |         Parameters
227 |         ----------
228 |         layer : LayerInfo
229 |             calculate mask for `layer`'s weight
230 |         config : dict
231 |             the configuration for generating the mask
232 |         """
233 |         raise NotImplementedError("Pruners must overload calc_mask()")
234 | 
235 |     def _instrument_layer(self, layer, config):
236 |         """
237 |         Create a wrapper forward function to replace the original one.
238 | 
239 |         Parameters
240 |         ----------
241 |         layer : LayerInfo
242 |             the layer to instrument the mask
243 |         config : dict
244 |             the configuration for generating the mask
245 |         """
246 |         layer._call = layer.keras_layer.call
247 | 
248 |         def new_call(*inputs):
249 |             weights = [x.numpy() for x in layer.keras_layer.weights]
250 |             mask = self.calc_mask(layer, config)
251 |             weights[layer.weight_index] = weights[layer.weight_index] * mask
252 |             layer.keras_layer.set_weights(weights)
253 |             ret = layer._call(*inputs)
254 |             return ret
255 | 
256 |         layer.keras_layer.call = new_call
257 | 
258 |     def Prun_channel(self, layer, layer_1, config):
259 |         weight = layer.weight
260 |         op_type = layer.type
261 |         op_name = layer.name
262 |         assert 0 <= config.get('sparsity') < 1
263 |         assert op_type in ['Conv1D', 'Conv2D']
264 |         assert op_type in config['op_types']
265 | 
266 |         # op_name = layer.name
267 |         # assert 0 <= config.get('sparsity') < 1
268 |         # assert op_type in ['Conv1D', 'Conv2D']
269 |         # assert op_type in config['op_types']
270 | 
271 |         if layer.name in self.epoch_pruned_layers:
272 |             assert layer.name in self.mask_dict
273 |             return self.mask_dict.get(layer.name)
274 | 
275 |         try:
276 |             w = tf.stop_gradient(tf.transpose(tf.reshape(weight, (-1, weight.shape[-1])), [1, 0]))
277 |             masks = np.ones(w.shape)
278 |             num_filters = w.shape[0]
279 |             num_prune = int(num_filters * config.get('sparsity'))
280 |             if num_filters < 2 or num_prune < 1:
281 |                 return masks
282 |             min_gm_idx = self._get_min_gm_kernel_idx_m(w, num_prune)
283 | 
284 |             surgeon = Surgeon(self.bound_model, copy=False)
285 |             channels = min_gm_idx
286 |             surgeon.add_job('delete_channels', layer_1, channels=channels)
287 | 
288 |             #for idx in min_gm_idx:
289 |             #    masks[idx] = 0.
290 |         finally:
291 |             masks = tf.reshape(tf.transpose(masks, [1, 0]), weight.shape)
292 |             masks = tf.Variable(masks)
293 |             self.mask_dict.update({op_name: masks})
294 |             self.epoch_pruned_layers.add(layer.name)
295 | 
296 |         return surgeon.operate()
297 |         #return min_gm_idx
298 | 
299 | 
300 |     def Prun_channel_1(self, layer, layer_1, config, channels_p):
301 |         weight = layer.weight
302 |         op_type = layer.type
303 |         op_name = layer.name
304 |         assert 0 <= config.get('sparsity') < 1
305 |         assert op_type in ['Conv1D', 'Conv2D']
306 |         assert op_type in config['op_types']
307 | 
308 |         # op_name = layer.name
309 |         # assert 0 <= config.get('sparsity') < 1
310 |         # assert op_type in ['Conv1D', 'Conv2D']
311 |         # assert op_type in config['op_types']
312 | 
313 |         #if layer.name in self.epoch_pruned_layers:
314 |         #    assert layer.name in self.mask_dict
315 |         #    return self.mask_dict.get(layer.name)
316 | 
317 |         try:
318 |             w = tf.stop_gradient(tf.transpose(tf.reshape(weight, (-1, weight.shape[-1])), [1, 0]))
319 |             masks = np.ones(w.shape)
320 |             num_filters = w.shape[0]
321 |             num_prune = int(num_filters * config.get('sparsity'))
322 |             if num_filters < 2 or num_prune < 1:
323 |                 return masks
324 |             #min_gm_idx = self._get_min_gm_kernel_idx_m(w, num_prune)
325 | 
326 |             surgeon = Surgeon(self.bound_model, copy=False)
327 |             channels = channels_p
328 |             surgeon.add_job('delete_channels', layer_1, channels=channels)
329 | 
330 |             #for idx in min_gm_idx:
331 |             #    masks[idx] = 0.
332 |         finally:
333 |             masks = tf.reshape(tf.transpose(masks, [1, 0]), weight.shape)
334 |             masks = tf.Variable(masks)
335 |             self.mask_dict.update({op_name: masks})
336 |             self.epoch_pruned_layers.add(layer.name)
337 | 
338 |         return surgeon.operate()
339 |         #return min_gm_idx
340 | 
341 | 
342 |     def _get_min_gm_kernel_idx_m(self, weight, n):
343 |         
344 |         dist_list = []
345 |         sum_max = 0;
346 |         for out_i in range(weight.shape[0]):
347 |             dist_sum = self._get_distance_sum_m(weight, out_i)
348 |             dist_list.append((dist_sum, out_i))
349 |             #dist_list.append(dist_sum)
350 | 
351 |         a=0
352 |         min_gm_kernels = sorted(dist_list, key=lambda x: x[0])[:n]
353 | 
354 |         #min_gm_kernels = dist_list[:n]
355 |         #size_a = tf.size(dist_list)
356 |         #min_gm_kernels = tf.sort(dist_list)
357 |         return [x[1] for x in min_gm_kernels]
358 |         #return dist_list
359 | 
360 |     def _get_distance_sum_m(self, weight, out_idx):
361 |         anchor_w = tf.tile(tf.expand_dims(weight[out_idx], 0), [weight.shape[0], 1])
362 |         x = weight - anchor_w
363 |         x = tf.math.reduce_sum((x*x), -1)
364 |         x = tf.math.sqrt(x)
365 |         return tf.math.reduce_sum(x)
366 | 
367 | 
368 | class Quantizer(Compressor):
369 |     """
370 |     Abstract base TensorFlow quantizer
371 |     """
372 | 
373 |     def quantize_weight(self, weight, config, op, op_type, op_name):
374 |         raise NotImplementedError("Quantizer must overload quantize_weight()")
375 | 


--------------------------------------------------------------------------------
/core/evaluation.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Xilinx Inc.
  2 | # 
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | # 
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # --------------------------------------------------------
 16 | # Code of "Evaluate classification or detection performance"
 17 | # python version
 18 | # Written by Lu Tian
 19 | # --------------------------------------------------------
 20 | 
 21 | import argparse
 22 | import numpy as np
 23 | import os
 24 | 
 25 | 
 26 | def compute_classification_accuracy(results, gts):
 27 |     """
 28 |     Evaluate classification results
 29 |     :param results: predicted results
 30 |     :param gts: ground truth
 31 |     :return: accuracy
 32 |     """
 33 |     num_label = len(gts[0].split(' ')) - 1
 34 |     image_label_gt = {}
 35 |     for gt in gts:
 36 |         gt_info = gt.split(' ')
 37 |         if len(gt_info) is not (num_label + 1):
 38 |             print ('label number does not match: ' + gt_info[0])
 39 |             return 0
 40 |         image_label_gt[gt_info[0]] = np.array(gt_info[1:])
 41 | 
 42 |     accuracy = np.zeros(num_label)
 43 |     count = 0
 44 |     image_names = set()
 45 |     for result in results:
 46 |         result_info = result.split(' ')
 47 |         if result_info[0] not in image_label_gt.keys():
 48 |             print ('could not find ground truth of image: ' + result_info[0])
 49 |             return 0
 50 |         if result_info[0] in image_names:
 51 |             print ('duplicate results of image: ' + result_info[0])
 52 |             return 0
 53 |         if len(result_info) is not (num_label + 1):
 54 |             print ('wrong predict label number of image: ' + result_info[0])
 55 |             return 0
 56 |         prediction = np.array(result_info[1:])
 57 |         accuracy += prediction == image_label_gt[result_info[0]]
 58 |         count += 1
 59 |         image_names.add(result_info[0])
 60 |     accuracy /= max(1, count)
 61 |     print ('evaluate ' + str(count) + ' images')
 62 |     return accuracy
 63 | 
 64 | 
 65 | def voc_ap(rec, prec, use_07_metric=False):
 66 |     """
 67 |     Compute VOC AP given precision and recall.
 68 |     :param rec: recall
 69 |     :param prec: precision
 70 |     :param use_07_metric: uses the VOC 07 11 point method to compute VOC AP given precision and recall
 71 |     :return: ap
 72 |     """
 73 |     if use_07_metric:
 74 |         # 11 point metric
 75 |         ap = 0.
 76 |         for t in np.arange(0., 1.1, 0.1):
 77 |             if np.sum(rec >= t) == 0:
 78 |                 p = 0
 79 |             else:
 80 |                 p = np.max(prec[rec >= t])
 81 |             ap = ap + p / 11.
 82 |     else:
 83 |         # correct AP calculation
 84 |         # first append sentinel values at the end
 85 |         mrec = np.concatenate(([0.], rec, [1.]))
 86 |         mpre = np.concatenate(([0.], prec, [0.]))
 87 | 
 88 |         # compute the precision envelope
 89 |         for i in range(mpre.size - 1, 0, -1):
 90 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 91 | 
 92 |         # to calculate area under PR curve, look for points
 93 |         # where X axis (recall) changes value
 94 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 95 | 
 96 |         # and sum (\Delta recall) * prec
 97 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 98 |     return ap
 99 | 
100 | 
101 | def compute_detection_ap(results, gts, thresh, overlap_thresh, use_07_metric=False):
102 |     """
103 |     Evaluate detection results
104 |     :param results: image_name class_label score xmin ymin xmax ymax
105 |     :param gts: image_name class_label xmin ymin xmax ymax
106 |     :param thresh: only bboxes whose confidence score under thresh are used
107 |     :param overlap_thresh: threshold of IOU ratio to determine a match bbox
108 |     :param use_07_metric: uses the VOC 07 11 point method to compute VOC AP given precision and recall
109 |     :return: recall, precision, ap
110 |     """
111 |     # load gt
112 |     class_gts = {}
113 |     class_num_positive = {}
114 |     image_names = set()
115 |     for gt in gts:
116 |         gt_info = gt.split(' ')
117 |         if len(gt_info) != 6 and len(gt_info) != 7:
118 |             print('wrong ground truth info: ' + gt_info[0])
119 |             return 0
120 |         image_name = gt_info[0]
121 |         class_name = gt_info[1]
122 |         bbox = [float(item) for item in gt_info[2:6]]
123 |         if len(gt_info) == 6:
124 |             difficult = False
125 |         else:
126 |             difficult = bool(int(gt_info[-1]))
127 | 
128 |         if class_name not in class_gts.keys():
129 |             class_gts[class_name] = {}
130 |             class_num_positive[class_name] = 0
131 |         if image_name not in class_gts[class_name].keys():
132 |             class_gts[class_name][image_name] = {'bbox': np.array([bbox]),
133 |                                                  'hit': [False],
134 |                                                  'difficult': [difficult]}
135 |         else:
136 |             class_gts[class_name][image_name]['bbox'] = np.vstack((class_gts[class_name][image_name]['bbox'],
137 |                                                                    np.array(bbox)))
138 |             class_gts[class_name][image_name]['hit'].append(False)
139 |             class_gts[class_name][image_name]['difficult'].append(difficult)
140 |         class_num_positive[class_name] += int(True ^ difficult)
141 |         image_names.add(image_name)
142 |     class_names = class_gts.keys()
143 | 
144 |     # read dets
145 |     class_dets = {}
146 |     for result in results:
147 |         result_info = result.split(' ')
148 |         if len(result_info) != 7:
149 |             print ('wrong detections info: ' + result_info[0])
150 |             return 0
151 |         image_name = result_info[0]
152 |         class_name = result_info[1]
153 |         bbox = [float(item) for item in result_info[2:]]
154 |         if bbox[0] <= thresh:
155 |             continue
156 |         if class_name not in class_names:
157 |             continue
158 |         if class_name not in class_dets.keys():
159 |             class_dets[class_name] = {'images': [],
160 |                                       'scores': [],
161 |                                       'bboxes': []}
162 |         class_dets[class_name]['images'].append(image_name)
163 |         class_dets[class_name]['scores'].append(bbox[0])
164 |         class_dets[class_name]['bboxes'].append(bbox[1:])
165 | 
166 |     ap = {}
167 |     precision = {}
168 |     recall = {}
169 |     for class_name in class_names:
170 |         if class_name not in class_dets.keys():
171 |             ap[class_name] = 0
172 |             recall[class_name] = 0
173 |             precision[class_name] = 0
174 |             continue
175 | 
176 |         gt_images = class_gts[class_name]
177 |         num_positive = class_num_positive[class_name]
178 | 
179 |         det_images = class_dets[class_name]['images']
180 |         det_scores = np.array(class_dets[class_name]['scores'])
181 |         det_bboxes = np.array(class_dets[class_name]['bboxes'])
182 | 
183 |         # sort by confidence
184 |         sorted_index = np.argsort(-det_scores)
185 |         det_bboxes = det_bboxes[sorted_index, :]
186 |         det_images = [det_images[x] for x in sorted_index]
187 | 
188 |         # go down dets and mark TPs and FPs
189 |         num_dets = len(det_images)
190 |         true_positive = np.zeros(num_dets)
191 |         false_positive = np.zeros(num_dets)
192 |         for idx in range(num_dets):
193 |             if det_images[idx] not in gt_images.keys():
194 |                 false_positive[idx] = 1
195 |                 continue
196 | 
197 |             gt_bboxes = gt_images[det_images[idx]]['bbox'].astype(float)
198 |             gt_hit = gt_images[det_images[idx]]['hit']
199 |             git_difficult = gt_images[det_images[idx]]['difficult']
200 |             det_bbox = det_bboxes[idx, :].astype(float)
201 |             overlaps_max = -np.inf
202 | 
203 |             if gt_bboxes.size > 0:
204 |                 # compute overlaps
205 |                 # intersection
206 |                 inter_xmin = np.maximum(gt_bboxes[:, 0], det_bbox[0])
207 |                 inter_ymin = np.maximum(gt_bboxes[:, 1], det_bbox[1])
208 |                 inter_xmax = np.minimum(gt_bboxes[:, 2], det_bbox[2])
209 |                 inter_ymax = np.minimum(gt_bboxes[:, 3], det_bbox[3])
210 |                 inter_width = np.maximum(inter_xmax - inter_xmin + 1., 0.)
211 |                 inter_height = np.maximum(inter_ymax - inter_ymin + 1., 0.)
212 |                 inters = inter_width * inter_height
213 | 
214 |                 # union
215 |                 unions = ((det_bbox[2] - det_bbox[0] + 1.) * (det_bbox[3] - det_bbox[1] + 1.) +
216 |                           (gt_bboxes[:, 2] - gt_bboxes[:, 0] + 1.) * (gt_bboxes[:, 3] - gt_bboxes[:, 1] + 1.) - inters)
217 | 
218 |                 overlaps = inters / unions
219 |                 overlaps_max = np.max(overlaps)
220 |                 jmax = np.argmax(overlaps)
221 | 
222 |             if overlaps_max > overlap_thresh:
223 |                 if not git_difficult[jmax]:
224 |                     if not gt_hit[jmax]:
225 |                         true_positive[idx] = 1.
226 |                         gt_hit[jmax] = 1
227 |                     else:
228 |                         false_positive[idx] = 1.
229 |             else:
230 |                 false_positive[idx] = 1.
231 | 
232 |         # compute precision recall
233 |         false_positive = np.cumsum(false_positive)
234 |         true_positive = np.cumsum(true_positive)
235 |         recall[class_name] = true_positive / float(num_positive)
236 |         precision[class_name] = true_positive / np.maximum(true_positive + false_positive, np.finfo(np.float64).eps)
237 |         ap[class_name] = voc_ap(recall[class_name], precision[class_name], use_07_metric)
238 |     print ('evaluate ' + str(len(image_names)) + ' images')
239 |     return recall, precision, ap
240 | 
241 | 
242 | if __name__ == '__main__':
243 |     parser = argparse.ArgumentParser(description='evaluate classification or detection performance')
244 |     parser.add_argument('-mode', default='detection',
245 |                         help='mode, detection or classification, default detection')
246 |     parser.add_argument('-result_file', default='',
247 |                         help="""Result file in space-separated text format.
248 |                         For classification, each row is: image_id label [label ...].
249 |                         For detection, each row is: image_id label score xmin ymin xmax ymax [difficult_bool].""")
250 |     parser.add_argument('-gt_file', default='',
251 |                         help="""Ground truth file in space-separated text format. 
252 |                         For classification, each row is: image_id label [label ...].
253 |                         For detection, each row is: image_id label xmin ymin xmax ymax.""")
254 |     parser.add_argument('-detection_metric', default='map',
255 |                         help="""Evaluation metric for detection, default map.
256 |                         Options are map (mean average precision), precision (given recall), recall (given precision), 
257 |                         pr (precision and recall given threshold of confidence score).""")
258 |     parser.add_argument('-detection_iou', default='0.5',
259 |                         help="""Threshold of IOU ratio to
260 |                          determine a match bbox.""")
261 |     parser.add_argument('-detection_thresh', default='0.005',
262 |                         help="""Threshold of confidence score for calculating evaluation metric, default 0.05.
263 |                         For metric = pr, detection_thresh should be the confidence score to determine a positive bbox.
264 |                         For other detection metrics, detection_thresh should be a very small value.""")
265 |     parser.add_argument('-detection_fix_recall', default='0.8',
266 |                         help="""Used when detection_metric is precision, default 0.8.""")
267 |     parser.add_argument('-detection_fix_precision', default='0.8',
268 |                         help="""Used when detection_metric is recall, default 0.8.""")
269 |     parser.add_argument('-detection_use_07_metric', default='False',
270 |                         help="""Uses the VOC 07 11 point method to compute VOC AP given precision and recall.""")
271 | 
272 |     args = parser.parse_args()
273 | 
274 |     results_file = open(args.result_file, 'r')
275 |     #fix for pytyhon3 JimH
276 |     #results_lines = filter(None, [item.strip() for item in results_file.readlines()])
277 |     results_lines = list(filter(None, [item.strip() for item in results_file.readlines()]))
278 |     gts_file = open(args.gt_file, 'r')
279 |     #fix for python3 Jimh
280 |     #gts_lines = filter(None, [item.strip() for item in gts_file.readlines()])
281 |     gts_lines = list(filter(None, [item.strip() for item in gts_file.readlines()]))
282 |     if len(gts_lines) < 1:
283 |         print ('ground truth file is empty!')
284 |     if len(results_lines) < 1:
285 |         print ('result file is empty!')
286 | 
287 |     if args.mode == 'classification':
288 |         accuracy = compute_classification_accuracy(results_lines, gts_lines)
289 |         print ('classification accuracy of each class: ' + str(accuracy))
290 |         print ('mean classification accuracy: ' + str(np.mean(accuracy)))
291 |     elif args.mode == 'detection':
292 |         detection_thresh = float(args.detection_thresh)
293 |         detection_iou = float(args.detection_iou)
294 |         use_07_metric = False
295 |         if args.detection_use_07_metric == 'True':
296 |             use_07_metric = True
297 |         recall, precision, ap = compute_detection_ap(results_lines, gts_lines, detection_thresh, detection_iou,
298 |                                                      use_07_metric)
299 |         if args.detection_metric == 'map':
300 |             for class_name in ap.keys():
301 |                 print (class_name + ' AP: ' + str(ap[class_name]))
302 |             print ('mAP: ' + str((float(sum(ap.values()))) / max(1, len(ap))))
303 |         elif args.detection_metric == 'precision':
304 |             fix_recall = float(args.detection_fix_recall)
305 |             for class_name in ap.keys():
306 |                 if np.sum(recall[class_name] >= fix_recall) == 0:
307 |                     output_precision = 0
308 |                 else:
309 |                     output_precision = np.max(precision[class_name][recall[class_name] >= fix_recall])
310 |                 print (class_name + ', set recall is ' + str(fix_recall) + ', precision: ' + str(output_precision))
311 |         elif args.detection_metric == 'recall':
312 |             fix_precision = float(args.detection_fix_precision)
313 |             for class_name in ap.keys():
314 |                 if np.sum(precision[class_name] >= fix_precision) == 0:
315 |                     output_recall = 0
316 |                 else:
317 |                     output_recall = np.max(recall[class_name][precision[class_name] >= fix_precision])
318 |                 print (class_name + ', set precision is ' + str(fix_precision) + ', recall: ' + str(output_recall))
319 |         elif args.detection_metric == 'pr':
320 |             for class_name in ap.keys():
321 |                 if len(recall[class_name]) > 0:
322 |                     output_recall = recall[class_name][-1]
323 |                 else:
324 |                     output_recall = 0
325 |                 if np.sum(recall[class_name] >= output_recall) == 0:
326 |                     output_precision = 0
327 |                 else:
328 |                     output_precision = np.max(precision[class_name][recall[class_name] >= output_recall])
329 |                 print (class_name + ', set confidence score is ' + str(detection_thresh) + \
330 |                       ', precision: ' + str(output_precision) + ', recall: ' + str(output_recall))
331 |         else:
332 |             print ('wrong evaluation metric!')
333 |     results_file.close()
334 |     gts_file.close()
335 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | import numpy as np
  3 | from PIL import Image
  4 | from functools import reduce
  5 | from tensorflow import keras
  6 | from tensorflow.keras import backend as K
  7 | from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
  8 | import cv2
  9 | def compose(*funcs):
 10 |     if funcs:
 11 |         return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
 12 |     else:
 13 |         raise ValueError('Composition of empty sequence not supported.')
 14 | 
 15 | def letterbox_image(image, size):
 16 |     iw, ih = image.size
 17 |     w, h = size
 18 |     scale = min(w/iw, h/ih)
 19 |     nw = int(iw*scale)
 20 |     nh = int(ih*scale)
 21 | 
 22 |     image = image.resize((nw,nh), Image.BICUBIC)
 23 |     new_image = Image.new('RGB', size, (128,128,128))
 24 |     new_image.paste(image, ((w-nw)//2, (h-nh)//2))
 25 |     return new_image
 26 | 
 27 | def rand(a=0, b=1):
 28 |     return np.random.rand()*(b-a) + a
 29 | 
 30 | def merge_bboxes(bboxes, cutx, cuty):
 31 |     merge_bbox = []
 32 |     for i in range(len(bboxes)):
 33 |         for box in bboxes[i]:
 34 |             tmp_box = []
 35 |             x1,y1,x2,y2 = box[0], box[1], box[2], box[3]
 36 | 
 37 |             if i == 0:
 38 |                 if y1 > cuty or x1 > cutx:
 39 |                     continue
 40 |                 if y2 >= cuty and y1 <= cuty:
 41 |                     y2 = cuty
 42 |                     if y2-y1 < 5:
 43 |                         continue
 44 |                 if x2 >= cutx and x1 <= cutx:
 45 |                     x2 = cutx
 46 |                     if x2-x1 < 5:
 47 |                         continue
 48 |                 
 49 |             if i == 1:
 50 |                 if y2 < cuty or x1 > cutx:
 51 |                     continue
 52 | 
 53 |                 if y2 >= cuty and y1 <= cuty:
 54 |                     y1 = cuty
 55 |                     if y2-y1 < 5:
 56 |                         continue
 57 |                 
 58 |                 if x2 >= cutx and x1 <= cutx:
 59 |                     x2 = cutx
 60 |                     if x2-x1 < 5:
 61 |                         continue
 62 | 
 63 |             if i == 2:
 64 |                 if y2 < cuty or x2 < cutx:
 65 |                     continue
 66 | 
 67 |                 if y2 >= cuty and y1 <= cuty:
 68 |                     y1 = cuty
 69 |                     if y2-y1 < 5:
 70 |                         continue
 71 | 
 72 |                 if x2 >= cutx and x1 <= cutx:
 73 |                     x1 = cutx
 74 |                     if x2-x1 < 5:
 75 |                         continue
 76 | 
 77 |             if i == 3:
 78 |                 if y1 > cuty or x2 < cutx:
 79 |                     continue
 80 | 
 81 |                 if y2 >= cuty and y1 <= cuty:
 82 |                     y2 = cuty
 83 |                     if y2-y1 < 5:
 84 |                         continue
 85 | 
 86 |                 if x2 >= cutx and x1 <= cutx:
 87 |                     x1 = cutx
 88 |                     if x2-x1 < 5:
 89 |                         continue
 90 | 
 91 |             tmp_box.append(x1)
 92 |             tmp_box.append(y1)
 93 |             tmp_box.append(x2)
 94 |             tmp_box.append(y2)
 95 |             tmp_box.append(box[-1])
 96 |             merge_bbox.append(tmp_box)
 97 |     return merge_bbox
 98 | 
 99 | def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue=.1, sat=1.5, val=1.5):
100 |     '''random preprocessing for real-time data augmentation'''
101 |     h, w = input_shape
102 |     min_offset_x = 0.4
103 |     min_offset_y = 0.4
104 |     scale_low = 1-min(min_offset_x,min_offset_y)
105 |     scale_high = scale_low+0.2
106 | 
107 |     image_datas = [] 
108 |     box_datas = []
109 |     index = 0
110 | 
111 |     place_x = [0,0,int(w*min_offset_x),int(w*min_offset_x)]
112 |     place_y = [0,int(h*min_offset_y),int(h*min_offset_y),0]
113 |     for line in annotation_line:
114 |         # 每一行进行分割
115 |         line_content = line.split()
116 |         # 打开图片
117 |         image = Image.open(line_content[0])
118 |         image = image.convert("RGB") 
119 |         # 图片的大小
120 |         iw, ih = image.size
121 |         # 保存框的位置
122 |         box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
123 |         
124 |         # 是否翻转图片
125 |         flip = rand()<.5
126 |         if flip and len(box)>0:
127 |             image = image.transpose(Image.FLIP_LEFT_RIGHT)
128 |             box[:, [0,2]] = iw - box[:, [2,0]]
129 | 
130 |         # 对输入进来的图片进行缩放
131 |         new_ar = w/h
132 |         scale = rand(scale_low, scale_high)
133 |         if new_ar < 1:
134 |             nh = int(scale*h)
135 |             nw = int(nh*new_ar)
136 |         else:
137 |             nw = int(scale*w)
138 |             nh = int(nw/new_ar)
139 |         image = image.resize((nw,nh), Image.BICUBIC)
140 | 
141 |         # 进行色域变换
142 |         hue = rand(-hue, hue)
143 |         sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
144 |         val = rand(1, val) if rand()<.5 else 1/rand(1, val)
145 |         x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
146 |         x[..., 0] += hue*360
147 |         x[..., 0][x[..., 0]>1] -= 1
148 |         x[..., 0][x[..., 0]<0] += 1
149 |         x[..., 1] *= sat
150 |         x[..., 2] *= val
151 |         x[x[:,:, 0]>360, 0] = 360
152 |         x[:, :, 1:][x[:, :, 1:]>1] = 1
153 |         x[x<0] = 0
154 |         image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) # numpy array, 0 to 1
155 | 
156 |         image = Image.fromarray((image*255).astype(np.uint8))
157 |         # 将图片进行放置，分别对应四张分割图片的位置
158 |         dx = place_x[index]
159 |         dy = place_y[index]
160 |         new_image = Image.new('RGB', (w,h), (128,128,128))
161 |         new_image.paste(image, (dx, dy))
162 |         image_data = np.array(new_image)/255
163 | 
164 |         
165 |         index = index + 1
166 |         box_data = []
167 |         # 对box进行重新处理
168 |         if len(box)>0:
169 |             np.random.shuffle(box)
170 |             box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
171 |             box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
172 |             box[:, 0:2][box[:, 0:2]<0] = 0
173 |             box[:, 2][box[:, 2]>w] = w
174 |             box[:, 3][box[:, 3]>h] = h
175 |             box_w = box[:, 2] - box[:, 0]
176 |             box_h = box[:, 3] - box[:, 1]
177 |             box = box[np.logical_and(box_w>1, box_h>1)]
178 |             box_data = np.zeros((len(box),5))
179 |             box_data[:len(box)] = box
180 |         
181 |         image_datas.append(image_data)
182 |         box_datas.append(box_data)
183 | 
184 |     # 将图片分割，放在一起
185 |     cutx = np.random.randint(int(w*min_offset_x), int(w*(1 - min_offset_x)))
186 |     cuty = np.random.randint(int(h*min_offset_y), int(h*(1 - min_offset_y)))
187 | 
188 |     new_image = np.zeros([h,w,3])
189 |     new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
190 |     new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
191 |     new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
192 |     new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
193 | 
194 |     # 对框进行进一步的处理
195 |     new_boxes = merge_bboxes(box_datas, cutx, cuty)
196 | 
197 |     # 将box进行调整
198 |     box_data = np.zeros((max_boxes,5))
199 |     if len(new_boxes)>0:
200 |         if len(new_boxes)>max_boxes: new_boxes = new_boxes[:max_boxes]
201 |         box_data[:len(new_boxes)] = new_boxes
202 |     return new_image, box_data
203 | 
204 | 
205 | def get_random_data(annotation_line, input_shape, max_boxes=100, jitter=.3, hue=.1, sat=1.5, val=1.5):
206 |     '''random preprocessing for real-time data augmentation'''
207 |     line = annotation_line.split()
208 |     image = Image.open(line[0])
209 |     iw, ih = image.size
210 |     h, w = input_shape
211 |     box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
212 | 
213 |     # 对图像进行缩放并且进行长和宽的扭曲
214 |     new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
215 |     scale = rand(.25,2)
216 |     if new_ar < 1:
217 |         nh = int(scale*h)
218 |         nw = int(nh*new_ar)
219 |     else:
220 |         nw = int(scale*w)
221 |         nh = int(nw/new_ar)
222 |     image = image.resize((nw,nh), Image.BICUBIC)
223 | 
224 |     # 将图像多余的部分加上灰条
225 |     dx = int(rand(0, w-nw))
226 |     dy = int(rand(0, h-nh))
227 |     new_image = Image.new('RGB', (w,h), (128,128,128))
228 |     new_image.paste(image, (dx, dy))
229 |     image = new_image
230 | 
231 |     # 翻转图像
232 |     flip = rand()<.5
233 |     if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
234 | 
235 |     # 色域扭曲
236 |     hue = rand(-hue, hue)
237 |     sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
238 |     val = rand(1, val) if rand()<.5 else 1/rand(1, val)
239 |     x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
240 |     x[..., 0] += hue*360
241 |     x[..., 0][x[..., 0]>1] -= 1
242 |     x[..., 0][x[..., 0]<0] += 1
243 |     x[..., 1] *= sat
244 |     x[..., 2] *= val
245 |     x[x[:,:, 0]>360, 0] = 360
246 |     x[:, :, 1:][x[:, :, 1:]>1] = 1
247 |     x[x<0] = 0
248 |     image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) # numpy array, 0 to 1
249 | 
250 |     # 将box进行调整
251 |     box_data = np.zeros((max_boxes,5))
252 |     if len(box)>0:
253 |         np.random.shuffle(box)
254 |         box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
255 |         box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
256 |         if flip: box[:, [0,2]] = w - box[:, [2,0]]
257 |         box[:, 0:2][box[:, 0:2]<0] = 0
258 |         box[:, 2][box[:, 2]>w] = w
259 |         box[:, 3][box[:, 3]>h] = h
260 |         box_w = box[:, 2] - box[:, 0]
261 |         box_h = box[:, 3] - box[:, 1]
262 |         box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
263 |         if len(box)>max_boxes: box = box[:max_boxes]
264 |         box_data[:len(box)] = box
265 |     
266 |     return image_data, box_data
267 | 
268 | 
269 | def cosine_decay_with_warmup(global_step,
270 |                              learning_rate_base,
271 |                              total_steps,
272 |                              warmup_learning_rate=0.0,
273 |                              warmup_steps=0,
274 |                              hold_base_rate_steps=0,
275 |                              min_learn_rate=0,
276 |                              ):
277 |     """
278 |     参数：
279 |         global_step: 上面定义的Tcur，记录当前执行的步数。
280 |         learning_rate_base：预先设置的学习率，当warm_up阶段学习率增加到learning_rate_base，就开始学习率下降。
281 |         total_steps: 是总的训练的步数，等于epoch*sample_count/batch_size,(sample_count是样本总数，epoch是总的循环次数)
282 |         warmup_learning_rate: 这是warm up阶段线性增长的初始值
283 |         warmup_steps: warm_up总的需要持续的步数
284 |         hold_base_rate_steps: 这是可选的参数，即当warm up阶段结束后保持学习率不变，知道hold_base_rate_steps结束后才开始学习率下降
285 |     """
286 |     if total_steps < warmup_steps:
287 |         raise ValueError('total_steps must be larger or equal to '
288 |                             'warmup_steps.')
289 |     #这里实现了余弦退火的原理，设置学习率的最小值为0，所以简化了表达式
290 |     learning_rate = 0.5 * learning_rate_base * (1 + np.cos(np.pi *
291 |         (global_step - warmup_steps - hold_base_rate_steps) / float(total_steps - warmup_steps - hold_base_rate_steps)))
292 |     #如果hold_base_rate_steps大于0，表明在warm up结束后学习率在一定步数内保持不变
293 |     if hold_base_rate_steps > 0:
294 |         learning_rate = np.where(global_step > warmup_steps + hold_base_rate_steps,
295 |                                     learning_rate, learning_rate_base)
296 |     if warmup_steps > 0:
297 |         if learning_rate_base < warmup_learning_rate:
298 |             raise ValueError('learning_rate_base must be larger or equal to '
299 |                                 'warmup_learning_rate.')
300 |         #线性增长的实现
301 |         slope = (learning_rate_base - warmup_learning_rate) / warmup_steps
302 |         warmup_rate = slope * global_step + warmup_learning_rate
303 |         #只有当global_step 仍然处于warm up阶段才会使用线性增长的学习率warmup_rate，否则使用余弦退火的学习率learning_rate
304 |         learning_rate = np.where(global_step < warmup_steps, warmup_rate,
305 |                                     learning_rate)
306 | 
307 |     learning_rate = max(learning_rate,min_learn_rate)
308 |     return learning_rate
309 | 
310 | 
311 | class WarmUpCosineDecayScheduler(keras.callbacks.Callback):
312 |     """
313 |     继承Callback，实现对学习率的调度
314 |     """
315 |     def __init__(self,
316 |                  learning_rate_base,
317 |                  total_steps,
318 |                  global_step_init=0,
319 |                  warmup_learning_rate=0.0,
320 |                  warmup_steps=0,
321 |                  hold_base_rate_steps=0,
322 |                  min_learn_rate=0,
323 |                  # interval_epoch代表余弦退火之间的最低点
324 |                  interval_epoch=[0.05, 0.15, 0.30, 0.50],
325 |                  verbose=0):
326 |         super(WarmUpCosineDecayScheduler, self).__init__()
327 |         # 基础的学习率
328 |         self.learning_rate_base = learning_rate_base
329 |         # 热调整参数
330 |         self.warmup_learning_rate = warmup_learning_rate
331 |         # 参数显示  
332 |         self.verbose = verbose
333 |         # learning_rates用于记录每次更新后的学习率，方便图形化观察
334 |         self.min_learn_rate = min_learn_rate
335 |         self.learning_rates = []
336 | 
337 |         self.interval_epoch = interval_epoch
338 |         # 贯穿全局的步长
339 |         self.global_step_for_interval = global_step_init
340 |         # 用于上升的总步长
341 |         self.warmup_steps_for_interval = warmup_steps
342 |         # 保持最高峰的总步长
343 |         self.hold_steps_for_interval = hold_base_rate_steps
344 |         # 整个训练的总步长
345 |         self.total_steps_for_interval = total_steps
346 | 
347 |         self.interval_index = 0
348 |         # 计算出来两个最低点的间隔
349 |         self.interval_reset = [self.interval_epoch[0]]
350 |         for i in range(len(self.interval_epoch)-1):
351 |             self.interval_reset.append(self.interval_epoch[i+1]-self.interval_epoch[i])
352 |         self.interval_reset.append(1-self.interval_epoch[-1])
353 | 
354 | 	#更新global_step，并记录当前学习率
355 |     def on_batch_end(self, batch, logs=None):
356 |         self.global_step = self.global_step + 1
357 |         self.global_step_for_interval = self.global_step_for_interval + 1
358 |         lr = K.get_value(self.model.optimizer.lr)
359 |         self.learning_rates.append(lr)
360 | 
361 | 	#更新学习率
362 |     def on_batch_begin(self, batch, logs=None):
363 |         # 每到一次最低点就重新更新参数
364 |         if self.global_step_for_interval in [0]+[int(i*self.total_steps_for_interval) for i in self.interval_epoch]:
365 |             self.total_steps = self.total_steps_for_interval * self.interval_reset[self.interval_index]
366 |             self.warmup_steps = self.warmup_steps_for_interval * self.interval_reset[self.interval_index]
367 |             self.hold_base_rate_steps = self.hold_steps_for_interval * self.interval_reset[self.interval_index]
368 |             self.global_step = 0
369 |             self.interval_index += 1
370 | 
371 |         lr = cosine_decay_with_warmup(global_step=self.global_step,
372 |                                       learning_rate_base=self.learning_rate_base,
373 |                                       total_steps=self.total_steps,
374 |                                       warmup_learning_rate=self.warmup_learning_rate,
375 |                                       warmup_steps=self.warmup_steps,
376 |                                       hold_base_rate_steps=self.hold_base_rate_steps,
377 |                                       min_learn_rate = self.min_learn_rate)
378 |         K.set_value(self.model.optimizer.lr, lr)
379 |         if self.verbose > 0:
380 |             print('\nBatch %05d: setting learning '
381 |                   'rate to %s.' % (self.global_step + 1, lr))
382 | 
383 | 
384 | class ModelCheckpoint(keras.callbacks.Callback):
385 |     def __init__(self, filepath, monitor='val_loss', verbose=0,
386 |                  save_best_only=False, save_weights_only=False,
387 |                  mode='auto', period=1):
388 |         super(ModelCheckpoint, self).__init__()
389 |         self.monitor = monitor
390 |         self.verbose = verbose
391 |         self.filepath = filepath
392 |         self.save_best_only = save_best_only
393 |         self.save_weights_only = save_weights_only
394 |         self.period = period
395 |         self.epochs_since_last_save = 0
396 | 
397 |         if mode not in ['auto', 'min', 'max']:
398 |             warnings.warn('ModelCheckpoint mode %s is unknown, '
399 |                           'fallback to auto mode.' % (mode),
400 |                           RuntimeWarning)
401 |             mode = 'auto'
402 | 
403 |         if mode == 'min':
404 |             self.monitor_op = np.less
405 |             self.best = np.Inf
406 |         elif mode == 'max':
407 |             self.monitor_op = np.greater
408 |             self.best = -np.Inf
409 |         else:
410 |             if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
411 |                 self.monitor_op = np.greater
412 |                 self.best = -np.Inf
413 |             else:
414 |                 self.monitor_op = np.less
415 |                 self.best = np.Inf
416 | 
417 |     def on_epoch_end(self, epoch, logs=None):
418 |         logs = logs or {}
419 |         self.epochs_since_last_save += 1
420 |         if self.epochs_since_last_save >= self.period:
421 |             self.epochs_since_last_save = 0
422 |             filepath = self.filepath.format(epoch=epoch + 1, **logs)
423 |             if self.save_best_only:
424 |                 current = logs.get(self.monitor)
425 |                 if current is None:
426 |                     warnings.warn('Can save best model only with %s available, '
427 |                                   'skipping.' % (self.monitor), RuntimeWarning)
428 |                 else:
429 |                     if self.monitor_op(current, self.best):
430 |                         if self.verbose > 0:
431 |                             print('\nEpoch %05d: %s improved from %0.5f to %0.5f,'
432 |                                   ' saving model to %s'
433 |                                   % (epoch + 1, self.monitor, self.best,
434 |                                      current, filepath))
435 |                         self.best = current
436 |                         if self.save_weights_only:
437 |                             self.model.save_weights(filepath, overwrite=True)
438 |                         else:
439 |                             self.model.save(filepath, overwrite=True)
440 |                     else:
441 |                         if self.verbose > 0:
442 |                             print('\nEpoch %05d: %s did not improve' %
443 |                                   (epoch + 1, self.monitor))
444 |             else:
445 |                 if self.verbose > 0:
446 |                     print('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath))
447 |                 if self.save_weights_only:
448 |                     self.model.save_weights(filepath, overwrite=True)
449 |                 else:
450 |                     self.model.save(filepath, overwrite=True)
451 | 
452 | 


--------------------------------------------------------------------------------
/Model_pruning/train_purn.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import tensorflow.keras.backend as K
  6 | from tensorflow.keras.layers import Input, Lambda
  7 | from tensorflow.keras.models import Model
  8 | from tensorflow.keras.optimizers import Adam
  9 | from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau, EarlyStopping
 10 | from nets.yolo_fastest import yolo_body
 11 | from nets.loss import yolo_loss
 12 | from utils.utils import get_random_data, get_random_data_with_Mosaic, rand, WarmUpCosineDecayScheduler, ModelCheckpoint
 13 | import os
 14 | 
 15 | from kerassurgeon.identify import get_apoz
 16 | from kerassurgeon import Surgeon
 17 | import math
 18 | import pandas as pd
 19 | from nni.compression.tensorflow import FPGMPruner
 20 | tf.compat.v1.enable_eager_execution()
 21 | 
 22 | from tensorflow.python.framework import ops
 23 | from tensorflow.keras.models import load_model, model_from_json
 24 | 
 25 | 
 26 | 
 27 | 
 28 | # ---------------------------------------------------#
 29 | #   获得类和先验框
 30 | # ---------------------------------------------------#
 31 | def get_classes(classes_path):
 32 |     '''loads the classes'''
 33 |     with open(classes_path) as f:
 34 |         class_names = f.readlines()
 35 |     class_names = [c.strip() for c in class_names]
 36 |     return class_names
 37 | 
 38 | 
 39 | def get_anchors(anchors_path):
 40 |     '''loads the anchors from a file'''
 41 |     with open(anchors_path) as f:
 42 |         anchors = f.readline()
 43 |     anchors = [float(x) for x in anchors.split(',')]
 44 |     return np.array(anchors).reshape(-1, 2)
 45 | 
 46 | 
 47 | # ---------------------------------------------------#
 48 | #   训练数据生成器
 49 | # ---------------------------------------------------#
 50 | def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, mosaic=False):
 51 |     '''data generator for fit_generator'''
 52 |     n = len(annotation_lines)
 53 |     i = 0
 54 |     flag = True
 55 |     while True:
 56 |         image_data = []
 57 |         box_data = []
 58 |         for b in range(batch_size):
 59 |             if i == 0:
 60 |                 np.random.shuffle(annotation_lines)
 61 |             if mosaic:
 62 |                 if flag and (i + 4) < n:
 63 |                     image, box = get_random_data_with_Mosaic(annotation_lines[i:i + 4], input_shape)
 64 |                     i = (i + 4) % n
 65 |                 else:
 66 |                     image, box = get_random_data(annotation_lines[i], input_shape)
 67 |                     i = (i + 1) % n
 68 |                 flag = bool(1 - flag)
 69 |             else:
 70 |                 image, box = get_random_data(annotation_lines[i], input_shape)
 71 |                 i = (i + 1) % n
 72 |             image_data.append(image)
 73 |             box_data.append(box)
 74 |         image_data = np.array(image_data)
 75 |         box_data = np.array(box_data)
 76 |         y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
 77 |         yield [image_data, *y_true], np.zeros(batch_size)
 78 | 
 79 | 
 80 | # ---------------------------------------------------#
 81 | #   读入xml文件，并输出y_true
 82 | # ---------------------------------------------------#
 83 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
 84 |     assert (true_boxes[..., 4] < num_classes).all(), 'class id must be less than num_classes'
 85 |     # 一共有三个特征层数
 86 |     num_layers = len(anchors) // 3
 87 |     # 先验框
 88 |     anchor_mask: List[List[int]] = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [0, 1, 2]]
 89 | 
 90 |     true_boxes = np.array(true_boxes, dtype='float32')
 91 |     input_shape = np.array(input_shape, dtype='int32')  # 416,416
 92 |     # 读出xy轴，读出长宽
 93 |     # 中心点(m,n,2)
 94 |     boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
 95 |     boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
 96 |     # 计算比例
 97 |     true_boxes[..., 0:2] = boxes_xy / input_shape[::-1]
 98 |     true_boxes[..., 2:4] = boxes_wh / input_shape[::-1]
 99 | 
100 |     # m张图
101 |     m = true_boxes.shape[0]
102 |     # 得到网格的shape为13,13;26,26;
103 |     grid_shapes = [input_shape // {0: 32, 1: 16, 2: 8}[l] for l in range(num_layers)]
104 |     # y_true的格式为(m,13,13,3,85)(m,26,26,3,85)
105 |     y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(anchor_mask[l]), 5 + num_classes),
106 |                        dtype='float32') for l in range(num_layers)]
107 |     # [1,9,2]
108 |     anchors = np.expand_dims(anchors, 0)
109 |     anchor_maxes = anchors / 2.
110 |     anchor_mins = -anchor_maxes
111 |     # 长宽要大于0才有效
112 |     valid_mask = boxes_wh[..., 0] > 0
113 | 
114 |     for b in range(m):
115 |         # 对每一张图进行处理
116 |         wh = boxes_wh[b, valid_mask[b]]
117 |         if len(wh) == 0: continue
118 |         # [n,1,2]
119 |         wh = np.expand_dims(wh, -2)
120 |         box_maxes = wh / 2.
121 |         box_mins = -box_maxes
122 | 
123 |         # 计算真实框和哪个先验框最契合
124 |         intersect_mins = np.maximum(box_mins, anchor_mins)
125 |         intersect_maxes = np.minimum(box_maxes, anchor_maxes)
126 |         intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
127 |         intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
128 |         box_area = wh[..., 0] * wh[..., 1]
129 |         anchor_area = anchors[..., 0] * anchors[..., 1]
130 |         iou = intersect_area / (box_area + anchor_area - intersect_area)
131 |         # 维度是(n) 感谢 消尽不死鸟 的提醒
132 |         best_anchor = np.argmax(iou, axis=-1)
133 | 
134 |         for t, n in enumerate(best_anchor):
135 |             for l in range(num_layers):
136 |                 if n in anchor_mask[l]:
137 |                     # floor用于向下取整
138 |                     i = np.floor(true_boxes[b, t, 0] * grid_shapes[l][1]).astype('int32')
139 |                     j = np.floor(true_boxes[b, t, 1] * grid_shapes[l][0]).astype('int32')
140 |                     # 找到真实框在特征层l中第b副图像对应的位置
141 |                     k = anchor_mask[l].index(n)
142 |                     c = true_boxes[b, t, 4].astype('int32')
143 |                     y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4]
144 |                     y_true[l][b, j, i, k, 4] = 1
145 |                     y_true[l][b, j, i, k, 5 + c] = 1
146 | 
147 |     return y_true
148 | 
149 | 
150 | #-----------------------------------------------------#
151 | # purn
152 | #-----------------------------------------------------#
153 | 
154 | def prune_model(model, apoz_df, n_channels_delete):
155 |     # Identify 5% of channels with the highest APoZ in model
156 |     sorted_apoz_df = apoz_df.sort_values('apoz', ascending=False)
157 |     high_apoz_index = sorted_apoz_df.iloc[0:n_channels_delete, :]
158 | 
159 |     # Create the Surgeon and add a 'delete_channels' job for each layer
160 |     # whose channels are to be deleted.
161 |     surgeon = Surgeon(model, copy=True)
162 |     for name in high_apoz_index.index.unique().values:
163 |         channels = list(pd.Series(high_apoz_index.loc[name, 'index'],
164 |                                   dtype=np.int64).values)
165 |         surgeon.add_job('delete_channels', model.get_layer(name),
166 |                         channels=channels)
167 |     # Delete channels
168 |     return surgeon.operate()
169 | 
170 | 
171 | def get_total_channels(model):
172 |     start = None
173 |     end = None
174 |     channels = 0
175 |     for layer in model.layers[start:end]:
176 |         if layer.__class__.__name__ == 'Conv2D':
177 |             channels += layer.filters
178 |     return channels
179 | 
180 | 
181 | def get_model_apoz(model, generator):
182 |     # Get APoZ
183 |     start = None
184 |     end = None
185 |     apoz = []
186 |     for layer in model.layers[start:end]:
187 |         if layer.__class__.__name__ == 'Conv2D':
188 |             print(layer.name)
189 |             apoz.extend([(layer.name, i, value) for (i, value)
190 |                          in enumerate(get_apoz(model, layer, generator))])
191 | 
192 |     layer_name, index, apoz_value = zip(*apoz)
193 |     apoz_df = pd.DataFrame({'layer': layer_name, 'index': index,
194 |                             'apoz': apoz_value})
195 |     apoz_df = apoz_df.set_index('layer')
196 |     return apoz_df
197 | 
198 | #--------------------------------------------------------------------#
199 | 
200 | 
201 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
202 | for gpu in gpus:
203 |     tf.config.experimental.set_memory_growth(gpu, True)
204 | 
205 | # ----------------------------------------------------#
206 | #   检测精度mAP和pr曲线计算参考视频
207 | #   https://www.bilibili.com/video/BV1zE411u7Vw
208 | # ----------------------------------------------------#
209 | if __name__ == "__main__":
210 |     # 标签的位置
211 |     annotation_path = '2007_train.txt'
212 |     # 获取classes和anchor的位置
213 |     classes_path = 'model_data/new_class.txt'
214 |     anchors_path = 'model_data/yolo_anchors_320.txt'
215 |     # 预训练模型的位置
216 |     weights_path = 'logs_12/ep075-loss8.043-val_loss7.786.h5'
217 |     # 获得classes和anchor
218 |     class_names = get_classes(classes_path)
219 |     anchors = get_anchors(anchors_path)
220 |     # 一共有多少类
221 |     num_classes = len(class_names)
222 |     num_anchors = len(anchors)
223 |     # 训练后的模型保存的位置
224 |     log_dir = 'logs_14/'
225 |     # ----------------------------------------------#
226 |     #   输入的shape大小
227 |     #   显存比较小可以使用416x416
228 |     #   现存比较大可以使用608x608
229 |     # ----------------------------------------------#
230 |     input_shape = (320, 320)
231 |     mosaic = False
232 |     Cosine_scheduler = False
233 |     label_smoothing = 0
234 | 
235 |     # 清除session
236 |     K.clear_session()
237 | 
238 |     # 输入的图像为
239 |     #image_input = Input(shape=(None, None, 3))
240 |     image_input = Input(shape=(320, 320, 3))
241 |     h, w = input_shape
242 | 
243 |     # 创建yolo模型
244 |     print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
245 |     model_body = yolo_body(image_input, num_anchors // 2, num_classes)
246 | 
247 |     model_body.summary()
248 | 
249 |     if not os.path.exists(log_dir):
250 |         os.makedirs(log_dir)
251 |     json_config = model_body.to_json()
252 |     with open(log_dir + 'model_config.json', 'w') as json_file:
253 |         json_file.write(json_config)
254 |     # -------------------------------------------#
255 |     #   权值文件的下载请看README
256 |     # -------------------------------------------#
257 |     print('Load weights {}.'.format(weights_path))
258 |     model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
259 | 
260 |     # y_true为13,13,3,85
261 |     # 26,26,3,85
262 |     y_true = [Input(shape=(h // {0: 32, 1: 16}[l], w // {0: 32, 1: 16}[l], num_anchors // 2, num_classes + 5)) for l in
263 |               range(2)]
264 | 
265 |     # 输入为*model_body.input, *y_true
266 |     # 输出为model_loss
267 |     loss_input = [*model_body.output, *y_true]
268 |     model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
269 |                         arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5,
270 |                                    'label_smoothing': label_smoothing})(loss_input)
271 | 
272 |     model = Model([model_body.input, *y_true], model_loss)
273 | 
274 |     # 训练参数设置
275 |     logging = TensorBoard(log_dir=log_dir)
276 |     checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
277 |                                  monitor='val_loss', save_weights_only=True, save_best_only=False, period=1)
278 |     early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
279 | 
280 |     #model_complexity_param = model_complexity.ModelParametersCallback(log_dir, verbose=1)
281 | 
282 |     # 0.1用于验证，0.9用于训练
283 |     val_split = 0.1
284 |     with open(annotation_path) as f:
285 |         lines = f.readlines()
286 |     np.random.seed(10101)
287 |     np.random.shuffle(lines)
288 |     np.random.seed(None)
289 |     num_val = int(len(lines) * val_split)
290 |     num_train = len(lines) - num_val
291 | 
292 |     freeze_layers = 60
293 |     for i in range(freeze_layers): model_body.layers[i].trainable = False
294 |     print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers)))
295 | 
296 |     # ------------------------------------------------------#
297 |     #   主干特征提取网络特征通用，冻结训练可以加快训练速度
298 |     #   也可以在训练初期防止权值被破坏。
299 |     #   Init_Epoch为起始世代
300 |     #   Freeze_Epoch为冻结训练的世代
301 |     #   Epoch总训练世代
302 |     #   提示OOM或者显存不足请调小Batch_size
303 |     # ------------------------------------------------------#
304 |     if False:
305 |         Init_epoch = 0
306 |         Freeze_epoch = 0
307 |         # batch_size大小，每次喂入多少数据
308 |         batch_size = 16
309 |         # 最大学习率
310 |         learning_rate_base = 1e-3
311 |         if Cosine_scheduler:
312 |             # 预热期
313 |             warmup_epoch = int((Freeze_epoch - Init_epoch) * 0.2)
314 |             # 总共的步长
315 |             total_steps = int((Freeze_epoch - Init_epoch) * num_train / batch_size)
316 |             # 预热步长
317 |             warmup_steps = int(warmup_epoch * num_train / batch_size)
318 |             # 学习率
319 |             reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base,
320 |                                                    total_steps=total_steps,
321 |                                                    warmup_learning_rate=1e-4,
322 |                                                    warmup_steps=warmup_steps,
323 |                                                    hold_base_rate_steps=num_train,
324 |                                                    min_learn_rate=1e-6
325 |                                                    )
326 |             model.compile(optimizer=Adam(), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
327 |         else:
328 |             reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
329 |             model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
330 | 
331 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
332 |         model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
333 |                   steps_per_epoch=max(1, num_train // batch_size),
334 |                   validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes,
335 |                                                  mosaic=False),
336 |                   validation_steps=max(1, num_val // batch_size),
337 |                   epochs=Freeze_epoch,
338 |                   initial_epoch=Init_epoch,
339 |                   callbacks=[logging, checkpoint, reduce_lr, early_stopping])
340 |         model.save_weights(log_dir + 'trained_weights_stage_1.h5')
341 | 
342 |     for i in range(freeze_layers): model_body.layers[i].trainable = True
343 | 
344 |     # 解冻后训练
345 |     if False:
346 |         Freeze_epoch = 0
347 |         Epoch = 1
348 |         # batch_size大小，每次喂入多少数据
349 |         batch_size = 16
350 | 
351 |         # 最大学习率
352 |         learning_rate_base = 1e-5
353 |         if Cosine_scheduler:
354 |             # 预热期
355 |             warmup_epoch = int((Epoch - Freeze_epoch) * 0.2)
356 |             # 总共的步长
357 |             total_steps = int((Epoch - Freeze_epoch) * num_train / batch_size)
358 |             # 预热步长
359 |             warmup_steps = int(warmup_epoch * num_train / batch_size)
360 |             # 学习率
361 |             reduce_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base,
362 |                                                    total_steps=total_steps,
363 |                                                    warmup_learning_rate=1e-5,
364 |                                                    warmup_steps=warmup_steps,
365 |                                                    hold_base_rate_steps=num_train // 2,
366 |                                                    min_learn_rate=1e-6
367 |                                                    )
368 |             model.compile(optimizer=Adam(), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
369 |         else:
370 |             reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
371 |             model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
372 | 
373 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
374 |         model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
375 |                   steps_per_epoch=max(1, num_train // batch_size),
376 |                   validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes,
377 |                                                  mosaic=False),
378 |                   validation_steps=max(1, num_val // batch_size),
379 |                   epochs=Epoch,
380 |                   initial_epoch=Freeze_epoch,
381 |                   callbacks=[logging, checkpoint, reduce_lr, early_stopping, model_complexity_param])
382 |         model.save_weights(log_dir + 'last1.h5')
383 |         # pruner.export_model(model_path='pruned_vgg19_cifar10.h5', mask_path='mask_vgg19_cifar10.h5')
384 |         json_config = model.to_json()
385 |         with open(log_dir + 'model_config.json', 'w') as json_file:
386 |             json_file.write(json_config)
387 | 
388 |     # 最大学习率
389 |     learning_rate_base = 1e-3
390 |     batch_size = 16
391 |     output_dir = 'inception_flowers/'
392 |     train_data_dir = output_dir + 'data/train/'
393 |     validation_data_dir = output_dir + 'data/validation/'
394 |     tuned_weights_path = output_dir + 'tuned_weights.h5'
395 |     Epoch = 2
396 |     val_batch_size = 16
397 |     percent_pruning = 2
398 |     total_percent_pruning = 50
399 | 
400 |     validation_generator = data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False)
401 | 
402 |     reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
403 |     model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
404 | 
405 |     configure_list = [{
406 |         'sparsity': 0.5,
407 |         'op_types': ['Conv2D']
408 |     }]
409 | 
410 |     configure_dict = {'sparsity': 0.5}
411 |     pruner = FPGMPruner(model, configure_list)
412 | 
413 |     start = None
414 |     end = None
415 |     for layer in model.layers[start:end]:
416 |         if layer.__class__.__name__ == 'Conv2D':
417 |             print(layer.name)
418 |         #    if layer.name == 'conv2d_17':
419 |         #        a = pruner.calc_mask(layer, configure_dict)
420 |     model = pruner.compress_model()
421 |     #prun_a = pruner.compress_model()
422 |     #min_gm_kernels = sorted(prun_a, key=lambda x: x[0])[:10]
423 |     #min_gm_kernels_1 = [x[1] for x in min_gm_kernels]
424 |     #surgeon = Surgeon(model, copy=False)
425 |     #model = pruner.compress_model_1(channels_p=min_gm_kernels_1)
426 |     #a=1
427 | 
428 |     model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
429 |     model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
430 |               steps_per_epoch=max(1, num_train // batch_size),
431 |               validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes,
432 |                                              mosaic=False),
433 |               validation_steps=max(1, num_val // batch_size),
434 |               epochs=Epoch,
435 |               callbacks=[logging, checkpoint, reduce_lr, early_stopping])
436 | 
437 |     model.save_weights(output_dir + '1' + '.h5')
438 | 
439 |     if False:
440 |         del model
441 |         tf.python.keras.backend.clear_session()
442 |         ops.reset_default_graph()
443 | 
444 |         with open(r'F:\yolo\yolo_fastest_tf2\inception_flowers\model_config.json', 'r') as file:
445 |             model_json1 = file.read()
446 |         new_model = model_from_json(model_json1)
447 |         new_model.load_weights(output_dir + '1' + '.h5', by_name=True, skip_mismatch=True)
448 | 
449 |         y_true = [Input(shape=(h // {0: 32, 1: 16}[l], w // {0: 32, 1: 16}[l], num_anchors // 2, num_classes + 5)) for l in
450 |                   range(2)]
451 | 
452 |         # 输入为*model_body.input, *y_true
453 |         # 输出为model_loss
454 |         loss_input = [*new_model.output, *y_true]
455 |         model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
456 |                             arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5,
457 |                                        'label_smoothing': label_smoothing})(loss_input)
458 | 
459 |         model = Model([new_model.input, *y_true], model_loss)
460 | 
461 |         # 训练参数设置
462 |         logging = TensorBoard(log_dir=log_dir)
463 |         checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
464 |                                      monitor='val_loss', save_weights_only=True, save_best_only=False, period=1)
465 |         early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
466 | 
467 |         # model_complexity_param = model_complexity.ModelParametersCallback(log_dir, verbose=1)
468 | 
469 |         # 0.1用于验证，0.9用于训练
470 |         val_split = 0.1
471 |         with open(annotation_path) as f:
472 |             lines = f.readlines()
473 |         np.random.seed(10101)
474 |         np.random.shuffle(lines)
475 |         np.random.seed(None)
476 |         num_val = int(len(lines) * val_split)
477 |         num_train = len(lines) - num_val
478 | 
479 |         #model.save_weights(output_dir + '1' + '.h5')
480 |         #json_config = model.to_json()
481 |         #with open(output_dir + 'model_config.json', 'w') as json_file:
482 |         #    json_file.write(json_config)
483 |         #model.save(output_dir + '1' + '.h5')
484 |         #del model
485 |         #tf.python.keras.backend.clear_session()
486 |         #ops.reset_default_graph()
487 |         #model = load_model(output_dir + '1' + '.h5')
488 |         #with open(r'F:\yolo\yolo_fastest_tf2\inception_flowers\model_config.json', 'r') as file:
489 |         #    model_json1 = file.read()
490 |         #new_model = model_from_json(model_json1)
491 |         #new_model.load_weights(output_dir + '1' + '.h5')
492 | 
493 |         model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
494 | 
495 |         if False:
496 |             total_channels = get_total_channels(model)
497 |             n_channels_delete = int(math.floor(percent_pruning / 100 * total_channels))
498 | 
499 |             # Incrementally prune the network, retraining it each time
500 |             percent_pruned = 0
501 |             # If percent_pruned > 0, continue pruning from previous checkpoint
502 |             if percent_pruned > 0:
503 |                 checkpoint_name = ('inception_flowers_pruning_' + str(percent_pruned)
504 |                                    + 'percent')
505 |                 model = load_model(output_dir + checkpoint_name + '.h5')
506 | 
507 |             while percent_pruned <= total_percent_pruning:
508 |                 # Prune the model
509 |                 apoz_df = get_model_apoz(model, validation_generator)
510 |                 percent_pruned += percent_pruning
511 |                 print('pruning up to ', str(percent_pruned),
512 |                       '% of the original model weights')
513 |                 model = prune_model(model, apoz_df, n_channels_delete)
514 | 
515 |                 # Clean up tensorflow session after pruning and re-load model
516 |                 checkpoint_name = ('inception_flowers_pruning_' + str(percent_pruned)
517 |                                    + 'percent')
518 |                 model.save(output_dir + checkpoint_name + '.h5')
519 |                 del model
520 |                 tensorflow.python.keras.backend.clear_session()
521 |                 tf.reset_default_graph()
522 |                 model = load_model(output_dir + checkpoint_name + '.h5')
523 | 
524 |                 # Re-train the model
525 |                 reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
526 |                 model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
527 |                 checkpoint_name = ('inception_flowers_pruning_' + str(percent_pruned)
528 |                                    + 'percent')
529 |                 #csv_logger = CSVLogger(output_dir + checkpoint_name + '.csv')
530 |                 model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
531 |                           steps_per_epoch=max(1, num_train // batch_size),
532 |                           validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes,
533 |                                                          mosaic=False),
534 |                           validation_steps=max(1, num_val // batch_size),
535 |                           epochs=Epoch,
536 |                           callbacks=[logging, checkpoint, reduce_lr, early_stopping])
537 | 
538 |         model.fit(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
539 |                   steps_per_epoch=max(1, num_train // batch_size),
540 |                   validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes,
541 |                                                  mosaic=False),
542 |                   validation_steps=max(1, num_val // batch_size),
543 |                   epochs=Epoch,
544 |                   callbacks=[logging, checkpoint, reduce_lr, early_stopping])
545 | 


--------------------------------------------------------------------------------