├── .idea ├── .gitignore ├── github.iml ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml └── modules.xml ├── Data_Preprocess ├── ic │ ├── classnum.py │ ├── find.py │ ├── label.txt │ ├── process.py │ ├── process1.py │ └── select.py └── od │ ├── changelabel.py │ ├── od_process.py │ └── pickflower.py ├── Deployment ├── classifiaction_of_30.py ├── convert_to_tflite.py ├── ic_model.tflite ├── object_detection_of_16.py ├── od_model.tflite ├── raspberry │ ├── ic_pi.py │ └── od_pi.py └── server │ ├── ic_server.py │ └── od_server.py ├── Document ├── image │ ├── img01.jpg │ ├── img02.jpg │ ├── img03.jpg │ ├── img04.jpg │ ├── img05.jpg │ ├── img06.jpg │ ├── img101.png │ ├── img102.png │ ├── img103.png │ ├── img104.png │ ├── img105.png │ ├── img106.png │ ├── img201.png │ ├── img202.png │ ├── img203.png │ ├── img204.png │ ├── img205.png │ ├── img206.png │ ├── img207.png │ ├── img208.png │ ├── img209.png │ ├── img210.png │ ├── img211.png │ ├── img212.png │ ├── img213.png │ ├── img214.png │ ├── img215.png │ ├── img216.png │ ├── img217.png │ ├── img218.png │ ├── img301.png │ ├── img302.png │ ├── img303.png │ ├── img304.png │ ├── img305.png │ ├── img306.png │ ├── img307.png │ ├── img401.png │ ├── img402.png │ ├── img403.png │ └── img404.png ├── part1.md ├── part2.md ├── part3.md ├── part4.md └── 树莓派zero图像分类与目标检测.md ├── Image_Classification ├── Alexnet.py ├── MobileNetV2.py ├── MobileNetV3.py ├── MobileNet_Selfconstruct.py ├── construct_model.py ├── parachange.py ├── plot_model_fig.py └── transfer_learning.py ├── Object_Detection ├── .idea │ ├── .gitignore │ ├── Object_Detection.iml │ ├── inspectionProfiles │ │ ├── Project_Default.xml │ │ └── profiles_settings.xml │ ├── misc.xml │ └── modules.xml ├── dataset_util.py ├── generate_tfrecord.py ├── training │ ├── label_map.pbtxt │ └── pipeline.config ├── xml_to_csv.py └── xml_to_csv_total.py └── README.md /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /../../../../../../../:\Users\86178\Desktop\深度学习\github\.idea/dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /.idea/github.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Data_Preprocess/ic/classnum.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from PIL import Image 4 | 5 | path = './final/flower/normal picture/' 6 | 7 | for item1 in os.listdir(path): 8 | path1 = path + item1 + '/' 9 | print(item1) 10 | t=0 11 | for item2 in os.listdir(path1): 12 | t += 1 13 | print(t) -------------------------------------------------------------------------------- /Data_Preprocess/ic/find.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from PIL import Image 4 | 5 | path = './fruit_flower/train/' 6 | 7 | for item1 in os.listdir(path): 8 | path1 = path + item1 + '/' 9 | print(item1) 10 | for item2 in os.listdir(path1): 11 | path2 = path1 + item2 12 | if os.path.getsize(path2) == 0: 13 | print(path2) 14 | else: 15 | img = Image.open(path2) 16 | # print(img_array) 17 | # if np.shape(img_array.shape)[0] != 3: 18 | # print('p', path2) 19 | if img.size[0] < 224 or img.size[1] < 224: 20 | print('q', path2) 21 | -------------------------------------------------------------------------------- /Data_Preprocess/ic/label.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Data_Preprocess/ic/label.txt -------------------------------------------------------------------------------- /Data_Preprocess/ic/process.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import shutil 4 | import string 5 | from PIL import Image 6 | 7 | 8 | def IsValidImage(img_path): 9 | bValid = True 10 | try: 11 | Image.open(img_path).verify() 12 | except: 13 | bValid = False 14 | return bValid 15 | 16 | 17 | def transimg(img_path): 18 | if IsValidImage(img_path): 19 | try: 20 | str = img_path.rsplit(".", 1) 21 | output_img_path = str[0] + ".jpg" 22 | im = Image.open(img_path) 23 | im.save(output_img_path) 24 | return True 25 | except: 26 | return False 27 | else: 28 | return False 29 | 30 | 31 | path = 'usedata/fruit/normal picture/' 32 | finalpath = 'final/fruit/normal picture/' 33 | for items1 in os.listdir(path): 34 | if items1 != 'apple': 35 | continue 36 | print(items1) 37 | path1 = path + items1 + '/' 38 | path_final = finalpath + items1 + '/' 39 | if not os.path.exists(path_final): 40 | os.mkdir(path_final) 41 | if os.listdir(path_final): 42 | shutil.rmtree(path_final) 43 | os.mkdir(path_final) 44 | i = 0 45 | for items2 in os.listdir(path1): 46 | print(items2) 47 | if items2 == 'cir': 48 | continue 49 | path2 = path1 + items2 + '/' 50 | for items3 in os.listdir(path2): 51 | path3 = path2 + items3 + '/' 52 | for items4 in os.listdir(path3): 53 | img_path = path3 + items4 54 | transimg(img_path) 55 | a = ''.join(random.choices(string.ascii_lowercase, k=20)) 56 | os.rename(img_path, path3+a+'.jpg') 57 | shutil.copy(path3+a+'.jpg', path_final) 58 | os.rename(path_final+a+'.jpg', path_final+str(i)+'.jpg') 59 | i += 1 60 | # 61 | # import os 62 | # import random 63 | # import shutil 64 | # import string 65 | # from PIL import Image 66 | # 67 | # 68 | # def IsValidImage(img_path): 69 | # bValid = True 70 | # try: 71 | # Image.open(img_path).verify() 72 | # except: 73 | # bValid = False 74 | # return bValid 75 | # 76 | # 77 | # def transimg(img_path): 78 | # if IsValidImage(img_path): 79 | # try: 80 | # str = img_path.rsplit(".", 1) 81 | # output_img_path = str[0] + ".jpg" 82 | # im = Image.open(img_path) 83 | # im.save(output_img_path) 84 | # return True 85 | # except: 86 | # return False 87 | # else: 88 | # return False 89 | # 90 | # 91 | # path = 'usedata/flower/normal picture/' 92 | # finalpath = 'final/flower/normal picture/' 93 | # for items1 in os.listdir(path): 94 | # if items1 != 'veronica': 95 | # continue 96 | # print(items1) 97 | # path1 = path + items1 + '/' 98 | # path_final = finalpath + items1 + '/' 99 | # if not os.path.exists(path_final): 100 | # os.mkdir(path_final) 101 | # if os.listdir(path_final): 102 | # shutil.rmtree(path_final) 103 | # os.mkdir(path_final) 104 | # i = 0 105 | # for items3 in os.listdir(path1): 106 | # path3 = path1 + items3 + '/' 107 | # for items4 in os.listdir(path3): 108 | # img_path = path3 + items4 109 | # transimg(img_path) 110 | # a = ''.join(random.choices(string.ascii_lowercase, k=20)) 111 | # os.rename(img_path, path3+a+'.jpg') 112 | # shutil.copy(path3+a+'.jpg', path_final) 113 | # os.rename(path_final+a+'.jpg', path_final+str(i)+'.jpg') 114 | # i += 1 115 | # 116 | -------------------------------------------------------------------------------- /Data_Preprocess/ic/process1.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from PIL import Image 4 | 5 | path = './final/flower/normal picture/' 6 | 7 | for item1 in os.listdir(path): 8 | # if item1 != 'apple': 9 | # continue 10 | path1 = path + item1 + '/' 11 | t = 0 12 | print(t) 13 | for item2 in os.listdir(path1): 14 | path2 = path1 + item2 15 | # if os.path.getsize(path2) == 0: 16 | # os.remove(path2) 17 | # print(path2) 18 | # else: 19 | # # img = Image.open(path2) 20 | # # img_array = np.array(img) 21 | # # if np.shape(img_array.shape)[0] != 3: 22 | # # os.remove(path2) 23 | # # print('p', path2) 24 | # # else: 25 | os.rename(path2, path1+str(t)+'.jpg') 26 | t += 1 27 | 28 | 29 | -------------------------------------------------------------------------------- /Data_Preprocess/ic/select.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import shutil 4 | 5 | i1 = 15 6 | path = './final/flower/normal picture/' 7 | newpath = './fruit_flower/train/' 8 | testpath = './fruit_flower/test/' 9 | if not os.path.exists(newpath): 10 | os.mkdir(newpath) 11 | if not os.path.exists(testpath): 12 | os.mkdir(testpath) 13 | 14 | for items1 in os.listdir(path): 15 | path1 = path + items1 + '/' 16 | newpath1 = newpath + str(i1) + '/' 17 | testpath1 = testpath + str(i1) + '/' 18 | if not os.path.exists(newpath1): 19 | os.mkdir(newpath1) 20 | if os.listdir(newpath1): 21 | shutil.rmtree(newpath1) 22 | os.mkdir(newpath1) 23 | if not os.path.exists(testpath1): 24 | os.mkdir(testpath1) 25 | if os.listdir(testpath1): 26 | shutil.rmtree(testpath1) 27 | os.mkdir(testpath1) 28 | i1 += 1 29 | i = 0 30 | for items2 in os.listdir(path1): 31 | i += 1 32 | resultList = random.sample(range(0, i), 1100) 33 | for t in range(1100): 34 | if t < 1000: 35 | shutil.copy(path1 + str(resultList[t]) + '.jpg', newpath1) 36 | os.rename(newpath1 + str(resultList[t]) + '.jpg', newpath1 + 'img' + str(t+1) + '.jpg') 37 | else: 38 | shutil.copy(path1 + str(resultList[t]) + '.jpg', testpath1) 39 | os.rename(testpath1 + str(resultList[t]) + '.jpg', testpath1 + 'img' + str(t + 1) + '.jpg') 40 | # print(path1 + str(resultList[1]) + '.jpg') 41 | # print(newpath1) 42 | # shutil.copy(path1 + str(resultList[1]) + '.jpg', newpath1) 43 | 44 | 45 | -------------------------------------------------------------------------------- /Data_Preprocess/od/changelabel.py: -------------------------------------------------------------------------------- 1 | # import os 2 | # import xml.etree.ElementTree as ET 3 | # 4 | # #程序功能:批量修改VOC数据集中xml标签文件的标签名称 5 | # def changelabelname(inputpath): 6 | # listdir = os.listdir(inputpath) 7 | # for file in listdir: 8 | # if file.endswith('xml'): 9 | # file = os.path.join(inputpath,file) 10 | # tree = ET.parse(file) 11 | # root = tree.getroot() 12 | # for object1 in root.findall('object'): 13 | # for sku in object1.findall('name'): #查找需要修改的名称 14 | # if (sku.text == 'stawberry'): #‘preName’为修改前的名称 15 | # sku.text = 'strawberry' #‘TESTNAME’为修改后的名称 16 | # tree.write(file,encoding='utf-8') #写进原始的xml文件并避免原始xml中文字符乱码 17 | # else: 18 | # pass 19 | # else: 20 | # pass 21 | # 22 | # if __name__ == '__main__': 23 | # inputpath = './source/tracking_fruits_dataset/train_dataset/VOC2007/combine' #此处替换为自己的路径 24 | # changelabelname(inputpath) 25 | 26 | 27 | # import os 28 | # import xml.etree.ElementTree as ET 29 | # 30 | # #程序功能:批量修改VOC数据集中xml标签文件的标签名称 31 | # def changelabelname(inputpath): 32 | # listdir = os.listdir(inputpath) 33 | # for file in listdir: 34 | # if file.endswith('xml'): 35 | # file = os.path.join(inputpath,file) 36 | # tree = ET.parse(file) 37 | # root = tree.getroot() 38 | # for object1 in root.findall('filename'): 39 | # object1.text = object1.text + '.jpg' 40 | # tree.write(file, encoding='utf-8') 41 | # else: 42 | # pass 43 | # 44 | # if __name__ == '__main__': 45 | # inputpath = './newimg/combine' #此处替换为自己的路径 46 | # changelabelname(inputpath) 47 | 48 | 49 | import os 50 | import xml.etree.ElementTree as ET 51 | 52 | #程序功能:批量修改VOC数据集中xml标签文件的标签名称 53 | def changelabelname(inputpath): 54 | p = 0 55 | listdir = os.listdir(inputpath) 56 | for file in listdir: 57 | t = file 58 | if file.endswith('xml'): 59 | file = os.path.join(inputpath,file) 60 | tree = ET.parse(file) 61 | root = tree.getroot() 62 | for object1 in root.findall('filename'): 63 | object1.text = t.split('.')[0]+'.jpg' 64 | tree.write(file, encoding='utf-8') 65 | p += 1 66 | else: 67 | pass 68 | 69 | if __name__ == '__main__': 70 | inputpath = './newimg/combine' #此处替换为自己的路径 71 | changelabelname(inputpath) 72 | 73 | # import os 74 | # import xml.etree.ElementTree as ET 75 | # 76 | # 77 | # def changelabelname(inputpath): 78 | # for items1 in os.listdir(inputpath): 79 | # path = inputpath + items1 + '/' 80 | # for items2 in os.listdir(path): 81 | # path1 = path + items2 + '/' + 'Annotation' + '/' + items2 + '/' 82 | # listdir = os.listdir(path1) 83 | # i =0 84 | # for file in listdir: 85 | # i += 1 86 | # if file.endswith('xml'): 87 | # file = os.path.join(path1, file) 88 | # tree = ET.parse(file) 89 | # root = tree.getroot() 90 | # for object1 in root.findall('object'): 91 | # for sku in object1.findall('name'): #查找需要修改的名称 92 | # if (sku.text == items2): #‘preName’为修改前的名称 93 | # sku.text = items1 #‘TESTNAME’为修改后的名称 94 | # tree.write(file, encoding='utf-8') #写进原始的xml文件并避免原始xml中文字符乱码 95 | # else: 96 | # pass 97 | # else: 98 | # pass 99 | # print(items1, i) 100 | # 101 | # if __name__ == '__main__': 102 | # inputpath = './imgnet/flower/' 103 | # changelabelname(inputpath) -------------------------------------------------------------------------------- /Data_Preprocess/od/od_process.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import shutil 4 | import string 5 | 6 | 7 | ori1 = './imgnet/fruit/' 8 | ori2 = './newimg/fruit/' 9 | for items1 in os.listdir(ori1): 10 | path1 = ori1 + items1 + '/' 11 | newimg = ori2 + items1 + '/' 12 | if not os.path.exists(newimg): 13 | os.mkdir(newimg) 14 | if os.listdir(newimg): 15 | shutil.rmtree(newimg) 16 | os.mkdir(newimg) 17 | for items2 in os.listdir(path1): 18 | path = path1 + items2 + '/Annotation/' + items2 + '/' 19 | imgpath = path1 + items2 + '/' 20 | t = 1 21 | for xmls in os.listdir(path): 22 | if not os.path.exists(imgpath + xmls.split(sep='.')[0] + '.JPEG'): 23 | continue 24 | shutil.copy(path + xmls, newimg) 25 | os.rename(newimg + xmls, newimg + str(t) + '.xml') 26 | shutil.copy(imgpath + xmls.split(sep='.')[0] + '.JPEG', newimg) 27 | os.rename(newimg + xmls.split(sep='.')[0] + '.JPEG', newimg + str(t) + '.jpg') 28 | t += 1 29 | -------------------------------------------------------------------------------- /Data_Preprocess/od/pickflower.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | 5 | ori1 = './newimg/flower/' 6 | newimg = './newimg/combine/' 7 | t = 0 8 | if not os.path.exists(newimg): 9 | os.mkdir(newimg) 10 | if os.listdir(newimg): 11 | shutil.rmtree(newimg) 12 | os.mkdir(newimg) 13 | for items1 in os.listdir(ori1): 14 | path1 = ori1 + items1 + '/' 15 | i = 0 16 | for items2 in os.listdir(path1): 17 | i += 1 18 | print(int((i/2)+1)) 19 | for p in range(1, int((i/2)+1)): 20 | shutil.copy(path1 + str(p) + '.jpg', newimg) 21 | shutil.copy(path1 + str(p) + '.xml', newimg) 22 | os.rename(newimg + str(p) + '.jpg', newimg + 'img' + str(t) + '.jpg') 23 | os.rename(newimg + str(p) + '.xml', newimg + 'img' + str(t) + '.xml') 24 | t += 1 25 | -------------------------------------------------------------------------------- /Deployment/classifiaction_of_30.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | #### 加载图像并裁剪到224*224的大小 6 | def load_image(img_path, size = (224,224)): 7 | img = tf.io.read_file(img_path) 8 | img = tf.image.decode_jpeg(img) 9 | img = tf.image.resize(img, size)/255.0 10 | return img 11 | 12 | ### 加载模型函数 13 | def evaluate_model(interpreter, test_image): 14 | input_index = interpreter.get_input_details()[0]["index"] 15 | output_index = interpreter.get_output_details()[0]["index"] 16 | test_image = np.expand_dims(test_image, axis=0).astype(np.float32) 17 | interpreter.set_tensor(input_index, test_image) 18 | interpreter.invoke() 19 | output = interpreter.tensor(output_index) 20 | output = np.argmax(output()[0]) 21 | return output 22 | 23 | #### 读取模型文件 24 | interpreter = tf.lite.Interpreter(model_path='MobileNetV2.tflite') 25 | interpreter.allocate_tensors() 26 | classlist = ["apple", "banana", "blueberry", "cherry", "durian", "fig", "grape", "lemon", "litchi", "mango", "orange", "pineapple", "plum", "pomegranate", "strawberry", "aster", "begonia", "calla_lily", "chrysanthemum", "cornflower", "corydali", "dahlia", "daisy", "gentian", "mistflower", "nigella", "rose", "sandwort", "sunflower", "veronica"] 27 | 28 | #### 通过opencv包打开摄像头进行拍摄 29 | capture = cv2.VideoCapture(0)#0为树莓派的内置摄像头 30 | while(True): 31 | ret, frame = capture.read() 32 | frame = cv2.flip(frame, 1) 33 | cv2.imwrite("temp.jpg", frame) 34 | test = load_image("temp.jpg") 35 | # 用模型进行识别 36 | result = evaluate_model(interpreter, test) 37 | print(classlist[result]) 38 | # 呈现识别的结果 39 | cv2.imshow("video", frame) 40 | c = cv2.waitKey(100) 41 | # 如果按q键,则终止 42 | if c == 113: 43 | break 44 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /Deployment/convert_to_tflite.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | model=tf.keras.models.load_model("./data/moblie_2.h5") 4 | converter = tf.lite.TFLiteConverter.from_keras_model(model) 5 | tfmodel = converter.convert() 6 | open ("model.tflite" , "wb") .write(tfmodel) -------------------------------------------------------------------------------- /Deployment/ic_model.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Deployment/ic_model.tflite -------------------------------------------------------------------------------- /Deployment/object_detection_of_16.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from object_detection.utils import visualization_utils as viz_utils 4 | from object_detection.utils import config_util 5 | from object_detection.builders import model_builder 6 | import cv2 7 | 8 | 9 | # 模型识别种类个数 10 | num_classes = 16 11 | # 模型位置 12 | pipeline_config = 'pipeline.config' 13 | # 模型标签 14 | category_index = {1: {'id': 1, 'name': 'apple'}, 2: {'id': 2, 'name': 'banana'}, 3: {'id': 3, 'name': 'grape'}, 4: {'id': 4, 'name': 'kiwifruit'}, 5: {'id': 5, 'name': 'mango'}, 6: {'id': 6, 'name': 'orange'}, 7: {'id': 7, 'name': 'pear'}, 8: {'id': 8, 'name': 'stawberry'}, 9: {'id': 9, 'name': 'calla lily'}, 10: {'id': 10, 'name': 'cornflower'}, 11: {'id':11, 'name': 'corydalis'}, 12: {'id': 12, 'name': 'dahlia'}, 13: {'id': 13, 'name': 'daisy'}, 14: {'id': 14, 'name': 'gentian'}, 15: {'id': 15, 'name': 'nigella'}, 16: {'id': 16, 'name': 'sunflower'}} 15 | 16 | # 定义模型 17 | configs = config_util.get_configs_from_pipeline_file(pipeline_config) 18 | model_config = configs['model'] 19 | model_config.ssd.num_classes = num_classes 20 | model_config.ssd.freeze_batchnorm = True 21 | detection_model = model_builder.build(model_config=model_config, is_training=True) 22 | 23 | # 加载tflite文件 24 | interpreter = tf.lite.Interpreter(model_path="model.tflite") 25 | interpreter.allocate_tensors() 26 | label_id_offset = 1 27 | # 定义摄像头 28 | capture = cv2.VideoCapture(0) 29 | 30 | while True: 31 | # 拍照并预处理照片 32 | ret, frame = capture.read() 33 | frame = cv2.flip(frame, 1) 34 | frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 35 | test = np.expand_dims(frame_rgb, axis=0) 36 | input_tensor = tf.convert_to_tensor(test, dtype=tf.float32) 37 | # 目标检测模型进行检测 38 | boxes, classes, scores = detect(interpreter, input_tensor) 39 | viz_utils.visualize_boxes_and_labels_on_image_array( 40 | test[0], 41 | boxes[0], 42 | classes[0].astype(np.uint32) + label_id_offset, 43 | scores[0], 44 | category_index, 45 | use_normalized_coordinates=True, 46 | min_score_thresh=0.8) 47 | # 呈现检测结果 48 | frame = cv2.cvtColor(test[0], cv2.COLOR_BGR2RGB) 49 | cv2.imshow("Object detector", frame) 50 | c = cv2.waitKey(20) 51 | # 如果按q键,则终止 52 | if c == 113: 53 | break 54 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /Deployment/od_model.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Deployment/od_model.tflite -------------------------------------------------------------------------------- /Deployment/raspberry/ic_pi.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import numpy as np 3 | import cv2 4 | import tensorflow as tf 5 | 6 | # 服务器公网地址 7 | url = "http://127.0.0.1:8088/" 8 | # post图片格式 9 | content_type = 'image/jpeg' 10 | headers = {'content-type': content_type} 11 | 12 | def load_image(img_path, size = (224,224)): 13 | img = tf.io.read_file(img_path) 14 | img = tf.image.decode_jpeg(img) 15 | img = tf.image.resize(img, size)/255.0 16 | return img 17 | 18 | capture = cv2.VideoCapture(0)#0为电脑内置摄像头 19 | while(True): 20 | ret, frame = capture.read() 21 | frame = cv2.flip(frame, 1) 22 | # 将图片数据编码并发送 23 | img_encoded = cv2.imencode('.jpg', frame)[1] 24 | imgstring = np.array(img_encoded).tobytes() 25 | response = requests.post(url, data=imgstring, headers=headers) 26 | imgstring = np.asarray(bytearray(response.content), dtype="uint8") 27 | # 展示返回结果 28 | img = cv2.imdecode(imgstring, cv2.IMREAD_COLOR) 29 | cv2.imshow("video", img) 30 | c = cv2.waitKey(20) 31 | # 如果按q键,则终止 32 | if c == 113: 33 | break 34 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /Deployment/raspberry/od_pi.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | # 服务器公网地址 7 | url = "http://127.0.0.1:8088/" 8 | # post图片格式 9 | content_type = 'image/jpeg' 10 | headers = {'content-type': content_type} 11 | # 定义摄像头 12 | capture = cv2.VideoCapture(0) 13 | while True: 14 | # 拍照与图片预处理 15 | ret, frame = capture.read() 16 | frame = cv2.resize(frame, (160, 120), interpolation=cv2.INTER_CUBIC) 17 | # 将图片数据编码并发送 18 | img_encoded = cv2.imencode('.jpg', frame)[1] 19 | imgstring = np.array(img_encoded).tobytes() 20 | response = requests.post(url, data=imgstring, headers=headers) 21 | imgstring = np.asarray(bytearray(response.content), dtype="uint8") 22 | # 展示返回结果 23 | img = cv2.imdecode(imgstring, cv2.IMREAD_COLOR) 24 | cv2.imshow("video", img) 25 | c = cv2.waitKey(20) 26 | # 如果按q键,则终止 27 | if c == 113: 28 | break 29 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /Deployment/server/ic_server.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | ### 云服务器端加载之前处理好的tflite模型文件,导入训练好的模型骨架和参数 5 | def evaluate_model(interpreter, test_image): 6 | input_index = interpreter.get_input_details()[0]["index"] 7 | output_index = interpreter.get_output_details()[0]["index"] 8 | test_image = np.expand_dims(test_image, axis=0).astype(np.float32) 9 | interpreter.set_tensor(input_index, test_image) 10 | interpreter.invoke() 11 | output = interpreter.tensor(output_index) 12 | output = np.argmax(output()[0]) 13 | return output 14 | 15 | interpreter = tf.lite.Interpreter(model_path='MobileNetV2.tflite') 16 | interpreter.allocate_tensors() 17 | ### 此模型共分为30类 18 | classlist = ["apple", "banana", "blueberry", "cherry", "durian", "fig", "grape", "lemon", "litchi", "mango", "orange", "pineapple", "plum", "pomegranate", "strawberry", "aster", "begonia", "calla_lily", "chrysanthemum", "cornflower", "corydali", "dahlia", "daisy", "gentian", "mistflower", "nigella", "rose", "sandwort", "sunflower", "veronica"] 19 | 20 | #### 每次计算都会得到一个0到29的索引值,云服务器会根据索引值索引到类别,返回字符串给树莓派端 21 | @app.route('/', methods=['post']) 22 | def predict(): 23 | upload_file = request.files['file'] 24 | file_name = upload_file.filename 25 | file_path = '/home/ubuntu/inifyy/img' 26 | if upload_file: 27 | file_paths = os.path.join(file_path, file_name) 28 | upload_file.save(file_paths) 29 | test = load_image(file_paths) 30 | result = evaluate_model(interpreter, test) 31 | result = classlist[result] 32 | return result 33 | 34 | if __name__ == '__main__': 35 | app.run(debug=True, host='127.0.0.1', port=8087) -------------------------------------------------------------------------------- /Deployment/server/od_server.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from object_detection.utils import visualization_utils as viz_utils 4 | from object_detection.utils import config_util 5 | from object_detection.builders import model_builder 6 | import cv2 7 | from flask import Flask, request 8 | app = Flask(__name__) 9 | 10 | 11 | # 定义检测函数 12 | def detect(interpreter, input_tensor): 13 | input_details = interpreter.get_input_details() 14 | output_details = interpreter.get_output_details() 15 | preprocessed_image, shapes = detection_model.preprocess(input_tensor) 16 | interpreter.set_tensor(input_details[0]['index'], preprocessed_image.numpy()) 17 | interpreter.invoke() 18 | boxes = interpreter.get_tensor(output_details[0]['index']) 19 | classes = interpreter.get_tensor(output_details[1]['index']) 20 | scores = interpreter.get_tensor(output_details[2]['index']) 21 | return boxes, classes, scores 22 | 23 | 24 | # 模型识别种类个数 25 | num_classes = 16 26 | # 模型位置 27 | pipeline_config = 'pipeline.config' 28 | # 模型标签 29 | category_index = {1: {'id': 1, 'name': 'apple'}, 2: {'id': 2, 'name': 'banana'}, 3: {'id': 3, 'name': 'grape'}, 4: {'id': 4, 'name': 'kiwifruit'}, 5: {'id': 5, 'name': 'mango'}, 6: {'id': 6, 'name': 'orange'}, 7: {'id': 7, 'name': 'pear'}, 8: {'id': 8, 'name': 'stawberry'}, 9: {'id': 9, 'name': 'calla lily'}, 10: {'id': 10, 'name': 'cornflower'}, 11: {'id':11, 'name': 'corydalis'}, 12: {'id': 12, 'name': 'dahlia'}, 13: {'id': 13, 'name': 'daisy'}, 14: {'id': 14, 'name': 'gentian'}, 15: {'id': 15, 'name': 'nigella'}, 16: {'id': 16, 'name': 'sunflower'}} 30 | 31 | # 定义模型 32 | configs = config_util.get_configs_from_pipeline_file(pipeline_config) 33 | model_config = configs['model'] 34 | model_config.ssd.num_classes = num_classes 35 | model_config.ssd.freeze_batchnorm = True 36 | detection_model = model_builder.build(model_config=model_config, is_training=True) 37 | 38 | # 加载tflite文件 39 | interpreter = tf.lite.Interpreter(model_path="model.tflite") 40 | interpreter.allocate_tensors() 41 | label_id_offset = 1 42 | 43 | 44 | # 定义预测函数,用于接受post及预测 45 | @app.route('/', methods=["post"]) 46 | def predict(): 47 | # 解码接收的图像文件 48 | imgstring = np.asarray(bytearray(request.data), dtype="uint8") 49 | img = cv2.imdecode(imgstring, cv2.IMREAD_COLOR) 50 | frame = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 51 | test = np.expand_dims(frame, axis=0) 52 | # 目标检测 53 | input_tensor = tf.convert_to_tensor(test, dtype=tf.float32) 54 | boxes, classes, scores = detect(interpreter, input_tensor) 55 | viz_utils.visualize_boxes_and_labels_on_image_array( 56 | test[0], 57 | boxes[0], 58 | classes[0].astype(np.uint32) + label_id_offset, 59 | scores[0], 60 | category_index, 61 | use_normalized_coordinates=True, 62 | min_score_thresh=0.8) 63 | #返回运算结果 64 | frame = cv2.cvtColor(test[0], cv2.COLOR_BGR2RGB) 65 | img_encoded = cv2.imencode('.jpg', frame)[1] 66 | imgstring = np.array(img_encoded).tobytes() 67 | return imgstring 68 | 69 | if __name__ == '__main__': 70 | app.run(debug=True, host='127.0.0.1', port=8088) -------------------------------------------------------------------------------- /Document/image/img01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img01.jpg -------------------------------------------------------------------------------- /Document/image/img02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img02.jpg -------------------------------------------------------------------------------- /Document/image/img03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img03.jpg -------------------------------------------------------------------------------- /Document/image/img04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img04.jpg -------------------------------------------------------------------------------- /Document/image/img05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img05.jpg -------------------------------------------------------------------------------- /Document/image/img06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img06.jpg -------------------------------------------------------------------------------- /Document/image/img101.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img101.png -------------------------------------------------------------------------------- /Document/image/img102.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img102.png -------------------------------------------------------------------------------- /Document/image/img103.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img103.png -------------------------------------------------------------------------------- /Document/image/img104.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img104.png -------------------------------------------------------------------------------- /Document/image/img105.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img105.png -------------------------------------------------------------------------------- /Document/image/img106.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img106.png -------------------------------------------------------------------------------- /Document/image/img201.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img201.png -------------------------------------------------------------------------------- /Document/image/img202.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img202.png -------------------------------------------------------------------------------- /Document/image/img203.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img203.png -------------------------------------------------------------------------------- /Document/image/img204.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img204.png -------------------------------------------------------------------------------- /Document/image/img205.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img205.png -------------------------------------------------------------------------------- /Document/image/img206.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img206.png -------------------------------------------------------------------------------- /Document/image/img207.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img207.png -------------------------------------------------------------------------------- /Document/image/img208.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img208.png -------------------------------------------------------------------------------- /Document/image/img209.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img209.png -------------------------------------------------------------------------------- /Document/image/img210.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img210.png -------------------------------------------------------------------------------- /Document/image/img211.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img211.png -------------------------------------------------------------------------------- /Document/image/img212.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img212.png -------------------------------------------------------------------------------- /Document/image/img213.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img213.png -------------------------------------------------------------------------------- /Document/image/img214.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img214.png -------------------------------------------------------------------------------- /Document/image/img215.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img215.png -------------------------------------------------------------------------------- /Document/image/img216.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img216.png -------------------------------------------------------------------------------- /Document/image/img217.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img217.png -------------------------------------------------------------------------------- /Document/image/img218.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img218.png -------------------------------------------------------------------------------- /Document/image/img301.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img301.png -------------------------------------------------------------------------------- /Document/image/img302.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img302.png -------------------------------------------------------------------------------- /Document/image/img303.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img303.png -------------------------------------------------------------------------------- /Document/image/img304.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img304.png -------------------------------------------------------------------------------- /Document/image/img305.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img305.png -------------------------------------------------------------------------------- /Document/image/img306.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img306.png -------------------------------------------------------------------------------- /Document/image/img307.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img307.png -------------------------------------------------------------------------------- /Document/image/img401.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img401.png -------------------------------------------------------------------------------- /Document/image/img402.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img402.png -------------------------------------------------------------------------------- /Document/image/img403.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img403.png -------------------------------------------------------------------------------- /Document/image/img404.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sun-Yize/Deep-Learning-On-Raspberry-Pi-Zero/c11001fe28d1213051b03aa0193fd660dba18a6f/Document/image/img404.png -------------------------------------------------------------------------------- /Document/part1.md: -------------------------------------------------------------------------------- 1 | ## 一、数据获取及预处理 2 | 3 | ### 1.1 数据的要求及意义 4 | 5 | - 数据集规模较大 6 | - 数据集干净且高质量 7 | 8 | **图像识别**和**目标检测**指利用计算机对图像进行处理、分析和理解,以识别各种不同模式的目标和对象的技术,也是深度学习算法的一种实践应用。 9 | 10 | 而一个成熟的深度学习算法的训练,首先需要大量相关数据来做支撑。 11 | 12 | > 简单介绍一下图像识别领域所常用的卷积神经网络的结构 : 13 | > 14 | > 15 | > 16 | > 1、使用神经网络将数据进行正向传播,计算误差得到损失函数。 17 | > 18 | > 2、通过整个网络的反向传播计算梯度。 19 | > 20 | > 3、用梯度更新网络中的参数或者权重,得到最终算法。 21 | 22 | 由此可以看出,数据的规模与干净程度对图像识别算法的准确度起着至关重要的作用。 23 | 24 | ### 1.2 数据集的选择 25 | 26 | #### 1.2.1 数据简介 27 | 28 | 在本项目中,我们需要训练两种模型,分别是图像分类模型与目标检测模型。 29 | 30 | + 图像分类:预测图像类别的任务被称为图像分类。训练图像分类模型的目的是识别各类图像。 31 | + 目标检测:目标检测可以识别出已知的物体和该物体在图片中的位置,并判断出物体对应的类别。 32 | 33 | 这两种模型在输入模型的训练数据上略有差距,所需的图片数据一共分为两种,普通图片数据和目标检测数据。 34 | 35 | 图像分类模型需要维度相同的图片作为训练集,一般图片均处理成.jpg格式。目标检测模型除了图片之外,还需要边界框的数据,这种数据以.xml格式储存。 36 | 37 | 图像分类与目标检测所需数据类型分别如下所示: 38 | 39 | 40 | 41 | #### 1.2.2 识别种类的确定 42 | 43 | 花卉与水果种类繁多,首先需要确定种类再进行相关图片数据的收集。我们以领域内常用的花卉水果种类为导向进行数据获取,最终确定了以下花卉水果味识别种类。 44 | 45 | 46 | 47 | ### 1.3 数据集获取 48 | 49 | #### 1.3.1 数据集来源 50 | 51 | ##### 1.现有数据集 52 | 53 | 目前在各个图片分类与目标检测的数据集中,已经存在了较为完善的花卉与水果图片。如ImageNet、百度paddle数据库等,我们将各个数据库中的水果和花卉数据进行整合,并形成了我们自己的数据集。 54 | 55 | 在我们的花卉与水果识别项目中,所选用数据集如下所示: 56 | 57 | + ImageNet:http://www.image-net.org/ 58 | + Oxford 102 Flowers:https://www.robots.ox.ac.uk/~vgg/data/flowers/102/ 59 | + Kaggle中数据集 60 | 61 | + Fruit Images for Object Detection:https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection 62 | + Flower Recognition:https://www.kaggle.com/alxmamaev/flowers-recognition 63 | + 百度paddle中数据集 64 | 65 | + PaddleDetection水果目标检测:https://aistudio.baidu.com/aistudio/datasetdetail/34327 66 | + 水果分类模型:https://aistudio.baidu.com/aistudio/datasetdetail/30069 67 | 68 | ##### 2.爬虫获取图片 69 | 70 | 水果及花卉图片属于较为常见的图片,根据关键字搜索可以获得数量较多的图片。 71 | 72 | 我们在获取图片时,使用python爬虫,对google图库以及百度图库进行搜索,并将爬取的图片进行分类。下载好的数据会经过人工筛选,最后放入使用的数据集当中。 73 | 74 | #### 1.3.2 数据集下载 75 | 76 | 在图像识别领域,已经有了很多成熟的数据库,我们可以从中选取自己所需的内容进行下载。 77 | 78 | 这里我们以最经典的ImageNet数据库为例,简单介绍一下此类数据集的下载方式: 79 | 80 | > 在目标检测领域,ImageNet提供了丰富的目标检测原始数据,包括了不同种类的花卉和水果。我们在ImageNet官网上,下载目标检测所需的数据。 81 | > 82 | > ImageNet是一个不可用于商业目的的数据集,用户下载的话首先需要进行注册。 83 | > 84 | > 1、完成注册后,首先搜索需要的图片,这里我们以香蕉banana为例: 85 | > 86 | > 2、点击Downloads,下载对应的数据图片。 87 | > 88 | > 3、除此之外,imagenet还提供了图片的原始网站,我们在下载图片时可以使用爬虫,对原始图片直接进行爬取。首先将网址复制,保存到本地: 89 | > 90 | > 3、接下来我们用如下的代码,逐个下载对应的图片: 91 | > 92 | > ```python 93 | > from urllib import request 94 | > 95 | > urlpath = './text.txt' # 保存的图片网址 96 | > imgpath = './image' # 图片下载后保存地址 97 | > file = open(urlpath, 'r') 98 | > i = 0 99 | > for line in file: 100 | > try: 101 | > temp = imgpath+str(i)+'.jpg' 102 | > request.urlretrieve(line, temp) 103 | > i += 1 104 | > except: 105 | > print("%s timeout " % line) 106 | > pass 107 | > file.close() 108 | > ``` 109 | 110 | #### 1.3.3 python爬虫 111 | 112 | 此外我们的数据更多地来源于爬虫获取。 113 | 114 | 这里我们以谷歌与百度为例,为大家介绍一下如何爬取指定关键字的图片。 115 | 116 | > 第一步先将谷歌查询网址、指定关键字、Chrome驱动程序在电脑中的位置进行变量的定义。 117 | > 118 | > ```python 119 | > base_url_part1 = 'https://www.google.com/search?q=' 120 | > base_url_part2 = '&source=lnms&tbm=isch' 121 | > search_query = '猕猴桃' 122 | > location_driver = '/home/LQ/Downloads/ChromeDriver/chromedriver' 123 | > ``` 124 | > 125 | > 然后用这些变量在函数中初始化实例 126 | > 127 | > ```python 128 | > class Crawler: 129 | > def __init__(self): 130 | > self.url = base_url_part1 + search_query + base_url_part2 131 | > ``` 132 | > 133 | > 接下来我们启动Chrome浏览器驱动,并打开爬取页面 134 | > 135 | > ```python 136 | > def start_brower(self): 137 | > chrome_options = Options() 138 | > chrome_options.add_argument("--disable-infobars") 139 | > driver = webdriver.Chrome(executable_path=location_driver, chrome_options=chrome_options) 140 | > # 最大化窗口,因为每一次爬取只能看到视窗内的图片 141 | > driver.maximize_window() 142 | > # 浏览器打开爬取页面 143 | > driver.get(self.url) 144 | > return driver 145 | > ``` 146 | > 147 | > 然后我们通过获取页面源码,利用Beautifulsoup4创建soup对象并进行页面解析,再通过soup对象中的findAll函数图像信息提取,就可以将需要的图片下载到指定的本地文件位置里了。 148 | > 149 | > ```python 150 | > def downloadImg(self, driver): 151 | > t = time.localtime(time.time()) 152 | > foldername = str(t.__getattribute__("tm_year")) + "-" + str(t.__getattribute__("tm_mon")) + "-" + \ 153 | > str(t.__getattribute__("tm_mday")) 154 | > picpath = '/home/LQ/ImageDownload/%s' % (foldername) 155 | > if not os.path.exists(picpath): os.makedirs(picpath) 156 | > 157 | > img_url_dic = {} 158 | > x = 0 159 | > pos = 0 160 | > for i in range(1): # 设置爬取范围 161 | > pos = i * 500 162 | > js = "document.documentElement.scrollTop=%d" % pos 163 | > driver.execute_script(js) 164 | > time.sleep(1) 165 | > html_page = driver.page_source 166 | > soup = bs(html_page, "html.parser") 167 | > imglist = soup.findAll('img', {'class': 'rg_ic rg_i'}) 168 | > for imgurl in imglist: 169 | > try: 170 | > print(x, end=' ') 171 | > if imgurl['src'] not in img_url_dic: 172 | > target = '{}/{}.jpg'.format(picpath, x) 173 | > img_url_dic[imgurl['src']] = '' 174 | > urllib.request.urlretrieve(imgurl['src'], target) 175 | > time.sleep(1) 176 | > x += 1 177 | > except KeyError: 178 | > print("Error") 179 | > continue 180 | > ``` 181 | > 182 | > 在百度图片获取数据也是同样的原理,只需更改初始变量定义。 183 | > 184 | > ```python 185 | > base_url_part1 = 'https://image.baidu.com/search' 186 | > base_url_part2 = '&oq=bagua&rsp=0' 187 | > search_query = '猕猴桃' 188 | > location_driver = 'D:/download/Chrome/Chrome-driver/chromedriver.exe' 189 | > ``` 190 | 191 | ### 1.4 数据预处理 192 | 193 | 在获取了足够的数据之后,需要对其进行预处理,才能投入最后的训练使用。 194 | 195 | 接下来我们分别对图像分类数据与目标检测数据进行预处理。 196 | 197 | #### 1.4.1 图像分类数据 198 | 199 | ##### 1.图片格式 200 | 201 | 首先,我们需要对数据格式进行统一,统一为jpg格式,方便之后数据的读取。 202 | 203 | 首先定义图像处理函数: 204 | 205 | ```python 206 | from PIL import Image 207 | 208 | def IsValidImage(img_path): 209 | bValid = True 210 | try: 211 | Image.open(img_path).verify() 212 | except: 213 | bValid = False 214 | return bValid 215 | 216 | def transimg(img_path): 217 | if IsValidImage(img_path): 218 | try: 219 | str = img_path.rsplit(".", 1) 220 | output_img_path = str[0] + ".jpg" 221 | im = Image.open(img_path) 222 | im.save(output_img_path) 223 | return True 224 | except: 225 | return False 226 | else: 227 | return False 228 | ``` 229 | 230 | 接下来以处理水果数据为例,读取数据所在文件夹,将原有数据重新命名,并统一为jpg格式,修改完成后储存到新的文件夹当中: 231 | 232 | ```python 233 | import os 234 | import random 235 | import shutil 236 | import string 237 | 238 | # 原始数据文件夹 239 | path = './data/fruit/' 240 | # 处理过后图片 241 | finalpath = './fruit_flower/' 242 | 243 | # 逐个文件夹遍历 244 | for items1 in os.listdir(path): 245 | i = 0 246 | path1 = path + items1 + '/' 247 | path_final = finalpath + items1 + '/' 248 | # 判断文件夹是否存在,如不存在则创建新文件夹 249 | if not os.path.exists(path_final): 250 | os.mkdir(path_final) 251 | if os.listdir(path_final): 252 | shutil.rmtree(path_final) 253 | os.mkdir(path_final) 254 | # 遍历各个图片 255 | for items2 in os.listdir(path1): 256 | path2 = path1 + items2 + '/' 257 | for items3 in os.listdir(path2): 258 | path3 = path2 + items3 + '/' 259 | for items4 in os.listdir(path3): 260 | img_path = path3 + items4 261 | transimg(img_path) 262 | a = ''.join(random.choices(string.ascii_lowercase, k=20)) 263 | os.rename(img_path, path3+a+'.jpg') 264 | shutil.copy(path3+a+'.jpg', path_final) 265 | os.rename(path_final+a+'.jpg', path_final+str(i)+'.jpg') 266 | i += 1 267 | ``` 268 | 269 | ##### 2.图片通道处理 270 | 271 | 在爬取图片或下载的数据图片当中,会存在一部分黑白图片。黑白图片为单通道图片,维度与彩色图片不同。 272 | 273 | 在本步骤里,我们需要查看图片是否均为彩色图片,如果并非彩色图片而是黑白图片,则删除对应图片,保证所有图片均为三通道图片。 274 | 275 | 具体处理代码如下所示: 276 | 277 | ```python 278 | # 已处理过格式的图片 279 | path = './fruit_flower/' 280 | 281 | #逐个文件夹遍历 282 | for item1 in os.listdir(path): 283 | path1 = path + item1 + '/' 284 | print(item1) 285 | for item2 in os.listdir(path1): 286 | path2 = path1 + item2 287 | # 判断是否格式错误 288 | if os.path.getsize(path2) == 0: 289 | print(path2) 290 | else: 291 | img = Image.open(path2) 292 | # 判断是否为彩色图片 293 | if np.shape(img_array.shape)[0] != 3: 294 | print('p', path2) 295 | os.remove(path2) 296 | ``` 297 | 298 | 我们对图片的处理以基本完成,用如下代码查看每种分类图片个数: 299 | 300 | ```python 301 | # 分别打印出图片对应种类名,以及每类的图片张数 302 | for item1 in os.listdir(path): 303 | path1 = path + item1 + '/' 304 | print(item1) 305 | t=0 306 | for item2 in os.listdir(path1): 307 | t += 1 308 | print(t) 309 | ``` 310 | 311 | ##### 3.数据集划分 312 | 313 | 在训练的过程中,我们需要使用到训练集与测试集。 314 | 315 | 我们对数据进行划分,将其以10:1的比例分为测试集和训练集。首先读取对应数据,打乱数据后以对应比例划分训练集与验证集,具体代码如下: 316 | 317 | ```python 318 | # 图片所在文件夹 319 | path = './fruit_flower/' 320 | # 生成测试集与训练集文件夹 321 | newpath = './fruit_flower/train/' 322 | testpath = './fruit_flower/test/' 323 | 324 | # 判断是否存在文件夹 325 | if not os.path.exists(newpath): 326 | os.mkdir(newpath) 327 | if not os.path.exists(testpath): 328 | os.mkdir(testpath) 329 | 330 | # 逐个文件夹进行遍历 331 | for items1 in os.listdir(path): 332 | path1 = path + items1 + '/' 333 | newpath1 = newpath + str(i1) + '/' 334 | testpath1 = testpath + str(i1) + '/' 335 | i = 0 336 | # 判断是否存在文件夹 337 | if not os.path.exists(newpath1): 338 | os.mkdir(newpath1) 339 | if os.listdir(newpath1): 340 | shutil.rmtree(newpath1) 341 | os.mkdir(newpath1) 342 | if not os.path.exists(testpath1): 343 | os.mkdir(testpath1) 344 | if os.listdir(testpath1): 345 | shutil.rmtree(testpath1) 346 | os.mkdir(testpath1) 347 | # 逐个图片遍历 348 | for items2 in os.listdir(path1): 349 | i += 1 350 | # 随机生成序列,进行洗牌 351 | resultList = random.sample(range(0, i), 1100) 352 | # 将训练集以10:1划分 353 | for t in range(1100): 354 | if t < 1000: 355 | shutil.copy(path1 + str(resultList[t]) + '.jpg', newpath1) 356 | os.rename(newpath1 + str(resultList[t]) + '.jpg', newpath1 + 'img' + str(t+1) + '.jpg') 357 | else: 358 | shutil.copy(path1 + str(resultList[t]) + '.jpg', testpath1) 359 | os.rename(testpath1 + str(resultList[t]) + '.jpg', testpath1 + 'img' + str(t + 1) + '.jpg') 360 | ``` 361 | 362 | 接着我们便得到了处理好的图像分类数据。 363 | 364 | 365 | 366 | 我们已经将数据开源,并存放于百度网盘当中。 367 | 368 | 关于数据集的下载详见:链接:https://pan.baidu.com/s/1N0DybQfNV_4JWCRTYd7SKw 369 | 提取码:3ey9 370 | 复制这段内容后打开百度网盘手机App,操作更方便哦--来自百度网盘超级会员V1的分享 371 | 372 | #### 1.4.2目标检测数据 373 | 374 | 接下来我们处理目标检测数据,因为在接下来的训练中,我们使用Tensorflow2 Object Detection的API进行训练,所以我们将所有数据处理为.tfrecord格式,方便后期训练。 375 | 376 | 我们选取imagenet中16种分类不同的花卉和水果,进行目标检测的训练。 377 | 378 | ##### 1.数据合并 379 | 380 | 我们将下载好的各类目标检测数据进行合并,将图片与标注文件进行匹配,并将所有处理好的数据放入一个文件夹内。首先将我们选取好的目标检测数据进行合并: 381 | 382 | ```python 383 | # 初始数据文件夹爱 384 | path = './newimg/flower/' 385 | # 新数据文件夹 386 | newimg = './newimg/combine/' 387 | t = 0 388 | 389 | # 判断是否存在文件夹 390 | if not os.path.exists(newimg): 391 | os.mkdir(newimg) 392 | if os.listdir(newimg): 393 | shutil.rmtree(newimg) 394 | os.mkdir(newimg) 395 | 396 | # 逐个文件夹进行遍历 397 | for items1 in os.listdir(path): 398 | path1 = ori1 + items1 + '/' 399 | i = 0 400 | for items2 in os.listdir(path1): 401 | i += 1 402 | print(int((i/2)+1)) 403 | for p in range(1, int((i/2)+1)): 404 | shutil.copy(path1 + str(p) + '.jpg', newimg) 405 | shutil.copy(path1 + str(p) + '.xml', newimg) 406 | os.rename(newimg + str(p) + '.jpg', newimg + 'img' + str(t) + '.jpg') 407 | os.rename(newimg + str(p) + '.xml', newimg + 'img' + str(t) + '.xml') 408 | t += 1 409 | ``` 410 | 411 | ##### 2.TFRecord数据 412 | 413 | 首先需要将所有的xml相关信息进行汇总,并放入csv列表中,方便TFRecord数据的生成。 414 | 415 | 我们用如下代码生成csv列表: 416 | 417 | ```python 418 | import xml.etree.ElementTree as ET 419 | 420 | def xml_to_csv(path): 421 | xml_list = [] 422 | for xml_file in glob.glob(path + '/*.xml'): 423 | tree = ET.parse(xml_file) 424 | root = tree.getroot() 425 | for member in root.findall('object'): 426 | value = (root.find('filename').text, 427 | int(root.find('size')[0].text), 428 | int(root.find('size')[1].text), 429 | member[0].text, 430 | int(member[4][0].text), 431 | int(member[4][1].text), 432 | int(member[4][2].text), 433 | int(member[4][3].text) 434 | ) 435 | xml_list.append(value) 436 | column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'] 437 | xml_df = pd.DataFrame(xml_list, columns=column_name) 438 | return xml_df 439 | 440 | for folder in ['train','test']: 441 | image_path = os.path.join(os.getcwd(), ('images/' + folder)) 442 | xml_df = xml_to_csv(image_path) 443 | xml_df.to_csv(('images/' + folder + '_labels.csv'), index=None) 444 | print('Successfully converted xml to csv.') 445 | ``` 446 | 447 | 接下来在文件夹中,生成了对应的csv列表。 448 | 449 | 我们使用如下代码,将所有图片和标注文件合并为一个TFRecord文件。 450 | 451 | 以下的代码参考了github中的Raccoon Detector Dataset项目:https://github.com/datitran/raccoon_dataset 452 | 453 | 具体的处理代码如下: 454 | 455 | ```python 456 | from __future__ import division 457 | from __future__ import print_function 458 | from __future__ import absolute_import 459 | 460 | import os 461 | import io 462 | import pandas as pd 463 | import tensorflow.compat.v1 as tf 464 | from PIL import Image 465 | import dataset_util 466 | from collections import namedtuple, OrderedDict 467 | 468 | 469 | flags = tf.app.flags 470 | flags.DEFINE_string('csv_input', '', 'Path to the CSV input') 471 | flags.DEFINE_string('image_dir', '', 'Path to the image directory') 472 | flags.DEFINE_string('output_path', '', 'Path to output TFRecord') 473 | FLAGS = flags.FLAGS 474 | 475 | 476 | # 放入对应的列表名称 477 | def class_text_to_int(row_label): 478 | if row_label == 'apple': 479 | return 1 480 | else: 481 | None 482 | 483 | def split(df, group): 484 | data = namedtuple('data', ['filename', 'object']) 485 | gb = df.groupby(group) 486 | return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)] 487 | 488 | def create_tf_example(group, path): 489 | with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: 490 | encoded_jpg = fid.read() 491 | encoded_jpg_io = io.BytesIO(encoded_jpg) 492 | image = Image.open(encoded_jpg_io) 493 | width, height = image.size 494 | 495 | filename = group.filename.encode('utf8') 496 | image_format = b'jpg' 497 | xmins = [] 498 | xmaxs = [] 499 | ymins = [] 500 | ymaxs = [] 501 | classes_text = [] 502 | classes = [] 503 | for index, row in group.object.iterrows(): 504 | xmins.append(row['xmin'] / width) 505 | xmaxs.append(row['xmax'] / width) 506 | ymins.append(row['ymin'] / height) 507 | ymaxs.append(row['ymax'] / height) 508 | classes_text.append(row['class'].encode('utf8')) 509 | classes.append(class_text_to_int(row['class'])) 510 | tf_example = tf.train.Example(features=tf.train.Features(feature={ 511 | 'image/height': dataset_util.int64_feature(height), 512 | 'image/width': dataset_util.int64_feature(width), 513 | 'image/filename': dataset_util.bytes_feature(filename), 514 | 'image/source_id': dataset_util.bytes_feature(filename), 515 | 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 516 | 'image/format': dataset_util.bytes_feature(image_format), 517 | 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 518 | 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 519 | 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 520 | 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 521 | 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 522 | 'image/object/class/label': dataset_util.int64_list_feature(classes), 523 | })) 524 | return tf_example 525 | 526 | 527 | def main(_): 528 | writer = tf.python_io.TFRecordWriter(FLAGS.output_path) 529 | path = os.path.join(os.getcwd(), FLAGS.image_dir) 530 | examples = pd.read_csv(FLAGS.csv_input) 531 | grouped = split(examples, 'filename') 532 | for group in grouped: 533 | tf_example = create_tf_example(group, path) 534 | writer.write(tf_example.SerializeToString()) 535 | 536 | writer.close() 537 | output_path = os.path.join(os.getcwd(), FLAGS.output_path) 538 | print('Successfully created the TFRecords: {}'.format(output_path)) 539 | 540 | 541 | if __name__ == '__main__': 542 | tf.app.run() 543 | ``` 544 | 545 | 将以上代码保存为generate_tfrecord.py文件。 546 | 547 | 使用如下命令,分别生成训练集数据以及测试集数据: 548 | 549 | ``` 550 | python generate_tfrecord.py --csv_input=images\train_labels.csv --image_dir=images\train --output_path=train.record 551 | python generate_tfrecord.py --csv_input=images\test_labels.csv --image_dir=images\test --output_path=test.record 552 | ``` 553 | 554 | 上述命令将生成train.record和test.record文件,这两个数据文件将会使用在后续的目标检测训练中。 555 | 556 | 我们已经将数据开源,并存放于百度网盘当中。 557 | 558 | 559 | 560 | 关于目标检测数据集的下载详见百度网盘 : 561 | 562 | 目标检测数据集链接:[https://pan.baidu.com/s/1IJ6xjvHv_WHje9d3XbtX1Q](https://link.zhihu.com/?target=https%3A//pan.baidu.com/s/1IJ6xjvHv_WHje9d3XbtX1Q) 563 | 564 | 提取码:xnkt -------------------------------------------------------------------------------- /Document/part2.md: -------------------------------------------------------------------------------- 1 | ## 二、图像分类模型 2 | 3 | ### 2.1 模型选择 4 | 5 | 在得到花果数据集后,最重要的便是选择合适的**图像识别模**型进行学习,从而达到对水果与花卉分类的目的。 6 | 7 | 首先,我们对历年ILSVRC比赛中出现的经典模型进行学习,总结了多个网络的核心思想与优化方法,为后续模型的选择与优化奠定了基础。 8 | 9 | 由于花果数据集相对于ImageNet数据集较小,且需要的分类数目只有几十种,直接使用经典模型会使计算时间大大增加,并浪费一定的计算资源。并且最终的模型需要部署到**树莓派**中,所以网络的结构不能过大。 10 | 11 | 综合以上两点,我们选择了目前适用于移动设备的**轻型模型**,这类模型在保证一定的识别精度的情况下,大大减小了网络结构与参数量,从而减少网络规模。 12 | 13 | 在比较了常用的三种**轻型网络**(DenseNet、ShuffleNet、MobileNet)后,结合各模型特点与应用于花果数据集后得到的准确率,我们最终选择了**MobileNetV2**作为图像识别模型。 14 | 15 | 在沿用了原模型中深度可分离卷积与到残差结构的基础上,我们对模型网络结构与参数进行了调整与优化,使其更好地对数据集进行学习,并得到了比原模型更高的准确率。 16 | 17 | 18 | 19 | #### 2.1.1 经典模型学习 20 | 21 | 在图像识别模型的选择过程中,我们首先学习了基于**ImageNet**数据集的ILSVRC比赛中的优秀经典模型,如Alexnet、VGG19、GoogleNet、ResNet等历年的冠/亚军模型。 22 | 23 | **层数比较**: 24 | 25 | 26 | 27 | **Top-1准确率**: 28 | 29 | 30 | 31 | 在这些网络中,我们感兴趣的模型有VGG、GoogleNet、ResNet,我们对这些模型的核心思想进行学习,分析了每个网络的结构以及优缺点,并找到各个网络相对于前一个网络的优化点,为我们之后自己的网络提供优化方向,总结如下: 32 | 33 | 34 | 35 | **VGG** 36 | 37 | 采用连续的几个**3x3的卷积核**代替较大卷积核,作为网络中的卷积核大小搭建模型。 38 | 39 | 40 | 41 | 42 | 43 | **优点**:结构简洁,整个网络都使用了同样大小的卷积核尺寸(3x3)和最大池化尺寸(2x2),通过不断加深网络结构可以提升性能 44 | 45 | **缺点**:参数过多,耗费过多计算资源 46 | 47 | 48 | 49 | **GoogleNet** 50 | 51 | 使用多个**inception模块**(如下图)串联从而形成最终的网络。 52 | 53 | inception结构的主要贡献有两个:一是使用1x1的卷积来进行升降维,二是在多个尺寸上同时进行卷积再聚合。 54 | 55 | 56 | 57 | **优点**:增加了网络的深度和宽度,但没有增加计算代价、提升了对网络内部计算资源的利用。 58 | 59 | 60 | 61 | **ResNet** 62 | 63 | 参考了VGG19网络,在其基础上进行了修改,并通过短路机制加入了**残差块**(Residual Block) 64 | 65 | 66 | 67 | **优点**:首次使用了残差块引入恒等快捷连接,直接跳过一个或多个层,解决了深度神经网络的“退化”问题,即给网络叠加更多的层后,性能却快速下降的情况。 68 | 69 | 70 | 71 | #### 2.1.2 轻型网络的选择 72 | 73 | 通过将这些网络应用到我们的花果数据集,得出了较高的准确率,但综合考虑了各个网络参数量、计算时间以及花果分类的数据集后,我们决定选择轻型网络作为最终的模型,原因如下: 74 | 75 | 1. 由于该项目最终需要部署到**树莓派**上,经典网络模型的结构与参数量都过大,无法应用到树莓派 76 | 2. 经典网络模型的应用任务往往是大型分类任务,而本项目最终所需要分类的花果仅几十种,实际应用时导致计算资源与时间的浪费。 77 | 3. **轻型网络**:在保证一定的识别精度的情况下,大大减小网络结构与参数量,从而减少网络规模。适用于手机、树莓派等移动终端中,如DenseNet、ShuffleNet、MobileNet等。 78 | 79 | 80 | 81 | **DenseNet** 82 | 83 | 在ResNet的基础上提出了一个更激进的密集连接机制,互相连接所有的层。 84 | 85 | 网络由多个**dense block**组成,在dense block中每层的输入是前面所有层的输出concat形成的,结构如下: 86 | 87 | 88 | 89 | **优点**:加强了feature的传递,并更有效的利用了它、大大减少了参数的数量 90 | 91 | 92 | 93 | **ShuffleNet** 94 | 95 | 在ResNeXt的基础上,使用**分组逐点卷积**(group pointwise convolution)来代替原来的结构。即通过将卷积运算的输入限制在每个组内,模型的计算量取得了显著的下降。 96 | 97 | 引入了**组间信息交换的机制**。即对于第二层卷积而言,每个卷积核需要同时接收各组的特征作为输入。 98 | 99 | 100 | 101 | **优点**:原创了三种混洗单元,每个单元都是由逐群卷积和信道混洗组成,这种结构极大的降低了计算代价 102 | 103 | 104 | 105 | **MobileNetV1** 106 | 107 | 在VGG的基础上,将其中的标准卷积层替换为**深度可分离卷积**,其计算代价是由深度卷积和逐点卷积两部分。并添加了两个超参数:**瘦身乘子**(width multiplier)其取值范围为0~1,用来减少网络的通道数。另外一个参数为**分辨率乘子**(resolution multiplier),该参数将缩放输入图像的尺寸,尺寸范围为224~128之间。 108 | 109 | 110 | 111 | **优点**:使用了深度可分离卷积,大大减少了参数量。并添加了两个超参数,使得在保证了一定准确度的前提下,网络模型进一步缩小 112 | 113 | 114 | 115 | #### 2.1.3 MobileNetV2模型 116 | 117 | 在V2的网络设计中,除了继续使用V1中的深度可分离卷积之外,还使用了Expansion layer和 Projection layer。 118 | 119 | **projection layer**使用1×1的网络结构,目的是将高维特征映射到低维空间去。 120 | 121 | **Expansion layer**的功能正相反,在使用1×1的网络结构的同时,目的是将低维特征映射到高维空间。其中的一个超参数决定了将维度扩展几倍 122 | 123 | **网络结构**:先通过Expansion layer来扩展维度,之后用深度可分离卷积来提取特征,而后使用Projection layer来压缩数据,让网络重新变小。因为Expansion layer 和 Projection layer都具有可以学习的参数,所以整个网络结构可以学习到如何更好的扩展数据和重新压缩数据。 124 | 125 | 126 | 127 | **优点**:在V1的基础上,使用了**倒残差结构**(Inverted residual block),即使用Pointwise先对特征图进行升维,在升维后接上Relu,减少Relu对特征的破坏。并引入了**特征复用结构**(ResNet bottleneck) 128 | 129 | 130 | 131 | 最终,通过将几种轻型网络应用于花果数据集后,综合**验证集准确率**以及**树莓派模型适用性**,我们选择了**MobileNetV2**为最终的网络模型,并进行了代码的调整与优化,使其更好地适用于本项目中的花果分类。 132 | 133 | 134 | 135 | 136 | ### 2.2 数据准备 137 | 138 | 我们需要使用我们已经预处理好的花卉与水果数据,图片共有30类,其中花卉与水果各有15类。 139 | 140 | 每类水果图片包含了1000张训练集,与100张测试集。数据集总共有33000张图片。 141 | 142 | | 序号 | 英文名 | 中文名 | | 序号 | 英文名 | 中文名 | 143 | | :--: | :---------: | :----: | ---- | :--: | :-----------: | :----: | 144 | | 1 | apple | 苹果 | | 16 | aster | 紫苑 | 145 | | 2 | banana | 香蕉 | | 17 | begonia | 秋海棠 | 146 | | 3 | blueberry | 蓝莓 | | 18 | calla lily | 马蹄莲 | 147 | | 4 | cherry | 樱桃 | | 19 | chrysanthemum | 菊花 | 148 | | 5 | durian | 榴莲 | | 20 | cornflower | 矢车菊 | 149 | | 6 | fig | 无花果 | | 21 | corydali | 紫堇 | 150 | | 7 | grape | 葡萄 | | 22 | dahlia | 大丽花 | 151 | | 8 | lemon | 柠檬 | | 23 | daisy | 雏菊 | 152 | | 9 | litchi | 荔枝 | | 24 | gentian | 龙胆 | 153 | | 10 | mango | 芒果 | | 25 | mistflower | 雾花 | 154 | | 11 | orange | 橙子 | | 26 | nigella | 黑霉菌 | 155 | | 12 | pineapple | 菠萝 | | 27 | rose | 玫瑰 | 156 | | 13 | plum | 李子 | | 28 | sandwort | 沙参 | 157 | | 14 | pomegranate | 石榴 | | 29 | sunflower | 向日葵 | 158 | | 15 | strawberry | 草莓 | | 30 | veronica | 婆婆纳 | 159 | 160 | ### 2.3 Tensorflow2框架搭建 161 | 162 | 我们使用Tensorflow2进行深度学习框架的搭建。理由如下: 163 | 164 | * 我们在本项目中主要注重模型的最终使用。Tensorflow2在模型部署方面有着相当成熟的API,可以更加快速的进行部署。 165 | 166 | 167 | * Tensorflow2有封装好的深度学习训练API,如tf.keras,能够快速的搭建模型和使用。 168 | 169 | #### 2.3.1 预处理函数 170 | 171 | 首先我们导入需要使用的python包: 172 | 173 | ```python 174 | import tensorflow as tf 175 | from tensorflow.keras import layers, models 176 | ``` 177 | 178 | 接下来我们定义图像处理函数,我们将图片所在文件夹名作为数据的标签,并将所有图片处理为相同的格式大小。 179 | 180 | 图片默认处理为224x224格式大小: 181 | 182 | ```python 183 | def load_image(img_path,size = (224,224)): 184 | # 如果使用的windows系统,需要将sep='/'改为sep='\\' 185 | label = tf.cast(tf.compat.v1.string_to_number(tf.strings.split(img_path, sep='/',)[-2]), tf.int8) 186 | img = tf.io.read_file(img_path) 187 | img = tf.image.decode_jpeg(img) 188 | img = tf.image.resize(img,size)/255.0 189 | return(img,label) 190 | ``` 191 | 192 | 定义模型训练过程中主要参数: 193 | 194 | ```python 195 | BATCH_SIZE = 100 196 | EPOCHS = 10 197 | ``` 198 | 199 | 导入测试集与训练集,并行化处理数据: 200 | 201 | ```python 202 | # 数据集文件夹所在路径 203 | datapath = "/content/gdrive/MyDrive/data/classification/" 204 | 205 | ds_train = tf.data.Dataset.list_files(datapath+"fruit_flower/train/*/*.jpg") \ 206 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 207 | .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \ 208 | .prefetch(tf.data.experimental.AUTOTUNE) 209 | 210 | ds_test = tf.data.Dataset.list_files(datapath+"fruit_flower/test/*/*.jpg") \ 211 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 212 | .batch(BATCH_SIZE) \ 213 | .prefetch(tf.data.experimental.AUTOTUNE) 214 | ``` 215 | 216 | 将预处理好的图片用matplotlib画图展示: 217 | 218 | ```python 219 | from matplotlib import pyplot as plt 220 | 221 | plt.figure(figsize=(8,8)) 222 | for i,(img,label) in enumerate(ds_train.unbatch().take(9)): 223 | ax=plt.subplot(3,3,i+1) 224 | ax.imshow(img.numpy()) 225 | ax.set_title("label = %d"%label) 226 | ax.set_xticks([]) 227 | ax.set_yticks([]) 228 | plt.show() 229 | ``` 230 | 231 | 232 | 233 | #### 2.3.2 定义MobileNetV2模型 234 | 235 | 使用Keras接口有以下3种方式构建模型:使用Sequential按层顺序构建模型,使用函数式API构建任意结构模型,继承Model基类构建自定义模型。 236 | 237 | 这里我们选择使用函数式API构建模型。 238 | 239 | 我们用Tensorflow2中的tf.keras.applications函数,调用MobileNetV2模型,其中: 240 | 241 | + 输入图片维度设置为(224,224,3) 242 | + 去除掉原有网络的卷积层 243 | + 定义新的卷积层 244 | 245 | ```python 246 | # tf.keras.applications导入模型 247 | Mo = tf.keras.applications.MobileNetV2( 248 | input_shape=(224,224,3), 249 | include_top=False) 250 | Mo.trainable=True 251 | 252 | model = models.Sequential() 253 | model.add(Mo) 254 | model.add(layers.GlobalAveragePooling2D()) 255 | model.add(layers.Flatten()) 256 | model.add(layers.Dense(512, activation='relu')) 257 | model.add(layers.Dropout(rate=0.5)) 258 | model.add(layers.Dense(256, activation='relu')) 259 | model.add(layers.Dropout(rate=0.5)) 260 | model.add(layers.Dense(30, activation='sigmoid')) 261 | model.summary() 262 | ``` 263 | 264 | ``` 265 | Model: "sequential_2" 266 | _________________________________________________________________ 267 | Layer (type) Output Shape Param # 268 | ================================================================= 269 | mobilenetv2_1.00_224 (Functi (None, 7, 7, 1280) 2257984 270 | _________________________________________________________________ 271 | global_average_pooling2d_2 ( (None, 1280) 0 272 | _________________________________________________________________ 273 | flatten_2 (Flatten) (None, 1280) 0 274 | _________________________________________________________________ 275 | dense_6 (Dense) (None, 512) 655872 276 | _________________________________________________________________ 277 | dropout_4 (Dropout) (None, 512) 0 278 | _________________________________________________________________ 279 | dense_7 (Dense) (None, 256) 131328 280 | _________________________________________________________________ 281 | dropout_5 (Dropout) (None, 256) 0 282 | _________________________________________________________________ 283 | dense_8 (Dense) (None, 30) 7710 284 | ================================================================= 285 | Total params: 3,052,894 286 | Trainable params: 794,910 287 | Non-trainable params: 2,257,984 288 | _________________________________________________________________ 289 | ``` 290 | 291 | #### 2.3.3 训练模型 292 | 293 | 接下来我们开始训练模型,首先设置模型的各个回调函数。其中分别包括: 294 | 295 | + TensorBoard:使用TensorBoard,将训练过程进行可视化。 296 | + ModelCheckpoint:设置模型的检查点,使模型下次训练时从检查点开始。 297 | + EarlyStopping:当训练损失函数连续多轮没有变化时,自动停止训练 298 | + ReduceLROnPlateau:根据训练的迭代次数,逐渐减小学习率,提高学习精度。 299 | 300 | 以上函数的具体代码如下: 301 | 302 | ```python 303 | import datetime 304 | import os 305 | 306 | stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 307 | callback_dir = datapath+'model/callback/'+stamp 308 | tensorboard_callback = tf.keras.callbacks.TensorBoard(callback_dir, histogram_freq=1) 309 | 310 | checkpoint_path = datapath+'model/checkpoint/'+stamp 311 | model_save = tf.keras.callbacks.ModelCheckpoint( 312 | filepath=checkpoint_path, 313 | verbose=1, 314 | save_weights_only=True, 315 | period=20) 316 | 317 | early_stop = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', min_delta = 0.001, 318 | patience = 5, mode = 'min', verbose = 1, 319 | restore_best_weights = True) 320 | 321 | reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', factor = 0.3, 322 | patience = 2, min_delta = 0.001, 323 | mode = 'min', verbose = 1) 324 | ``` 325 | 326 | 导入最新的检查点: 327 | 328 | ```python 329 | # 如果是初次训练,则不需要调用 330 | checkpoint_dir = datapath+'model/checkpoint/' 331 | latest = tf.train.latest_checkpoint(checkpoint_dir) 332 | model.load_weights(latest) 333 | ``` 334 | 335 | 定义模型的优化器以及损失函数: 336 | 337 | + 优化器:Adam优化器,即一种对随机目标函数执行一阶梯度优化的算法,该算法基于适应性低阶矩估计。 338 | + 损失函数:稀疏分类交叉熵(Sparse Categorical Crossentropy),多类的对数损失,适用于多类分类问题,且接受稀疏标签。 339 | 340 | 用Tensorflow2内置fit方法开始进行模型训练: 341 | 342 | ```python 343 | model.compile( 344 | optimizer=tf.keras.optimizers.Adam(lr=0.001), 345 | loss=tf.keras.losses.sparse_categorical_crossentropy, 346 | metrics=["accuracy"]) 347 | 348 | history = model.fit(ds_train,epochs=EPOCHS,validation_data=ds_test, 349 | callbacks = [tensorboard_callback, model_save, early_stop, reduce_lr]) 350 | ``` 351 | 352 | ### 2.4 模型评估 353 | 354 | #### 2.4.1 TensorBoard可视化 355 | 356 | TensorBoard可以用于查看训练的进度如何,我们可以在命令行或jupyter notebook中调用TensorBoard,实时查看损失函数的下降情况以及训练的具体进度,这里我们用jupyter notebook进行查看,具体操作如下: 357 | 358 | 首先导入TensorBoard: 359 | 360 | ```python 361 | %load_ext tensorboard 362 | 363 | from tensorboard import notebook 364 | notebook.list() 365 | ``` 366 | 367 | 接着输入训练对应文件夹,启用TensorBoard: 368 | 369 | ```python 370 | notebook.start("--logdir "+datapath+"model/callback/") 371 | ``` 372 | 373 | 374 | 375 | #### 2.4.2 训练正确率查看 376 | 377 | 我们使用python的pandas包,将每次的迭代正确率和损失以表格的形式呈现: 378 | 379 | ```python 380 | import pandas as pd 381 | 382 | dfhistory = pd.DataFrame(history.history) 383 | dfhistory.index = range(1,len(dfhistory) + 1) 384 | dfhistory.index.name = 'epoch' 385 | 386 | print(dfhistory) 387 | ``` 388 | 389 | 390 | 391 | 除此之外,我们将训练中训练集和验证集的正确率与损失函数以图的方式呈现,具体如下: 392 | 393 | ```python 394 | train_metrics = history.history["loss"] 395 | val_metrics = history.history['val_loss'] 396 | epochs = range(1, len(train_metrics) + 1) 397 | plt.plot(epochs, train_metrics, 'bo--') 398 | plt.plot(epochs, val_metrics, 'ro-') 399 | plt.title('Training and validation loss') 400 | plt.xlabel("Epochs") 401 | plt.ylabel("loss") 402 | plt.legend(["train_loss", 'val_loss']) 403 | plt.show() 404 | ``` 405 | 406 | 407 | 408 | ```python 409 | train_metrics = history.history["accuracy"] 410 | val_metrics = history.history['val_accuracy'] 411 | epochs = range(1, len(train_metrics) + 1) 412 | plt.plot(epochs, train_metrics, 'bo--') 413 | plt.plot(epochs, val_metrics, 'ro-') 414 | plt.title('Training and validation accuracy') 415 | plt.xlabel("Epochs") 416 | plt.ylabel("accuracy") 417 | plt.legend(["train_accuracy", 'val_accuracy']) 418 | plt.show() 419 | ``` 420 | 421 | 422 | 423 | ### 2.5 模型改进 424 | 425 | 在训练模型时要从模型的实际用途进行出发,我们进一步对训练好的模型进行改进。 426 | 427 | 在接下来的模型改进中,主要从以下两个方面进行考虑: 428 | 429 | + 减小模型的大小,提高模型运算速度 430 | + 提高模型的准确率 431 | 432 | 以上改进分别通过输入图片大小调整和网络参数调整来实现,同时我们还通过迁移学习提高模型训练效率。 433 | 434 | #### 2.5.1 迁移学习 435 | 436 | 迁移学习通过下载以训练好的模型权重,减少模型的训练量,加快模型的训练速度。这里我们使用MobileNetV2在ImageNet上训练的权重作为迁移学习的模型。 437 | 438 | 首先定义模型,并调用迁移学习的参数: 439 | 440 | ```python 441 | Mo = tf.keras.applications.MobileNetV2( 442 | input_shape=(224,224,3), 443 | include_top=False, 444 | weights='imagenet') 445 | 446 | model = models.Sequential() 447 | model.add(Mo) 448 | model.add(layers.GlobalAveragePooling2D()) 449 | model.add(layers.Flatten()) 450 | model.add(layers.Dense(512, activation='relu')) 451 | model.add(layers.Dropout(rate=0.3)) 452 | model.add(layers.Dense(256, activation='relu')) 453 | model.add(layers.Dropout(rate=0.3)) 454 | model.add(layers.Dense(30, activation='sigmoid')) 455 | model.summary() 456 | ``` 457 | 458 | 我们控制模型的前20层为不可训练的参数,20层往后为可训练参数: 459 | 460 | ```python 461 | for layer in Mo.layers[:20]: 462 | layer.trainable=False 463 | for layer in Mo.layers[20:]: 464 | layer.trainable=True 465 | ``` 466 | 467 | 调用fit函数开始训练模型: 468 | 469 | ```python 470 | history = model.fit(ds_train,epochs=EPOCHS,validation_data=ds_test, 471 | callbacks = [tensorboard_callback, model_save, early_stop, reduce_lr]) 472 | ``` 473 | 474 | #### 2.5.2 输入图片大小调整 475 | 476 | 输入图片的大小决定了卷积神经网络的复杂程度,以及参数的多少。对于较多的分类情况下,如ImageNet数据集,使用的是(224,224,3)维度的图片。但在本项目中,图片的分类较少,为30种,可以使用较小矩阵的图片,我们将图片大小调整为(100,100,3),并重新进行训练,具体如下。 477 | 478 | 改变函数,调整图片大小: 479 | 480 | ```python 481 | def load_image(img_path,size = (224,224)): 482 | label = tf.cast(tf.compat.v1.string_to_number(tf.strings.split(img_path, sep='/',)[8]), tf.int8) 483 | img = tf.io.read_file(img_path) 484 | img = tf.image.decode_jpeg(img) 485 | img = tf.image.resize(img,size)/255.0 486 | return(img,label) 487 | ``` 488 | 489 | 将图片进行可视化: 490 | 491 | ```python 492 | plt.figure(figsize=(8,8)) 493 | for i,(img,label) in enumerate(ds_train.unbatch().take(9)): 494 | ax=plt.subplot(3,3,i+1) 495 | ax.imshow(img.numpy()) 496 | ax.set_title("label = %d"%label) 497 | ax.set_xticks([]) 498 | ax.set_yticks([]) 499 | plt.show() 500 | ``` 501 | 502 | 重新对模型进行训练: 503 | 504 | ```python 505 | history = model.fit(ds_train,epochs=EPOCHS,validation_data=ds_test, 506 | callbacks = [tensorboard_callback, model_save, early_stop, reduce_lr]) 507 | ``` 508 | 509 | #### 2.5.3 网络参数调整 510 | 511 | 卷积神经网络的参数,以及训练时的超参数很大程度决定了模型的好坏,模型的参数可以从以下几点进行调整: 512 | 513 | + 卷积神经网络参数 514 | + 网络层的激活函数,选择relu或sigmod 515 | + dropout层的添加以及参数改变,防止过拟合 516 | + 网络结构调整,删去冗余网络结构,减小计算量 517 | + 训练超参数选择 518 | + 优化器的选择,选择合适的优化器 519 | + 学习率大小的调整,通过减小学习率来提高精度 520 | 521 | 以下我们通过调整学习率,以及dropout层的参数,来对模型进行改进。 522 | 523 | 重新定义模型,调整模型参数: 524 | 525 | ```python 526 | Mo = tf.keras.applications.MobileNetV2( 527 | input_shape=(224,224,3), 528 | include_top=False) 529 | Mo.trainable=True 530 | 531 | model = models.Sequential() 532 | model.add(Mo) 533 | model.add(layers.GlobalAveragePooling2D()) 534 | model.add(layers.Flatten()) 535 | model.add(layers.Dense(512, activation='relu')) 536 | model.add(layers.Dropout(rate=0.3)) 537 | model.add(layers.Dense(256, activation='relu')) 538 | model.add(layers.Dropout(rate=0.3)) 539 | model.add(layers.Dense(30, activation='sigmoid')) 540 | model.summary() 541 | ``` 542 | 543 | 调整合适的学习率,将学习率从0.001调整至0.0005: 544 | 545 | ```python 546 | model.compile( 547 | optimizer=tf.keras.optimizers.Adam(lr=0.0005), 548 | loss=tf.keras.losses.sparse_categorical_crossentropy, 549 | metrics=["accuracy"]) 550 | ``` 551 | 552 | 调用fit函数训练模型: 553 | 554 | ```python 555 | history = model.fit(ds_train,epochs=EPOCHS,validation_data=ds_test, 556 | callbacks = [tensorboard_callback, model_save, early_stop, reduce_lr]) 557 | ``` 558 | 559 | -------------------------------------------------------------------------------- /Document/part3.md: -------------------------------------------------------------------------------- 1 | ## 三、目标检测模型 2 | 3 | ### 3.1 数据集的选取 4 | 5 | 在目标检测过程中,我们从图像分类的30种分类中,选取了8种水果,8种花卉(共16种),作为我们的目标检测模型训练数据。具体的选取种类如下: 6 | 7 | | 序号 | 英文名 | 中文名 | | 序号 | 英文名 | 中文名 | 8 | | :--: | :--------: | :----: | ---- | :--: | :--------: | :----: | 9 | | 1 | apple | 苹果 | | 9 | calla lily | 马蹄莲 | 10 | | 2 | banana | 香蕉 | | 10 | cornflower | 矢车菊 | 11 | | 3 | grape | 葡萄 | | 11 | corydalis | 延胡索 | 12 | | 4 | kiwifruit | 猕猴桃 | | 12 | dahlia | 大丽花 | 13 | | 5 | mango | 芒果 | | 13 | daisy | 雏菊 | 14 | | 6 | orange | 橘子 | | 14 | gentian | 龙胆 | 15 | | 7 | pear | 梨 | | 15 | nigella | 黑种草 | 16 | | 8 | strawberry | 草莓 | | 16 | sunflower | 向日葵 | 17 | 18 | 19 | ### 3.2 数据集获取 20 | 21 | 在目标检测领域,imagenet提供了丰富的目标检测原始数据,包括了不同种类的花卉和水果。我们在imagenet官网上,下载目标检测所需的数据。 22 | 23 | + 点击imagenet官网:http://www.image-net.org/ 24 | 25 | + 搜索我们需要的数据,这里我们以香蕉banana为例: 26 | 27 | 28 | 29 | + 点击Downloads,分别下载原始图片以及边界框。 30 | 31 | 32 | 33 | 下载好后,我们可以进一步查看我们下载的目标检测数据。LabelImg是目标检测标记图像的工具,它既可以用于标注图片,也可以用于查看目标检测的数据。我们可以在github中下载其最新版本,其github页面上有关于如何安装和使用它的非常清晰的说明。 34 | 35 | [LabelImg说明文档](https://github.com/tzutalin/labelImg) 36 | 37 | [点击下载LabelImg](https://www.dropbox.com/s/tq7zfrcwl44vxan/windows_v1.6.0.zip?dl=1) 38 | 39 | 我们将我们下载的images与Bounding Boxes放入同一个文件夹,并用LabelImg打开,具体效果如下: 40 | 41 | 42 | 43 | 同时我们也可以自己用LabelImg进行数据标注,LabelImg保存一个.xml文件,其中包含每个图像的标签数据。这些.xml文件将用于生成TFRecords,它们是TensorFlow训练的输入之一。 44 | 45 | ### 3.3 目标检测模型选择 46 | 47 | 在当前的目标检测领域中,已有较为成熟的研究成果。目前目标检测主要分为两个领域 48 | 49 | (1)**two-stage方法**,如R-CNN系算法,其主要思路是先通过启发式方法或者CNN网络(RPN)产生一系列稀疏的候选框,然后对这些候选框进行分类与回归,two-stage方法的优势是准确度高 50 | 51 | (2)**one-stage方法**,如Yolo和SSD,其主要思路是均匀地在图片的不同位置进行密集抽样,抽样时可以采用不同尺度和长宽比,然后利用CNN提取特征后直接进行分类与回归,整个过程只需要一步,所以其优势是速度快,但是均匀的密集采样的一个重要缺点是训练比较困难,这主要是因为正样本与负样本极其不均衡,导致模型准确度稍低。 52 | 53 | 因为我们在之后部署到树莓派的过程中,计算能力有限,所以这里我们选择one-stage方法中的SSD,在移动端仍可以保持较快的运算速度。 54 | 55 | 同时,我们选择将SSD与MobileNet相结合,生成以MobileNet为基底的SSD-MobileNetV2网络。具体的网络结构如下: 56 | 57 | 58 | 59 | ### 3.4 Tensorflow2目标检测API 60 | 61 | TensorFlow目标检测API是一个基于TensorFlow的开源框架,可轻松构建,训练和部署对象检测模型。在2020年6月,TensorFlow更新的目标检测,并适用于了TensorFlow2的版本。在本教程内,我们将基于TF2来进行目标检测。 62 | 63 | 首先我们要从github上克隆完整的Tensorflow object detection仓库,在命令行输入如下命令: 64 | 65 | ``` 66 | git clone https://github.com/tensorflow/models 67 | ``` 68 | 69 | 克隆或下载完成后,将models-master重命名为models。 70 | 71 | 接下来,我们的主要操作全部在 models/research/object_detection 中进行。 72 | 73 | 同时,我们也可以下载一些预训练好的模型,用于之后模型的训练。我们在TensorFlow目标检测的仓库中可以找到对应的模型,在如下地址下载: 74 | 75 | [TF2预训练模型](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md) 76 | 77 | 在本篇文章中,我们主要使用了SSD-MobileNetV2模型,所以点击SSD-MobileNetV2模型进行下载: 78 | 79 | [SSD MobileNet v2 320x320](http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz) 80 | 81 | ### 3.5 配置Tensorflow2环境 82 | 83 | 首先,我们需要使用pip安装tensorflow2版本环境。我们在命令行中输入: 84 | 85 | ``` 86 | pip install tensorflow==2.3.0 87 | ``` 88 | 89 | 如果使用的是GPU设备,则安装对应的tensorflow-gpu版本,在安装的同时,系统会自动安装对应的CUDA和cuDNN。 90 | 91 | 等待一段时间安装好后,我们再安装其他必要的软件包,在命令行中输入: 92 | 93 | ``` 94 | pip install tf_slim 95 | pip install lvis 96 | ``` 97 | 98 | 当软件包安装好后,我们需要编译tensorflow目标检测API中的Protobufs,首先切换到对应目录,然后再用protoc命令进行编译: 99 | 100 | ``` 101 | cd models/research 102 | protoc object_detection/protos/*.proto --python_out=. 103 | ``` 104 | 105 | 最后我们还需要配置PYTHONPATH环境变量,输入如下命令进行PYTHONPATH环境变量的配置: 106 | 107 | ``` 108 | export PYTHONPATH=$PYTHONPATH:models/research/:models 109 | ``` 110 | 111 | 完成TensorFlow对象检测API的所有设置并可以使用了。 112 | 113 | ### 3.6 生成TFRecords格式数据 114 | 115 | 接下来我们要生成TFRecords格式的数据,首先保证目标检测的数据已经处理为了如下格式,在文件夹中排列: 116 | 117 | 118 | 119 | 在models/research/object_detection中新建images文件夹。 120 | 121 | 将这些图片中,以10:1的比例分别放入models/research/object_detection/train,models/research/object_detection/test,用于后续的处理。 122 | 123 | 处理好图像后,便开始生成TFRecords了,该记录用作TensorFlow训练模型的输入数据。我们使用本教程github仓库中的xml_to_csv.py和generate_tfrecord.py文件。 124 | 125 | 首先,图像.xml数据将用于创建.csv文件,其中包含了训练集和测试集的所有数据。首先打开object_detection文件夹,在命令行中运行此命令: 126 | 127 | ``` 128 | python xml_to_csv.py 129 | ``` 130 | 131 | 这将在object_detection/images文件夹中创建train_labels.csv和test_labels.csv文件。 132 | 133 | 接下来,在文本编辑器中打开generate_tfrecord.py文件。并用generate_tfrecord.py生成对应的TFRecords文件,执行以下命令: 134 | 135 | ``` 136 | python generate_tfrecord.py --csv_input=images\train_labels.csv --image_dir=images\train --output_path=training/train.record 137 | python generate_tfrecord.py --csv_input=images\test_labels.csv --image_dir=images\test --output_path=training/test.record 138 | ``` 139 | 140 | 执行完命令后,我们便发现在object_detection/training文件夹中有了train.record和test.record文件,这两个文件分别作为之后目标检测的训练集与测试集使用。 141 | 142 | ### 3.7 目标检测模型训练 143 | 144 | 在执行训练之前,我们先介绍一下训练所需的各个文件对应的含义。 145 | 146 | #### 3.7.1 label_map.pbtxt标签图 147 | 148 | 标签图通过定义分类名称和对应的分类编号的映射,来说明训练中每个编号对应的内容是什么。我们将标签图放在models/research/object_detection/training文件夹中,并按照对应的格式写好,具体的格式如下: 149 | 150 | ``` 151 | item { 152 | id: 1 153 | name: 'apple' 154 | } 155 | 156 | item { 157 | id: 2 158 | name: 'banana' 159 | } 160 | 161 | item { 162 | id: 3 163 | name: 'grape' 164 | } 165 | 166 | ... 167 | ``` 168 | 169 | 标签映射ID编号应与generate_tfrecord.py文件中定义的编号相同。 170 | 171 | #### 3.7.2 pipeline.config配置 172 | 173 | 我们在训练不同的目标检测模型时,需要对配置文件进行修改,以下是在训练SSD-MobileNetV2模型时所用到的配置文件,具体如下: 174 | 175 | ``` 176 | model { 177 | ssd { 178 | inplace_batchnorm_update: true 179 | freeze_batchnorm: false 180 | num_classes: 16 181 | box_coder { 182 | faster_rcnn_box_coder { 183 | y_scale: 10.0 184 | x_scale: 10.0 185 | height_scale: 5.0 186 | width_scale: 5.0 187 | } 188 | } 189 | matcher { 190 | argmax_matcher { 191 | matched_threshold: 0.5 192 | unmatched_threshold: 0.5 193 | ignore_thresholds: false 194 | negatives_lower_than_unmatched: true 195 | force_match_for_each_row: true 196 | use_matmul_gather: true 197 | } 198 | } 199 | similarity_calculator { 200 | iou_similarity { 201 | } 202 | } 203 | encode_background_as_zeros: true 204 | anchor_generator { 205 | ssd_anchor_generator { 206 | num_layers: 6 207 | min_scale: 0.2 208 | max_scale: 0.95 209 | aspect_ratios: 1.0 210 | aspect_ratios: 2.0 211 | aspect_ratios: 0.5 212 | aspect_ratios: 3.0 213 | aspect_ratios: 0.3333 214 | } 215 | } 216 | image_resizer { 217 | fixed_shape_resizer { 218 | height: 300 219 | width: 300 220 | } 221 | } 222 | box_predictor { 223 | convolutional_box_predictor { 224 | min_depth: 0 225 | max_depth: 0 226 | num_layers_before_predictor: 0 227 | use_dropout: false 228 | dropout_keep_probability: 0.8 229 | kernel_size: 1 230 | box_code_size: 4 231 | apply_sigmoid_to_scores: false 232 | class_prediction_bias_init: -4.6 233 | conv_hyperparams { 234 | activation: RELU_6, 235 | regularizer { 236 | l2_regularizer { 237 | weight: 0.00004 238 | } 239 | } 240 | initializer { 241 | random_normal_initializer { 242 | stddev: 0.01 243 | mean: 0.0 244 | } 245 | } 246 | batch_norm { 247 | train: true, 248 | scale: true, 249 | center: true, 250 | decay: 0.97, 251 | epsilon: 0.001, 252 | } 253 | } 254 | } 255 | } 256 | feature_extractor { 257 | type: 'ssd_mobilenet_v2_keras' 258 | min_depth: 16 259 | depth_multiplier: 1.0 260 | conv_hyperparams { 261 | activation: RELU_6, 262 | regularizer { 263 | l2_regularizer { 264 | weight: 0.00004 265 | } 266 | } 267 | initializer { 268 | truncated_normal_initializer { 269 | stddev: 0.03 270 | mean: 0.0 271 | } 272 | } 273 | batch_norm { 274 | train: true, 275 | scale: true, 276 | center: true, 277 | decay: 0.97, 278 | epsilon: 0.001, 279 | } 280 | } 281 | override_base_feature_extractor_hyperparams: true 282 | } 283 | loss { 284 | classification_loss { 285 | weighted_sigmoid_focal { 286 | alpha: 0.75, 287 | gamma: 2.0 288 | } 289 | } 290 | localization_loss { 291 | weighted_smooth_l1 { 292 | delta: 1.0 293 | } 294 | } 295 | classification_weight: 1.0 296 | localization_weight: 1.0 297 | } 298 | normalize_loss_by_num_matches: true 299 | normalize_loc_loss_by_codesize: true 300 | post_processing { 301 | batch_non_max_suppression { 302 | score_threshold: 1e-8 303 | iou_threshold: 0.6 304 | max_detections_per_class: 100 305 | max_total_detections: 100 306 | } 307 | score_converter: SIGMOID 308 | } 309 | } 310 | } 311 | 312 | train_config: { 313 | fine_tune_checkpoint_version: V2 314 | fine_tune_checkpoint: "models/research/object_detection/ssd_mobilenet_v2_320x320_coco17_tpu-8/checkpoint/ckpt-0" 315 | fine_tune_checkpoint_type: "detection" 316 | batch_size: 256 317 | sync_replicas: true 318 | startup_delay_steps: 0 319 | replicas_to_aggregate: 8 320 | num_steps: 100000 321 | data_augmentation_options { 322 | random_horizontal_flip { 323 | } 324 | } 325 | data_augmentation_options { 326 | ssd_random_crop { 327 | } 328 | } 329 | optimizer { 330 | momentum_optimizer: { 331 | learning_rate: { 332 | cosine_decay_learning_rate { 333 | learning_rate_base: .8 334 | total_steps: 50000 335 | warmup_learning_rate: 0.13333 336 | warmup_steps: 2000 337 | } 338 | } 339 | momentum_optimizer_value: 0.9 340 | } 341 | use_moving_average: false 342 | } 343 | max_number_of_boxes: 100 344 | unpad_groundtruth_tensors: false 345 | } 346 | 347 | train_input_reader: { 348 | label_map_path: "models/research/object_detection/training/label_map.pbtxt" 349 | tf_record_input_reader { 350 | input_path: "models/research/object_detection/training/train.record" 351 | } 352 | } 353 | 354 | eval_config: { 355 | metrics_set: "coco_detection_metrics" 356 | use_moving_averages: false 357 | } 358 | 359 | eval_input_reader: { 360 | label_map_path: "models/research/object_detection/training/label_map.pbtxt" 361 | shuffle: false 362 | num_epochs: 1 363 | tf_record_input_reader { 364 | input_path: "models/research/object_detection/training/test.record" 365 | } 366 | } 367 | ``` 368 | 369 | 该文件基于Tensorflow Object Detection自带的配置文件进行修改而得出,如果需要自己训练模型,可以models/research/object_detection/configs/tf2中找到对应的模型,一般对应的配置文件主要修改以下几处: 370 | 371 | - 将num_classes更改为要分类器检测的不同对象的数量,本文修改的是16。 372 | - 将fine_tune_checkpoint更改为预训练模型的对应路径,如果没有预训练模型,则删除本行。 373 | - 将fine_tune_checkpoint_type修改为: "detection",如果没有预训练模型,则删除本行。 374 | - 在train_input_reader部分中,将input_path和label_map_path更改为: 375 | - input_path:“ models/research/object_detection/training/train.record” 376 | - label_map_path:“ models/research/object_detection/training/label_map.pbtxt” 377 | - 在eval_input_reader部分中,将input_path和label_map_path更改为: 378 | - input_path:“models/research/object_detection/training/test.record” 379 | - label_map_path:“models/research/object_detection/training/label_map.pbtxt” 380 | 381 | #### 3.7.3 开始训练 382 | 383 | Tensorflow2训练时,主要使用object_detection文件夹下的model_main_tf2.py文件进行数据集的训练。 384 | 385 | model_main_tf2.py文件主要有如下几个输入的选项: 386 | 387 | + pipeline_config_path:输入对应的配置文件位置。 388 | + model_dir:训练模型时对应的文件夹位置。 389 | + checkpoint_every_n:每n步对模型进行一次保存 390 | + record_summaries:储存模型的训练过程 391 | 392 | 我们使用如下命令开始对模型进行训练: 393 | 394 | ``` 395 | python3 model_main_tf2.py \ 396 | --logtostderr \ 397 | --model_dir=training \ 398 | --pipeline_config_path=training/pipline.config \ 399 | --checkpoint_every_n=200 \ 400 | --record_summaries=training 401 | ``` 402 | 403 | 在执行命令后,TensorFlow将初始化训练。初始化过程可能需要1-2分钟左右。当初始化完成后,程序便开始进行正式训练: 404 | 405 | 406 | 407 | 在训练过程中,会从命令行输出每100epoach的训练结果。在本文所部署的SSD-MobileNetV2模型,使用了NVIDIA Tesla V100显卡进行训练,训练约5-6小时后,结果开始逐步收敛,损失函数最终收敛到0.1以下,目标检测模型可以基本实现正确的检测。 408 | 409 | ### 3.8 TensorBoard查看训练进度 410 | 411 | TensorBoard可以用于实时查看训练的进度如何,我们可以在命令行或jupyter notebook中调用TensorBoard,实时查看损失函数的下降情况以及训练的具体进度,这里我们用jupyter notebook进行查看,具体操作如下: 412 | 413 | 首先导入TensorBoard: 414 | 415 | ```python 416 | %load_ext tensorboard 417 | 418 | from tensorboard import notebook 419 | notebook.list() 420 | ``` 421 | 422 | 接着输入训练对应文件夹,启用TensorBoard: 423 | 424 | ```python 425 | notebook.start("--logdir models/research/object_detection/training") 426 | ``` 427 | 428 | 我们可以在输出界面看到训练的具体情况: 429 | 430 | 431 | 432 | ### 3.9 导出训练模型 433 | 434 | 接下来我们导出训练好的模型,并将模型转换为tflite格式。 435 | 436 | #### 3.9.1 导出.pb格式模型 437 | 438 | 我们使用object_detection文件夹中的export_tflite_graph_tf2.py文件,先将模型导出: 439 | 440 | ``` 441 | python export_tflite_graph_tf2.py \ 442 | --pipeline_config_path training/pipeline.config \ 443 | --trained_checkpoint_dir training \ 444 | --output_directory trainingModel 445 | ``` 446 | 447 | 导出的模型位于object_detection/trainingModel文件夹,如果不需要部署到单片机或其他移动端设备,则可以对此模型直接使用。 448 | 449 | #### 3.9.2 .pb格式模型转化为.tflite 450 | 451 | tflite是为了将深度学习模型部署在移动端和嵌入式设备的工具包,可以把训练好的模型通过转化、部署和优化三个步骤,达到提升运算速度,减少内存、显存占用的效果。 452 | 453 | 我们需要对已有的模型进行转换,首先使用pip安装tf-nightly: 454 | 455 | ``` 456 | pip install tf-nightly 457 | ``` 458 | 459 | tf-nightly是支持tensorflow2的软件包,并首次被添加到tensorflow 2.3版本。 460 | 461 | 接下来我们使用tflite_convert对模型进行转换,将原有的模型转化为.tflite格式,在命令行中输入如下命令: 462 | 463 | ``` 464 | tflite_convert --saved_model_dir=trainingModel/saved_model --output_file=trainingModel/model.tflite 465 | ``` 466 | 467 | 这时我们可以看到trainingModel文件夹中生成了model.tflite文件,此时我们便可以用此模型部署到树莓派上了。 -------------------------------------------------------------------------------- /Document/part4.md: -------------------------------------------------------------------------------- 1 | ## 四、深度学习部署 2 | 3 | ### 4.1 单片机简介 4 | 5 | #### 4.1.1 硬件简介 6 | 7 | 8 | 9 | ##### 树莓派zero w 10 | 11 | 在用树莓派部署深度学习过程中,我们选用树莓派zero w作为主要设备,树莓派zero w是树莓派系列最为基础的设备,搭载树莓派linux系统下,可以很好的运行程序。同时它还包括了wifi模块与蓝牙模块,方便pc与树莓派之间数据的传输。 12 | 13 | 树莓派zero主要参数如下: 14 | 15 | - 博通 BCM2835 芯片 1GHz ARM11 core 16 | - 512MB LPDDR2 SDRAM 17 | - 一个 micro-SD 卡槽 18 | - 一个 mini-HDMI 接口,支持 1080p 60hz 视频输出 19 | - Micro-USB 接口用于供电和数据传输 20 | - MicroUSB数据线,8G内存的MicroSD卡,用于烧制linux系统镜像 21 | 22 | 树莓派zero相比于其他型号树莓派,性能略有差异,但是仍可以胜任模型部署。 23 | 24 | ##### 摄像头 25 | 26 | 因为在训练模型过程中,我们对图片没有过高要求,仅采用较小像素图片进行训练,所以在实际使用时,我们使用500万像素摄像头进行拍摄,在实际使用中能够充分的发挥其作用。 27 | 28 | ##### 3.5寸显示屏 29 | 30 | 显示屏采用串口外接3.5寸显示屏,主要用于展示图像分类与目标检测的具体结果,屏幕为LCD显示屏,具有触摸功能,可以对系统进行具体的操控。 31 | 32 | #### 4.1.2 软件环境 33 | 34 | 我们使用了**Raspberry Pi OS + python3.7**作为我们的软件环境。 35 | 36 | Raspberry Pi OS环境自带python2.X版本,但是我们深度学习框架需要3.X以上的版本,所以需要在Linux系统中配置python环境。 37 | 38 | 在python官网下载后,选择源码安装,在通过xshell拷贝到linux系统中。通过文件传输将下载的压缩包上传后,通过yum-y命令安装依赖包和tar命令解压源码包。 39 | 40 | ``` 41 | ./configure --prefix=/home/python3 42 | ``` 43 | 44 | 使用该命令为将要添加的python安装环境变量,在建立一个sh文件添加环境变量进去之后重载一下,linux系统下的python环境就配置完成了 45 | 46 | ### 4.2 树莓派环境搭建 47 | 48 | #### 4.2.1 Raspberry Pi OS系统配置 49 | 50 | ##### 1.系统下载 51 | 52 | 我们使用Raspberry Pi Imager在SD卡上进行快速安装,首先在树莓派官网下载Raspberry Pi Imager: 53 | 54 | [Raspberry Pi Imager下载](https://www.raspberrypi.org/software/) 55 | 56 | 57 | 58 | 下载完成后,我们打开安装器,选择Raspberry Pi OS系统,并选择对应的SD卡进行系统安装。 59 | 60 | 61 | 62 | 等待下载结束后,我们便得到了一张装有树莓派系统的SD卡。 63 | 64 | ##### 2.文件配置 65 | 66 | 我们将SD卡插入树莓派,并按照系统提示完成系统的安装: 67 | 68 | 69 | 70 | 接下来我们还需要对系统进行简单的配置。 71 | 72 | + root账户设置 73 | 74 | 首先设置root账户密码: 75 | 76 | ``` 77 | sudo passwd root 78 | ``` 79 | 80 | 接下来我们编辑文件,配置root远程登录的权限: 81 | 82 | ``` 83 | nano /etc/ssh/sshd_config 84 | ``` 85 | 86 | 打开文件后,在文档末尾添加: 87 | 88 | ``` 89 | PermitRootLogin yes 90 | PermitEmptyPasswords no 91 | PasswordAuthentication yes 92 | ``` 93 | 94 | 添加完成后,用ctrl+o 保存,ctrl+x 退出。 95 | 96 | + 摄像头连接树莓派 97 | 98 | 首先将摄像头与树莓派相连,接着在命令行中输入: 99 | 100 | ``` 101 | sudo raspi-config 102 | ``` 103 | 104 | 选择Interface Options—camera,选择yes,将摄像头权限开启,我们便可以使用树莓派进行摄像头拍照了。 105 | 106 | 在命令行执行如下命令: 107 | 108 | ``` 109 | raspistill -t 2000 -o image.jpg 110 | ``` 111 | 112 | 如果看到文件夹中新增了image.jpg文件,则代表配置成功。 113 | 114 | #### 4.2.2 Tensorflow2安装 115 | 116 | tensorflow lite支持树莓派3及以上的版本,如果使用的是以上版本的树莓派,则可以到以下网址进行tensorflow lite的下载和安装。 117 | 118 | [tensorflow lite下载](https://www.tensorflow.org/lite/guide/python?hl=zh-cn) 119 | 120 | 由于树莓派zero不支持tensorflow lite,我们必须下载完整的Tensorflow2包,再从中调用Tensorflow lite模块。 121 | 122 | 以下是树莓派zero安装tensorflow2的具体方法。首先我们需要下载tensorflow2的arm编译版本,在[tensorflow arm编译版本下载](https://github.com/lhelontra/tensorflow-on-arm/releases/tag/v2.2.0)可以找到对应支持的版本。 123 | 124 | 因为我们使用的是python3.7,所以我们在树莓派命令行中输入: 125 | 126 | ``` 127 | wget https://github.com/lhelontra/tensorflow-on-arm/releases/download/v2.2.0/tensorflow-2.2.0-cp37-none-linux_armv6l.whl 128 | ``` 129 | 130 | 下载完成后对文件进行重命名: 131 | 132 | ``` 133 | mv tensorflow-2.2.0-cp37-none-linux_armv6l.whl tensorflow-2.2.0-cp37-abi3-linux_armv6l.whl 134 | ``` 135 | 136 | 然后使用pip3安装对应的.whl文件 137 | 138 | ``` 139 | sudo pip3 install tensorflow-2.2.0-cp37-abi3-linux_armv6l.whl 140 | ``` 141 | 142 | 等待程序安装好后,我们便可以在树莓派zero上使用Tensorflow2了。输入如下命令进行测试: 143 | 144 | ``` 145 | python -c "import tensorflow as tf;print(tf.reduce_sum(tf.random.normal([100, 100])))" 146 | ``` 147 | 148 | 如果出现了正确的输出,则代表tensorflow2安装成功。 149 | 150 | #### 4.2.3 OpenCV安装 151 | 152 | OpenCV的全称是Open Source Computer Vision Library,是一个跨平台的计算机视觉库,我们利用OpenCV操作树莓派进行拍照和图像的预处理。OpenCV在树莓派zero上的安装方法具体如下。 153 | 154 | 首先在命令行输入以下内容,安装必要的环境配置: 155 | 156 | ``` 157 | sudo apt-get -y install libjpeg-dev libtiff5-dev libjasper-dev libpng12-dev 158 | sudo apt-get -y install libavcodec-dev libavformat-dev libswscale-dev libv4l-dev 159 | sudo apt-get -y install libxvidcore-dev libx264-dev 160 | sudo apt-get -y install qt4-dev-tools libatlas-base-dev 161 | ``` 162 | 163 | 接下来我们使用pip3安装OpenCV: 164 | 165 | ``` 166 | pip3 install opencv-python==3.4.6.27 167 | ``` 168 | 169 | 等待安装成功后,我们便可以使用OpenCV了。 170 | 171 | ### 4.3 树莓派部署模型 172 | 173 | #### 4.3.1 图像分类模型部署 174 | 175 | ##### 1.导出为tensorflow模型 176 | 177 | 模型训练好之后会通过lastest_checkpoint命令导入最后一次训练的参数,checkpoint_dir是运行过程中得到的网络结构和权重值,作为暂时的值存储在文件夹里 178 | 179 | ```python 180 | latest = tf.train.latest_checkpoint(checkpoint_dir) 181 | model.load_weights(latest) 182 | ``` 183 | 184 | 模型结构参数导出后,需要在重新运行一次,运行结果应该与训练过程的最后一次结果相同。 185 | 186 | ```python 187 | model.compile( 188 | optimizer=tf.keras.optimizers.Adam(), 189 | loss=tf.keras.losses.sparse_categorical_crossentropy, 190 | metrics=["accuracy"] 191 | ) 192 | 193 | history = model.fit(ds_train,epochs=500,validation_data=ds_test, 194 | callbacks = [tensorboard_callback, cp_callback]) 195 | 196 | ``` 197 | 198 | 此时的model包括了网络结构和权重参数,可以直接保存为h5文件,这里得到的h5文件大小为28.7M 199 | 200 | ```python 201 | model.save('./data/moblie_2.h5') 202 | ``` 203 | 204 | ##### 2.使用tflite部署 205 | 206 | tflite是谷歌自己的一个轻量级推理库。主要用于移动端。之前的tensorflow mobile就是用的tflite部署方式,tflite使用的思路主要是从预训练的模型转换为tflite模型文件,拿到移动端部署。tflite的源模型可以来自tensorflow的saved model或者frozen model,也可以来自keras。 207 | 208 | ```python 209 | model=tf.keras.models.load_model("./data/moblie_2.h5") 210 | converter = tf.lite.TFLiteConverter.from_keras_model(model) 211 | tfmodel = converter.convert() 212 | open ("model.tflite" , "wb") .write(tfmodel) 213 | ``` 214 | 215 | 通过此代码读取保存的h5文件经过convert处理后转换成tflite文件,此时得到的文件大小只有7.4M,大大的减小了模型大小。 216 | 217 | ##### 3.摄像头拍照 218 | 219 | 通过opencv包打开摄像头进行拍摄 220 | 221 | ```python 222 | import cv2 as cv 223 | def video_demo(): 224 | #0是代表摄像头编号,只有一个的话默认为0 225 | capture=cv.VideoCapture(0) 226 | if not cap.isOpened(): 227 | print("Cannot open camera") 228 | exit() 229 | while(True): 230 | ref,frame=capture.read() 231 | 232 | cv.imshow("1",frame) 233 | #等待30ms显示图像,若过程中按“Esc”退出 234 | c= cv.waitKey(30) & 0xff 235 | if c==27: 236 | capture.release() 237 | break 238 | ``` 239 | 240 | cv.VideoCapture(0)表示读取视频,当输入为0时默认打开的是电脑摄像头。 241 | read函数会返回两个值ref和frame,前者为true的时候表示获取到了图像,后者参数表示截取到的每一张图片。 242 | cv.waitKey(30)&oxff: cv.waitKey(delay)函数如果delay为0就没有返回值,如果delay大于0,如果有按键就返回按键值,如果没有按键就在delay秒后返回-1,0xff的ASCII码为1111 1111,任何数与它&操作都等于它本身。`Esc`按键的ASCII码为27,所以当c==27时,摄像头释放。 243 | 244 | ```python 245 | video_demo() 246 | cv.destroyAllWindows() 247 | ``` 248 | 249 | 最后通过cv.destroyAllWindows()函数清除所有的方框界面。 250 | 251 | #### 4.3.2 目标检测模型部署 252 | 253 | ##### 1.导入tflite模型 254 | 255 | 首先我们需要在树莓派上下载Tensorflow Object Detection的API包,在树莓派命令行中输入: 256 | 257 | ``` 258 | git clone https://github.com/tensorflow/models 259 | ``` 260 | 261 | 克隆完成后,将克隆的仓库进行重命名: 262 | 263 | ``` 264 | mv models-master models 265 | ``` 266 | 267 | 下载目标检测API必要的软件包: 268 | 269 | ``` 270 | pip3 install tf_slim 271 | pip3 install lvis 272 | ``` 273 | 274 | 导入python的环境路径: 275 | 276 | ``` 277 | export PYTHONPATH=$PYTHONPATH:models/research/:models 278 | ``` 279 | 280 | 接下来我们便可以进行目标检测模型的部署了。部署主要分为两部分,首先是加载tflite模型: 281 | 282 | ```python 283 | import tensorflow as tf 284 | import numpy as np 285 | from object_detection.utils import visualization_utils as viz_utils 286 | from object_detection.utils import config_util 287 | from object_detection.builders import model_builder 288 | import cv2 289 | 290 | 291 | # 模型识别种类个数 292 | num_classes = 16 293 | # 模型位置 294 | pipeline_config = 'pipeline.config' 295 | # 模型标签 296 | category_index = {1: {'id': 1, 'name': 'apple'}, 2: {'id': 2, 'name': 'banana'}, 3: {'id': 3, 'name': 'grape'}, 4: {'id': 4, 'name': 'kiwifruit'}, 5: {'id': 5, 'name': 'mango'}, 6: {'id': 6, 'name': 'orange'}, 7: {'id': 7, 'name': 'pear'}, 8: {'id': 8, 'name': 'stawberry'}, 9: {'id': 9, 'name': 'calla lily'}, 10: {'id': 10, 'name': 'cornflower'}, 11: {'id':11, 'name': 'corydalis'}, 12: {'id': 12, 'name': 'dahlia'}, 13: {'id': 13, 'name': 'daisy'}, 14: {'id': 14, 'name': 'gentian'}, 15: {'id': 15, 'name': 'nigella'}, 16: {'id': 16, 'name': 'sunflower'}} 297 | 298 | # 定义模型 299 | configs = config_util.get_configs_from_pipeline_file(pipeline_config) 300 | model_config = configs['model'] 301 | model_config.ssd.num_classes = num_classes 302 | model_config.ssd.freeze_batchnorm = True 303 | detection_model = model_builder.build(model_config=model_config, is_training=True) 304 | 305 | # 加载tflite文件 306 | interpreter = tf.lite.Interpreter(model_path="model.tflite") 307 | interpreter.allocate_tensors() 308 | label_id_offset = 1 309 | ``` 310 | 311 | ##### 2.OpenCV拍照与展示 312 | 313 | 接下来用OpenCV包进行实时拍照处理,并将拍照结果放入目标检测模型进行检测: 314 | 315 | ```python 316 | # 定义摄像头 317 | capture = cv2.VideoCapture(0) 318 | 319 | while True: 320 | # 拍照并预处理照片 321 | ret, frame = capture.read() 322 | frame = cv2.flip(frame, 1) 323 | frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 324 | test = np.expand_dims(frame_rgb, axis=0) 325 | input_tensor = tf.convert_to_tensor(test, dtype=tf.float32) 326 | # 目标检测模型进行检测 327 | boxes, classes, scores = detect(interpreter, input_tensor) 328 | viz_utils.visualize_boxes_and_labels_on_image_array( 329 | test[0], 330 | boxes[0], 331 | classes[0].astype(np.uint32) + label_id_offset, 332 | scores[0], 333 | category_index, 334 | use_normalized_coordinates=True, 335 | min_score_thresh=0.8) 336 | # 呈现检测结果 337 | frame = cv2.cvtColor(test[0], cv2.COLOR_BGR2RGB) 338 | cv2.imshow("Object detector", frame) 339 | c = cv2.waitKey(20) 340 | # 如果按q键,则终止 341 | if c == 113: 342 | break 343 | cv2.destroyAllWindows() 344 | ``` 345 | 346 | ### 4.4 服务器改进部署方式 347 | 348 | #### 4.4.1 Flask框架的搭建 349 | 350 | Flask是一个使用python编写的轻量级web应用框架,主要用来接收和发送数据。当树莓派端Flask发送Post请求时,Flask可以使用Request包获取传来的数据,并将计算结果作为Post请求的返回值返回给树莓派 351 | 352 | 在服务器中使用Flask框架时,我们需要引入flask包,并定义函数,这样当接收到树莓派请求时,程序便会执行对应的函数,并将结果返回给树莓派。 353 | 354 | 以下是一个简单的Flask框架搭建: 355 | 356 | ```python 357 | from flask import Flask 358 | app = Flask(__name__) 359 | 360 | @app.route('/', methods=["post"]) 361 | def index(): 362 | return "

hello world!

" 363 | 364 | if __name__ == '__main__': 365 | app.run(host='192.168.1.1', debug=True) 366 | ``` 367 | 368 | 云服务器使用flask时,只需要将端口号对应的函数上面使用装饰器,并在主函数运行主端口号即可。 369 | 370 | ```python 371 | if __name__ == '__main__': 372 | app.run(host='192.168.1.1', debug=True, port=8888) 373 | ``` 374 | 375 | #### 4.4.2 Nginx+uwsgi的配置 376 | 377 | 单纯使用flask框架构造简单,但是器并发性效果较差,我们可以改进部署方式,选用Nginx + uwsgi + flask的部署方式增加稳定性。 378 | 379 | 首先安装Nginx,用如下命令进行安装: 380 | 381 | ``` 382 | apt install nginx 383 | ``` 384 | 385 | 安装完成后对Nginx进行配置,具体的配置因服务器的具体情况而定: 386 | 387 | ``` 388 | server { 389 | listen 80; # 监听端口,http默认80 390 | server_name _; # 填写域名或者公网IP 391 | location / { 392 | include uwsgi_params; # 使用nginx内置的uwsgi配置参数文件 393 | uwsgi_pass 127.0.0.1:8088; # 转发请求到该地址端口 394 | uwsgi_param UWSGI_SCRIPT main:app; # 调用的脚本名称和application变量名 395 | } 396 | } 397 | ``` 398 | 399 | 最后启动Nginx: 400 | 401 | ``` 402 | service nginx start 403 | ``` 404 | 405 | 接下来安装uwsgi: 406 | 407 | ``` 408 | pip install -i https://pypi.tuna.tsinghua.edu.cn/simple uwsgi 409 | ``` 410 | 411 | 查看uwsgi的版本,若显示则表示安装成功: 412 | 413 | ``` 414 | uwsgi --version 415 | ``` 416 | 417 | 接下来我们创建uwsgi的配置文件,在命令行中输入: 418 | 419 | ``` 420 | vim main.ini 421 | ``` 422 | 423 | 将以下内容输入到文本当中,其中wsgi-file为部署模型的python程序名,在文章之后会有程序的具体内容;socket为Nginx的转接地址;threads为同时开启的线程数,如需要同时调试多个模型,请增大线程数: 424 | 425 | ``` 426 | [uwsgi] 427 | master = true 428 | wsgi-file = main.py 429 | callable = app 430 | socket = 127.0.0.1:8001 431 | processes = 4 432 | threads = 2 433 | buffer-size = 32768 434 | ``` 435 | 436 | 全部配置完成后,运行只需要输入: 437 | 438 | ``` 439 | uwsgi main.ini 440 | ``` 441 | 442 | #### 4.4.3 图像分类模型部署 443 | 444 | 单片机运算内存较小,用其自带的运算器计算速度很慢,因此我们可以使用云服务器加持,从树莓派端收集图片,在树莓派端进行图片裁剪后发送给云服务器进行模型导入计算,并返回label值给树莓派。 445 | 446 | ##### 1.树莓派端图像裁剪 447 | 448 | 树莓派端通过调用opencv的摄像头函数采集图像后,进行图像缩放及图像均值化等简单操作,把图像缩放成224*224大小,并用直方图均值化的方法处理光照不均,最后通过端口发送图片给云服务器 449 | 450 | ```python 451 | def load_image(img_path, size=(224, 224)): 452 | img = tf.io.read_file(img_path) 453 | img = tf.image.decode_jpeg(img) 454 | img = tf.image.resize(img, size)/255.0 455 | return img 456 | ``` 457 | 458 | ##### 2.服务器端模型分类 459 | 460 | 云服务器端首先加载之前处理好的tflite模型文件,导入训练好的模型骨架和参数。 461 | 462 | ```python 463 | def evaluate_model(interpreter, test_image): 464 | input_index = interpreter.get_input_details()[0]["index"] 465 | output_index = interpreter.get_output_details()[0]["index"] 466 | test_image = np.expand_dims(test_image, axis=0).astype(np.float32) 467 | interpreter.set_tensor(input_index, test_image) 468 | interpreter.invoke() 469 | output = interpreter.tensor(output_index) 470 | output = np.argmax(output()[0]) 471 | return output 472 | ``` 473 | 474 | 通过8089这个端口号接收到图片后,将图片暂存之文件夹内,并读取该图片放入到预加载好的模型里 475 | 476 | ```python 477 | interpreter = tf.lite.Interpreter(model_path='MobileNetV2.tflite') 478 | interpreter.allocate_tensors() 479 | ``` 480 | 481 | 此模型共分为30类 482 | 483 | ```python 484 | classlist = ["apple", "banana", "blueberry", "cherry", "durian", "fig", "grape", "lemon", "litchi", "mango", "orange", "pineapple", "plum", "pomegranate", "strawberry", "aster", "begonia", "calla_lily", "chrysanthemum", "cornflower", "corydali", "dahlia", "daisy", "gentian", "mistflower", "nigella", "rose", "sandwort", "sunflower", "veronica"] 485 | ``` 486 | 487 | 每次计算都会得到一个0到29的索引值,云服务器会根据索引值索引到类别,返回字符串给树莓派端。 488 | 489 | ```python 490 | @app.route('/', methods=['post']) 491 | def predict(): 492 | upload_file = request.files['file'] 493 | file_name = upload_file.filename 494 | file_path = '/home/ubuntu/inifyy/img' 495 | if upload_file: 496 | file_paths = os.path.join(file_path, file_name) 497 | upload_file.save(file_paths) 498 | test = load_image(file_paths) 499 | result = evaluate_model(interpreter, test) 500 | result = classlist[result] 501 | return result 502 | ``` 503 | 504 | #### 4.4.4 目标检测模型部署 505 | 506 | ##### 1.树莓派端数据发送 507 | 508 | 树莓派端首先用OpenCV包进行拍照与图像处理,接着利用requests模块发送post请求,并等待服务器返回运行结果,具体的部署代码如下: 509 | 510 | ```python 511 | import requests 512 | import numpy as np 513 | import cv2 514 | 515 | 516 | # 服务器公网地址 517 | url = "http://127.0.0.1:8088/" 518 | # post图片格式 519 | content_type = 'image/jpeg' 520 | headers = {'content-type': content_type} 521 | # 定义摄像头 522 | capture = cv2.VideoCapture(0) 523 | while True: 524 | # 拍照与图片预处理 525 | ret, frame = capture.read() 526 | frame = cv2.resize(frame, (160, 120), interpolation=cv2.INTER_CUBIC) 527 | # 将图片数据编码并发送 528 | img_encoded = cv2.imencode('.jpg', frame)[1] 529 | imgstring = np.array(img_encoded).tobytes() 530 | response = requests.post(url, data=imgstring, headers=headers) 531 | imgstring = np.asarray(bytearray(response.content), dtype="uint8") 532 | # 展示返回结果 533 | img = cv2.imdecode(imgstring, cv2.IMREAD_COLOR) 534 | cv2.imshow("video", img) 535 | c = cv2.waitKey(20) 536 | # 如果按q键,则终止 537 | if c == 113: 538 | break 539 | cv2.destroyAllWindows() 540 | ``` 541 | 542 | ##### 2.服务器端模型检测 543 | 544 | 服务器端的模型部署与之前在树莓派上部署模型类似,都用到了Tensorflow Object Detection的API。 545 | 546 | 首先我们需要在树莓派上下载Tensorflow Object Detection的API包,在树莓派命令行中输入: 547 | 548 | ``` 549 | git clone https://github.com/tensorflow/models 550 | ``` 551 | 552 | 克隆完成后,将克隆的仓库进行重命名: 553 | 554 | ``` 555 | mv models-master models 556 | ``` 557 | 558 | 下载目标检测API必要的软件包: 559 | 560 | ``` 561 | pip3 install tf_slim 562 | pip3 install lvis 563 | ``` 564 | 565 | 导入python的环境路径: 566 | 567 | ``` 568 | export PYTHONPATH=$PYTHONPATH:models/research/:models 569 | ``` 570 | 571 | 接下来我们便可以进行目标检测模型的部署了,具体部署代码如下: 572 | 573 | ```python 574 | import tensorflow as tf 575 | import numpy as np 576 | from object_detection.utils import visualization_utils as viz_utils 577 | from object_detection.utils import config_util 578 | from object_detection.builders import model_builder 579 | import cv2 580 | from flask import Flask, request 581 | app = Flask(__name__) 582 | 583 | 584 | # 定义检测函数 585 | def detect(interpreter, input_tensor): 586 | input_details = interpreter.get_input_details() 587 | output_details = interpreter.get_output_details() 588 | preprocessed_image, shapes = detection_model.preprocess(input_tensor) 589 | interpreter.set_tensor(input_details[0]['index'], preprocessed_image.numpy()) 590 | interpreter.invoke() 591 | boxes = interpreter.get_tensor(output_details[0]['index']) 592 | classes = interpreter.get_tensor(output_details[1]['index']) 593 | scores = interpreter.get_tensor(output_details[2]['index']) 594 | return boxes, classes, scores 595 | 596 | 597 | # 模型识别种类个数 598 | num_classes = 16 599 | # 模型位置 600 | pipeline_config = 'pipeline.config' 601 | # 模型标签 602 | category_index = {1: {'id': 1, 'name': 'apple'}, 2: {'id': 2, 'name': 'banana'}, 3: {'id': 3, 'name': 'grape'}, 4: {'id': 4, 'name': 'kiwifruit'}, 5: {'id': 5, 'name': 'mango'}, 6: {'id': 6, 'name': 'orange'}, 7: {'id': 7, 'name': 'pear'}, 8: {'id': 8, 'name': 'stawberry'}, 9: {'id': 9, 'name': 'calla lily'}, 10: {'id': 10, 'name': 'cornflower'}, 11: {'id':11, 'name': 'corydalis'}, 12: {'id': 12, 'name': 'dahlia'}, 13: {'id': 13, 'name': 'daisy'}, 14: {'id': 14, 'name': 'gentian'}, 15: {'id': 15, 'name': 'nigella'}, 16: {'id': 16, 'name': 'sunflower'}} 603 | 604 | # 定义模型 605 | configs = config_util.get_configs_from_pipeline_file(pipeline_config) 606 | model_config = configs['model'] 607 | model_config.ssd.num_classes = num_classes 608 | model_config.ssd.freeze_batchnorm = True 609 | detection_model = model_builder.build(model_config=model_config, is_training=True) 610 | 611 | # 加载tflite文件 612 | interpreter = tf.lite.Interpreter(model_path="model.tflite") 613 | interpreter.allocate_tensors() 614 | label_id_offset = 1 615 | 616 | 617 | # 定义预测函数,用于接受post及预测 618 | @app.route('/', methods=["post"]) 619 | def predict(): 620 | # 解码接收的图像文件 621 | imgstring = np.asarray(bytearray(request.data), dtype="uint8") 622 | img = cv2.imdecode(imgstring, cv2.IMREAD_COLOR) 623 | frame = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 624 | test = np.expand_dims(frame, axis=0) 625 | # 目标检测 626 | input_tensor = tf.convert_to_tensor(test, dtype=tf.float32) 627 | boxes, classes, scores = detect(interpreter, input_tensor) 628 | viz_utils.visualize_boxes_and_labels_on_image_array( 629 | test[0], 630 | boxes[0], 631 | classes[0].astype(np.uint32) + label_id_offset, 632 | scores[0], 633 | category_index, 634 | use_normalized_coordinates=True, 635 | min_score_thresh=0.8) 636 | #返回运算结果 637 | frame = cv2.cvtColor(test[0], cv2.COLOR_BGR2RGB) 638 | img_encoded = cv2.imencode('.jpg', frame)[1] 639 | imgstring = np.array(img_encoded).tobytes() 640 | return imgstring 641 | 642 | if __name__ == '__main__': 643 | app.run(debug=True, host='127.0.0.1', port=8088) 644 | ``` -------------------------------------------------------------------------------- /Image_Classification/Alexnet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import layers, models 3 | import datetime 4 | import os 5 | 6 | 7 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 8 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 9 | 10 | BATCH_SIZE = 50 11 | ds6 = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/train/*/*") 12 | for file in ds6.take(10): 13 | print(file) 14 | 15 | # labels = tf.constant(["apple", " blueberry", "grape", "mango", " pear", " plum", "watermelon", "banana", "cherry", " lemon", "orange", "pineapple", "strawberry"]) 16 | 17 | 18 | def load_image(img_path,size = (224,224)): 19 | label = tf.cast(tf.compat.v1.string_to_number(tf.strings.split(img_path, sep='/',)[6]), tf.int8) 20 | img = tf.io.read_file(img_path) 21 | img = tf.image.decode_jpeg(img) 22 | img = tf.image.resize(img,size)/255.0 23 | return(img,label) 24 | 25 | 26 | ds_train = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/train/*/*") \ 27 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 28 | .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \ 29 | .prefetch(tf.data.experimental.AUTOTUNE) 30 | 31 | ds_test = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/test/*/*") \ 32 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 33 | .batch(BATCH_SIZE) \ 34 | .prefetch(tf.data.experimental.AUTOTUNE) 35 | 36 | inputs = layers.Input(shape=(224,224,3)) 37 | x = layers.Conv2D(32,kernel_size=(3,3),padding="same")(inputs) 38 | x = layers.Activation('relu')(x) 39 | 40 | x = layers.Conv2D(32,kernel_size=(3,3),padding="same")(x) 41 | x = layers.Activation('relu')(x) 42 | x = layers.MaxPool2D(pool_size=2,strides=2)(x) 43 | x = layers.Conv2D(64,kernel_size=(3,3),padding="same")(x) 44 | x = layers.Activation('relu')(x) 45 | x = layers.Conv2D(64,kernel_size=(3,3),padding= "same")(x) 46 | x = layers.Activation('relu')(x) 47 | x = layers.MaxPool2D(pool_size=2,strides=2)(x) 48 | ### 继续构建卷积层和池化层,这次核数量设置成128 49 | x = layers.Conv2D(128,kernel_size=(3,3),padding="same")(x) 50 | x = layers.Activation('relu')(x) 51 | x = layers.Conv2D(128,kernel_size=(3,3),padding="same")(x) 52 | x = layers.Activation('relu')(x) 53 | x = layers.MaxPool2D()(x) 54 | x = layers.Flatten()(x)##数据扁平化处理 55 | x = layers.Dense(128,activation='relu')(x) 56 | x = layers.Dense(64,activation='relu')(x) 57 | x = layers.Dropout(rate=0.3)(x) 58 | outputs = layers.Dense(30,activation = 'softmax')(x) 59 | 60 | model = models.Model(inputs = inputs,outputs = outputs) 61 | 62 | model.summary() 63 | 64 | 65 | stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 66 | logdir = os.path.join('data', 'autograph', stamp) 67 | tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1) 68 | model.compile( 69 | optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), 70 | loss=tf.keras.losses.sparse_categorical_crossentropy, 71 | metrics=["accuracy"] 72 | ) 73 | history = model.fit(ds_train,epochs= 500,validation_data=ds_test, 74 | callbacks = [tensorboard_callback],workers = 4) 75 | 76 | # # 保存权重,该方式仅仅保存权重张量 77 | model.save_weights('./data/tf_model_weights.ckpt',save_format = "tf") 78 | # 79 | # # 保存模型结构与模型参数到文件,该方式保存的模型具有跨平台性便于部署 80 | # 81 | model.save('./data/tf_model_savedmodel', save_format="tf") 82 | print('export saved model.') 83 | 84 | model_loaded = tf.keras.models.load_model('./data/tf_model_savedmodel') 85 | model_loaded.evaluate(ds_test) 86 | -------------------------------------------------------------------------------- /Image_Classification/MobileNetV2.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import layers, models 3 | import datetime 4 | import os 5 | 6 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 7 | os.environ["CUDA_VISIBLE_DEVICES"]="1" 8 | 9 | BATCH_SIZE = 128 10 | EPOCHS = 100 11 | 12 | def load_image(img_path,size = (224,224)): 13 | label = tf.cast(tf.compat.v1.string_to_number(tf.strings.split(img_path, sep='/',)[6]), tf.int8) 14 | img = tf.io.read_file(img_path) 15 | img = tf.image.decode_jpeg(img) 16 | img = tf.image.resize(img,size)/255.0 17 | return(img,label) 18 | 19 | 20 | ds_train = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/train/*/*.jpg") \ 21 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 22 | .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \ 23 | .prefetch(tf.data.experimental.AUTOTUNE) 24 | 25 | ds_test = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/test/*/*.jpg") \ 26 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 27 | .batch(BATCH_SIZE) \ 28 | .prefetch(tf.data.experimental.AUTOTUNE) 29 | 30 | 31 | # tf.keras.applications导入模型 32 | Mo = tf.keras.applications.MobileNetV2( 33 | input_shape=(224,224,3), 34 | include_top=False) 35 | Mo.trainable=True 36 | 37 | model = models.Sequential() 38 | model.add(Mo) 39 | model.add(layers.GlobalAveragePooling2D()) 40 | model.add(layers.Flatten()) 41 | model.add(layers.Dense(512, activation='relu')) 42 | model.add(layers.Dropout(rate=0.5)) 43 | model.add(layers.Dense(256, activation='relu')) 44 | model.add(layers.Dropout(rate=0.5)) 45 | model.add(layers.Dense(30, activation='sigmoid')) 46 | model.summary() 47 | 48 | stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 49 | callback_dir = 'model/callback/'+stamp 50 | tensorboard_callback = tf.keras.callbacks.TensorBoard(callback_dir, histogram_freq=1) 51 | 52 | checkpoint_path = 'model/checkpoint/'+stamp 53 | model_save = tf.keras.callbacks.ModelCheckpoint( 54 | filepath=checkpoint_path, 55 | verbose=1, 56 | save_weights_only=True, 57 | period=20) 58 | 59 | early_stop = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', min_delta = 0.001, 60 | patience = 5, mode = 'min', verbose = 1, 61 | restore_best_weights = True) 62 | 63 | reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', factor = 0.3, 64 | patience = 2, min_delta = 0.001, 65 | mode = 'min', verbose = 1) 66 | 67 | 68 | model.compile( 69 | optimizer=tf.keras.optimizers.Adam(lr=0.001), 70 | loss=tf.keras.losses.sparse_categorical_crossentropy, 71 | metrics=["accuracy"]) 72 | 73 | history = model.fit(ds_train,epochs=EPOCHS,validation_data=ds_test, 74 | callbacks = [tensorboard_callback, model_save, early_stop, reduce_lr]) 75 | 76 | history = model.fit(ds_train,epochs= 2000,validation_data=ds_test, 77 | callbacks = [tensorboard_callback, cp_callback]) 78 | 79 | -------------------------------------------------------------------------------- /Image_Classification/MobileNetV3.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import datetime 3 | import os 4 | 5 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 6 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 7 | 8 | BATCH_SIZE = 256 9 | NUM_CLASSES = 30 10 | 11 | 12 | def h_sigmoid(x): 13 | return tf.nn.relu6(x + 3) / 6 14 | 15 | 16 | def h_swish(x): 17 | return x * h_sigmoid(x) 18 | 19 | 20 | class SEBlock(tf.keras.layers.Layer): 21 | def __init__(self, input_channels, r=16): 22 | super(SEBlock, self).__init__() 23 | self.pool = tf.keras.layers.GlobalAveragePooling2D() 24 | self.fc1 = tf.keras.layers.Dense(units=input_channels // r) 25 | self.fc2 = tf.keras.layers.Dense(units=input_channels) 26 | 27 | def call(self, inputs, **kwargs): 28 | branch = self.pool(inputs) 29 | branch = self.fc1(branch) 30 | branch = tf.nn.relu(branch) 31 | branch = self.fc2(branch) 32 | branch = h_sigmoid(branch) 33 | branch = tf.expand_dims(input=branch, axis=1) 34 | branch = tf.expand_dims(input=branch, axis=1) 35 | output = inputs * branch 36 | return output 37 | 38 | 39 | class BottleNeck(tf.keras.layers.Layer): 40 | def __init__(self, in_size, exp_size, out_size, s, is_se_existing, NL, k): 41 | super(BottleNeck, self).__init__() 42 | self.stride = s 43 | self.in_size = in_size 44 | self.out_size = out_size 45 | self.is_se_existing = is_se_existing 46 | self.NL = NL 47 | self.conv1 = tf.keras.layers.Conv2D(filters=exp_size, 48 | kernel_size=(1, 1), 49 | strides=1, 50 | padding="same") 51 | self.bn1 = tf.keras.layers.BatchNormalization() 52 | self.dwconv = tf.keras.layers.DepthwiseConv2D(kernel_size=(k, k), 53 | strides=s, 54 | padding="same") 55 | self.bn2 = tf.keras.layers.BatchNormalization() 56 | self.se = SEBlock(input_channels=exp_size) 57 | self.conv2 = tf.keras.layers.Conv2D(filters=out_size, 58 | kernel_size=(1, 1), 59 | strides=1, 60 | padding="same") 61 | self.bn3 = tf.keras.layers.BatchNormalization() 62 | self.linear = tf.keras.layers.Activation(tf.keras.activations.linear) 63 | 64 | def call(self, inputs, training=None, **kwargs): 65 | x = self.conv1(inputs) 66 | x = self.bn1(x, training=training) 67 | if self.NL == "HS": 68 | x = h_swish(x) 69 | elif self.NL == "RE": 70 | x = tf.nn.relu6(x) 71 | x = self.dwconv(x) 72 | x = self.bn2(x, training=training) 73 | if self.NL == "HS": 74 | x = h_swish(x) 75 | elif self.NL == "RE": 76 | x = tf.nn.relu6(x) 77 | if self.is_se_existing: 78 | x = self.se(x) 79 | x = self.conv2(x) 80 | x = self.bn3(x, training=training) 81 | x = self.linear(x) 82 | if self.stride == 1 and self.in_size == self.out_size: 83 | x = tf.keras.layers.add([x, inputs]) 84 | return x 85 | 86 | 87 | class MobileNetV3Small(tf.keras.Model): 88 | def __init__(self): 89 | super(MobileNetV3Small, self).__init__() 90 | self.conv1 = tf.keras.layers.Conv2D(filters=16, 91 | kernel_size=(3, 3), 92 | strides=2, 93 | padding="same") 94 | self.bn1 = tf.keras.layers.BatchNormalization() 95 | self.bneck1 = BottleNeck(in_size=16, exp_size=16, out_size=16, s=2, is_se_existing=True, NL="RE", k=3) 96 | self.bneck2 = BottleNeck(in_size=16, exp_size=72, out_size=24, s=2, is_se_existing=False, NL="RE", k=3) 97 | self.bneck3 = BottleNeck(in_size=24, exp_size=88, out_size=24, s=1, is_se_existing=False, NL="RE", k=3) 98 | self.bneck4 = BottleNeck(in_size=24, exp_size=96, out_size=40, s=2, is_se_existing=True, NL="HS", k=5) 99 | self.bneck5 = BottleNeck(in_size=40, exp_size=240, out_size=40, s=1, is_se_existing=True, NL="HS", k=5) 100 | self.bneck6 = BottleNeck(in_size=40, exp_size=240, out_size=40, s=1, is_se_existing=True, NL="HS", k=5) 101 | self.bneck7 = BottleNeck(in_size=40, exp_size=120, out_size=48, s=1, is_se_existing=True, NL="HS", k=5) 102 | self.bneck8 = BottleNeck(in_size=48, exp_size=144, out_size=48, s=1, is_se_existing=True, NL="HS", k=5) 103 | self.bneck9 = BottleNeck(in_size=48, exp_size=288, out_size=96, s=2, is_se_existing=True, NL="HS", k=5) 104 | self.bneck10 = BottleNeck(in_size=96, exp_size=576, out_size=96, s=1, is_se_existing=True, NL="HS", k=5) 105 | self.bneck11 = BottleNeck(in_size=96, exp_size=576, out_size=96, s=1, is_se_existing=True, NL="HS", k=5) 106 | 107 | self.conv2 = tf.keras.layers.Conv2D(filters=576, 108 | kernel_size=(1, 1), 109 | strides=1, 110 | padding="same") 111 | self.bn2 = tf.keras.layers.BatchNormalization() 112 | self.avgpool = tf.keras.layers.AveragePooling2D(pool_size=(7, 7), 113 | strides=1) 114 | self.conv3 = tf.keras.layers.Conv2D(filters=1280, 115 | kernel_size=(1, 1), 116 | strides=1, 117 | padding="same") 118 | self.conv4 = tf.keras.layers.Conv2D(filters=NUM_CLASSES, 119 | kernel_size=(1, 1), 120 | strides=1, 121 | padding="same", 122 | activation=tf.keras.activations.softmax) 123 | 124 | def call(self, inputs, training=None, mask=None): 125 | x = self.conv1(inputs) 126 | x = self.bn1(x, training=training) 127 | x = h_swish(x) 128 | x = self.bneck1(x, training=training) 129 | x = self.bneck2(x, training=training) 130 | x = self.bneck3(x, training=training) 131 | x = self.bneck4(x, training=training) 132 | x = self.bneck5(x, training=training) 133 | x = self.bneck6(x, training=training) 134 | x = self.bneck7(x, training=training) 135 | x = self.bneck8(x, training=training) 136 | x = self.bneck9(x, training=training) 137 | x = self.bneck10(x, training=training) 138 | x = self.bneck11(x, training=training) 139 | x = self.conv2(x) 140 | x = self.bn2(x, training=training) 141 | x = h_swish(x) 142 | x = self.avgpool(x) 143 | x = self.conv3(x) 144 | x = h_swish(x) 145 | x = self.conv4(x) 146 | return x 147 | 148 | 149 | def compile_model(model): 150 | model.compile( 151 | optimizer=tf.keras.optimizers.Adam(), 152 | loss=tf.keras.losses.sparse_categorical_crossentropy, 153 | metrics=["accuracy"] 154 | ) 155 | return(model) 156 | 157 | 158 | def load_image(img_path,size = (224,224)): 159 | label = tf.cast(tf.compat.v1.string_to_number(tf.strings.split(img_path, sep='/',)[6]), tf.int8) 160 | img = tf.io.read_file(img_path) 161 | img = tf.image.decode_jpeg(img) 162 | img = tf.image.resize(img,size)/255.0 163 | return(img,label) 164 | 165 | 166 | #使用并行化预处理num_parallel_calls 和预存数据prefetch来提升性能 167 | ds_train = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/train/*/*") \ 168 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 169 | .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \ 170 | .prefetch(tf.data.experimental.AUTOTUNE) 171 | 172 | ds_test = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/test/*/*") \ 173 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 174 | .batch(BATCH_SIZE) \ 175 | .prefetch(tf.data.experimental.AUTOTUNE) 176 | 177 | stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 178 | logdir = os.path.join('data', stamp) 179 | tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1) 180 | 181 | strategy = tf.distribute.MirroredStrategy() 182 | with strategy.scope(): 183 | model = MobileNetV3Small() 184 | model.build(input_shape=(None, 224, 224, 3)) 185 | model.summary() 186 | model = compile_model(model) 187 | 188 | history = model.fit(ds_train,epochs= 800, validation_data=ds_test, 189 | callbacks = [tensorboard_callback]) 190 | 191 | # 保存模型结构与模型参数到文件,该方式保存的模型具有跨平台性便于部署 192 | 193 | model.save('./data/model', save_format="tf") 194 | print('export saved model.') 195 | 196 | model_loaded = tf.keras.models.load_model('./data/model') 197 | model_loaded.evaluate(ds_test) 198 | 199 | -------------------------------------------------------------------------------- /Image_Classification/MobileNet_Selfconstruct.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow import keras 3 | import tensorflow.keras.backend as K 4 | from tensorflow.keras import layers, models, Sequential, backend 5 | from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization, Activation, GlobalAveragePooling2D 6 | from tensorflow.keras.layers import Concatenate, Lambda, Input, ZeroPadding2D, AveragePooling2D, DepthwiseConv2D, Reshape 7 | import datetime 8 | import os 9 | 10 | 11 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 12 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 13 | 14 | BATCH_SIZE = 20 15 | 16 | 17 | def relu6(x): 18 | return K.relu(x, max_value=6) 19 | 20 | 21 | # 保证特征层数为8的倍数 22 | def make_divisible(v, divisor, min_value=None): 23 | if min_value is None: 24 | min_value = divisor 25 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) # //向下取整,除 26 | if new_v < 0.9 * v: 27 | new_v += divisor 28 | return new_v 29 | 30 | 31 | def pad_size(inputs, kernel_size): 32 | input_size = inputs.shape[1:3] 33 | 34 | if isinstance(kernel_size, int): 35 | kernel_size = (kernel_size, kernel_size) 36 | 37 | if input_size[0] is None: 38 | adjust = (1, 1) 39 | 40 | else: 41 | adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) 42 | 43 | correct = (kernel_size[0] // 2, kernel_size[1] // 2) 44 | 45 | return ((correct[0] - adjust[0], correct[0]), 46 | (correct[1] - adjust[1], correct[1])) 47 | 48 | 49 | def conv_block(x, nb_filter, kernel=(1, 1), stride=(1, 1), name=None): 50 | x = Conv2D(nb_filter, kernel, strides=stride, padding='same', use_bias=False, name=name + '_expand')(x) 51 | x = BatchNormalization(axis=3, name=name + '_expand_BN')(x) 52 | x = Activation(relu6, name=name + '_expand_relu')(x) 53 | 54 | return x 55 | 56 | 57 | def depthwise_res_block(x, nb_filter, kernel, stride, t, alpha, resdiual=False, name=None): 58 | input_tensor = x 59 | exp_channels = x.shape[-1] * t # 扩展维度 60 | alpha_channels = int(nb_filter * alpha) # 压缩维度 61 | 62 | x = conv_block(x, exp_channels, (1, 1), (1, 1), name=name) 63 | 64 | if stride[0] == 2: 65 | x = ZeroPadding2D(padding=pad_size(x, 3), name=name + '_pad')(x) 66 | 67 | x = DepthwiseConv2D(kernel, padding='same' if stride[0] == 1 else 'valid', strides=stride, depth_multiplier=1, 68 | use_bias=False, name=name + '_depthwise')(x) 69 | 70 | x = BatchNormalization(axis=3, name=name + '_depthwise_BN')(x) 71 | x = Activation(relu6, name=name + '_depthwise_relu')(x) 72 | 73 | x = Conv2D(alpha_channels, (1, 1), padding='same', use_bias=False, strides=(1, 1), name=name + '_project')(x) 74 | x = BatchNormalization(axis=3, name=name + '_project_BN')(x) 75 | 76 | if resdiual: 77 | x = layers.add([x, input_tensor], name=name + '_add') 78 | 79 | return x 80 | 81 | 82 | def MovblieNetV2(nb_classes, alpha=1., dropout=0): 83 | img_input = Input(shape=(224, 224, 3)) 84 | 85 | first_filter = make_divisible(32 * alpha, 8) 86 | 87 | x = ZeroPadding2D(padding=pad_size(img_input, 3), name='Conv1_pad')(img_input) 88 | x = Conv2D(first_filter, (3, 3), strides=(2, 2), padding='valid', use_bias=False, name='Conv1')(x) 89 | x = BatchNormalization(axis=3, name='bn_Conv1')(x) 90 | x = Activation(relu6, name='Conv1_relu')(x) 91 | 92 | x = DepthwiseConv2D((3, 3), padding='same', strides=(1, 1), depth_multiplier=1, use_bias=False, 93 | name='expanded_conv_depthwise')(x) 94 | x = BatchNormalization(axis=3, name='expanded_conv_depthwise_BN')(x) 95 | x = Activation(relu6, name='expanded_conv_depthwise_relu')(x) 96 | 97 | x = Conv2D(16, (1, 1), padding='same', use_bias=False, strides=(1, 1), name='expanded_conv_project')(x) 98 | x = BatchNormalization(axis=3, name='expanded_conv_project_BN')(x) 99 | 100 | x = depthwise_res_block(x, 24, (3, 3), (2, 2), 6, alpha, resdiual=False, name='block_1') 101 | 102 | x = depthwise_res_block(x, 24, (3, 3), (1, 1), 6, alpha, resdiual=True, name='block_2') 103 | 104 | x = depthwise_res_block(x, 32, (3, 3), (2, 2), 6, alpha, resdiual=False, name='block_3') 105 | 106 | x = depthwise_res_block(x, 32, (3, 3), (1, 1), 6, alpha, resdiual=True, name='block_4') 107 | 108 | x = depthwise_res_block(x, 32, (3, 3), (1, 1), 6, alpha, resdiual=True, name='block_5') 109 | 110 | x = depthwise_res_block(x, 64, (3, 3), (2, 2), 6, alpha, resdiual=False, name='block_6') 111 | 112 | x = depthwise_res_block(x, 64, (3, 3), (1, 1), 6, alpha, resdiual=True, name='block_7') 113 | 114 | x = depthwise_res_block(x, 64, (3, 3), (1, 1), 6, alpha, resdiual=True, name='block_8') 115 | 116 | x = depthwise_res_block(x, 64, (3, 3), (1, 1), 6, alpha, resdiual=True, name='block_9') 117 | 118 | x = depthwise_res_block(x, 96, (3, 3), (1, 1), 6, alpha, resdiual=False, name='block_10') 119 | 120 | x = depthwise_res_block(x, 96, (3, 3), (1, 1), 6, alpha, resdiual=True, name='block_11') 121 | 122 | x = depthwise_res_block(x, 96, (3, 3), (1, 1), 6, alpha, resdiual=True, name='block_12') 123 | 124 | x = depthwise_res_block(x, 160, (3, 3), (2, 2), 6, alpha, resdiual=False, name='block_13') 125 | 126 | x = depthwise_res_block(x, 160, (3, 3), (1, 1), 6, alpha, resdiual=True, name='block_14') 127 | 128 | x = depthwise_res_block(x, 160, (3, 3), (1, 1), 6, alpha, resdiual=True, name='block_15') 129 | 130 | x = depthwise_res_block(x, 320, (3, 3), (1, 1), 6, alpha, resdiual=False, name='block_16') 131 | 132 | if alpha > 1.0: 133 | last_filter = make_divisible(1280 * alpha, 8) 134 | else: 135 | last_filter = 1280 136 | 137 | x = Conv2D(last_filter, (1, 1), strides=(1, 1), use_bias=False, name='Conv_1')(x) 138 | x = BatchNormalization(axis=3, name='Conv_1_bn')(x) 139 | x = Activation(relu6, name='out_relu')(x) 140 | 141 | x = GlobalAveragePooling2D()(x) 142 | x = Dense(nb_classes, activation='softmax', use_bias=True, name='Logits')(x) 143 | 144 | model = models.Model(img_input, x, name='MobileNetV2') 145 | 146 | return model 147 | 148 | 149 | def load_image(img_path,size = (224,224)): 150 | label = tf.cast(tf.compat.v1.string_to_number(tf.strings.split(img_path, sep='/',)[6]), tf.int8) 151 | img = tf.io.read_file(img_path) 152 | img = tf.image.decode_jpeg(img) 153 | img = tf.image.resize(img,size)/255.0 154 | return(img,label) 155 | 156 | 157 | ds_train = tf.data.Dataset.list_files("/home/group7/dataset/fruit_data/train/*/*") \ 158 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 159 | .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \ 160 | .prefetch(tf.data.experimental.AUTOTUNE) 161 | 162 | ds_test = tf.data.Dataset.list_files("/home/group7/dataset/fruit_data/test/*/*") \ 163 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 164 | .batch(BATCH_SIZE) \ 165 | .prefetch(tf.data.experimental.AUTOTUNE) 166 | 167 | model = MovblieNetV2(30, 1.0, 0.2) 168 | model.summary() 169 | 170 | model.compile( 171 | optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), 172 | loss=tf.keras.losses.sparse_categorical_crossentropy, 173 | metrics=["accuracy"] 174 | ) 175 | 176 | 177 | stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 178 | logdir = os.path.join('data', 'autograph', stamp) 179 | tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1) 180 | 181 | history = model.fit(ds_train,epochs= 2,validation_data=ds_test, 182 | callbacks = [tensorboard_callback],workers = 4) 183 | 184 | # 保存模型结构与模型参数到文件,该方式保存的模型具有跨平台性便于部署 185 | 186 | model.save('./data/tf_model_savedmodel1', save_format="tf") 187 | print('export saved model.') 188 | 189 | model_loaded = tf.keras.models.load_model('./data/tf_model_savedmodel1') 190 | model_loaded.evaluate(ds_test) 191 | 192 | -------------------------------------------------------------------------------- /Image_Classification/construct_model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import layers, models 3 | import datetime 4 | import os 5 | 6 | 7 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 8 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 9 | 10 | BATCH_SIZE = 20 11 | ds6 = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/train/*/*") 12 | for file in ds6.take(10): 13 | print(file) 14 | 15 | # labels = tf.constant(["apple", " blueberry", "grape", "mango", " pear", " plum", "watermelon", "banana", "cherry", " lemon", "orange", "pineapple", "strawberry"]) 16 | 17 | 18 | def load_image(img_path,size = (100,100)): 19 | label = tf.cast(tf.compat.v1.string_to_number(tf.strings.split(img_path, sep='/',)[6]), tf.int8) 20 | img = tf.io.read_file(img_path) 21 | img = tf.image.decode_jpeg(img) 22 | img = tf.image.resize(img,size)/255.0 23 | return(img,label) 24 | 25 | 26 | ds_train = tf.data.Dataset.list_files("/home/group7/dataset/fruit_data/train/*/*") \ 27 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 28 | .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \ 29 | .prefetch(tf.data.experimental.AUTOTUNE) 30 | 31 | ds_test = tf.data.Dataset.list_files("/home/group7/dataset/fruit_data/test/*/*") \ 32 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 33 | .batch(BATCH_SIZE) \ 34 | .prefetch(tf.data.experimental.AUTOTUNE) 35 | 36 | inputs = layers.Input(shape=(100,100,3)) 37 | x = layers.Conv2D(64,kernel_size=(3,3),padding="same")(inputs) 38 | x = layers.Activation('relu')(x) 39 | x = layers.BatchNormalization()(x) 40 | x = layers.MaxPool2D()(x) 41 | x = layers.Conv2D(128,kernel_size=(3,3),padding="same")(x) 42 | x = layers.Activation('relu')(x) 43 | x = layers.BatchNormalization()(x) 44 | x = layers.MaxPool2D()(x) 45 | x = layers.Conv2D(256,kernel_size=(3,3),padding="same")(x) 46 | x = layers.Activation('relu')(x) 47 | x = layers.BatchNormalization()(x) 48 | x = layers.Dropout(rate=0.3)(x) 49 | x = layers.Conv2D(256,kernel_size=(3,3),padding="same")(x) 50 | x = layers.Activation('relu')(x) 51 | x = layers.BatchNormalization()(x) 52 | x = layers.MaxPool2D()(x) 53 | x = layers.Conv2D(512,kernel_size=(3,3),padding="same")(x) 54 | x = layers.Activation('relu')(x) 55 | x = layers.BatchNormalization()(x) 56 | x = layers.Dropout(rate=0.3)(x) 57 | x = layers.Conv2D(512,kernel_size=(3,3),padding="same")(x) 58 | x = layers.Activation('relu')(x) 59 | x = layers.BatchNormalization()(x) 60 | x = layers.MaxPool2D()(x) 61 | x = layers.Flatten()(x) 62 | x = layers.Dense(4096,activation='relu')(x) 63 | x = layers.BatchNormalization()(x) 64 | x = layers.Dropout(rate=0.3)(x) 65 | outputs = layers.Dense(30,activation = 'softmax')(x) 66 | 67 | model = models.Model(inputs = inputs,outputs = outputs) 68 | model.summary() 69 | 70 | model.compile( 71 | optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), 72 | loss=tf.keras.losses.sparse_categorical_crossentropy, 73 | metrics=["accuracy"] 74 | ) 75 | 76 | 77 | stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 78 | logdir = os.path.join('data', 'autograph', stamp) 79 | tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1) 80 | 81 | history = model.fit(ds_train,epochs= 2,validation_data=ds_test, 82 | callbacks = [tensorboard_callback],workers = 4) 83 | 84 | # 保存模型结构与模型参数到文件,该方式保存的模型具有跨平台性便于部署 85 | 86 | model.save('./data/tf_model_savedmodel1', save_format="tf") 87 | print('export saved model.') 88 | 89 | model_loaded = tf.keras.models.load_model('./data/tf_model_savedmodel1') 90 | model_loaded.evaluate(ds_test) 91 | -------------------------------------------------------------------------------- /Image_Classification/parachange.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import layers, models 3 | import os 4 | 5 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 6 | os.environ["CUDA_VISIBLE_DEVICES"]="3" 7 | 8 | BATCH_SIZE = 128 9 | EPOCHS = 20 10 | datapath = '/home/group7/tensorflow_learn/train/' 11 | 12 | def load_image(img_path,size = (224,224)): 13 | label = tf.cast(tf.compat.v1.string_to_number(tf.strings.split(img_path, sep='/',)[6]), tf.int8) 14 | img = tf.io.read_file(img_path) 15 | img = tf.image.decode_jpeg(img) 16 | img = tf.image.resize(img,size)/255.0 17 | return(img,label) 18 | 19 | 20 | ds_train = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/train/*/*.jpg") \ 21 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 22 | .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \ 23 | .prefetch(tf.data.experimental.AUTOTUNE) 24 | 25 | ds_test = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/test/*/*.jpg") \ 26 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 27 | .batch(BATCH_SIZE) \ 28 | .prefetch(tf.data.experimental.AUTOTUNE) 29 | 30 | # tf.keras.applications导入模型 31 | Mo = tf.keras.applications.MobileNetV2( 32 | input_shape=(224,224,3), 33 | include_top=False) 34 | Mo.trainable=True 35 | 36 | model = models.Sequential() 37 | model.add(Mo) 38 | model.add(layers.GlobalAveragePooling2D()) 39 | model.add(layers.Flatten()) 40 | model.add(layers.Dense(512, activation='relu')) 41 | model.add(layers.Dropout(rate=0.5)) 42 | model.add(layers.Dense(256, activation='relu')) 43 | model.add(layers.Dropout(rate=0.5)) 44 | model.add(layers.Dense(30, activation='sigmoid')) 45 | model.summary() 46 | 47 | import datetime 48 | 49 | stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 50 | callback_dir = datapath+'model/callback/'+stamp 51 | tensorboard_callback = tf.keras.callbacks.TensorBoard(callback_dir, histogram_freq=1) 52 | 53 | checkpoint_path = datapath+'model/checkpoint/'+stamp 54 | model_save = tf.keras.callbacks.ModelCheckpoint( 55 | filepath=checkpoint_path, 56 | verbose=1, 57 | save_weights_only=True, 58 | period=20) 59 | 60 | early_stop = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', min_delta = 0.001, 61 | patience = 5, mode = 'min', verbose = 1, 62 | restore_best_weights = True) 63 | 64 | reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', factor = 0.3, 65 | patience = 2, min_delta = 0.001, 66 | mode = 'min', verbose = 1) 67 | 68 | model.compile( 69 | optimizer=tf.keras.optimizers.Adam(lr=0.0005), 70 | loss=tf.keras.losses.sparse_categorical_crossentropy, 71 | metrics=["accuracy"]) 72 | 73 | history = model.fit(ds_train,epochs=EPOCHS,validation_data=ds_test, 74 | callbacks = [tensorboard_callback, model_save, early_stop, reduce_lr]) 75 | 76 | 77 | from matplotlib import pyplot as plt 78 | 79 | plt.figure(1) 80 | train_metrics = history.history["loss"] 81 | val_metrics = history.history['val_loss'] 82 | epochs = range(1, len(train_metrics) + 1) 83 | plt.plot(epochs, train_metrics, 'bo--') 84 | plt.plot(epochs, val_metrics, 'ro-') 85 | plt.title('Training and validation loss') 86 | plt.xlabel("Epochs") 87 | plt.ylabel("loss") 88 | plt.legend(["train_loss", 'val_loss']) 89 | plt.savefig('./test1.jpg') 90 | 91 | plt.figure(2) 92 | train_metrics = history.history["accuracy"] 93 | val_metrics = history.history['val_accuracy'] 94 | epochs = range(1, len(train_metrics) + 1) 95 | plt.plot(epochs, train_metrics, 'bo--') 96 | plt.plot(epochs, val_metrics, 'ro-') 97 | plt.title('Training and validation accuracy') 98 | plt.xlabel("Epochs") 99 | plt.ylabel("accuracy") 100 | plt.legend(["train_accuracy", 'val_accuracy']) 101 | plt.savefig('./test2.jpg') 102 | -------------------------------------------------------------------------------- /Image_Classification/plot_model_fig.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import layers, models 3 | import os 4 | 5 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 6 | os.environ["CUDA_VISIBLE_DEVICES"]="2" 7 | 8 | BATCH_SIZE = 128 9 | EPOCHS = 20 10 | datapath = '/home/group7/tensorflow_learn/train/' 11 | 12 | def load_image(img_path,size = (224,224)): 13 | label = tf.cast(tf.compat.v1.string_to_number(tf.strings.split(img_path, sep='/',)[6]), tf.int8) 14 | img = tf.io.read_file(img_path) 15 | img = tf.image.decode_jpeg(img) 16 | img = tf.image.resize(img,size)/255.0 17 | return(img,label) 18 | 19 | 20 | ds_train = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/train/*/*.jpg") \ 21 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 22 | .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \ 23 | .prefetch(tf.data.experimental.AUTOTUNE) 24 | 25 | ds_test = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/test/*/*.jpg") \ 26 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 27 | .batch(BATCH_SIZE) \ 28 | .prefetch(tf.data.experimental.AUTOTUNE) 29 | 30 | # tf.keras.applications导入模型 31 | Mo = tf.keras.applications.MobileNetV2( 32 | input_shape=(224,224,3), 33 | include_top=False) 34 | Mo.trainable=True 35 | 36 | model = models.Sequential() 37 | model.add(Mo) 38 | model.add(layers.GlobalAveragePooling2D()) 39 | model.add(layers.Flatten()) 40 | model.add(layers.Dense(512, activation='relu')) 41 | model.add(layers.Dropout(rate=0.5)) 42 | model.add(layers.Dense(256, activation='relu')) 43 | model.add(layers.Dropout(rate=0.5)) 44 | model.add(layers.Dense(30, activation='sigmoid')) 45 | model.summary() 46 | 47 | import datetime 48 | 49 | 50 | stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 51 | callback_dir = datapath+'model/callback/'+stamp 52 | tensorboard_callback = tf.keras.callbacks.TensorBoard(callback_dir, histogram_freq=1) 53 | 54 | checkpoint_path = datapath+'model/checkpoint/'+stamp 55 | model_save = tf.keras.callbacks.ModelCheckpoint( 56 | filepath=checkpoint_path, 57 | verbose=1, 58 | save_weights_only=True, 59 | period=20) 60 | 61 | early_stop = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', min_delta = 0.001, 62 | patience = 5, mode = 'min', verbose = 1, 63 | restore_best_weights = True) 64 | 65 | reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', factor = 0.3, 66 | patience = 2, min_delta = 0.001, 67 | mode = 'min', verbose = 1) 68 | 69 | model.compile( 70 | optimizer=tf.keras.optimizers.Adam(lr=0.0005), 71 | loss=tf.keras.losses.sparse_categorical_crossentropy, 72 | metrics=["accuracy"]) 73 | 74 | history = model.fit(ds_train,epochs=EPOCHS,validation_data=ds_test, 75 | callbacks = [tensorboard_callback, model_save, early_stop, reduce_lr]) 76 | 77 | 78 | from matplotlib import pyplot as plt 79 | 80 | plt.figure(1) 81 | train_metrics = history.history["loss"] 82 | val_metrics = history.history['val_loss'] 83 | epochs = range(1, len(train_metrics) + 1) 84 | plt.plot(epochs, train_metrics, 'bo--') 85 | plt.plot(epochs, val_metrics, 'ro-') 86 | plt.title('Training and validation loss') 87 | plt.xlabel("Epochs") 88 | plt.ylabel("loss") 89 | plt.legend(["train_loss", 'val_loss']) 90 | plt.savefig('./test1.jpg') 91 | 92 | plt.figure(2) 93 | train_metrics = history.history["accuracy"] 94 | val_metrics = history.history['val_accuracy'] 95 | epochs = range(1, len(train_metrics) + 1) 96 | plt.plot(epochs, train_metrics, 'bo--') 97 | plt.plot(epochs, val_metrics, 'ro-') 98 | plt.title('Training and validation accuracy') 99 | plt.xlabel("Epochs") 100 | plt.ylabel("accuracy") 101 | plt.legend(["train_accuracy", 'val_accuracy']) 102 | plt.savefig('./test2.jpg') 103 | -------------------------------------------------------------------------------- /Image_Classification/transfer_learning.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import layers, models 3 | import datetime 4 | import os 5 | 6 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 7 | os.environ["CUDA_VISIBLE_DEVICES"]="3" 8 | BATCH_SIZE = 128 9 | 10 | 11 | def load_image(img_path,size = (224,224)): 12 | label = tf.cast(tf.compat.v1.string_to_number(tf.strings.split(img_path, sep='/',)[6]), tf.int8) 13 | img = tf.io.read_file(img_path) 14 | img = tf.image.decode_jpeg(img) 15 | img = tf.image.resize(img,size)/255.0 16 | return(img,label) 17 | 18 | 19 | ds_train = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/train/*/*.jpg") \ 20 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 21 | .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \ 22 | .prefetch(tf.data.experimental.AUTOTUNE) 23 | 24 | ds_test = tf.data.Dataset.list_files("/home/group7/dataset/fruit_flower/test/*/*.jpg") \ 25 | .map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) \ 26 | .batch(BATCH_SIZE) \ 27 | .prefetch(tf.data.experimental.AUTOTUNE) 28 | 29 | Mo = tf.keras.applications.MobileNetV2( 30 | input_shape=(224,224,3), 31 | include_top=False, 32 | weights='imagenet') 33 | 34 | model = models.Sequential() 35 | model.add(Mo) 36 | model.add(layers.GlobalAveragePooling2D()) 37 | model.add(layers.Flatten()) 38 | model.add(layers.Dense(512, activation='relu')) 39 | model.add(layers.Dropout(rate=0.3)) 40 | model.add(layers.Dense(256, activation='relu')) 41 | model.add(layers.Dropout(rate=0.3)) 42 | model.add(layers.Dense(30, activation='sigmoid')) 43 | model.summary() 44 | 45 | for layer in Mo.layers[:20]: 46 | layer.trainable=False 47 | for layer in Mo.layers[20:]: 48 | layer.trainable=True 49 | 50 | stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 51 | logdir = os.path.join('model', 'autograph', stamp) 52 | tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1) 53 | 54 | checkpoint_dir = os.path.join('model', 'MoblieNetV2') 55 | checkpoint_path = os.path.join('model', 'MoblieNetV2', stamp) 56 | cp_callback = tf.keras.callbacks.ModelCheckpoint( 57 | filepath=checkpoint_path, 58 | verbose=1, 59 | save_weights_only=True, 60 | period=20) 61 | 62 | 63 | latest = tf.train.latest_checkpoint(checkpoint_dir) 64 | #model.load_weights(latest) 65 | model.compile( 66 | optimizer=tf.keras.optimizers.Adam(), 67 | loss=tf.keras.losses.sparse_categorical_crossentropy, 68 | metrics=["accuracy"] 69 | ) 70 | 71 | history = model.fit(ds_train,epochs= 1,validation_data=ds_test, 72 | callbacks = [tensorboard_callback, cp_callback]) 73 | 74 | # 保存模型结构与模型参数到文件,该方式保存的模型具有跨平台性便于部署 75 | 76 | model.save('./data/tf_model_savedmodel', save_format="tf") 77 | print('export saved model.') 78 | 79 | model_loaded = tf.keras.models.load_model('./data/tf_model_savedmodel') 80 | model_loaded.evaluate(ds_test) 81 | 82 | -------------------------------------------------------------------------------- /Object_Detection/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /../../../../../../../../:\Users\86178\Desktop\深度学习\github\Object_Detection\.idea/dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /Object_Detection/.idea/Object_Detection.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /Object_Detection/.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /Object_Detection/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /Object_Detection/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /Object_Detection/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Object_Detection/dataset_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utility functions for creating TFRecord data sets.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import tensorflow.compat.v1 as tf 23 | 24 | 25 | def int64_feature(value): 26 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 27 | 28 | 29 | def int64_list_feature(value): 30 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 31 | 32 | 33 | def bytes_feature(value): 34 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 35 | 36 | 37 | def bytes_list_feature(value): 38 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) 39 | 40 | 41 | def float_feature(value): 42 | return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) 43 | 44 | 45 | def float_list_feature(value): 46 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 47 | 48 | 49 | def read_examples_list(path): 50 | """Read list of training or validation examples. 51 | 52 | The file is assumed to contain a single example per line where the first 53 | token in the line is an identifier that allows us to find the image and 54 | annotation xml for that example. 55 | 56 | For example, the line: 57 | xyz 3 58 | would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored). 59 | 60 | Args: 61 | path: absolute path to examples list file. 62 | 63 | Returns: 64 | list of example identifiers (strings). 65 | """ 66 | with tf.gfile.GFile(path) as fid: 67 | lines = fid.readlines() 68 | return [line.strip().split(' ')[0] for line in lines] 69 | 70 | 71 | def recursive_parse_xml_to_dict(xml): 72 | """Recursively parses XML contents to python dict. 73 | 74 | We assume that `object` tags are the only ones that can appear 75 | multiple times at the same level of a tree. 76 | 77 | Args: 78 | xml: xml tree obtained by parsing XML file contents using lxml.etree 79 | 80 | Returns: 81 | Python dictionary holding XML contents. 82 | """ 83 | if not xml: 84 | return {xml.tag: xml.text} 85 | result = {} 86 | for child in xml: 87 | child_result = recursive_parse_xml_to_dict(child) 88 | if child.tag != 'object': 89 | result[child.tag] = child_result[child.tag] 90 | else: 91 | if child.tag not in result: 92 | result[child.tag] = [] 93 | result[child.tag].append(child_result[child.tag]) 94 | return {xml.tag: result} 95 | -------------------------------------------------------------------------------- /Object_Detection/generate_tfrecord.py: -------------------------------------------------------------------------------- 1 | """ 2 | Usage: 3 | # From tensorflow/models/ 4 | # Create train data: 5 | python generate_tfrecord.py --csv_input=images/train_labels.csv --image_dir=images/train --output_path=train.record 6 | 7 | # Create test data: 8 | python generate_tfrecord.py --csv_input=images/test_labels.csv --image_dir=images/test --output_path=test.record 9 | """ 10 | from __future__ import division 11 | from __future__ import print_function 12 | from __future__ import absolute_import 13 | 14 | import os 15 | import io 16 | import pandas as pd 17 | 18 | from tensorflow.python.framework.versions import VERSION 19 | if VERSION >= "2.0.0a0": 20 | import tensorflow.compat.v1 as tf 21 | else: 22 | import tensorflow as tf 23 | 24 | from PIL import Image 25 | import dataset_util 26 | from collections import namedtuple, OrderedDict 27 | 28 | flags = tf.app.flags 29 | flags.DEFINE_string('csv_input', '', 'Path to the CSV input') 30 | flags.DEFINE_string('image_dir', '', 'Path to the image directory') 31 | flags.DEFINE_string('output_path', '', 'Path to output TFRecord') 32 | FLAGS = flags.FLAGS 33 | 34 | 35 | # TO-DO replace this with label map 36 | def class_text_to_int(row_label): 37 | if row_label == 'apple': 38 | return 1 39 | elif row_label == 'banana': 40 | return 2 41 | elif row_label == 'grape': 42 | return 3 43 | elif row_label == 'kiwifruit': 44 | return 4 45 | elif row_label == 'mango': 46 | return 5 47 | elif row_label == 'orange': 48 | return 6 49 | elif row_label == 'pear': 50 | return 7 51 | elif row_label == 'strawberry': 52 | return 8 53 | else: 54 | None 55 | 56 | 57 | def split(df, group): 58 | data = namedtuple('data', ['filename', 'object']) 59 | gb = df.groupby(group) 60 | return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)] 61 | 62 | 63 | def create_tf_example(group, path): 64 | with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: 65 | encoded_jpg = fid.read() 66 | encoded_jpg_io = io.BytesIO(encoded_jpg) 67 | image = Image.open(encoded_jpg_io) 68 | width, height = image.size 69 | 70 | filename = group.filename.encode('utf8') 71 | image_format = b'jpg' 72 | xmins = [] 73 | xmaxs = [] 74 | ymins = [] 75 | ymaxs = [] 76 | classes_text = [] 77 | classes = [] 78 | 79 | for index, row in group.object.iterrows(): 80 | xmins.append(row['xmin'] / width) 81 | xmaxs.append(row['xmax'] / width) 82 | ymins.append(row['ymin'] / height) 83 | ymaxs.append(row['ymax'] / height) 84 | classes_text.append(row['class'].encode('utf8')) 85 | classes.append(class_text_to_int(row['class'])) 86 | 87 | tf_example = tf.train.Example(features=tf.train.Features(feature={ 88 | 'image/height': dataset_util.int64_feature(height), 89 | 'image/width': dataset_util.int64_feature(width), 90 | 'image/filename': dataset_util.bytes_feature(filename), 91 | 'image/source_id': dataset_util.bytes_feature(filename), 92 | 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 93 | 'image/format': dataset_util.bytes_feature(image_format), 94 | 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 95 | 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 96 | 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 97 | 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 98 | 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 99 | 'image/object/class/label': dataset_util.int64_list_feature(classes), 100 | })) 101 | return tf_example 102 | 103 | 104 | def main(_): 105 | writer = tf.python_io.TFRecordWriter(FLAGS.output_path) 106 | path = os.path.join(os.getcwd(), FLAGS.image_dir) 107 | examples = pd.read_csv(FLAGS.csv_input) 108 | grouped = split(examples, 'filename') 109 | for group in grouped: 110 | tf_example = create_tf_example(group, path) 111 | writer.write(tf_example.SerializeToString()) 112 | 113 | writer.close() 114 | output_path = os.path.join(os.getcwd(), FLAGS.output_path) 115 | print('Successfully created the TFRecords: {}'.format(output_path)) 116 | 117 | 118 | if __name__ == '__main__': 119 | tf.app.run() 120 | -------------------------------------------------------------------------------- /Object_Detection/training/label_map.pbtxt: -------------------------------------------------------------------------------- 1 | item { 2 | id: 1 3 | name: 'apple' 4 | } 5 | 6 | item { 7 | id: 2 8 | name: 'banana' 9 | } 10 | 11 | item { 12 | id: 3 13 | name: 'grape' 14 | } 15 | 16 | item { 17 | id: 4 18 | name: 'kiwifruit' 19 | } 20 | 21 | item { 22 | id: 5 23 | name: 'mango' 24 | } 25 | 26 | item { 27 | id: 6 28 | name: 'orange' 29 | } 30 | 31 | item { 32 | id: 7 33 | name: 'pear' 34 | } 35 | 36 | item { 37 | id: 8 38 | name: 'strawberry' 39 | } 40 | 41 | item { 42 | id: 9 43 | name: 'calla lily' 44 | } 45 | 46 | item { 47 | id: 10 48 | name: 'cornflower' 49 | } 50 | 51 | item { 52 | id: 11 53 | name: 'corydali' 54 | } 55 | 56 | item { 57 | id: 12 58 | name: 'dahlia' 59 | } 60 | 61 | item { 62 | id: 13 63 | name: 'daisy' 64 | } 65 | 66 | item { 67 | id: 14 68 | name: 'gentian' 69 | } 70 | 71 | item { 72 | id: 15 73 | name: 'nigella' 74 | } 75 | 76 | item { 77 | id: 16 78 | name: 'sunflower' 79 | } -------------------------------------------------------------------------------- /Object_Detection/training/pipeline.config: -------------------------------------------------------------------------------- 1 | model { 2 | ssd { 3 | inplace_batchnorm_update: true 4 | freeze_batchnorm: false 5 | num_classes: 16 6 | box_coder { 7 | faster_rcnn_box_coder { 8 | y_scale: 10.0 9 | x_scale: 10.0 10 | height_scale: 5.0 11 | width_scale: 5.0 12 | } 13 | } 14 | matcher { 15 | argmax_matcher { 16 | matched_threshold: 0.5 17 | unmatched_threshold: 0.5 18 | ignore_thresholds: false 19 | negatives_lower_than_unmatched: true 20 | force_match_for_each_row: true 21 | use_matmul_gather: true 22 | } 23 | } 24 | similarity_calculator { 25 | iou_similarity { 26 | } 27 | } 28 | encode_background_as_zeros: true 29 | anchor_generator { 30 | ssd_anchor_generator { 31 | num_layers: 6 32 | min_scale: 0.2 33 | max_scale: 0.95 34 | aspect_ratios: 1.0 35 | aspect_ratios: 2.0 36 | aspect_ratios: 0.5 37 | aspect_ratios: 3.0 38 | aspect_ratios: 0.3333 39 | } 40 | } 41 | image_resizer { 42 | fixed_shape_resizer { 43 | height: 300 44 | width: 300 45 | } 46 | } 47 | box_predictor { 48 | convolutional_box_predictor { 49 | min_depth: 0 50 | max_depth: 0 51 | num_layers_before_predictor: 0 52 | use_dropout: false 53 | dropout_keep_probability: 0.8 54 | kernel_size: 1 55 | box_code_size: 4 56 | apply_sigmoid_to_scores: false 57 | class_prediction_bias_init: -4.6 58 | conv_hyperparams { 59 | activation: RELU_6, 60 | regularizer { 61 | l2_regularizer { 62 | weight: 0.00004 63 | } 64 | } 65 | initializer { 66 | random_normal_initializer { 67 | stddev: 0.01 68 | mean: 0.0 69 | } 70 | } 71 | batch_norm { 72 | train: true, 73 | scale: true, 74 | center: true, 75 | decay: 0.97, 76 | epsilon: 0.001, 77 | } 78 | } 79 | } 80 | } 81 | feature_extractor { 82 | type: 'ssd_mobilenet_v2_keras' 83 | min_depth: 16 84 | depth_multiplier: 1.0 85 | conv_hyperparams { 86 | activation: RELU_6, 87 | regularizer { 88 | l2_regularizer { 89 | weight: 0.00004 90 | } 91 | } 92 | initializer { 93 | truncated_normal_initializer { 94 | stddev: 0.03 95 | mean: 0.0 96 | } 97 | } 98 | batch_norm { 99 | train: true, 100 | scale: true, 101 | center: true, 102 | decay: 0.97, 103 | epsilon: 0.001, 104 | } 105 | } 106 | override_base_feature_extractor_hyperparams: true 107 | } 108 | loss { 109 | classification_loss { 110 | weighted_sigmoid_focal { 111 | alpha: 0.75, 112 | gamma: 2.0 113 | } 114 | } 115 | localization_loss { 116 | weighted_smooth_l1 { 117 | delta: 1.0 118 | } 119 | } 120 | classification_weight: 1.0 121 | localization_weight: 1.0 122 | } 123 | normalize_loss_by_num_matches: true 124 | normalize_loc_loss_by_codesize: true 125 | post_processing { 126 | batch_non_max_suppression { 127 | score_threshold: 1e-8 128 | iou_threshold: 0.6 129 | max_detections_per_class: 100 130 | max_total_detections: 100 131 | } 132 | score_converter: SIGMOID 133 | } 134 | } 135 | } 136 | 137 | train_config: { 138 | fine_tune_checkpoint_version: V2 139 | fine_tune_checkpoint: "models/research/object_detection/ssd_mobilenet_v2_320x320_coco17_tpu-8/checkpoint/ckpt-0" 140 | fine_tune_checkpoint_type: "detection" 141 | batch_size: 256 142 | sync_replicas: true 143 | startup_delay_steps: 0 144 | replicas_to_aggregate: 8 145 | num_steps: 100000 146 | data_augmentation_options { 147 | random_horizontal_flip { 148 | } 149 | } 150 | data_augmentation_options { 151 | ssd_random_crop { 152 | } 153 | } 154 | optimizer { 155 | momentum_optimizer: { 156 | learning_rate: { 157 | cosine_decay_learning_rate { 158 | learning_rate_base: .8 159 | total_steps: 50000 160 | warmup_learning_rate: 0.13333 161 | warmup_steps: 2000 162 | } 163 | } 164 | momentum_optimizer_value: 0.9 165 | } 166 | use_moving_average: false 167 | } 168 | max_number_of_boxes: 100 169 | unpad_groundtruth_tensors: false 170 | } 171 | 172 | train_input_reader: { 173 | label_map_path: "models/research/object_detection/training/label_map.pbtxt" 174 | tf_record_input_reader { 175 | input_path: "models/research/object_detection/training/train.record" 176 | } 177 | } 178 | 179 | eval_config: { 180 | metrics_set: "coco_detection_metrics" 181 | use_moving_averages: false 182 | } 183 | 184 | eval_input_reader: { 185 | label_map_path: "models/research/object_detection/training/label_map.pbtxt" 186 | shuffle: false 187 | num_epochs: 1 188 | tf_record_input_reader { 189 | input_path: "models/research/object_detection/training/test.record" 190 | } 191 | } -------------------------------------------------------------------------------- /Object_Detection/xml_to_csv.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import pandas as pd 4 | import xml.etree.ElementTree as ET 5 | 6 | 7 | def xml_to_csv(path): 8 | xml_list = [] 9 | for xml_file in glob.glob(path + '/*.xml'): 10 | tree = ET.parse(xml_file) 11 | root = tree.getroot() 12 | for member in root.findall('object'): 13 | value = (root.find('filename').text, 14 | int(root.find('size')[0].text), 15 | int(root.find('size')[1].text), 16 | member[0].text, 17 | int(member[4][0].text), 18 | int(member[4][1].text), 19 | int(member[4][2].text), 20 | int(member[4][3].text) 21 | ) 22 | xml_list.append(value) 23 | column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'] 24 | xml_df = pd.DataFrame(xml_list, columns=column_name) 25 | return xml_df 26 | 27 | 28 | def main(): 29 | for folder in ['train','test']: 30 | image_path = os.path.join(os.getcwd(), ('images/' + folder)) 31 | xml_df = xml_to_csv(image_path) 32 | xml_df.to_csv(('images/' + folder + '_labels.csv'), index=None) 33 | print('Successfully converted xml to csv.') 34 | 35 | 36 | main() 37 | -------------------------------------------------------------------------------- /Object_Detection/xml_to_csv_total.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import shutil 3 | import os 4 | import glob 5 | import pandas as pd 6 | import xml.etree.ElementTree as ET 7 | 8 | 9 | def xml_to_csv(path): 10 | xml_list = [] 11 | for xml_file in glob.glob(path + '/*.xml'): 12 | tree = ET.parse(xml_file) 13 | root = tree.getroot() 14 | for member in root.findall('object'): 15 | value = (root.find('filename').text, 16 | int(root.find('size')[0].text), 17 | int(root.find('size')[1].text), 18 | member[0].text, 19 | int(member[4][0].text), 20 | int(member[4][1].text), 21 | int(member[4][2].text), 22 | int(member[4][3].text) 23 | ) 24 | xml_list.append(value) 25 | column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'] 26 | xml_df = pd.DataFrame(xml_list, columns=column_name) 27 | return xml_df 28 | 29 | 30 | if __name__ == "__main__": 31 | parser = argparse.ArgumentParser() 32 | parser.add_argument('--data_path', default="./data/", type=str) 33 | args = parser.parse_args() 34 | data_path = args.data_path 35 | 36 | if not data_path.endswith('/'): 37 | data_path = data_path+'/' 38 | if os.path.exists('./images'): 39 | shutil.rmtree('./images') 40 | os.mkdir('./images') 41 | os.mkdir('./images/train') 42 | os.mkdir('./images/test') 43 | 44 | 45 | for i in ['train', 'test']: 46 | f = open(data_path+'ImageSets/Main/'+i+'.txt',"r") 47 | lines = f.readlines() 48 | for file_name in lines: 49 | img = file_name.rstrip('\r\n')+'.jpg' 50 | xml = file_name.rstrip('\r\n')+'.xml' 51 | shutil.copy(data_path+'JPEGImages/'+img, './images/'+i) 52 | shutil.copy(data_path+'Annotations/'+xml, './images/'+i) 53 | 54 | image_path = os.path.join(os.getcwd(), ('images/' + i)) 55 | xml_df = xml_to_csv(image_path) 56 | xml_df.to_csv(('images/' + i + '_labels.csv'), index=None) 57 | print('Successfully converted xml to csv.') 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 树莓派zero图像分类与目标检测 2 | 3 | > 山东大学(威海) 2018级数据科学与人工智能实验班 4 | > 5 | > 孙易泽 吴锦程 詹沛 徐潇涵 6 | 7 | **树莓派zero图像分类与目标检测**是深度学习的研究项目,旨在通过深度学习算法,实现树莓派的**实时识别与分类**。 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |
16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 25 | 在树莓派上运行深度学习主要分为以下几个步骤: 26 | 27 | + 首先是**数据的获取及预处理**,图像分类与目标检测需要大量**干净且高质量**的图片数据进行训练,我们需要通过不同方式,尽可能多的获取到相关的图片数据,并处理为深度学习可用的形式。 28 | + 接下来先实现**图像分类**,根据深度学习领域存在的相关模型,选择适合于树莓派上运行的深度学习模型。通过**Tensorflow2**搭建深度学习框架,通过对模型参数不断调整,训练出正确率高且能快速运行的模型。通过对模型的不断改进,在保持模型正确率的同时,减小模型的大小。 29 | + **目标检测模型**也是一个侧重点,我们选择轻量级的深度学习模型,并使用**Tensorflow2 Object Detection**进行模型的训练,能够进行水果和花卉物体的准确检测,做到一张图片中正确识别多个不同物体的位置与种类。 30 | + 最后是**图像分类模型与目标检测模型分别的部署**,将训练好的模型部署到树莓派中,并利用摄像头实时对数据进行处理,做到图片的实时检测。 31 | 32 | 以下是详细的说明文档: 33 | 34 | + 1.[树莓派zero图像分类与目标检测—数据获取与预处理](https://github.com/Sun-Yize-SDUWH/Deep-Learning-On-Raspberry-Pi-Zero/blob/master/Document/part1.md) 35 | 36 | + 2.[树莓派zero图像分类与目标检测—图像分类模型](https://github.com/Sun-Yize-SDUWH/Deep-Learning-On-Raspberry-Pi-Zero/blob/master/Document/part2.md) 37 | 38 | + 3.[树莓派zero图像分类与目标检测—目标检测模型](https://github.com/Sun-Yize-SDUWH/Deep-Learning-On-Raspberry-Pi-Zero/blob/master/Document/part3.md) 39 | 40 | + 4.[树莓派zero图像分类与目标检测—深度学习部署](https://github.com/Sun-Yize-SDUWH/Deep-Learning-On-Raspberry-Pi-Zero/blob/master/Document/part4.md) 41 | 42 | 除此之外,本项目还在其他平台有相关的开源资料: 43 | 44 | [知乎专栏—树莓派zero图像分类与目标检测](https://www.zhihu.com/column/c_1326223429637902336) 45 | 46 | [b站视频链接](https://www.bilibili.com/video/BV12a4y1n7MW) 47 | 48 | --------------------------------------------------------------------------------