├── README.md
├── convertTrainLabel.py
├── detect.py
├── hubconf.py
├── process_data_yolo.py
├── task05_rnn.ipynb
├── test.py
└── train.py


/README.md:
--------------------------------------------------------------------------------
 1 | ## 任务安排
 2 | 
 3 | 开营时间：02月16日21:00
 4 | 
 5 | - 比赛题目：天池创新大赛：热身赛  布匹缺陷检测，内容：根据给出的布匹图片标注出其中的缺陷
 6 | 
 7 | - 比赛链接：https://tianchi.aliyun.com/competition/entrance/531864/introduction?spm=5176.12281976.0.0.506441a6dTFHF3
 8 | 
 9 | 
10 | ### Task00：熟悉规则（1天）
11 | 
12 | - 组队、修改群昵称。
13 | - 熟悉打卡规则。
14 | - 打卡截止时间：02月18日03:00
15 | 
16 | ### Task01：比赛全流程体验（3天）
17 | 
18 | - 学习如何使用Docker提交代码及比赛上分。
19 | - 记录比赛中遇到的问题，并在学习笔记中插入初始分数截图。
20 | - 打卡截止时间：02月21日03:00
21 | - 学习资料：
22 |   - [Docker环境配置指南！](https://tianchi.aliyun.com/competition/entrance/231759/tab/226)
23 |   - [比赛Docker相关操作](https://github.com/datawhalechina/team-learning-cv/blob/master/DefectDetection/docker%E6%8F%90%E4%BA%A4%E6%95%99%E7%A8%8B.pdf)
24 | 
25 | ### Task02：Baseline学习及改进（5天）
26 | 
27 | - 学习baseline，并提出自己的改进策略，提交代码并更新自己的分数排名。
28 | - 在学习笔记中插入改进baseline之后分数排名截图。
29 | - 打卡截止时间：02月26日03:00
30 | - 学习资料：
31 |   - [Baseline学习及上分技巧](https://github.com/datawhalechina/team-learning-cv/blob/master/DefectDetection/README.md)
32 | 
33 | ### Task03：学习者分享（2天）
34 | 
35 | - 我们根据截图，邀请提分比较多的学习者进行分享。
36 | 
37 | 
38 | ## 文件说明
39 | - code : 存放所有相关代码的文件夹
40 |     - train_data : 存放原始数据文件 guangdong1_round2_train2_20191004_Annotations  guangdong1_round2_train2_20191004_images
41 |     - tcdata: 存放官方测试数据文件，docker 提交后会自动生成
42 |     - data :训练数据路径设置 coco128.yaml中设置训练数据路径
43 |     - models ： 网络相关的代码文件夹
44 |     - weights ： 保存训练模型的文件夹，best.pt last.pt
45 |     - convertTrainLabel.py：将官方的数据集转换成yolo数据的格式 运行生成convertor数据文件夹
46 |     - process_data_yolo.py：滑动窗口处理convertor数据文件夹里面数据，将大图变成1024*1024小图，生成数据文件夹process_data
47 |     - train.py :  训练代码， 运行该函数进行模型的训练，可以得到模型
48 |     - detect.py : 预测代码
49 |     - test.py :测试模型代码
50 |     - run.sh : 预测测试集，生成结果的脚本   sh run.sh
51 |     - train.sh : 训练脚本  sh trian.sh 
52 |  
53 |     
54 | 
55 | 
56 | ## 操作说明
57 | - step1 : 将官方训练数据集解压后放入train_data 文件夹
58 | - step2 : 训练运行  sh train.sh  
59 |     - train.sh 有四步
60 |         -python convertTrainLabel.py
61 |         -python process_data_yolo.py
62 |         -rm -rf ./convertor
63 |         -python train.py
64 | - step3 : 生成结果 sh run.sh
65 | 
66 | ## 思路说明
67 | -本方案采用了yolov5作为baseline
68 | -数据处理：滑动窗口分割训练图片
69 | 
70 | 
71 | ##改进思路
72 | -数据扩增：训练样本扩增随机竖直/水平翻折，色彩空间增强，使缺陷样本均匀
73 | -自适应anchor策略
74 | -适当减少回归框损失的权重
75 | -正负样本类别
76 | -多尺度训练
77 | -空洞卷积替换FPN最后一层
78 | -FPN改进尝试：NAS-FPN、AC-PFN
79 | -Anchor 匹配策略


--------------------------------------------------------------------------------
/convertTrainLabel.py:
--------------------------------------------------------------------------------
  1 | import numpy as np # linear algebra
  2 | import os
  3 | import json
  4 | from tqdm.auto import tqdm
  5 | import shutil as sh
  6 | import cv2
  7 | 
  8 | josn_path = "./train_data/guangdong1_round2_train2_20191004_Annotations/Annotations/anno_train.json"
  9 | image_path = "./train_data/guangdong1_round2_train2_20191004_images/defect/"
 10 | 
 11 | name_list = []
 12 | image_h_list = []
 13 | image_w_list = []
 14 | c_list = []
 15 | w_list = []
 16 | h_list = []
 17 | x_center_list = []
 18 | y_center_list = []
 19 | 
 20 | with open(josn_path, 'r') as f:
 21 |     temps = tqdm(json.loads(f.read()))
 22 |     for temp in temps:
 23 |         # image_w = temp["image_width"]
 24 |         # image_h = temp["image_height"]
 25 |         name = temp["name"].split('.')[0]
 26 |         path = os.path.join(image_path, name, temp["name"])
 27 |         # print('path: ',path)
 28 |         im = cv2.imread(path)
 29 |         sp = im.shape
 30 |         image_h, image_w = sp[0], sp[1]
 31 |         # print("image_h, image_w: ", image_h, image_w)
 32 |         # print("defect_name: ",temp["defect_name"])
 33 |         #bboxs
 34 |         x_l, y_l, x_r, y_r = temp["bbox"]
 35 |         # print(temp["name"], temp["bbox"])
 36 |         if temp["defect_name"]=="沾污":
 37 |             defect_name = '0'
 38 |         elif temp["defect_name"]=="错花":
 39 |             defect_name = '1'
 40 |         elif temp["defect_name"] == "水印":
 41 |             defect_name = '2'
 42 |         elif temp["defect_name"] == "花毛":
 43 |             defect_name = '3'
 44 |         elif temp["defect_name"] == "缝头":
 45 |             defect_name = '4'
 46 |         elif temp["defect_name"] == "缝头印":
 47 |             defect_name = '5'
 48 |         elif temp["defect_name"] == "虫粘":
 49 |             defect_name = '6'
 50 |         elif temp["defect_name"] == "破洞":
 51 |             defect_name = '7'
 52 |         elif temp["defect_name"] == "褶子":
 53 |             defect_name = '8'
 54 |         elif temp["defect_name"] == "织疵":
 55 |             defect_name = '9'
 56 |         elif temp["defect_name"] == "漏印":
 57 |             defect_name = '10'
 58 |         elif temp["defect_name"] == "蜡斑":
 59 |             defect_name = '11'
 60 |         elif temp["defect_name"] == "色差":
 61 |             defect_name = '12'
 62 |         elif temp["defect_name"] == "网折":
 63 |             defect_name = '13'
 64 |         elif temp["defect_name"] == "其他":
 65 |             defect_name = '14'
 66 |         else:
 67 |             defect_name = '15'
 68 |             print("----------------------------------error---------------------------")
 69 |             raise("erro")
 70 |         # print(image_w, image_h)
 71 |         # print(defect_name)
 72 |         x_center = (x_l + x_r)/(2*image_w)
 73 |         y_center = (y_l + y_r)/(2*image_h)
 74 |         w = (x_r - x_l)/(image_w)
 75 |         h = (y_r - y_l)/(image_h)
 76 |         # print(x_center, y_center, w, h)
 77 |         name_list.append(temp["name"])
 78 |         c_list.append(defect_name)
 79 |         image_h_list.append(image_w)
 80 |         image_w_list.append(image_h)
 81 |         x_center_list.append(x_center)
 82 |         y_center_list.append(y_center)
 83 |         w_list.append(w)
 84 |         h_list.append(h)
 85 | 
 86 |     index = list(set(name_list))
 87 |     print(len(index))
 88 |     for fold in [0]:
 89 |         val_index = index[len(index) * fold // 5:len(index) * (fold + 1) // 5]
 90 |         print(len(val_index))
 91 |         for num, name in enumerate(name_list):
 92 |             print(c_list[num], x_center_list[num], y_center_list[num], w_list[num], h_list[num])
 93 |             row = [c_list[num], x_center_list[num], y_center_list[num], w_list[num], h_list[num]]
 94 |             if name in val_index:
 95 |                 path2save = 'val/'
 96 |             else:
 97 |                 path2save = 'train/'
 98 |             # print('convertor\\fold{}\\labels\\'.format(fold) + path2save)
 99 |             # print('convertor\\fold{}/labels\\'.format(fold) + path2save + name.split('.')[0] + ".txt")
100 |             # print("{}/{}".format(image_path, name))
101 |             # print('convertor\\fold{}\\images\\{}\\{}'.format(fold, path2save, name))
102 |             if not os.path.exists('convertor/fold{}/labels/'.format(fold) + path2save):
103 |                 os.makedirs('convertor/fold{}/labels/'.format(fold) + path2save)
104 |             with open('convertor/fold{}/labels/'.format(fold) + path2save + name.split('.')[0] + ".txt", 'a+') as f:
105 |                 for data in row:
106 |                     f.write('{} '.format(data))
107 |                 f.write('\n')
108 |                 if not os.path.exists('convertor/fold{}/images/{}'.format(fold, path2save)):
109 |                     os.makedirs('convertor/fold{}/images/{}'.format(fold, path2save))
110 |                 sh.copy(os.path.join(image_path, name.split('.')[0], name),
111 |                         'convertor/fold{}/images/{}/{}'.format(fold, path2save, name))
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | import torch.backends.cudnn as cudnn
  4 | import json
  5 | import cv2
  6 | import os
  7 | import torch
  8 | from utils import google_utils
  9 | from utils.datasets import *
 10 | from utils.utils import *
 11 | 
 12 | 
 13 | def detect(save_img=False):
 14 |     out, source, weights, view_img, save_txt, imgsz = \
 15 |         opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
 16 |     webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
 17 |     save_dir = opt.save_dir
 18 |     # Initialize
 19 |     device = torch_utils.select_device(opt.device)
 20 |     if os.path.exists(out):
 21 |         shutil.rmtree(out)  # delete output folder
 22 |     os.makedirs(out)  # make new output folder
 23 |     half = device.type != 'cpu'  # half precision only supported on CUDA
 24 | 
 25 |     # Load model
 26 |     google_utils.attempt_download(weights)
 27 |     model = torch.load(weights, map_location=device)['model'].float().eval()  # load FP32 model
 28 |     imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
 29 |     if half:
 30 |         model.half()  # to FP16
 31 | 
 32 |     # Second-stage classifier
 33 |     classify = False
 34 |     if classify:
 35 |         modelc = torch_utils.load_classifier(name='resnet101', n=2)  # initialize
 36 |         modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
 37 |         modelc.to(device).eval()
 38 | 
 39 |     # Set Dataloader
 40 |     vid_path, vid_writer = None, None
 41 |     if webcam:
 42 |         view_img = True
 43 |         cudnn.benchmark = True  # set True to speed up constant image size inference
 44 |         dataset = LoadStreams(source, img_size=imgsz)
 45 |     else:
 46 |         save_img = True
 47 |         dataset = LoadImagesTest(source, img_size=imgsz)
 48 | 
 49 |     # Get names and colors
 50 |     names = model.module.names if hasattr(model, 'module') else model.names
 51 |     colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
 52 | 
 53 |     # Run inference
 54 |     t0 = time.time()
 55 |     img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
 56 |     _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
 57 | 
 58 |     save_json = True
 59 |     result = []
 60 |     for path, img, im0s, vid_cap in dataset:
 61 |         img = torch.from_numpy(img).to(device)
 62 |         img = img.half() if half else img.float()  # uint8 to fp16/32
 63 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
 64 |         if img.ndimension() == 3:
 65 |             img = img.unsqueeze(0)
 66 | 
 67 |         # Inference
 68 |         t1 = torch_utils.time_synchronized()
 69 |         pred = model(img, augment=opt.augment)[0]
 70 | 
 71 |         # Apply NMS
 72 |         pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
 73 |         t2 = torch_utils.time_synchronized()
 74 | 
 75 |         # Apply Classifier
 76 |         if classify:
 77 |             pred = apply_classifier(pred, modelc, img, im0s)
 78 | 
 79 |         # Process detections
 80 |         for i, det in enumerate(pred):  # detections per image
 81 |             if webcam:  # batch_size >= 1
 82 |                 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
 83 |             else:
 84 |                 p, s, im0 = path, '', im0s
 85 | 
 86 |             save_path = str(Path(out) / Path(p).name)
 87 |             txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
 88 |             s += '%gx%g ' % img.shape[2:]  # print string
 89 |             gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
 90 |             if det is not None and len(det):
 91 |                 # Rescale boxes from img_size to im0 size
 92 |                 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
 93 | 
 94 |                 # Print results
 95 |                 for c in det[:, -1].unique():
 96 |                     n = (det[:, -1] == c).sum()  # detections per class
 97 |                     s += '%g %ss, ' % (n, names[int(c)])  # add to string
 98 | 
 99 |                 # Write results
100 |                 for *xyxy, conf, cls in det:
101 |                     if save_txt:  # Write to file
102 |                         xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
103 |                         with open(txt_path + '.txt', 'a') as f:
104 |                             f.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format
105 | 
106 |                     # write jiang #################
107 |                     if save_json:
108 |                         name = os.path.split(txt_path)[-1]
109 |                         print(name)
110 | 
111 |                         x1, y1, x2, y2 = float(xyxy[0]), float(xyxy[1]), float(xyxy[2]), float(xyxy[3])
112 |                         bbox = [x1, y1, x2, y2]
113 |                         img_name = name
114 |                         conf = float(conf)
115 | 
116 |                         #add solution remove other
117 |                         result.append(
118 |                             {'name': img_name+'.jpg', 'category': int(cls+1), 'bbox': bbox,
119 |                              'score': conf})
120 |                         print("result: ", {'name': img_name+'.jpg', 'category': int(cls+1), 'bbox': bbox,'score': conf})
121 | 
122 |                     if save_img or view_img:  # Add bbox to image
123 |                         label = '%s %.2f' % (names[int(cls)], conf)
124 |                         plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
125 | 
126 |             # Print time (inference + NMS)
127 |             print('%sDone. (%.3fs)' % (s, t2 - t1))
128 | 
129 |             # Stream results
130 |             if view_img:
131 |                 cv2.imshow(p, im0)
132 |                 if cv2.waitKey(1) == ord('q'):  # q to quit
133 |                     raise StopIteration
134 | 
135 |             # Save results (image with detections)
136 |             if save_img:
137 |                 if dataset.mode == 'images':
138 |                     cv2.imwrite(save_path, im0)
139 |                 else:
140 |                     if vid_path != save_path:  # new video
141 |                         vid_path = save_path
142 |                         if isinstance(vid_writer, cv2.VideoWriter):
143 |                             vid_writer.release()  # release previous video writer
144 | 
145 |                         fourcc = 'mp4v'  # output video codec
146 |                         fps = vid_cap.get(cv2.CAP_PROP_FPS)
147 |                         w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
148 |                         h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
149 |                         vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
150 |                     vid_writer.write(im0)
151 | 
152 |     if save_txt or save_img:
153 |         print('Results saved to %s' % os.getcwd() + os.sep + out)
154 |         if platform == 'darwin':  # MacOS
155 |             os.system('open ' + save_path)
156 | 
157 |     if save_json:
158 |         if not os.path.exists(save_dir):
159 |             os.makedirs(save_dir)
160 |         with open(os.path.join(save_dir, "result.json"), 'w') as fp:
161 |             json.dump(result, fp, indent=4, ensure_ascii=False)
162 | 
163 | 
164 |     print('Done. (%.3fs)' % (time.time() - t0))
165 | 
166 | 
167 | 
168 | if __name__ == '__main__':
169 |     parser = argparse.ArgumentParser()
170 |     parser.add_argument('--weights', type=str, default='weights/best.pt', help='model.pt path')
171 |     parser.add_argument('--save_dir', type=str, default='./', help='result save dir')
172 |     # parser.add_argument('--source', type=str, default='convertor/fold0/images/val', help='source')  # file/folder, 0 for webcam
173 |     parser.add_argument('--source', type=str, default='../../data/guangdong1_round2_train_part1_20190924/defect',
174 |                         help='source')  # file/folder, 0 for webcam
175 |     parser.add_argument('--output', type=str, default='inference/output', help='output folder')  # output folder
176 |     parser.add_argument('--img-size', type=int, default=1024, help='inference size (pixels)')
177 |     parser.add_argument('--conf-thres', type=float, default=0.04, help='object confidence threshold')
178 |     parser.add_argument('--iou-thres', type=float, default=0.05, help='IOU threshold for NMS')
179 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
180 |     parser.add_argument('--view-img', action='store_true', help='display results')
181 |     parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
182 |     parser.add_argument('--classes', nargs='+', type=int, help='filter by class')
183 |     parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
184 |     parser.add_argument('--augment', action='store_true', help='augmented inference')
185 |     parser.add_argument('--update', action='store_true', help='update all models')
186 |     opt = parser.parse_args()
187 |     print(opt)
188 | 
189 |     with torch.no_grad():
190 |         if opt.update:  # update all models (to fix SourceChangeWarning)
191 |             for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
192 |                 detect()
193 |                 create_pretrained(opt.weights, opt.weights)
194 |         else:
195 |             detect()
196 | 


--------------------------------------------------------------------------------
/hubconf.py:
--------------------------------------------------------------------------------
 1 | """File for accessing YOLOv5 via PyTorch Hub https://pytorch.org/hub/
 2 | 
 3 | Usage:
 4 |     import torch
 5 |     model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, channels=3, classes=80)
 6 | """
 7 | 
 8 | dependencies = ['torch', 'yaml']
 9 | 
10 | import os
11 | 
12 | import torch
13 | 
14 | from models.yolo import Model
15 | from utils import google_utils
16 | 
17 | 
18 | def create(name, pretrained, channels, classes):
19 |     """Creates a specified YOLOv5 model
20 | 
21 |     Arguments:
22 |         name (str): name of model, i.e. 'yolov5s'
23 |         pretrained (bool): load pretrained weights into the model
24 |         channels (int): number of input channels
25 |         classes (int): number of model classes
26 | 
27 |     Returns:
28 |         pytorch model
29 |     """
30 |     config = os.path.join(os.path.dirname(__file__), 'models', '%s.yaml' % name)  # model.yaml path
31 |     model = Model(config, channels, classes)
32 |     if pretrained:
33 |         ckpt = '%s.pt' % name  # checkpoint filename
34 |         google_utils.attempt_download(ckpt)  # download if not found locally
35 |         state_dict = torch.load(ckpt, map_location=torch.device('cpu'))['model'].float().state_dict()  # to FP32
36 |         state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape}  # filter
37 |         model.load_state_dict(state_dict, strict=False)  # load
38 |     return model
39 | 
40 | 
41 | def yolov5s(pretrained=False, channels=3, classes=80):
42 |     """YOLOv5-small model from https://github.com/ultralytics/yolov5
43 | 
44 |     Arguments:
45 |         pretrained (bool): load pretrained weights into the model, default=False
46 |         channels (int): number of input channels, default=3
47 |         classes (int): number of model classes, default=80
48 | 
49 |     Returns:
50 |         pytorch model
51 |     """
52 |     return create('yolov5s', pretrained, channels, classes)
53 | 
54 | 
55 | def yolov5m(pretrained=False, channels=3, classes=80):
56 |     """YOLOv5-medium model from https://github.com/ultralytics/yolov5
57 | 
58 |     Arguments:
59 |         pretrained (bool): load pretrained weights into the model, default=False
60 |         channels (int): number of input channels, default=3
61 |         classes (int): number of model classes, default=80
62 | 
63 |     Returns:
64 |         pytorch model
65 |     """
66 |     return create('yolov5m', pretrained, channels, classes)
67 | 
68 | 
69 | def yolov5l(pretrained=False, channels=3, classes=80):
70 |     """YOLOv5-large model from https://github.com/ultralytics/yolov5
71 | 
72 |     Arguments:
73 |         pretrained (bool): load pretrained weights into the model, default=False
74 |         channels (int): number of input channels, default=3
75 |         classes (int): number of model classes, default=80
76 | 
77 |     Returns:
78 |         pytorch model
79 |     """
80 |     return create('yolov5l', pretrained, channels, classes)
81 | 
82 | 
83 | def yolov5x(pretrained=False, channels=3, classes=80):
84 |     """YOLOv5-xlarge model from https://github.com/ultralytics/yolov5
85 | 
86 |     Arguments:
87 |         pretrained (bool): load pretrained weights into the model, default=False
88 |         channels (int): number of input channels, default=3
89 |         classes (int): number of model classes, default=80
90 | 
91 |     Returns:
92 |         pytorch model
93 |     """
94 |     return create('yolov5x', pretrained, channels, classes)
95 | 


--------------------------------------------------------------------------------
/process_data_yolo.py:
--------------------------------------------------------------------------------
  1 | #-*- coding: utf-8 -*-
  2 | '''
  3 | @use:将图片和对应的xml生成为裁剪后两张的图片及数据集
  4 | '''
  5 | 
  6 | from __future__ import division
  7 | import os.path
  8 | from PIL import Image
  9 | import numpy as np
 10 | import shutil
 11 | import cv2
 12 | from tqdm import tqdm
 13 | 
 14 | ImgPath = './convertor/fold0/images/val/'  #原始图片
 15 | path = './convertor/fold0/labels/val/'  #原始标注
 16 | 
 17 | ProcessedPath = './process_data/'  #生成后数据
 18 | 
 19 | txtfiles = os.listdir(path)
 20 | print(txtfiles)
 21 | #patch img_size
 22 | patch_size = 1024
 23 | #slide window stride
 24 | stride = 600
 25 | 
 26 | txtfiles = tqdm(txtfiles)
 27 | for file in txtfiles: #遍历txt进行操作
 28 |     image_pre, ext = os.path.splitext(file)
 29 |     imgfile = ImgPath + image_pre + '.jpg'
 30 |     txtfile = path + image_pre + '.txt'
 31 |     # if not os.path.isdir(file):  # 判断是否是文件夹，不是文件夹才打开
 32 |     #     print(file)
 33 | 
 34 |     img = cv2.imread(imgfile)
 35 |     sp = img.shape
 36 |     img_h, img_w = sp[0], sp[1]
 37 | 
 38 |     f = open(os.path.join(path, file), "r")
 39 |     lines = f.readlines()
 40 |     savepath_img = ProcessedPath + 'images' + '/val'  #处理完的图片保存路径
 41 |     savepath_txt = ProcessedPath + 'labels' + '/val'  #处理完的图片标签路径
 42 |     if not os.path.exists(savepath_img):
 43 |         os.makedirs(savepath_img)
 44 |     if not os.path.exists(savepath_txt):
 45 |         os.makedirs(savepath_txt)
 46 | 
 47 |     bndbox = []
 48 |     boxname = []
 49 |     for line in lines:
 50 |         c, x_c, y_c, w, h, _ = line.split(' ')
 51 |         c, x_c, y_c, w, h = float(c), float(x_c), float(y_c), float(w), float(h)
 52 |         bndbox.append([x_c, y_c, w, h])
 53 |         boxname.append([c])
 54 |     # print("boxname: ", boxname)
 55 |     # b = bndbox[1]
 56 |     # print(b.nodeName)
 57 |     #a: x起点, b: y起点, w: 宽, h: 高
 58 | 
 59 |     a = []
 60 |     b = []
 61 |     for a_ in range(0, img_w, stride):
 62 |         a.append(a_)
 63 |     for b_ in range(0, img_h, stride):
 64 |         b.append(b_)
 65 | 
 66 | 
 67 |     cropboxes = []
 68 |     for i in a:
 69 |         for j in b:
 70 |             cropboxes.append([i, j, i + patch_size, j + patch_size])
 71 |     i = 1
 72 |     top_size, bottom_size, left_size, right_size = (150, 0, 0, 0)
 73 | 
 74 |     def select(m, n, w, h):
 75 |         # m: x起点, n: y起点, w: 宽, h: 高
 76 |         bbox = []
 77 |         # 查找图片中所有的 box 框
 78 |         for index in range(0, len(bndbox)):
 79 |             boxcls = boxname[index]#获取回归框的类别
 80 |             # print(bndbox[index])
 81 |             # x min
 82 |             x1 = float(bndbox[index][0] * img_w - bndbox[index][2] * img_w/2)
 83 |             # y min
 84 |             y1 = float(bndbox[index][1] * img_h - bndbox[index][3] * img_h/2)
 85 |             # x max
 86 |             x2 = float(bndbox[index][0] * img_w + bndbox[index][2] * img_w/2)
 87 |             # y max
 88 |             y2 = float(bndbox[index][1] * img_h + bndbox[index][3] * img_h/2)
 89 |             # print("the index of the box is", index)
 90 |             # print("the box cls is",boxcls[0])
 91 |             # print("the xy", x1, y1, x2, y2)
 92 |             #如果标记框在第一个范围内则存入bbox[] 并转换成新的格式
 93 |             if x1 >= m and x2 <= m + w and y1 >= n and y2 <= n + h:
 94 |                 a1 = x1 - m
 95 |                 b1 = y1 - n
 96 |                 a2 = x2 - m
 97 |                 b2 = y2 - n
 98 |                 box_w = a2 - a1
 99 |                 box_h = b2 - b1
100 |                 x_c = (a1 + box_w/2)/w
101 |                 y_c = (b1 + box_h/2)/h
102 |                 box_w = box_w / w
103 |                 box_h = box_h / h
104 |                 bbox.append([boxcls[0], x_c, y_c, box_w, box_h])  # 更新后的标记框
105 |         if bbox is not None:
106 |             return bbox
107 |         else:
108 |             return 0
109 | 
110 |     img = Image.open(imgfile)
111 |     for j in range(0, len(cropboxes)):
112 |         # print("the img number is :", j)
113 |         # 获取在 patch 的 box
114 |         Bboxes = select(cropboxes[j][0], cropboxes[j][1], patch_size, patch_size)
115 |         if len(Bboxes):
116 |             with open(savepath_txt + '/' + image_pre + '_' + '{}'.format(j) + '.txt', 'w') as f:
117 |                 for Bbox in Bboxes:
118 |                     for data in Bbox:
119 |                         f.write('{} '.format(data))
120 |                     f.write('\n')
121 | 
122 |         #图片裁剪
123 |             try:
124 |                 cropedimg = img.crop(cropboxes[j])
125 |                 # print(np.array(cropedimg).shape)
126 |                 cropedimg.save(savepath_img + '/' + image_pre + '_' + str(j) + '.jpg')
127 |                 # print("done!")
128 |             except:
129 |                 continue
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/task05_rnn.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "configured-exploration",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "循环神经网络RNN\n",
  9 |     "\n",
 10 |     "1. 计算图\n",
 11 |     "2. RNN\n",
 12 |     "3. 长短时记忆网络\n",
 13 |     "4. 其他RNN\n",
 14 |     "5. RNN主要应用\n",
 15 |     "\n",
 16 |     "\n",
 17 |     "\n",
 18 |     "# 计算图\n",
 19 |     "\n",
 20 |     "计算图的引入是为了后面更方便的表示网络，计算图是描述计算结构的一种图，它的元素包括节点(node)和边(edge)，节点表示变量，可以是标量、矢量、张量等，而边表示的是某个操作，即函数。\n",
 21 |     "\n",
 22 |     "![6.1](./PIC/6/6.1.png)\n",
 23 |     "\n",
 24 |     "下面这个计算图表示复合函数\n",
 25 |     "\n",
 26 |     "![6.2](./PIC/6/6.2.png)\n",
 27 |     "\n",
 28 |     "关于计算图的求导，我们可以用链式法则表示，有下面两种情况。\n",
 29 |     "\n",
 30 |     "- 情况1\n",
 31 |     "\n",
 32 |     "![6.3](./PIC/6/6.3.png)\n",
 33 |     "\n",
 34 |     "- 情况2\n",
 35 |     "\n",
 36 |     "![6.4](./PIC/6/6.4.png)\n",
 37 |     "\n",
 38 |     "求导举例：\n",
 39 |     "\n",
 40 |     "例1\n",
 41 |     "\n",
 42 |     "<img src=\"./PIC/6/6.5.png\" alt=\"6.5\" style=\"zoom:40%;\" />\n",
 43 |     "\n",
 44 |     "- a = 3, b = 1 可以得到 c = 3, d = 2, e = 6\n",
 45 |     "\n",
 46 |     "- $\\frac{\\partial e}{\\partial a} = \\frac{\\partial e}{\\partial c}\\frac{\\partial c}{\\partial a} = d = b + 1 = 2$\n",
 47 |     "- $\\frac{\\partial e}{\\partial b} = \\frac{\\partial e}{\\partial c}\\frac{\\partial c}{\\partial b}+\\frac{\\partial e}{\\partial d}\\frac{\\partial d}{\\partial b} = d + c=b+1+a+b = 5$\n",
 48 |     "\n",
 49 |     "例2\n",
 50 |     "\n",
 51 |     "<img src=\"./PIC/6/6.6.png\" alt=\"6.6\" style=\"zoom:40%;\" />\n",
 52 |     "\n",
 53 |     "$\\frac{\\partial Z}{\\partial X}=\\alpha \\delta+\\alpha \\epsilon+\\alpha \\zeta+\\beta \\delta+\\beta \\epsilon+\\beta \\zeta+\\gamma \\delta+\\gamma \\epsilon+\\gamma \\zeta = (\\alpha +\\beta+\\gamma)(\\delta+\\epsilon+\\zeta) $\n",
 54 |     "\n",
 55 |     "计算图可以很好的表示导数的前向传递和后向传递的过程，比如上面例2，前向传递了$\\frac{\\partial }{\\partial X}$ ，反向传递$\\frac{\\partial }{\\partial Z}$ 。\n",
 56 |     "\n",
 57 |     "<img src=\"./PIC/6/6.7.png\" alt=\"6.7\" style=\"zoom:30%;\" />\n",
 58 |     "\n",
 59 |     "<img src=\"./PIC/6/6.8.png\" alt=\"6.8\" style=\"zoom:30%;\" />\n",
 60 |     "\n",
 61 |     "# 循环神经网络（Recurrent Neural Network）\n",
 62 |     "\n",
 63 |     "上一章我们已经介绍了CNN，可能我们会想这里为什么还需要构建一种新的网络RNN呢？因为现实生活中存在很多序列化结构，我们需要建立一种更优秀的序列数据模型。\n",
 64 |     "\n",
 65 |     "- 文本：字母和词汇的序列\n",
 66 |     "- 语音：音节的序列\n",
 67 |     "- 视频：图像帧的序列\n",
 68 |     "- 时态数据：气象观测数据，股票交易数据、房价数据等\n",
 69 |     "\n",
 70 |     "RNN的发展历程：\n",
 71 |     "\n",
 72 |     "![6.9](/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.9.png)\n",
 73 |     "\n",
 74 |     "循环神经网络是一种人工神经网络，它的节点间的连接形成一个遵循时间序列的有向图，它的核心思想是，样本间存在顺序关系，每个样本和它之前的样本存在关联。通过神经网络在时序上的展开，我们能够找到样本之间的序列相关性。\n",
 75 |     "\n",
 76 |     "下面给出RNN的一般结构：\n",
 77 |     "\n",
 78 |     "![6.10](/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.10.png)\n",
 79 |     "\n",
 80 |     "其中各个符号的表示：$x_t,s_t,o_t$分别表示的是$t$时刻的输入、记忆和输出，$U,V,W$是RNN的连接权重，$b_s,b_o$是RNN的偏置，$\\sigma,\\varphi$是激活函数，$\\sigma$通常选tanh或sigmoid，$\\varphi$通常选用softmax。\n",
 81 |     "\n",
 82 |     "其中 softmax 函数，用于分类问题的概率计算。本质上是将一个K维的任意实数向量压缩 (映射)成另一个K维的实数向量，其中向量中的每个元素取值都介于(0，1)之间。\n",
 83 |     "$$\n",
 84 |     "\\sigma(\\vec{z})_{i}=\\frac{e^{z_{i}}}{\\sum_{j=1}^{K} e^{z_{j}}}\n",
 85 |     "$$\n",
 86 |     "\n",
 87 |     "### RNN案例\n",
 88 |     "\n",
 89 |     "比如词性标注，\n",
 90 |     "\n",
 91 |     "- 我/n,爱/v购物/n,\n",
 92 |     "- 我/n在/pre华联/n购物/v\n",
 93 |     "\n",
 94 |     "Word Embedding：自然语言处理(NLP)中的 一组语言建模和特征学习技术的统称，其中来自词汇表的单词或短语被映射到实数的向量。比如这里映射到三个向量然后输入：\n",
 95 |     "\n",
 96 |     "<img src=\"./PIC/6/6.11.png\" alt=\"6.11\" style=\"zoom:30%;\" />\n",
 97 |     "\n",
 98 |     "将神经元的输出存到memory中，memory中值会作为下一时刻的输入。在最开始时刻，给定 memory初始值，然后逐次更新memory中的值。\n",
 99 |     "\n",
100 |     "![6.12](/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.12.png)\n",
101 |     "\n",
102 |     "![6.13](/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.13.png)\n",
103 |     "\n",
104 |     "### RNN的一般结构\n",
105 |     "\n",
106 |     "- Elman Network\n",
107 |     "\n",
108 |     "<img src=\"/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.14.png\" alt=\"6.14\" style=\"zoom:50%;\" />\n",
109 |     "\n",
110 |     "- Jordan Network\n",
111 |     "\n",
112 |     "<img src=\"/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.15.png\" alt=\"6.15\" style=\"zoom:50%;\" />\n",
113 |     "\n",
114 |     "各种不同的RNN结构\n",
115 |     "\n",
116 |     "![6.16](/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.16.png)\n",
117 |     "\n",
118 |     "### RNN训练算法 - BPTT\n",
119 |     "\n",
120 |     "我们先来回顾一下BP算法，就是定义损失函数 Loss 来表示输出 $\\hat{y}$ 和真实标签 y 的误差，通过链式法则自顶向下求得 Loss 对网络权重的偏导。沿梯度的反方向更新权重的值， 直到 Loss 收敛。而这里的 BPTT 算法就是加上了时序演化，后面的两个字母 TT 就是 Through Time。\n",
121 |     "\n",
122 |     "<img src=\"./PIC/6/6.17.png\" alt=\"6.17\" style=\"zoom:50%;\" />\n",
123 |     "\n",
124 |     "我们先定义输出函数：\n",
125 |     "$$\n",
126 |     "\\begin{array}{l}s_{t}=\\tanh \\left(U x_{t}+W s_{t-1}\\right) \\\\ \\hat{y}_{t}=\\operatorname{softmax}\\left(V s_{t}\\right)\\end{array}\n",
127 |     "$$\n",
128 |     "再定义损失函数：\n",
129 |     "$$\n",
130 |     "\\begin{aligned} E_{t}\\left(y_{t}, \\hat{y}_{t}\\right) =-y_{t} \\log \\hat{y}_{t} \\\\ E(y, \\hat{y}) =\\sum_{t} E_{t}\\left(y_{t}, \\hat{y}_{t}\\right) \\\\ =-\\sum_{t} y_{t} \\log \\hat{y}_{t}\\end{aligned}\n",
131 |     "$$\n",
132 |     "<img src=\"./PIC/6/6.18.png\" alt=\"6.18\" style=\"zoom:50%;\" />\n",
133 |     "\n",
134 |     "我们分别求损失函数 E 对 U、V、W的梯度：\n",
135 |     "$$\n",
136 |     "\\begin{array}{l}\\frac{\\partial E}{\\partial V}=\\sum_{t} \\frac{\\partial E_{t}}{\\partial V} \\\\ \\frac{\\partial E}{\\partial W}=\\sum_{t} \\frac{\\partial E_{t}}{\\partial W} \\\\ \\frac{\\partial E}{\\partial U}=\\sum_{t} \\frac{\\partial E_{t}}{\\partial U}\\end{array}\n",
137 |     "$$\n",
138 |     "\n",
139 |     "- 求 E 对 V 的梯度，先求 $E_3$ 对 V 的梯度\n",
140 |     "\n",
141 |     "$$\n",
142 |     "\\begin{aligned} \\frac{\\partial E_{3}}{\\partial V} &=\\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial V} \\\\ &=\\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial z_{3}} \\frac{\\partial z_{3}}{\\partial V} \\end{aligned}\n",
143 |     "$$\n",
144 |     "\n",
145 |     "其中 $z_3 = V s_3$，然后求和即可。\n",
146 |     "\n",
147 |     "- 求 E 对 W 的梯度，先求 $E_3$ 对 W 的梯度\n",
148 |     "\n",
149 |     "$$\n",
150 |     "\\begin{array}{c}\\frac{\\partial E_{3}}{\\partial W}=\\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}} \\frac{\\partial s_{3}}{\\partial W} \\\\ s_{3}=\\tanh \\left(U x_{3}+W s_{2}\\right) \\\\ \\frac{\\partial E_{3}}{\\partial W}=\\sum_{k=0}^{3} \\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}} \\frac{\\partial s_{3}}{\\partial s_{k}} \\frac{\\partial s_{k}}{\\partial W} \\\\ \\frac{\\partial E_{3}}{\\partial W}=\\sum_{k=0}^{3} \\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}}\\left(\\prod_{j=k+1}^{3} \\frac{\\partial s_{j}}{\\partial s_{j-1}}\\right) \\frac{\\partial s_{k}}{\\partial W}\\end{array}\n",
151 |     "$$\n",
152 |     "\n",
153 |     "其中:  $s_3$ 依赖于 $s_2$，而 $s_2$ 又依赖于 $s_1 $ 和 W ，依赖关系 一直传递到 t = 0 的时刻。因此，当我们计算对于 W 的偏导数时，不能把 $s_2$ 看作是常数项！\n",
154 |     "\n",
155 |     "- 求 E 对 U 的梯度，先求 $E_3$ 对 U 的梯度\n",
156 |     "\n",
157 |     "$$\n",
158 |     "\\begin{array}{c}\\frac{\\partial E_{3}}{\\partial W}=\\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}} \\frac{\\partial s_{3}}{\\partial U} \\\\ s_{3}=\\tanh \\left(U x_{3}+W s_{2}\\right) \\\\ \\frac{\\partial E_{3}}{\\partial U}=\\sum_{k=0}^{3} \\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}} \\frac{\\partial s_{3}}{\\partial s_{k}} \\frac{\\partial s_{k}}{\\partial U}\\end{array}\n",
159 |     "$$\n",
160 |     "\n",
161 |     "# 长短时记忆网络\n",
162 |     "\n",
163 |     "在RNN中，存在一个很重要的问题，就是梯度消失问题，一开始我们不能有效的解决长时依赖问题，其中梯度消失的原因有两个：BPTT算法和激活函数Tanh\n",
164 |     "$$\n",
165 |     "\\frac{\\partial E_{3}}{\\partial W}=\\sum_{k=0}^{3} \\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}}\\left(\\prod_{j=k+1}^{3} \\frac{\\partial s_{j}}{\\partial s_{j-1}}\\right) \\frac{\\partial s_{k}}{\\partial W}\n",
166 |     "$$\n",
167 |     "有两种解决方案，分别是ReLU函数和门控RNN(LSTM).\n",
168 |     "\n",
169 |     "### LSTM\n",
170 |     "\n",
171 |     "LSTM，即长短时记忆网络，于1997年被Sepp Hochreiter 和Jürgen Schmidhuber提出来，LSTM是一种用于深度学习领域的人工循环神经网络（RNN）结构。一个LSTM单元由输入门、输出门和遗忘门组成，三个门控制信息进出单元。\n",
172 |     "\n",
173 |     "![6.20](./PIC/6/6.20.png)\n",
174 |     "\n",
175 |     "- LSTM依靠贯穿隐藏层的细胞状态实现隐藏单元之间的信息传递，其中只有少量的线性操作\n",
176 |     "- LSTM引入了“门”机制对细胞状态信息进行添加或删除，由此实现长程记忆\n",
177 |     "- “门”机制由一个Sigmoid激活函数层和一个向量点乘操作组成，Sigmoid层的输出控制了信息传递的比例\n",
178 |     "\n",
179 |     "**遗忘门**：LSTM通过遗忘门(forget gate)实现对细胞状态信息遗忘程度的控制，输出当前状态的遗忘权重，取决于 $h_{t−1}$ 和 $x_t$.\n",
180 |     "$$\n",
181 |     "f_{t}=\\sigma\\left(W_{f} \\cdot\\left[h_{t-1}, x_{t}\\right]+b_{f}\\right)\n",
182 |     "$$\n",
183 |     "<img src=\"./PIC/6/6.21.png\" alt=\"6.21\" style=\"zoom:50%;\" />\n",
184 |     "\n",
185 |     "**输入门**：LSTM通过输入门(input gate)实现对细胞状态输入接收程度的控制，输出当前输入信息的接受权重，取决于 $h_{t−1}$ 和 $x_t$.\n",
186 |     "$$\n",
187 |     "\\begin{array}{c}i_{t}=\\sigma\\left(W_{i} \\cdot\\left[h_{t-1}, x_{t}\\right]+b_{i}\\right) \\\\ \\tilde{C}_{t}=\\tanh \\left(W_{C} \\cdot\\left[h_{t-1}, x_{t}\\right]+b_{C}\\right)\\end{array}\n",
188 |     "$$\n",
189 |     "<img src=\"/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.22.png\" alt=\"6.22\" style=\"zoom:50%;\" />\n",
190 |     "\n",
191 |     "**输出门**：LSTM通过输出门(output gate)实现对细胞状态输出认可程度的控制，输出当前输出信息的认可权重，取决于 $h_{t−1}$ 和 $x_t$.\n",
192 |     "$$\n",
193 |     "o_{t}=\\sigma\\left(W_{o} \\cdot\\left[h_{t-1}, x_{t}\\right]+b_{o}\\right)\n",
194 |     "$$\n",
195 |     "<img src=\"./PIC/6/6.23.png\" alt=\"6.23\" style=\"zoom:50%;\" />\n",
196 |     "\n",
197 |     "**状态更新**：“门”机制对细胞状态信息进行添加或删除，由此实现长程记忆。\n",
198 |     "$$\n",
199 |     "\\begin{array}{c}C_{t}=f_{t} * C_{t-1}+i_{t} * \\tilde{C}_{t} \\\\ h_{t}=o_{t} * \\tanh \\left(C_{t}\\right)\\end{array}\n",
200 |     "$$\n",
201 |     "<img src=\"./PIC/6/6.24.png\" alt=\"6.24\" style=\"zoom:50%;\" />\n",
202 |     "\n",
203 |     "下面给出一个标准化的RNN例子\n",
204 |     "\n",
205 |     "```python\n",
206 |     "#构造RNN网络，x的维度5，隐层的维度10,网络的层数2\n",
207 |     "rnn_ seq = nn.RNN(5， 10,2)\n",
208 |     "#构造一个输入序列，长为6，batch是3，特征是5\n",
209 |     "X =V(torch. randn(6， 3，5))\n",
210 |     "#out,ht = rnn_ seq(x， h0) # h0可以指定或者不指定\n",
211 |     "out,ht = rnn_ seq(x)\n",
212 |     "# q1:这里out、ht的size是多少呢? out:6*3*10， ht:2*3*10\n",
213 |     "\n",
214 |     "#输入维度50，隐层100维，两层\n",
215 |     "Lstm_ seq = nn.LSTM(50， 100，num layers=2 )\n",
216 |     "#输入序列seq= 10，batch =3，输入维度=50\n",
217 |     "lstm input = torch. randn(10，3，50)\n",
218 |     "out, (h, c) = lstm_ seq(lstm_ _input) #使用默认的全0隐藏状态\n",
219 |     "```\n",
220 |     "\n",
221 |     "# 其他经典的循环神经网络\n",
222 |     "\n",
223 |     "### Gated Recurrent Unit(GRU)\n",
224 |     "\n",
225 |     "Gated Recurrent Unit (GRU)，是在2014年提出的，可认为是LSTM 的变种，它的细胞状态与隐状态合并，在计算当前时刻新信息的方法和LSTM有 所不同；GRU只包含重置门和更新门；在音乐建模与语音信号建模领域与LSTM具有相似的性能，但是参数更少，只有两个门控。\n",
226 |     "\n",
227 |     "![6.19](./PIC/6/6.19.png)\n",
228 |     "\n",
229 |     "### Peephole LSTM\n",
230 |     "\n",
231 |     "让门层也接受细胞状态的输入，同时考虑隐层信息的输入。\n",
232 |     "\n",
233 |     "<img src=\"./PIC/6/6.25.png\" alt=\"6.25\" style=\"zoom:50%;\" />\n",
234 |     "\n",
235 |     "### Bi-directional RNN(双向RNN) \n",
236 |     "\n",
237 |     "Bi-directional RNN(双向RNN)假设当前t的输出不仅仅和之前的序列有关，并且还与之后的序列有关，例如：完形填空，它由两个RNNs上下叠加在一起组成，输出由这两个RNNs的隐藏层的状态决定。\n",
238 |     "\n",
239 |     "<img src=\"./PIC/6/6.26.png\" alt=\"6.26\" style=\"zoom:50%;\" />\n",
240 |     "\n",
241 |     "<img src=\"./PIC/6/6.27.png\" alt=\"6.27\" style=\"zoom:50%;\" />\n",
242 |     "\n",
243 |     "### Continuous time RNN(CTRNN)\n",
244 |     "\n",
245 |     "CTRNN利用常微分方程系统对输入脉冲序列神经元的影响 进行建模。CTRNN被应用到进化机器人中，用于解决视觉、协作和最 小认知行为等问题。\n",
246 |     "\n",
247 |     "<img src=\"./PIC/6/6.28.png\" alt=\"6.28\" style=\"zoom:50%;\" />\n",
248 |     "\n",
249 |     "# 循环神经网络的主要应用\n",
250 |     "\n",
251 |     "### 语言模型\n",
252 |     "\n",
253 |     "根据之前和当前词预测下一个单词或者字母\n",
254 |     "\n",
255 |     "<img src=\"./PIC/6/6.29.png\" alt=\"6.29\" style=\"zoom:50%;\" />\n",
256 |     "\n",
257 |     "问答系统\n",
258 |     "\n",
259 |     "<img src=\"/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.30.png\" alt=\"6.30\" style=\"zoom:50%;\" />\n",
260 |     "\n",
261 |     "### 自动作曲\n",
262 |     "\n",
263 |     "<img src=\"./PIC/6/6.31.png\" alt=\"6.31\" style=\"zoom:50%;\" />\n",
264 |     "\n",
265 |     "参考：Hang Chu, Raquel Urtasun, Sanja Fidler. Song From PI: A Musically Plausible Network for Pop Music Generation. CoRR abs/1611.03477 (2016)\n",
266 |     "\n",
267 |     "Music AI Lab: **https://musicai.citi.sinica.edu.tw/**\n",
268 |     "\n",
269 |     "<img src=\"./PIC/6/6.32.png\" alt=\"6.32\" style=\"zoom:50%;\" />\n",
270 |     "\n",
271 |     "### 机器翻译\n",
272 |     "\n",
273 |     "将一种语言自动翻译成另一种语言\n",
274 |     "\n",
275 |     "<img src=\"./PIC/6/6.35.png\" alt=\"6.35\" style=\"zoom:50%;\" />\n",
276 |     "\n",
277 |     "### 自动写作\n",
278 |     "\n",
279 |     "根据现有资料自动写作，当前主要包括新闻写作和诗歌创作。主要是基于RNN&LSTM的文本生成技术来实现，需要训练大量同 类文本，结合模板技术。\n",
280 |     "\n",
281 |     "目前主要产品有：腾讯Dreamwriter写稿机器人、今日头条xiaomingbot、第一财经DT稿王(背后是阿里巴巴) 、百度Writing-bots...\n",
282 |     "\n",
283 |     "### 图像描述\n",
284 |     "\n",
285 |     "根据图像形成语言描述\n",
286 |     "\n",
287 |     "<img src=\"./PIC/6/6.33.png\" alt=\"6.33\" style=\"zoom:50%;\" />\n",
288 |     "\n",
289 |     "<img src=\"./PIC/6/6.34.png\" alt=\"6.34\" style=\"zoom:50%;\" />\n",
290 |     "\n"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": null,
296 |    "id": "settled-engine",
297 |    "metadata": {},
298 |    "outputs": [],
299 |    "source": []
300 |   }
301 |  ],
302 |  "metadata": {
303 |   "kernelspec": {
304 |    "display_name": "Python 3",
305 |    "language": "python",
306 |    "name": "python3"
307 |   },
308 |   "language_info": {
309 |    "codemirror_mode": {
310 |     "name": "ipython",
311 |     "version": 3
312 |    },
313 |    "file_extension": ".py",
314 |    "mimetype": "text/x-python",
315 |    "name": "python",
316 |    "nbconvert_exporter": "python",
317 |    "pygments_lexer": "ipython3",
318 |    "version": "3.8.10"
319 |   }
320 |  },
321 |  "nbformat": 4,
322 |  "nbformat_minor": 5
323 | }
324 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | 
  4 | from utils import google_utils
  5 | from utils.datasets import *
  6 | from utils.utils import *
  7 | 
  8 | 
  9 | def test(data,
 10 |          weights=None,
 11 |          batch_size=16,
 12 |          imgsz=640,
 13 |          conf_thres=0.001,
 14 |          iou_thres=0.6,  # for NMS
 15 |          save_json=False,
 16 |          single_cls=False,
 17 |          augment=False,
 18 |          verbose=False,
 19 |          model=None,
 20 |          dataloader=None,
 21 |          merge=False):
 22 |     # Initialize/load model and set device
 23 |     if model is None:
 24 |         training = False
 25 |         merge = opt.merge  # use Merge NMS
 26 |         device = torch_utils.select_device(opt.device, batch_size=batch_size)
 27 | 
 28 |         # Remove previous
 29 |         for f in glob.glob('test_batch*.jpg'):
 30 |             os.remove(f)
 31 | 
 32 |         # Load model
 33 |         google_utils.attempt_download(weights)
 34 |         model = torch.load(weights, map_location=device)['model'].float().fuse().to(device)  # load to FP32
 35 |         imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
 36 | 
 37 |         # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
 38 |         # if device.type != 'cpu' and torch.cuda.device_count() > 1:
 39 |         #     model = nn.DataParallel(model)
 40 | 
 41 |     else:  # called by train.py
 42 |         training = True
 43 |         device = next(model.parameters()).device  # get model device
 44 | 
 45 |     # Half
 46 |     half = device.type != 'cpu' and torch.cuda.device_count() == 1  # half precision only supported on single-GPU
 47 |     if half:
 48 |         model.half()  # to FP16
 49 | 
 50 |     # Configure
 51 |     model.eval()
 52 |     with open(data) as f:
 53 |         data = yaml.load(f, Loader=yaml.FullLoader)  # model dict
 54 |     nc = 1 if single_cls else int(data['nc'])  # number of classes
 55 |     iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
 56 |     niou = iouv.numel()
 57 | 
 58 |     # Dataloader
 59 |     if dataloader is None:  # not training
 60 |         img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
 61 |         _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
 62 |         path = data['test'] if opt.task == 'test' else data['val']  # path to val/test images
 63 |         dataloader = create_dataloader(path, imgsz, batch_size, int(max(model.stride)), opt,
 64 |                                        hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0]
 65 | 
 66 |     seen = 0
 67 |     names = model.names if hasattr(model, 'names') else model.module.names
 68 |     coco91class = coco80_to_coco91_class()
 69 |     s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
 70 |     p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
 71 |     loss = torch.zeros(3, device=device)
 72 |     jdict, stats, ap, ap_class = [], [], [], []
 73 |     for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
 74 |         img = img.to(device)
 75 |         img = img.half() if half else img.float()  # uint8 to fp16/32
 76 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
 77 |         targets = targets.to(device)
 78 |         nb, _, height, width = img.shape  # batch size, channels, height, width
 79 |         whwh = torch.Tensor([width, height, width, height]).to(device)
 80 | 
 81 |         # Disable gradients
 82 |         with torch.no_grad():
 83 |             # Run model
 84 |             t = torch_utils.time_synchronized()
 85 |             inf_out, train_out = model(img, augment=augment)  # inference and training outputs
 86 |             t0 += torch_utils.time_synchronized() - t
 87 | 
 88 |             # Compute loss
 89 |             if training:  # if model has loss hyperparameters
 90 |                 loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3]  # GIoU, obj, cls
 91 | 
 92 |             # Run NMS
 93 |             t = torch_utils.time_synchronized()
 94 |             output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge)
 95 |             t1 += torch_utils.time_synchronized() - t
 96 | 
 97 |         # Statistics per image
 98 |         for si, pred in enumerate(output):
 99 |             labels = targets[targets[:, 0] == si, 1:]
100 |             nl = len(labels)
101 |             tcls = labels[:, 0].tolist() if nl else []  # target class
102 |             seen += 1
103 | 
104 |             if pred is None:
105 |                 if nl:
106 |                     stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
107 |                 continue
108 | 
109 |             # Append to text file
110 |             # with open('test.txt', 'a') as file:
111 |             #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]
112 | 
113 |             # Clip boxes to image bounds
114 |             clip_coords(pred, (height, width))
115 | 
116 |             # Append to pycocotools JSON dictionary
117 |             if save_json:
118 |                 # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
119 |                 image_id = int(Path(paths[si]).stem.split('_')[-1])
120 |                 box = pred[:, :4].clone()  # xyxy
121 |                 scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1])  # to original shape
122 |                 box = xyxy2xywh(box)  # xywh
123 |                 box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
124 |                 for p, b in zip(pred.tolist(), box.tolist()):
125 |                     jdict.append({'image_id': image_id,
126 |                                   'category_id': coco91class[int(p[5])],
127 |                                   'bbox': [round(x, 3) for x in b],
128 |                                   'score': round(p[4], 5)})
129 | 
130 |             # Assign all predictions as incorrect
131 |             correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
132 |             if nl:
133 |                 detected = []  # target indices
134 |                 tcls_tensor = labels[:, 0]
135 | 
136 |                 # target boxes
137 |                 tbox = xywh2xyxy(labels[:, 1:5]) * whwh
138 | 
139 |                 # Per target class
140 |                 for cls in torch.unique(tcls_tensor):
141 |                     ti = (cls == tcls_tensor).nonzero().view(-1)  # prediction indices
142 |                     pi = (cls == pred[:, 5]).nonzero().view(-1)  # target indices
143 | 
144 |                     # Search for detections
145 |                     if pi.shape[0]:
146 |                         # Prediction to target ious
147 |                         ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1)  # best ious, indices
148 | 
149 |                         # Append detections
150 |                         for j in (ious > iouv[0]).nonzero():
151 |                             d = ti[i[j]]  # detected target
152 |                             if d not in detected:
153 |                                 detected.append(d)
154 |                                 correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
155 |                                 if len(detected) == nl:  # all targets already located in image
156 |                                     break
157 | 
158 |             # Append statistics (correct, conf, pcls, tcls)
159 |             stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
160 | 
161 |         # Plot images
162 |         if batch_i < 1:
163 |             f = 'test_batch%g_gt.jpg' % batch_i  # filename
164 |             plot_images(img, targets, paths, f, names)  # ground truth
165 |             f = 'test_batch%g_pred.jpg' % batch_i
166 |             plot_images(img, output_to_target(output, width, height), paths, f, names)  # predictions
167 | 
168 |     # Compute statistics
169 |     stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
170 |     if len(stats):
171 |         p, r, ap, f1, ap_class = ap_per_class(*stats)
172 |         p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1)  # [P, R, AP@0.5, AP@0.5:0.95]
173 |         mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
174 |         nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
175 |     else:
176 |         nt = torch.zeros(1)
177 | 
178 |     # Print results
179 |     pf = '%20s' + '%12.3g' * 6  # print format
180 |     print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
181 | 
182 |     # Print results per class
183 |     if verbose and nc > 1 and len(stats):
184 |         for i, c in enumerate(ap_class):
185 |             print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
186 | 
187 |     # Print speeds
188 |     t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple
189 |     if not training:
190 |         print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
191 | 
192 |     # Save JSON
193 |     if save_json and map50 and len(jdict):
194 |         imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataloader.dataset.img_files]
195 |         f = 'detections_val2017_%s_results.json' % \
196 |             (weights.split(os.sep)[-1].replace('.pt', '') if weights else '')  # filename
197 |         print('\nCOCO mAP with pycocotools... saving %s...' % f)
198 |         with open(f, 'w') as file:
199 |             json.dump(jdict, file)
200 | 
201 |         try:
202 |             from pycocotools.coco import COCO
203 |             from pycocotools.cocoeval import COCOeval
204 | 
205 |             # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
206 |             cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0])  # initialize COCO ground truth api
207 |             cocoDt = cocoGt.loadRes(f)  # initialize COCO pred api
208 | 
209 |             cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
210 |             cocoEval.params.imgIds = imgIds  # image IDs to evaluate
211 |             cocoEval.evaluate()
212 |             cocoEval.accumulate()
213 |             cocoEval.summarize()
214 |             map, map50 = cocoEval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
215 |         except:
216 |             print('WARNING: pycocotools must be installed with numpy==1.17 to run correctly. '
217 |                   'See https://github.com/cocodataset/cocoapi/issues/356')
218 | 
219 |     # Return results
220 |     model.float()  # for training
221 |     maps = np.zeros(nc) + map
222 |     for i, c in enumerate(ap_class):
223 |         maps[c] = ap[i]
224 |     return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
225 | 
226 | 
227 | if __name__ == '__main__':
228 |     parser = argparse.ArgumentParser(prog='test.py')
229 |     parser.add_argument('--weights', type=str, default='weights/yolov5s.pt', help='model.pt path')
230 |     parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path')
231 |     parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
232 |     parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
233 |     parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
234 |     parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS')
235 |     parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
236 |     parser.add_argument('--task', default='val', help="'val', 'test', 'study'")
237 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
238 |     parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
239 |     parser.add_argument('--augment', action='store_true', help='augmented inference')
240 |     parser.add_argument('--merge', action='store_true', help='use Merge NMS')
241 |     parser.add_argument('--verbose', action='store_true', help='report mAP by class')
242 |     opt = parser.parse_args()
243 |     opt.save_json = opt.save_json or opt.data.endswith('coco.yaml')
244 |     opt.data = check_file(opt.data)  # check file
245 |     print(opt)
246 | 
247 |     # task = 'val', 'test', 'study'
248 |     if opt.task in ['val', 'test']:  # (default) run normally
249 |         test(opt.data,
250 |              opt.weights,
251 |              opt.batch_size,
252 |              opt.img_size,
253 |              opt.conf_thres,
254 |              opt.iou_thres,
255 |              opt.save_json,
256 |              opt.single_cls,
257 |              opt.augment,
258 |              opt.verbose)
259 | 
260 |     elif opt.task == 'study':  # run over a range of settings and save/plot
261 |         for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
262 |             f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem)  # filename to save to
263 |             x = list(range(352, 832, 64))  # x axis
264 |             y = []  # y axis
265 |             for i in x:  # img-size
266 |                 print('\nRunning %s point %s...' % (f, i))
267 |                 r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json)
268 |                 y.append(r + t)  # results and times
269 |             np.savetxt(f, y, fmt='%10.4g')  # save
270 |         os.system('zip -r study.zip study_*.txt')
271 |         # plot_study_txt(f, x)  # plot
272 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | import torch.distributed as dist
  4 | import torch.nn.functional as F
  5 | import torch.optim as optim
  6 | import torch.optim.lr_scheduler as lr_scheduler
  7 | import torch.utils.data
  8 | import numpy as np
  9 | from torch.utils.tensorboard import SummaryWriter
 10 | import os
 11 | import test  # import test.py to get mAP after each epoch
 12 | from models.yolo import Model
 13 | from utils import google_utils
 14 | from utils.datasets import *
 15 | from utils.utils import *
 16 | 
 17 | mixed_precision = True
 18 | try:  # Mixed precision training https://github.com/NVIDIA/apex
 19 |     from apex import amp
 20 | except:
 21 |     print('Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex')
 22 |     mixed_precision = False  # not installed
 23 | 
 24 | wdir = 'weights' + os.sep  # weights dir
 25 | os.makedirs(wdir, exist_ok=True)
 26 | last = wdir + 'last.pt'
 27 | best = wdir + 'best.pt'
 28 | results_file = 'results.txt'
 29 | 
 30 | # Hyperparameters
 31 | hyp = {'lr0': 0.01,  # initial learning rate (SGD=1E-2, Adam=1E-3)
 32 |        'momentum': 0.937,  # SGD momentum
 33 |        'weight_decay': 5e-4,  # optimizer weight decay
 34 |        'giou': 0.05,  # giou loss gain
 35 |        'cls': 0.58,  # cls loss gain
 36 |        'cls_pw': 1.0,  # cls BCELoss positive_weight
 37 |        'obj': 1.0,  # obj loss gain (*=img_size/320 if img_size != 320)
 38 |        'obj_pw': 1.0,  # obj BCELoss positive_weight
 39 |        'iou_t': 0.20,  # iou training threshold
 40 |        'anchor_t': 4.0,  # anchor-multiple threshold
 41 |        'fl_gamma': 0.0,  # focal loss gamma (efficientDet default is gamma=1.5)
 42 |        'hsv_h': 0.014,  # image HSV-Hue augmentation (fraction)
 43 |        'hsv_s': 0.68,  # image HSV-Saturation augmentation (fraction)
 44 |        'hsv_v': 0.36,  # image HSV-Value augmentation (fraction)
 45 |        'degrees': 0.0,  # image rotation (+/- deg)
 46 |        'translate': 0.0,  # image translation (+/- fraction)
 47 |        'scale': 0.5,  # image scale (+/- gain)
 48 |        'shear': 0.0}  # image shear (+/- deg)
 49 | print(hyp)
 50 | 
 51 | # Overwrite hyp with hyp*.txt (optional)
 52 | f = glob.glob('hyp*.txt')
 53 | if f:
 54 |     print('Using %s' % f[0])
 55 |     for k, v in zip(hyp.keys(), np.loadtxt(f[0])):
 56 |         hyp[k] = v
 57 | 
 58 | # Print focal loss if gamma > 0
 59 | if hyp['fl_gamma']:
 60 |     print('Using FocalLoss(gamma=%g)' % hyp['fl_gamma'])
 61 | 
 62 | 
 63 | def train(hyp):
 64 |     epochs = opt.epochs  # 300
 65 |     batch_size = opt.batch_size  # 64
 66 |     weights = opt.weights  # initial training weights
 67 | 
 68 |     # Configure
 69 |     init_seeds(1)
 70 |     with open(opt.data) as f:
 71 |         data_dict = yaml.load(f, Loader=yaml.FullLoader)  # model dict
 72 |     train_path = data_dict['train']
 73 |     test_path = data_dict['val']
 74 |     nc = 1 if opt.single_cls else int(data_dict['nc'])  # number of classes
 75 | 
 76 |     # Remove previous results
 77 |     for f in glob.glob('*_batch*.jpg') + glob.glob(results_file):
 78 |         os.remove(f)
 79 | 
 80 |     # Create model
 81 |     model = Model(opt.cfg, nc=data_dict['nc']).to(device)
 82 | 
 83 |     # Image sizes
 84 |     gs = int(max(model.stride))  # grid size (max stride)
 85 |     imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size]  # verify imgsz are gs-multiples
 86 | 
 87 |     # Optimizer
 88 |     nbs = 64  # nominal batch size
 89 |     accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
 90 |     hyp['weight_decay'] *= batch_size * accumulate / nbs  # scale weight_decay
 91 |     pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
 92 |     for k, v in model.named_parameters():
 93 |         if v.requires_grad:
 94 |             if '.bias' in k:
 95 |                 pg2.append(v)  # biases
 96 |             elif '.weight' in k and '.bn' not in k:
 97 |                 pg1.append(v)  # apply weight decay
 98 |             else:
 99 |                 pg0.append(v)  # all else
100 | 
101 |     optimizer = optim.Adam(pg0, lr=hyp['lr0']) if opt.adam else \
102 |         optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
103 |     optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
104 |     optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
105 |     # Scheduler https://arxiv.org/pdf/1812.01187.pdf
106 |     lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1  # cosine
107 |     scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
108 |     print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
109 |     del pg0, pg1, pg2
110 | 
111 |     # Load Model
112 |     google_utils.attempt_download(weights)
113 |     start_epoch, best_fitness = 0, 0.0
114 |     if weights.endswith('.pt'):  # pytorch format
115 |         ckpt = torch.load(weights, map_location=device)  # load checkpoint
116 | 
117 |         # load model
118 |         try:
119 |             ckpt['model'] = {k: v for k, v in ckpt['model'].float().state_dict().items()
120 |                              if model.state_dict()[k].shape == v.shape}  # to FP32, filter
121 |             model.load_state_dict(ckpt['model'], strict=False)
122 |         except KeyError as e:
123 |             s = "%s is not compatible with %s. This may be due to model differences or %s may be out of date. " \
124 |                 "Please delete or update %s and try again, or use --weights '' to train from scratch." \
125 |                 % (opt.weights, opt.cfg, opt.weights, opt.weights)
126 |             raise KeyError(s) from e
127 | 
128 |         # load optimizer
129 |         if ckpt['optimizer'] is not None:
130 |             optimizer.load_state_dict(ckpt['optimizer'])
131 |             best_fitness = ckpt['best_fitness']
132 | 
133 |         # load results
134 |         if ckpt.get('training_results') is not None:
135 |             with open(results_file, 'w') as file:
136 |                 file.write(ckpt['training_results'])  # write results.txt
137 | 
138 |         # epochs
139 |         start_epoch = ckpt['epoch'] + 1
140 |         if epochs < start_epoch:
141 |             print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
142 |                   (opt.weights, ckpt['epoch'], epochs))
143 |             epochs += ckpt['epoch']  # finetune additional epochs
144 | 
145 |         del ckpt
146 | 
147 |     # Mixed precision training https://github.com/NVIDIA/apex
148 |     if mixed_precision:
149 |         model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
150 | 
151 | 
152 |     scheduler.last_epoch = start_epoch - 1  # do not move
153 |     # https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
154 |     # plot_lr_scheduler(optimizer, scheduler, epochs)
155 | 
156 |     # Initialize distributed training
157 |     if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
158 |         dist.init_process_group(backend='nccl',  # distributed backend
159 |                                 init_method='tcp://127.0.0.1:9999',  # init method
160 |                                 world_size=1,  # number of nodes
161 |                                 rank=0)  # node rank
162 |         model = torch.nn.parallel.DistributedDataParallel(model)
163 |         # pip install torch==1.4.0+cu100 torchvision==0.5.0+cu100 -f https://download.pytorch.org/whl/torch_stable.html
164 | 
165 |     # Trainloader
166 |     dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
167 |                                             hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect)
168 |     mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
169 |     assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Correct your labels or your model.' % (mlc, nc, opt.cfg)
170 | 
171 |     # Testloader
172 |     testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt,
173 |                                    hyp=hyp, augment=False, cache=opt.cache_images, rect=True)[0]
174 | 
175 |     # Model parameters
176 |     hyp['cls'] *= nc / 80.  # scale coco-tuned hyp['cls'] to current dataset
177 |     model.nc = nc  # attach number of classes to model
178 |     model.hyp = hyp  # attach hyperparameters to model
179 |     model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
180 |     model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
181 |     model.names = data_dict['names']
182 | 
183 |     # Class frequency
184 |     labels = np.concatenate(dataset.labels, 0)
185 |     c = torch.tensor(labels[:, 0])  # classes
186 |     # cf = torch.bincount(c.long(), minlength=nc) + 1.
187 |     # model._initialize_biases(cf.to(device))
188 |     if tb_writer:
189 |         plot_labels(labels)
190 |         tb_writer.add_histogram('classes', c, 0)
191 | 
192 |     # Check anchors
193 |     if not opt.noautoanchor:
194 |         check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
195 | 
196 |     # Exponential moving average
197 |     ema = torch_utils.ModelEMA(model)
198 | 
199 |     # Start training
200 |     t0 = time.time()
201 |     nb = len(dataloader)  # number of batches
202 |     n_burn = max(3 * nb, 1e3)  # burn-in iterations, max(3 epochs, 1k iterations)
203 |     maps = np.zeros(nc)  # mAP per class
204 |     results = (0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
205 |     print('Image sizes %g train, %g test' % (imgsz, imgsz_test))
206 |     print('Using %g dataloader workers' % dataloader.num_workers)
207 |     print('Starting training for %g epochs...' % epochs)
208 |     # torch.autograd.set_detect_anomaly(True)
209 |     for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
210 |         model.train()
211 | 
212 |         # Update image weights (optional)
213 |         if dataset.image_weights:
214 |             w = model.class_weights.cpu().numpy() * (1 - maps) ** 2  # class weights
215 |             image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
216 |             dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n)  # rand weighted idx
217 | 
218 |         # Update mosaic border
219 |         # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
220 |         # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
221 | 
222 |         mloss = torch.zeros(4, device=device)  # mean losses
223 |         print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
224 |         pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar
225 |         for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
226 |             ni = i + nb * epoch  # number integrated batches (since train start)
227 |             imgs = imgs.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
228 | 
229 |             # Burn-in
230 |             if ni <= n_burn:
231 |                 xi = [0, n_burn]  # x interp
232 |                 # model.gr = np.interp(ni, xi, [0.0, 1.0])  # giou loss ratio (obj_loss = 1.0 or giou)
233 |                 accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
234 |                 for j, x in enumerate(optimizer.param_groups):
235 |                     # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
236 |                     x['lr'] = np.interp(ni, xi, [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
237 |                     if 'momentum' in x:
238 |                         x['momentum'] = np.interp(ni, xi, [0.9, hyp['momentum']])
239 | 
240 |             # Multi-scale
241 |             if opt.multi_scale:
242 |                 sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
243 |                 sf = sz / max(imgs.shape[2:])  # scale factor
244 |                 if sf != 1:
245 |                     ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
246 |                     imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
247 | 
248 |             # Forward
249 |             pred = model(imgs)
250 | 
251 |             # Loss
252 |             loss, loss_items = compute_loss(pred, targets.to(device), model)
253 |             if not torch.isfinite(loss):
254 |                 print('WARNING: non-finite loss, ending training ', loss_items)
255 |                 return results
256 | 
257 |             # Backward
258 |             if mixed_precision:
259 |                 with amp.scale_loss(loss, optimizer) as scaled_loss:
260 |                     scaled_loss.backward()
261 |             else:
262 |                 loss.backward()
263 | 
264 |             # Optimize
265 |             if ni % accumulate == 0:
266 |                 optimizer.step()
267 |                 optimizer.zero_grad()
268 |                 ema.update(model)
269 | 
270 |             # Print
271 |             mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
272 |             mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
273 |             s = ('%10s' * 2 + '%10.4g' * 6) % (
274 |                 '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
275 |             pbar.set_description(s)
276 | 
277 |             # Plot
278 |             if ni < 3:
279 |                 f = 'train_batch%g.jpg' % ni  # filename
280 |                 result = plot_images(images=imgs, targets=targets, paths=paths, fname=f)
281 |                 if tb_writer and result is not None:
282 |                     tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
283 |                     # tb_writer.add_graph(model, imgs)  # add model to tensorboard
284 | 
285 |             # end batch ------------------------------------------------------------------------------------------------
286 | 
287 |         # Scheduler
288 |         scheduler.step()
289 | 
290 |         # mAP
291 |         ema.update_attr(model)
292 |         final_epoch = epoch + 1 == epochs
293 |         if not opt.notest or final_epoch:  # Calculate mAP
294 |             results, maps, times = test.test(opt.data,
295 |                                              batch_size=batch_size,
296 |                                              imgsz=imgsz_test,
297 |                                              save_json=final_epoch and opt.data.endswith(os.sep + 'coco.yaml'),
298 |                                              model=ema.ema,
299 |                                              single_cls=opt.single_cls,
300 |                                              dataloader=testloader)
301 | 
302 |         # Write
303 |         with open(results_file, 'a') as f:
304 |             f.write(s + '%10.4g' * 7 % results + '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
305 |         if len(opt.name) and opt.bucket:
306 |             os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (opt.bucket, opt.name))
307 | 
308 |         # Tensorboard
309 |         if tb_writer:
310 |             tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss',
311 |                     'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1',
312 |                     'val/giou_loss', 'val/obj_loss', 'val/cls_loss']
313 |             for x, tag in zip(list(mloss[:-1]) + list(results), tags):
314 |                 tb_writer.add_scalar(tag, x, epoch)
315 | 
316 |         # Update best mAP
317 |         fi = fitness(np.array(results).reshape(1, -1))  # fitness_i = weighted combination of [P, R, mAP, F1]
318 |         if fi > best_fitness:
319 |             best_fitness = fi
320 | 
321 |         # Save model
322 |         save = (not opt.nosave) or (final_epoch and not opt.evolve)
323 |         if save:
324 |             with open(results_file, 'r') as f:  # create checkpoint
325 |                 ckpt = {'epoch': epoch,
326 |                         'best_fitness': best_fitness,
327 |                         'training_results': f.read(),
328 |                         'model': ema.ema,
329 |                         'optimizer': None if final_epoch else optimizer.state_dict()}
330 | 
331 |             # Save last, best and delete
332 |             torch.save(ckpt, last)
333 |             if (best_fitness == fi) and not final_epoch:
334 |                 torch.save(ckpt, best)
335 |             del ckpt
336 | 
337 |         # end epoch ----------------------------------------------------------------------------------------------------
338 |     # end training
339 | 
340 |     # Strip optimizers
341 |     n = ('_' if len(opt.name) and not opt.name.isnumeric() else '') + opt.name
342 |     fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
343 |     for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]):
344 |         if os.path.exists(f1):
345 |             os.rename(f1, f2)  # rename
346 |             ispt = f2.endswith('.pt')  # is *.pt
347 |             strip_optimizer(f2) if ispt else None  # strip optimizer
348 |             os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None  # upload
349 | 
350 |     # Finish
351 |     if not opt.evolve:
352 |         plot_results()  # save as results.png
353 |     print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
354 |     dist.destroy_process_group() if device.type != 'cpu' and torch.cuda.device_count() > 1 else None
355 |     torch.cuda.empty_cache()
356 |     return results
357 | 
358 | 
359 | if __name__ == '__main__':
360 |     check_git_status()
361 |     parser = argparse.ArgumentParser()
362 |     parser.add_argument('--epochs', type=int, default=1000)
363 |     parser.add_argument('--batch-size', type=int, default=3)
364 |     parser.add_argument('--cfg', type=str, default='models/yolov5x.yaml', help='*.cfg path')
365 |     parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path')
366 |     parser.add_argument('--img-size', nargs='+', type=int, default=[1024, 1024], help='train,test sizes')
367 |     parser.add_argument('--rect', action='store_true', help='rectangular training')
368 |     parser.add_argument('--resume', action='store_true', help='resume training from last.pt')
369 |     parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
370 |     parser.add_argument('--notest', action='store_true', help='only test final epoch')
371 |     parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
372 |     parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
373 |     parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
374 |     parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
375 |     parser.add_argument('--weights', type=str, default='weights/best.pt', help='initial weights path')
376 |     parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')
377 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
378 |     parser.add_argument('--adam', action='store_true', help='use adam optimizer')
379 |     parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%')
380 |     parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
381 |     opt = parser.parse_args()
382 |     opt.weights = last if opt.resume and not opt.weights else opt.weights
383 |     opt.cfg = check_file(opt.cfg)  # check file
384 |     opt.data = check_file(opt.data)  # check file
385 |     print(opt)
386 |     opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
387 |     device = torch_utils.select_device(opt.device, apex=mixed_precision, batch_size=opt.batch_size)
388 |     if device.type == 'cpu':
389 |         mixed_precision = False
390 | 
391 |     # Train
392 |     if not opt.evolve:
393 |         tb_writer = SummaryWriter(comment=opt.name)
394 |         print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
395 |         train(hyp)
396 | 
397 |     # Evolve hyperparameters (optional)
398 |     else:
399 |         tb_writer = None
400 |         opt.notest, opt.nosave = True, True  # only test/save final epoch
401 |         if opt.bucket:
402 |             os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)  # download evolve.txt if exists
403 | 
404 |         for _ in range(10):  # generations to evolve
405 |             if os.path.exists('evolve.txt'):  # if evolve.txt exists: select best hyps and mutate
406 |                 # Select parent(s)
407 |                 parent = 'single'  # parent selection method: 'single' or 'weighted'
408 |                 x = np.loadtxt('evolve.txt', ndmin=2)
409 |                 n = min(5, len(x))  # number of previous results to consider
410 |                 x = x[np.argsort(-fitness(x))][:n]  # top n mutations
411 |                 w = fitness(x) - fitness(x).min()  # weights
412 |                 if parent == 'single' or len(x) == 1:
413 |                     # x = x[random.randint(0, n - 1)]  # random selection
414 |                     x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
415 |                 elif parent == 'weighted':
416 |                     x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination
417 | 
418 |                 # Mutate
419 |                 mp, s = 0.9, 0.2  # mutation probability, sigma
420 |                 npr = np.random
421 |                 npr.seed(int(time.time()))
422 |                 g = np.array([1, 1, 1, 1, 1, 1, 1, 0, .1, 1, 0, 1, 1, 1, 1, 1, 1, 1])  # gains
423 |                 ng = len(g)
424 |                 v = np.ones(ng)
425 |                 while all(v == 1):  # mutate until a change occurs (prevent duplicates)
426 |                     v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
427 |                 for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
428 |                     hyp[k] = x[i + 7] * v[i]  # mutate
429 | 
430 |             # Clip to limits
431 |             keys = ['lr0', 'iou_t', 'momentum', 'weight_decay', 'hsv_s', 'hsv_v', 'translate', 'scale', 'fl_gamma']
432 |             limits = [(1e-5, 1e-2), (0.00, 0.70), (0.60, 0.98), (0, 0.001), (0, .9), (0, .9), (0, .9), (0, .9), (0, 3)]
433 |             for k, v in zip(keys, limits):
434 |                 hyp[k] = np.clip(hyp[k], v[0], v[1])
435 | 
436 |             # Train mutation
437 |             results = train(hyp.copy())
438 | 
439 |             # Write mutation results
440 |             print_mutation(hyp, results, opt.bucket)
441 | 
442 |             # Plot results
443 |             # plot_evolution_results(hyp)
444 | 


--------------------------------------------------------------------------------