├── README.md
├── convertTrainLabel.py
├── detect.py
├── hubconf.py
├── process_data_yolo.py
├── task05_rnn.ipynb
├── test.py
└── train.py
/README.md:
--------------------------------------------------------------------------------
1 | ## 任务安排
2 |
3 | 开营时间:02月16日21:00
4 |
5 | - 比赛题目:天池创新大赛:热身赛 布匹缺陷检测,内容:根据给出的布匹图片标注出其中的缺陷
6 |
7 | - 比赛链接:https://tianchi.aliyun.com/competition/entrance/531864/introduction?spm=5176.12281976.0.0.506441a6dTFHF3
8 |
9 |
10 | ### Task00:熟悉规则(1天)
11 |
12 | - 组队、修改群昵称。
13 | - 熟悉打卡规则。
14 | - 打卡截止时间:02月18日03:00
15 |
16 | ### Task01:比赛全流程体验(3天)
17 |
18 | - 学习如何使用Docker提交代码及比赛上分。
19 | - 记录比赛中遇到的问题,并在学习笔记中插入初始分数截图。
20 | - 打卡截止时间:02月21日03:00
21 | - 学习资料:
22 | - [Docker环境配置指南!](https://tianchi.aliyun.com/competition/entrance/231759/tab/226)
23 | - [比赛Docker相关操作](https://github.com/datawhalechina/team-learning-cv/blob/master/DefectDetection/docker%E6%8F%90%E4%BA%A4%E6%95%99%E7%A8%8B.pdf)
24 |
25 | ### Task02:Baseline学习及改进(5天)
26 |
27 | - 学习baseline,并提出自己的改进策略,提交代码并更新自己的分数排名。
28 | - 在学习笔记中插入改进baseline之后分数排名截图。
29 | - 打卡截止时间:02月26日03:00
30 | - 学习资料:
31 | - [Baseline学习及上分技巧](https://github.com/datawhalechina/team-learning-cv/blob/master/DefectDetection/README.md)
32 |
33 | ### Task03:学习者分享(2天)
34 |
35 | - 我们根据截图,邀请提分比较多的学习者进行分享。
36 |
37 |
38 | ## 文件说明
39 | - code : 存放所有相关代码的文件夹
40 | - train_data : 存放原始数据文件 guangdong1_round2_train2_20191004_Annotations guangdong1_round2_train2_20191004_images
41 | - tcdata: 存放官方测试数据文件,docker 提交后会自动生成
42 | - data :训练数据路径设置 coco128.yaml中设置训练数据路径
43 | - models : 网络相关的代码文件夹
44 | - weights : 保存训练模型的文件夹,best.pt last.pt
45 | - convertTrainLabel.py:将官方的数据集转换成yolo数据的格式 运行生成convertor数据文件夹
46 | - process_data_yolo.py:滑动窗口处理convertor数据文件夹里面数据,将大图变成1024*1024小图,生成数据文件夹process_data
47 | - train.py : 训练代码, 运行该函数进行模型的训练,可以得到模型
48 | - detect.py : 预测代码
49 | - test.py :测试模型代码
50 | - run.sh : 预测测试集,生成结果的脚本 sh run.sh
51 | - train.sh : 训练脚本 sh trian.sh
52 |
53 |
54 |
55 |
56 | ## 操作说明
57 | - step1 : 将官方训练数据集解压后放入train_data 文件夹
58 | - step2 : 训练运行 sh train.sh
59 | - train.sh 有四步
60 | -python convertTrainLabel.py
61 | -python process_data_yolo.py
62 | -rm -rf ./convertor
63 | -python train.py
64 | - step3 : 生成结果 sh run.sh
65 |
66 | ## 思路说明
67 | -本方案采用了yolov5作为baseline
68 | -数据处理:滑动窗口分割训练图片
69 |
70 |
71 | ##改进思路
72 | -数据扩增:训练样本扩增随机竖直/水平翻折,色彩空间增强,使缺陷样本均匀
73 | -自适应anchor策略
74 | -适当减少回归框损失的权重
75 | -正负样本类别
76 | -多尺度训练
77 | -空洞卷积替换FPN最后一层
78 | -FPN改进尝试:NAS-FPN、AC-PFN
79 | -Anchor 匹配策略
--------------------------------------------------------------------------------
/convertTrainLabel.py:
--------------------------------------------------------------------------------
1 | import numpy as np # linear algebra
2 | import os
3 | import json
4 | from tqdm.auto import tqdm
5 | import shutil as sh
6 | import cv2
7 |
8 | josn_path = "./train_data/guangdong1_round2_train2_20191004_Annotations/Annotations/anno_train.json"
9 | image_path = "./train_data/guangdong1_round2_train2_20191004_images/defect/"
10 |
11 | name_list = []
12 | image_h_list = []
13 | image_w_list = []
14 | c_list = []
15 | w_list = []
16 | h_list = []
17 | x_center_list = []
18 | y_center_list = []
19 |
20 | with open(josn_path, 'r') as f:
21 | temps = tqdm(json.loads(f.read()))
22 | for temp in temps:
23 | # image_w = temp["image_width"]
24 | # image_h = temp["image_height"]
25 | name = temp["name"].split('.')[0]
26 | path = os.path.join(image_path, name, temp["name"])
27 | # print('path: ',path)
28 | im = cv2.imread(path)
29 | sp = im.shape
30 | image_h, image_w = sp[0], sp[1]
31 | # print("image_h, image_w: ", image_h, image_w)
32 | # print("defect_name: ",temp["defect_name"])
33 | #bboxs
34 | x_l, y_l, x_r, y_r = temp["bbox"]
35 | # print(temp["name"], temp["bbox"])
36 | if temp["defect_name"]=="沾污":
37 | defect_name = '0'
38 | elif temp["defect_name"]=="错花":
39 | defect_name = '1'
40 | elif temp["defect_name"] == "水印":
41 | defect_name = '2'
42 | elif temp["defect_name"] == "花毛":
43 | defect_name = '3'
44 | elif temp["defect_name"] == "缝头":
45 | defect_name = '4'
46 | elif temp["defect_name"] == "缝头印":
47 | defect_name = '5'
48 | elif temp["defect_name"] == "虫粘":
49 | defect_name = '6'
50 | elif temp["defect_name"] == "破洞":
51 | defect_name = '7'
52 | elif temp["defect_name"] == "褶子":
53 | defect_name = '8'
54 | elif temp["defect_name"] == "织疵":
55 | defect_name = '9'
56 | elif temp["defect_name"] == "漏印":
57 | defect_name = '10'
58 | elif temp["defect_name"] == "蜡斑":
59 | defect_name = '11'
60 | elif temp["defect_name"] == "色差":
61 | defect_name = '12'
62 | elif temp["defect_name"] == "网折":
63 | defect_name = '13'
64 | elif temp["defect_name"] == "其他":
65 | defect_name = '14'
66 | else:
67 | defect_name = '15'
68 | print("----------------------------------error---------------------------")
69 | raise("erro")
70 | # print(image_w, image_h)
71 | # print(defect_name)
72 | x_center = (x_l + x_r)/(2*image_w)
73 | y_center = (y_l + y_r)/(2*image_h)
74 | w = (x_r - x_l)/(image_w)
75 | h = (y_r - y_l)/(image_h)
76 | # print(x_center, y_center, w, h)
77 | name_list.append(temp["name"])
78 | c_list.append(defect_name)
79 | image_h_list.append(image_w)
80 | image_w_list.append(image_h)
81 | x_center_list.append(x_center)
82 | y_center_list.append(y_center)
83 | w_list.append(w)
84 | h_list.append(h)
85 |
86 | index = list(set(name_list))
87 | print(len(index))
88 | for fold in [0]:
89 | val_index = index[len(index) * fold // 5:len(index) * (fold + 1) // 5]
90 | print(len(val_index))
91 | for num, name in enumerate(name_list):
92 | print(c_list[num], x_center_list[num], y_center_list[num], w_list[num], h_list[num])
93 | row = [c_list[num], x_center_list[num], y_center_list[num], w_list[num], h_list[num]]
94 | if name in val_index:
95 | path2save = 'val/'
96 | else:
97 | path2save = 'train/'
98 | # print('convertor\\fold{}\\labels\\'.format(fold) + path2save)
99 | # print('convertor\\fold{}/labels\\'.format(fold) + path2save + name.split('.')[0] + ".txt")
100 | # print("{}/{}".format(image_path, name))
101 | # print('convertor\\fold{}\\images\\{}\\{}'.format(fold, path2save, name))
102 | if not os.path.exists('convertor/fold{}/labels/'.format(fold) + path2save):
103 | os.makedirs('convertor/fold{}/labels/'.format(fold) + path2save)
104 | with open('convertor/fold{}/labels/'.format(fold) + path2save + name.split('.')[0] + ".txt", 'a+') as f:
105 | for data in row:
106 | f.write('{} '.format(data))
107 | f.write('\n')
108 | if not os.path.exists('convertor/fold{}/images/{}'.format(fold, path2save)):
109 | os.makedirs('convertor/fold{}/images/{}'.format(fold, path2save))
110 | sh.copy(os.path.join(image_path, name.split('.')[0], name),
111 | 'convertor/fold{}/images/{}/{}'.format(fold, path2save, name))
112 |
113 |
114 |
--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import torch.backends.cudnn as cudnn
4 | import json
5 | import cv2
6 | import os
7 | import torch
8 | from utils import google_utils
9 | from utils.datasets import *
10 | from utils.utils import *
11 |
12 |
13 | def detect(save_img=False):
14 | out, source, weights, view_img, save_txt, imgsz = \
15 | opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
16 | webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
17 | save_dir = opt.save_dir
18 | # Initialize
19 | device = torch_utils.select_device(opt.device)
20 | if os.path.exists(out):
21 | shutil.rmtree(out) # delete output folder
22 | os.makedirs(out) # make new output folder
23 | half = device.type != 'cpu' # half precision only supported on CUDA
24 |
25 | # Load model
26 | google_utils.attempt_download(weights)
27 | model = torch.load(weights, map_location=device)['model'].float().eval() # load FP32 model
28 | imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
29 | if half:
30 | model.half() # to FP16
31 |
32 | # Second-stage classifier
33 | classify = False
34 | if classify:
35 | modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize
36 | modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights
37 | modelc.to(device).eval()
38 |
39 | # Set Dataloader
40 | vid_path, vid_writer = None, None
41 | if webcam:
42 | view_img = True
43 | cudnn.benchmark = True # set True to speed up constant image size inference
44 | dataset = LoadStreams(source, img_size=imgsz)
45 | else:
46 | save_img = True
47 | dataset = LoadImagesTest(source, img_size=imgsz)
48 |
49 | # Get names and colors
50 | names = model.module.names if hasattr(model, 'module') else model.names
51 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
52 |
53 | # Run inference
54 | t0 = time.time()
55 | img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
56 | _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
57 |
58 | save_json = True
59 | result = []
60 | for path, img, im0s, vid_cap in dataset:
61 | img = torch.from_numpy(img).to(device)
62 | img = img.half() if half else img.float() # uint8 to fp16/32
63 | img /= 255.0 # 0 - 255 to 0.0 - 1.0
64 | if img.ndimension() == 3:
65 | img = img.unsqueeze(0)
66 |
67 | # Inference
68 | t1 = torch_utils.time_synchronized()
69 | pred = model(img, augment=opt.augment)[0]
70 |
71 | # Apply NMS
72 | pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
73 | t2 = torch_utils.time_synchronized()
74 |
75 | # Apply Classifier
76 | if classify:
77 | pred = apply_classifier(pred, modelc, img, im0s)
78 |
79 | # Process detections
80 | for i, det in enumerate(pred): # detections per image
81 | if webcam: # batch_size >= 1
82 | p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
83 | else:
84 | p, s, im0 = path, '', im0s
85 |
86 | save_path = str(Path(out) / Path(p).name)
87 | txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
88 | s += '%gx%g ' % img.shape[2:] # print string
89 | gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
90 | if det is not None and len(det):
91 | # Rescale boxes from img_size to im0 size
92 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
93 |
94 | # Print results
95 | for c in det[:, -1].unique():
96 | n = (det[:, -1] == c).sum() # detections per class
97 | s += '%g %ss, ' % (n, names[int(c)]) # add to string
98 |
99 | # Write results
100 | for *xyxy, conf, cls in det:
101 | if save_txt: # Write to file
102 | xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
103 | with open(txt_path + '.txt', 'a') as f:
104 | f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format
105 |
106 | # write jiang #################
107 | if save_json:
108 | name = os.path.split(txt_path)[-1]
109 | print(name)
110 |
111 | x1, y1, x2, y2 = float(xyxy[0]), float(xyxy[1]), float(xyxy[2]), float(xyxy[3])
112 | bbox = [x1, y1, x2, y2]
113 | img_name = name
114 | conf = float(conf)
115 |
116 | #add solution remove other
117 | result.append(
118 | {'name': img_name+'.jpg', 'category': int(cls+1), 'bbox': bbox,
119 | 'score': conf})
120 | print("result: ", {'name': img_name+'.jpg', 'category': int(cls+1), 'bbox': bbox,'score': conf})
121 |
122 | if save_img or view_img: # Add bbox to image
123 | label = '%s %.2f' % (names[int(cls)], conf)
124 | plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
125 |
126 | # Print time (inference + NMS)
127 | print('%sDone. (%.3fs)' % (s, t2 - t1))
128 |
129 | # Stream results
130 | if view_img:
131 | cv2.imshow(p, im0)
132 | if cv2.waitKey(1) == ord('q'): # q to quit
133 | raise StopIteration
134 |
135 | # Save results (image with detections)
136 | if save_img:
137 | if dataset.mode == 'images':
138 | cv2.imwrite(save_path, im0)
139 | else:
140 | if vid_path != save_path: # new video
141 | vid_path = save_path
142 | if isinstance(vid_writer, cv2.VideoWriter):
143 | vid_writer.release() # release previous video writer
144 |
145 | fourcc = 'mp4v' # output video codec
146 | fps = vid_cap.get(cv2.CAP_PROP_FPS)
147 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
148 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
149 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
150 | vid_writer.write(im0)
151 |
152 | if save_txt or save_img:
153 | print('Results saved to %s' % os.getcwd() + os.sep + out)
154 | if platform == 'darwin': # MacOS
155 | os.system('open ' + save_path)
156 |
157 | if save_json:
158 | if not os.path.exists(save_dir):
159 | os.makedirs(save_dir)
160 | with open(os.path.join(save_dir, "result.json"), 'w') as fp:
161 | json.dump(result, fp, indent=4, ensure_ascii=False)
162 |
163 |
164 | print('Done. (%.3fs)' % (time.time() - t0))
165 |
166 |
167 |
168 | if __name__ == '__main__':
169 | parser = argparse.ArgumentParser()
170 | parser.add_argument('--weights', type=str, default='weights/best.pt', help='model.pt path')
171 | parser.add_argument('--save_dir', type=str, default='./', help='result save dir')
172 | # parser.add_argument('--source', type=str, default='convertor/fold0/images/val', help='source') # file/folder, 0 for webcam
173 | parser.add_argument('--source', type=str, default='../../data/guangdong1_round2_train_part1_20190924/defect',
174 | help='source') # file/folder, 0 for webcam
175 | parser.add_argument('--output', type=str, default='inference/output', help='output folder') # output folder
176 | parser.add_argument('--img-size', type=int, default=1024, help='inference size (pixels)')
177 | parser.add_argument('--conf-thres', type=float, default=0.04, help='object confidence threshold')
178 | parser.add_argument('--iou-thres', type=float, default=0.05, help='IOU threshold for NMS')
179 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
180 | parser.add_argument('--view-img', action='store_true', help='display results')
181 | parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
182 | parser.add_argument('--classes', nargs='+', type=int, help='filter by class')
183 | parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
184 | parser.add_argument('--augment', action='store_true', help='augmented inference')
185 | parser.add_argument('--update', action='store_true', help='update all models')
186 | opt = parser.parse_args()
187 | print(opt)
188 |
189 | with torch.no_grad():
190 | if opt.update: # update all models (to fix SourceChangeWarning)
191 | for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
192 | detect()
193 | create_pretrained(opt.weights, opt.weights)
194 | else:
195 | detect()
196 |
--------------------------------------------------------------------------------
/hubconf.py:
--------------------------------------------------------------------------------
1 | """File for accessing YOLOv5 via PyTorch Hub https://pytorch.org/hub/
2 |
3 | Usage:
4 | import torch
5 | model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, channels=3, classes=80)
6 | """
7 |
8 | dependencies = ['torch', 'yaml']
9 |
10 | import os
11 |
12 | import torch
13 |
14 | from models.yolo import Model
15 | from utils import google_utils
16 |
17 |
18 | def create(name, pretrained, channels, classes):
19 | """Creates a specified YOLOv5 model
20 |
21 | Arguments:
22 | name (str): name of model, i.e. 'yolov5s'
23 | pretrained (bool): load pretrained weights into the model
24 | channels (int): number of input channels
25 | classes (int): number of model classes
26 |
27 | Returns:
28 | pytorch model
29 | """
30 | config = os.path.join(os.path.dirname(__file__), 'models', '%s.yaml' % name) # model.yaml path
31 | model = Model(config, channels, classes)
32 | if pretrained:
33 | ckpt = '%s.pt' % name # checkpoint filename
34 | google_utils.attempt_download(ckpt) # download if not found locally
35 | state_dict = torch.load(ckpt, map_location=torch.device('cpu'))['model'].float().state_dict() # to FP32
36 | state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape} # filter
37 | model.load_state_dict(state_dict, strict=False) # load
38 | return model
39 |
40 |
41 | def yolov5s(pretrained=False, channels=3, classes=80):
42 | """YOLOv5-small model from https://github.com/ultralytics/yolov5
43 |
44 | Arguments:
45 | pretrained (bool): load pretrained weights into the model, default=False
46 | channels (int): number of input channels, default=3
47 | classes (int): number of model classes, default=80
48 |
49 | Returns:
50 | pytorch model
51 | """
52 | return create('yolov5s', pretrained, channels, classes)
53 |
54 |
55 | def yolov5m(pretrained=False, channels=3, classes=80):
56 | """YOLOv5-medium model from https://github.com/ultralytics/yolov5
57 |
58 | Arguments:
59 | pretrained (bool): load pretrained weights into the model, default=False
60 | channels (int): number of input channels, default=3
61 | classes (int): number of model classes, default=80
62 |
63 | Returns:
64 | pytorch model
65 | """
66 | return create('yolov5m', pretrained, channels, classes)
67 |
68 |
69 | def yolov5l(pretrained=False, channels=3, classes=80):
70 | """YOLOv5-large model from https://github.com/ultralytics/yolov5
71 |
72 | Arguments:
73 | pretrained (bool): load pretrained weights into the model, default=False
74 | channels (int): number of input channels, default=3
75 | classes (int): number of model classes, default=80
76 |
77 | Returns:
78 | pytorch model
79 | """
80 | return create('yolov5l', pretrained, channels, classes)
81 |
82 |
83 | def yolov5x(pretrained=False, channels=3, classes=80):
84 | """YOLOv5-xlarge model from https://github.com/ultralytics/yolov5
85 |
86 | Arguments:
87 | pretrained (bool): load pretrained weights into the model, default=False
88 | channels (int): number of input channels, default=3
89 | classes (int): number of model classes, default=80
90 |
91 | Returns:
92 | pytorch model
93 | """
94 | return create('yolov5x', pretrained, channels, classes)
95 |
--------------------------------------------------------------------------------
/process_data_yolo.py:
--------------------------------------------------------------------------------
1 | #-*- coding: utf-8 -*-
2 | '''
3 | @use:将图片和对应的xml生成为裁剪后两张的图片及数据集
4 | '''
5 |
6 | from __future__ import division
7 | import os.path
8 | from PIL import Image
9 | import numpy as np
10 | import shutil
11 | import cv2
12 | from tqdm import tqdm
13 |
14 | ImgPath = './convertor/fold0/images/val/' #原始图片
15 | path = './convertor/fold0/labels/val/' #原始标注
16 |
17 | ProcessedPath = './process_data/' #生成后数据
18 |
19 | txtfiles = os.listdir(path)
20 | print(txtfiles)
21 | #patch img_size
22 | patch_size = 1024
23 | #slide window stride
24 | stride = 600
25 |
26 | txtfiles = tqdm(txtfiles)
27 | for file in txtfiles: #遍历txt进行操作
28 | image_pre, ext = os.path.splitext(file)
29 | imgfile = ImgPath + image_pre + '.jpg'
30 | txtfile = path + image_pre + '.txt'
31 | # if not os.path.isdir(file): # 判断是否是文件夹,不是文件夹才打开
32 | # print(file)
33 |
34 | img = cv2.imread(imgfile)
35 | sp = img.shape
36 | img_h, img_w = sp[0], sp[1]
37 |
38 | f = open(os.path.join(path, file), "r")
39 | lines = f.readlines()
40 | savepath_img = ProcessedPath + 'images' + '/val' #处理完的图片保存路径
41 | savepath_txt = ProcessedPath + 'labels' + '/val' #处理完的图片标签路径
42 | if not os.path.exists(savepath_img):
43 | os.makedirs(savepath_img)
44 | if not os.path.exists(savepath_txt):
45 | os.makedirs(savepath_txt)
46 |
47 | bndbox = []
48 | boxname = []
49 | for line in lines:
50 | c, x_c, y_c, w, h, _ = line.split(' ')
51 | c, x_c, y_c, w, h = float(c), float(x_c), float(y_c), float(w), float(h)
52 | bndbox.append([x_c, y_c, w, h])
53 | boxname.append([c])
54 | # print("boxname: ", boxname)
55 | # b = bndbox[1]
56 | # print(b.nodeName)
57 | #a: x起点, b: y起点, w: 宽, h: 高
58 |
59 | a = []
60 | b = []
61 | for a_ in range(0, img_w, stride):
62 | a.append(a_)
63 | for b_ in range(0, img_h, stride):
64 | b.append(b_)
65 |
66 |
67 | cropboxes = []
68 | for i in a:
69 | for j in b:
70 | cropboxes.append([i, j, i + patch_size, j + patch_size])
71 | i = 1
72 | top_size, bottom_size, left_size, right_size = (150, 0, 0, 0)
73 |
74 | def select(m, n, w, h):
75 | # m: x起点, n: y起点, w: 宽, h: 高
76 | bbox = []
77 | # 查找图片中所有的 box 框
78 | for index in range(0, len(bndbox)):
79 | boxcls = boxname[index]#获取回归框的类别
80 | # print(bndbox[index])
81 | # x min
82 | x1 = float(bndbox[index][0] * img_w - bndbox[index][2] * img_w/2)
83 | # y min
84 | y1 = float(bndbox[index][1] * img_h - bndbox[index][3] * img_h/2)
85 | # x max
86 | x2 = float(bndbox[index][0] * img_w + bndbox[index][2] * img_w/2)
87 | # y max
88 | y2 = float(bndbox[index][1] * img_h + bndbox[index][3] * img_h/2)
89 | # print("the index of the box is", index)
90 | # print("the box cls is",boxcls[0])
91 | # print("the xy", x1, y1, x2, y2)
92 | #如果标记框在第一个范围内则存入bbox[] 并转换成新的格式
93 | if x1 >= m and x2 <= m + w and y1 >= n and y2 <= n + h:
94 | a1 = x1 - m
95 | b1 = y1 - n
96 | a2 = x2 - m
97 | b2 = y2 - n
98 | box_w = a2 - a1
99 | box_h = b2 - b1
100 | x_c = (a1 + box_w/2)/w
101 | y_c = (b1 + box_h/2)/h
102 | box_w = box_w / w
103 | box_h = box_h / h
104 | bbox.append([boxcls[0], x_c, y_c, box_w, box_h]) # 更新后的标记框
105 | if bbox is not None:
106 | return bbox
107 | else:
108 | return 0
109 |
110 | img = Image.open(imgfile)
111 | for j in range(0, len(cropboxes)):
112 | # print("the img number is :", j)
113 | # 获取在 patch 的 box
114 | Bboxes = select(cropboxes[j][0], cropboxes[j][1], patch_size, patch_size)
115 | if len(Bboxes):
116 | with open(savepath_txt + '/' + image_pre + '_' + '{}'.format(j) + '.txt', 'w') as f:
117 | for Bbox in Bboxes:
118 | for data in Bbox:
119 | f.write('{} '.format(data))
120 | f.write('\n')
121 |
122 | #图片裁剪
123 | try:
124 | cropedimg = img.crop(cropboxes[j])
125 | # print(np.array(cropedimg).shape)
126 | cropedimg.save(savepath_img + '/' + image_pre + '_' + str(j) + '.jpg')
127 | # print("done!")
128 | except:
129 | continue
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
--------------------------------------------------------------------------------
/task05_rnn.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "configured-exploration",
6 | "metadata": {},
7 | "source": [
8 | "循环神经网络RNN\n",
9 | "\n",
10 | "1. 计算图\n",
11 | "2. RNN\n",
12 | "3. 长短时记忆网络\n",
13 | "4. 其他RNN\n",
14 | "5. RNN主要应用\n",
15 | "\n",
16 | "\n",
17 | "\n",
18 | "# 计算图\n",
19 | "\n",
20 | "计算图的引入是为了后面更方便的表示网络,计算图是描述计算结构的一种图,它的元素包括节点(node)和边(edge),节点表示变量,可以是标量、矢量、张量等,而边表示的是某个操作,即函数。\n",
21 | "\n",
22 | "\n",
23 | "\n",
24 | "下面这个计算图表示复合函数\n",
25 | "\n",
26 | "\n",
27 | "\n",
28 | "关于计算图的求导,我们可以用链式法则表示,有下面两种情况。\n",
29 | "\n",
30 | "- 情况1\n",
31 | "\n",
32 | "\n",
33 | "\n",
34 | "- 情况2\n",
35 | "\n",
36 | "\n",
37 | "\n",
38 | "求导举例:\n",
39 | "\n",
40 | "例1\n",
41 | "\n",
42 | "
\n",
43 | "\n",
44 | "- a = 3, b = 1 可以得到 c = 3, d = 2, e = 6\n",
45 | "\n",
46 | "- $\\frac{\\partial e}{\\partial a} = \\frac{\\partial e}{\\partial c}\\frac{\\partial c}{\\partial a} = d = b + 1 = 2$\n",
47 | "- $\\frac{\\partial e}{\\partial b} = \\frac{\\partial e}{\\partial c}\\frac{\\partial c}{\\partial b}+\\frac{\\partial e}{\\partial d}\\frac{\\partial d}{\\partial b} = d + c=b+1+a+b = 5$\n",
48 | "\n",
49 | "例2\n",
50 | "\n",
51 | "
\n",
52 | "\n",
53 | "$\\frac{\\partial Z}{\\partial X}=\\alpha \\delta+\\alpha \\epsilon+\\alpha \\zeta+\\beta \\delta+\\beta \\epsilon+\\beta \\zeta+\\gamma \\delta+\\gamma \\epsilon+\\gamma \\zeta = (\\alpha +\\beta+\\gamma)(\\delta+\\epsilon+\\zeta) $\n",
54 | "\n",
55 | "计算图可以很好的表示导数的前向传递和后向传递的过程,比如上面例2,前向传递了$\\frac{\\partial }{\\partial X}$ ,反向传递$\\frac{\\partial }{\\partial Z}$ 。\n",
56 | "\n",
57 | "
\n",
58 | "\n",
59 | "
\n",
60 | "\n",
61 | "# 循环神经网络(Recurrent Neural Network)\n",
62 | "\n",
63 | "上一章我们已经介绍了CNN,可能我们会想这里为什么还需要构建一种新的网络RNN呢?因为现实生活中存在很多序列化结构,我们需要建立一种更优秀的序列数据模型。\n",
64 | "\n",
65 | "- 文本:字母和词汇的序列\n",
66 | "- 语音:音节的序列\n",
67 | "- 视频:图像帧的序列\n",
68 | "- 时态数据:气象观测数据,股票交易数据、房价数据等\n",
69 | "\n",
70 | "RNN的发展历程:\n",
71 | "\n",
72 | "\n",
73 | "\n",
74 | "循环神经网络是一种人工神经网络,它的节点间的连接形成一个遵循时间序列的有向图,它的核心思想是,样本间存在顺序关系,每个样本和它之前的样本存在关联。通过神经网络在时序上的展开,我们能够找到样本之间的序列相关性。\n",
75 | "\n",
76 | "下面给出RNN的一般结构:\n",
77 | "\n",
78 | "\n",
79 | "\n",
80 | "其中各个符号的表示:$x_t,s_t,o_t$分别表示的是$t$时刻的输入、记忆和输出,$U,V,W$是RNN的连接权重,$b_s,b_o$是RNN的偏置,$\\sigma,\\varphi$是激活函数,$\\sigma$通常选tanh或sigmoid,$\\varphi$通常选用softmax。\n",
81 | "\n",
82 | "其中 softmax 函数,用于分类问题的概率计算。本质上是将一个K维的任意实数向量压缩 (映射)成另一个K维的实数向量,其中向量中的每个元素取值都介于(0,1)之间。\n",
83 | "$$\n",
84 | "\\sigma(\\vec{z})_{i}=\\frac{e^{z_{i}}}{\\sum_{j=1}^{K} e^{z_{j}}}\n",
85 | "$$\n",
86 | "\n",
87 | "### RNN案例\n",
88 | "\n",
89 | "比如词性标注,\n",
90 | "\n",
91 | "- 我/n,爱/v购物/n,\n",
92 | "- 我/n在/pre华联/n购物/v\n",
93 | "\n",
94 | "Word Embedding:自然语言处理(NLP)中的 一组语言建模和特征学习技术的统称,其中来自词汇表的单词或短语被映射到实数的向量。比如这里映射到三个向量然后输入:\n",
95 | "\n",
96 | "
\n",
97 | "\n",
98 | "将神经元的输出存到memory中,memory中值会作为下一时刻的输入。在最开始时刻,给定 memory初始值,然后逐次更新memory中的值。\n",
99 | "\n",
100 | "\n",
101 | "\n",
102 | "\n",
103 | "\n",
104 | "### RNN的一般结构\n",
105 | "\n",
106 | "- Elman Network\n",
107 | "\n",
108 | "
\n",
109 | "\n",
110 | "- Jordan Network\n",
111 | "\n",
112 | "
\n",
113 | "\n",
114 | "各种不同的RNN结构\n",
115 | "\n",
116 | "\n",
117 | "\n",
118 | "### RNN训练算法 - BPTT\n",
119 | "\n",
120 | "我们先来回顾一下BP算法,就是定义损失函数 Loss 来表示输出 $\\hat{y}$ 和真实标签 y 的误差,通过链式法则自顶向下求得 Loss 对网络权重的偏导。沿梯度的反方向更新权重的值, 直到 Loss 收敛。而这里的 BPTT 算法就是加上了时序演化,后面的两个字母 TT 就是 Through Time。\n",
121 | "\n",
122 | "
\n",
123 | "\n",
124 | "我们先定义输出函数:\n",
125 | "$$\n",
126 | "\\begin{array}{l}s_{t}=\\tanh \\left(U x_{t}+W s_{t-1}\\right) \\\\ \\hat{y}_{t}=\\operatorname{softmax}\\left(V s_{t}\\right)\\end{array}\n",
127 | "$$\n",
128 | "再定义损失函数:\n",
129 | "$$\n",
130 | "\\begin{aligned} E_{t}\\left(y_{t}, \\hat{y}_{t}\\right) =-y_{t} \\log \\hat{y}_{t} \\\\ E(y, \\hat{y}) =\\sum_{t} E_{t}\\left(y_{t}, \\hat{y}_{t}\\right) \\\\ =-\\sum_{t} y_{t} \\log \\hat{y}_{t}\\end{aligned}\n",
131 | "$$\n",
132 | "
\n",
133 | "\n",
134 | "我们分别求损失函数 E 对 U、V、W的梯度:\n",
135 | "$$\n",
136 | "\\begin{array}{l}\\frac{\\partial E}{\\partial V}=\\sum_{t} \\frac{\\partial E_{t}}{\\partial V} \\\\ \\frac{\\partial E}{\\partial W}=\\sum_{t} \\frac{\\partial E_{t}}{\\partial W} \\\\ \\frac{\\partial E}{\\partial U}=\\sum_{t} \\frac{\\partial E_{t}}{\\partial U}\\end{array}\n",
137 | "$$\n",
138 | "\n",
139 | "- 求 E 对 V 的梯度,先求 $E_3$ 对 V 的梯度\n",
140 | "\n",
141 | "$$\n",
142 | "\\begin{aligned} \\frac{\\partial E_{3}}{\\partial V} &=\\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial V} \\\\ &=\\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial z_{3}} \\frac{\\partial z_{3}}{\\partial V} \\end{aligned}\n",
143 | "$$\n",
144 | "\n",
145 | "其中 $z_3 = V s_3$,然后求和即可。\n",
146 | "\n",
147 | "- 求 E 对 W 的梯度,先求 $E_3$ 对 W 的梯度\n",
148 | "\n",
149 | "$$\n",
150 | "\\begin{array}{c}\\frac{\\partial E_{3}}{\\partial W}=\\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}} \\frac{\\partial s_{3}}{\\partial W} \\\\ s_{3}=\\tanh \\left(U x_{3}+W s_{2}\\right) \\\\ \\frac{\\partial E_{3}}{\\partial W}=\\sum_{k=0}^{3} \\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}} \\frac{\\partial s_{3}}{\\partial s_{k}} \\frac{\\partial s_{k}}{\\partial W} \\\\ \\frac{\\partial E_{3}}{\\partial W}=\\sum_{k=0}^{3} \\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}}\\left(\\prod_{j=k+1}^{3} \\frac{\\partial s_{j}}{\\partial s_{j-1}}\\right) \\frac{\\partial s_{k}}{\\partial W}\\end{array}\n",
151 | "$$\n",
152 | "\n",
153 | "其中: $s_3$ 依赖于 $s_2$,而 $s_2$ 又依赖于 $s_1 $ 和 W ,依赖关系 一直传递到 t = 0 的时刻。因此,当我们计算对于 W 的偏导数时,不能把 $s_2$ 看作是常数项!\n",
154 | "\n",
155 | "- 求 E 对 U 的梯度,先求 $E_3$ 对 U 的梯度\n",
156 | "\n",
157 | "$$\n",
158 | "\\begin{array}{c}\\frac{\\partial E_{3}}{\\partial W}=\\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}} \\frac{\\partial s_{3}}{\\partial U} \\\\ s_{3}=\\tanh \\left(U x_{3}+W s_{2}\\right) \\\\ \\frac{\\partial E_{3}}{\\partial U}=\\sum_{k=0}^{3} \\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}} \\frac{\\partial s_{3}}{\\partial s_{k}} \\frac{\\partial s_{k}}{\\partial U}\\end{array}\n",
159 | "$$\n",
160 | "\n",
161 | "# 长短时记忆网络\n",
162 | "\n",
163 | "在RNN中,存在一个很重要的问题,就是梯度消失问题,一开始我们不能有效的解决长时依赖问题,其中梯度消失的原因有两个:BPTT算法和激活函数Tanh\n",
164 | "$$\n",
165 | "\\frac{\\partial E_{3}}{\\partial W}=\\sum_{k=0}^{3} \\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}}\\left(\\prod_{j=k+1}^{3} \\frac{\\partial s_{j}}{\\partial s_{j-1}}\\right) \\frac{\\partial s_{k}}{\\partial W}\n",
166 | "$$\n",
167 | "有两种解决方案,分别是ReLU函数和门控RNN(LSTM).\n",
168 | "\n",
169 | "### LSTM\n",
170 | "\n",
171 | "LSTM,即长短时记忆网络,于1997年被Sepp Hochreiter 和Jürgen Schmidhuber提出来,LSTM是一种用于深度学习领域的人工循环神经网络(RNN)结构。一个LSTM单元由输入门、输出门和遗忘门组成,三个门控制信息进出单元。\n",
172 | "\n",
173 | "\n",
174 | "\n",
175 | "- LSTM依靠贯穿隐藏层的细胞状态实现隐藏单元之间的信息传递,其中只有少量的线性操作\n",
176 | "- LSTM引入了“门”机制对细胞状态信息进行添加或删除,由此实现长程记忆\n",
177 | "- “门”机制由一个Sigmoid激活函数层和一个向量点乘操作组成,Sigmoid层的输出控制了信息传递的比例\n",
178 | "\n",
179 | "**遗忘门**:LSTM通过遗忘门(forget gate)实现对细胞状态信息遗忘程度的控制,输出当前状态的遗忘权重,取决于 $h_{t−1}$ 和 $x_t$.\n",
180 | "$$\n",
181 | "f_{t}=\\sigma\\left(W_{f} \\cdot\\left[h_{t-1}, x_{t}\\right]+b_{f}\\right)\n",
182 | "$$\n",
183 | "
\n",
184 | "\n",
185 | "**输入门**:LSTM通过输入门(input gate)实现对细胞状态输入接收程度的控制,输出当前输入信息的接受权重,取决于 $h_{t−1}$ 和 $x_t$.\n",
186 | "$$\n",
187 | "\\begin{array}{c}i_{t}=\\sigma\\left(W_{i} \\cdot\\left[h_{t-1}, x_{t}\\right]+b_{i}\\right) \\\\ \\tilde{C}_{t}=\\tanh \\left(W_{C} \\cdot\\left[h_{t-1}, x_{t}\\right]+b_{C}\\right)\\end{array}\n",
188 | "$$\n",
189 | "
\n",
190 | "\n",
191 | "**输出门**:LSTM通过输出门(output gate)实现对细胞状态输出认可程度的控制,输出当前输出信息的认可权重,取决于 $h_{t−1}$ 和 $x_t$.\n",
192 | "$$\n",
193 | "o_{t}=\\sigma\\left(W_{o} \\cdot\\left[h_{t-1}, x_{t}\\right]+b_{o}\\right)\n",
194 | "$$\n",
195 | "
\n",
196 | "\n",
197 | "**状态更新**:“门”机制对细胞状态信息进行添加或删除,由此实现长程记忆。\n",
198 | "$$\n",
199 | "\\begin{array}{c}C_{t}=f_{t} * C_{t-1}+i_{t} * \\tilde{C}_{t} \\\\ h_{t}=o_{t} * \\tanh \\left(C_{t}\\right)\\end{array}\n",
200 | "$$\n",
201 | "
\n",
202 | "\n",
203 | "下面给出一个标准化的RNN例子\n",
204 | "\n",
205 | "```python\n",
206 | "#构造RNN网络,x的维度5,隐层的维度10,网络的层数2\n",
207 | "rnn_ seq = nn.RNN(5, 10,2)\n",
208 | "#构造一个输入序列,长为6,batch是3,特征是5\n",
209 | "X =V(torch. randn(6, 3,5))\n",
210 | "#out,ht = rnn_ seq(x, h0) # h0可以指定或者不指定\n",
211 | "out,ht = rnn_ seq(x)\n",
212 | "# q1:这里out、ht的size是多少呢? out:6*3*10, ht:2*3*10\n",
213 | "\n",
214 | "#输入维度50,隐层100维,两层\n",
215 | "Lstm_ seq = nn.LSTM(50, 100,num layers=2 )\n",
216 | "#输入序列seq= 10,batch =3,输入维度=50\n",
217 | "lstm input = torch. randn(10,3,50)\n",
218 | "out, (h, c) = lstm_ seq(lstm_ _input) #使用默认的全0隐藏状态\n",
219 | "```\n",
220 | "\n",
221 | "# 其他经典的循环神经网络\n",
222 | "\n",
223 | "### Gated Recurrent Unit(GRU)\n",
224 | "\n",
225 | "Gated Recurrent Unit (GRU),是在2014年提出的,可认为是LSTM 的变种,它的细胞状态与隐状态合并,在计算当前时刻新信息的方法和LSTM有 所不同;GRU只包含重置门和更新门;在音乐建模与语音信号建模领域与LSTM具有相似的性能,但是参数更少,只有两个门控。\n",
226 | "\n",
227 | "\n",
228 | "\n",
229 | "### Peephole LSTM\n",
230 | "\n",
231 | "让门层也接受细胞状态的输入,同时考虑隐层信息的输入。\n",
232 | "\n",
233 | "
\n",
234 | "\n",
235 | "### Bi-directional RNN(双向RNN) \n",
236 | "\n",
237 | "Bi-directional RNN(双向RNN)假设当前t的输出不仅仅和之前的序列有关,并且还与之后的序列有关,例如:完形填空,它由两个RNNs上下叠加在一起组成,输出由这两个RNNs的隐藏层的状态决定。\n",
238 | "\n",
239 | "
\n",
240 | "\n",
241 | "
\n",
242 | "\n",
243 | "### Continuous time RNN(CTRNN)\n",
244 | "\n",
245 | "CTRNN利用常微分方程系统对输入脉冲序列神经元的影响 进行建模。CTRNN被应用到进化机器人中,用于解决视觉、协作和最 小认知行为等问题。\n",
246 | "\n",
247 | "
\n",
248 | "\n",
249 | "# 循环神经网络的主要应用\n",
250 | "\n",
251 | "### 语言模型\n",
252 | "\n",
253 | "根据之前和当前词预测下一个单词或者字母\n",
254 | "\n",
255 | "
\n",
256 | "\n",
257 | "问答系统\n",
258 | "\n",
259 | "
\n",
260 | "\n",
261 | "### 自动作曲\n",
262 | "\n",
263 | "
\n",
264 | "\n",
265 | "参考:Hang Chu, Raquel Urtasun, Sanja Fidler. Song From PI: A Musically Plausible Network for Pop Music Generation. CoRR abs/1611.03477 (2016)\n",
266 | "\n",
267 | "Music AI Lab: **https://musicai.citi.sinica.edu.tw/**\n",
268 | "\n",
269 | "
\n",
270 | "\n",
271 | "### 机器翻译\n",
272 | "\n",
273 | "将一种语言自动翻译成另一种语言\n",
274 | "\n",
275 | "
\n",
276 | "\n",
277 | "### 自动写作\n",
278 | "\n",
279 | "根据现有资料自动写作,当前主要包括新闻写作和诗歌创作。主要是基于RNN&LSTM的文本生成技术来实现,需要训练大量同 类文本,结合模板技术。\n",
280 | "\n",
281 | "目前主要产品有:腾讯Dreamwriter写稿机器人、今日头条xiaomingbot、第一财经DT稿王(背后是阿里巴巴) 、百度Writing-bots...\n",
282 | "\n",
283 | "### 图像描述\n",
284 | "\n",
285 | "根据图像形成语言描述\n",
286 | "\n",
287 | "
\n",
288 | "\n",
289 | "
\n",
290 | "\n"
291 | ]
292 | },
293 | {
294 | "cell_type": "code",
295 | "execution_count": null,
296 | "id": "settled-engine",
297 | "metadata": {},
298 | "outputs": [],
299 | "source": []
300 | }
301 | ],
302 | "metadata": {
303 | "kernelspec": {
304 | "display_name": "Python 3",
305 | "language": "python",
306 | "name": "python3"
307 | },
308 | "language_info": {
309 | "codemirror_mode": {
310 | "name": "ipython",
311 | "version": 3
312 | },
313 | "file_extension": ".py",
314 | "mimetype": "text/x-python",
315 | "name": "python",
316 | "nbconvert_exporter": "python",
317 | "pygments_lexer": "ipython3",
318 | "version": "3.8.10"
319 | }
320 | },
321 | "nbformat": 4,
322 | "nbformat_minor": 5
323 | }
324 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 |
4 | from utils import google_utils
5 | from utils.datasets import *
6 | from utils.utils import *
7 |
8 |
9 | def test(data,
10 | weights=None,
11 | batch_size=16,
12 | imgsz=640,
13 | conf_thres=0.001,
14 | iou_thres=0.6, # for NMS
15 | save_json=False,
16 | single_cls=False,
17 | augment=False,
18 | verbose=False,
19 | model=None,
20 | dataloader=None,
21 | merge=False):
22 | # Initialize/load model and set device
23 | if model is None:
24 | training = False
25 | merge = opt.merge # use Merge NMS
26 | device = torch_utils.select_device(opt.device, batch_size=batch_size)
27 |
28 | # Remove previous
29 | for f in glob.glob('test_batch*.jpg'):
30 | os.remove(f)
31 |
32 | # Load model
33 | google_utils.attempt_download(weights)
34 | model = torch.load(weights, map_location=device)['model'].float().fuse().to(device) # load to FP32
35 | imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
36 |
37 | # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
38 | # if device.type != 'cpu' and torch.cuda.device_count() > 1:
39 | # model = nn.DataParallel(model)
40 |
41 | else: # called by train.py
42 | training = True
43 | device = next(model.parameters()).device # get model device
44 |
45 | # Half
46 | half = device.type != 'cpu' and torch.cuda.device_count() == 1 # half precision only supported on single-GPU
47 | if half:
48 | model.half() # to FP16
49 |
50 | # Configure
51 | model.eval()
52 | with open(data) as f:
53 | data = yaml.load(f, Loader=yaml.FullLoader) # model dict
54 | nc = 1 if single_cls else int(data['nc']) # number of classes
55 | iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
56 | niou = iouv.numel()
57 |
58 | # Dataloader
59 | if dataloader is None: # not training
60 | img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
61 | _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
62 | path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images
63 | dataloader = create_dataloader(path, imgsz, batch_size, int(max(model.stride)), opt,
64 | hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0]
65 |
66 | seen = 0
67 | names = model.names if hasattr(model, 'names') else model.module.names
68 | coco91class = coco80_to_coco91_class()
69 | s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
70 | p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
71 | loss = torch.zeros(3, device=device)
72 | jdict, stats, ap, ap_class = [], [], [], []
73 | for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
74 | img = img.to(device)
75 | img = img.half() if half else img.float() # uint8 to fp16/32
76 | img /= 255.0 # 0 - 255 to 0.0 - 1.0
77 | targets = targets.to(device)
78 | nb, _, height, width = img.shape # batch size, channels, height, width
79 | whwh = torch.Tensor([width, height, width, height]).to(device)
80 |
81 | # Disable gradients
82 | with torch.no_grad():
83 | # Run model
84 | t = torch_utils.time_synchronized()
85 | inf_out, train_out = model(img, augment=augment) # inference and training outputs
86 | t0 += torch_utils.time_synchronized() - t
87 |
88 | # Compute loss
89 | if training: # if model has loss hyperparameters
90 | loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # GIoU, obj, cls
91 |
92 | # Run NMS
93 | t = torch_utils.time_synchronized()
94 | output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge)
95 | t1 += torch_utils.time_synchronized() - t
96 |
97 | # Statistics per image
98 | for si, pred in enumerate(output):
99 | labels = targets[targets[:, 0] == si, 1:]
100 | nl = len(labels)
101 | tcls = labels[:, 0].tolist() if nl else [] # target class
102 | seen += 1
103 |
104 | if pred is None:
105 | if nl:
106 | stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
107 | continue
108 |
109 | # Append to text file
110 | # with open('test.txt', 'a') as file:
111 | # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]
112 |
113 | # Clip boxes to image bounds
114 | clip_coords(pred, (height, width))
115 |
116 | # Append to pycocotools JSON dictionary
117 | if save_json:
118 | # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
119 | image_id = int(Path(paths[si]).stem.split('_')[-1])
120 | box = pred[:, :4].clone() # xyxy
121 | scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape
122 | box = xyxy2xywh(box) # xywh
123 | box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
124 | for p, b in zip(pred.tolist(), box.tolist()):
125 | jdict.append({'image_id': image_id,
126 | 'category_id': coco91class[int(p[5])],
127 | 'bbox': [round(x, 3) for x in b],
128 | 'score': round(p[4], 5)})
129 |
130 | # Assign all predictions as incorrect
131 | correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
132 | if nl:
133 | detected = [] # target indices
134 | tcls_tensor = labels[:, 0]
135 |
136 | # target boxes
137 | tbox = xywh2xyxy(labels[:, 1:5]) * whwh
138 |
139 | # Per target class
140 | for cls in torch.unique(tcls_tensor):
141 | ti = (cls == tcls_tensor).nonzero().view(-1) # prediction indices
142 | pi = (cls == pred[:, 5]).nonzero().view(-1) # target indices
143 |
144 | # Search for detections
145 | if pi.shape[0]:
146 | # Prediction to target ious
147 | ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices
148 |
149 | # Append detections
150 | for j in (ious > iouv[0]).nonzero():
151 | d = ti[i[j]] # detected target
152 | if d not in detected:
153 | detected.append(d)
154 | correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn
155 | if len(detected) == nl: # all targets already located in image
156 | break
157 |
158 | # Append statistics (correct, conf, pcls, tcls)
159 | stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
160 |
161 | # Plot images
162 | if batch_i < 1:
163 | f = 'test_batch%g_gt.jpg' % batch_i # filename
164 | plot_images(img, targets, paths, f, names) # ground truth
165 | f = 'test_batch%g_pred.jpg' % batch_i
166 | plot_images(img, output_to_target(output, width, height), paths, f, names) # predictions
167 |
168 | # Compute statistics
169 | stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
170 | if len(stats):
171 | p, r, ap, f1, ap_class = ap_per_class(*stats)
172 | p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95]
173 | mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
174 | nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class
175 | else:
176 | nt = torch.zeros(1)
177 |
178 | # Print results
179 | pf = '%20s' + '%12.3g' * 6 # print format
180 | print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
181 |
182 | # Print results per class
183 | if verbose and nc > 1 and len(stats):
184 | for i, c in enumerate(ap_class):
185 | print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
186 |
187 | # Print speeds
188 | t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple
189 | if not training:
190 | print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
191 |
192 | # Save JSON
193 | if save_json and map50 and len(jdict):
194 | imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataloader.dataset.img_files]
195 | f = 'detections_val2017_%s_results.json' % \
196 | (weights.split(os.sep)[-1].replace('.pt', '') if weights else '') # filename
197 | print('\nCOCO mAP with pycocotools... saving %s...' % f)
198 | with open(f, 'w') as file:
199 | json.dump(jdict, file)
200 |
201 | try:
202 | from pycocotools.coco import COCO
203 | from pycocotools.cocoeval import COCOeval
204 |
205 | # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
206 | cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0]) # initialize COCO ground truth api
207 | cocoDt = cocoGt.loadRes(f) # initialize COCO pred api
208 |
209 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
210 | cocoEval.params.imgIds = imgIds # image IDs to evaluate
211 | cocoEval.evaluate()
212 | cocoEval.accumulate()
213 | cocoEval.summarize()
214 | map, map50 = cocoEval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5)
215 | except:
216 | print('WARNING: pycocotools must be installed with numpy==1.17 to run correctly. '
217 | 'See https://github.com/cocodataset/cocoapi/issues/356')
218 |
219 | # Return results
220 | model.float() # for training
221 | maps = np.zeros(nc) + map
222 | for i, c in enumerate(ap_class):
223 | maps[c] = ap[i]
224 | return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
225 |
226 |
227 | if __name__ == '__main__':
228 | parser = argparse.ArgumentParser(prog='test.py')
229 | parser.add_argument('--weights', type=str, default='weights/yolov5s.pt', help='model.pt path')
230 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path')
231 | parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
232 | parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
233 | parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
234 | parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS')
235 | parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
236 | parser.add_argument('--task', default='val', help="'val', 'test', 'study'")
237 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
238 | parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
239 | parser.add_argument('--augment', action='store_true', help='augmented inference')
240 | parser.add_argument('--merge', action='store_true', help='use Merge NMS')
241 | parser.add_argument('--verbose', action='store_true', help='report mAP by class')
242 | opt = parser.parse_args()
243 | opt.save_json = opt.save_json or opt.data.endswith('coco.yaml')
244 | opt.data = check_file(opt.data) # check file
245 | print(opt)
246 |
247 | # task = 'val', 'test', 'study'
248 | if opt.task in ['val', 'test']: # (default) run normally
249 | test(opt.data,
250 | opt.weights,
251 | opt.batch_size,
252 | opt.img_size,
253 | opt.conf_thres,
254 | opt.iou_thres,
255 | opt.save_json,
256 | opt.single_cls,
257 | opt.augment,
258 | opt.verbose)
259 |
260 | elif opt.task == 'study': # run over a range of settings and save/plot
261 | for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
262 | f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem) # filename to save to
263 | x = list(range(352, 832, 64)) # x axis
264 | y = [] # y axis
265 | for i in x: # img-size
266 | print('\nRunning %s point %s...' % (f, i))
267 | r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json)
268 | y.append(r + t) # results and times
269 | np.savetxt(f, y, fmt='%10.4g') # save
270 | os.system('zip -r study.zip study_*.txt')
271 | # plot_study_txt(f, x) # plot
272 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import torch.distributed as dist
4 | import torch.nn.functional as F
5 | import torch.optim as optim
6 | import torch.optim.lr_scheduler as lr_scheduler
7 | import torch.utils.data
8 | import numpy as np
9 | from torch.utils.tensorboard import SummaryWriter
10 | import os
11 | import test # import test.py to get mAP after each epoch
12 | from models.yolo import Model
13 | from utils import google_utils
14 | from utils.datasets import *
15 | from utils.utils import *
16 |
17 | mixed_precision = True
18 | try: # Mixed precision training https://github.com/NVIDIA/apex
19 | from apex import amp
20 | except:
21 | print('Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex')
22 | mixed_precision = False # not installed
23 |
24 | wdir = 'weights' + os.sep # weights dir
25 | os.makedirs(wdir, exist_ok=True)
26 | last = wdir + 'last.pt'
27 | best = wdir + 'best.pt'
28 | results_file = 'results.txt'
29 |
30 | # Hyperparameters
31 | hyp = {'lr0': 0.01, # initial learning rate (SGD=1E-2, Adam=1E-3)
32 | 'momentum': 0.937, # SGD momentum
33 | 'weight_decay': 5e-4, # optimizer weight decay
34 | 'giou': 0.05, # giou loss gain
35 | 'cls': 0.58, # cls loss gain
36 | 'cls_pw': 1.0, # cls BCELoss positive_weight
37 | 'obj': 1.0, # obj loss gain (*=img_size/320 if img_size != 320)
38 | 'obj_pw': 1.0, # obj BCELoss positive_weight
39 | 'iou_t': 0.20, # iou training threshold
40 | 'anchor_t': 4.0, # anchor-multiple threshold
41 | 'fl_gamma': 0.0, # focal loss gamma (efficientDet default is gamma=1.5)
42 | 'hsv_h': 0.014, # image HSV-Hue augmentation (fraction)
43 | 'hsv_s': 0.68, # image HSV-Saturation augmentation (fraction)
44 | 'hsv_v': 0.36, # image HSV-Value augmentation (fraction)
45 | 'degrees': 0.0, # image rotation (+/- deg)
46 | 'translate': 0.0, # image translation (+/- fraction)
47 | 'scale': 0.5, # image scale (+/- gain)
48 | 'shear': 0.0} # image shear (+/- deg)
49 | print(hyp)
50 |
51 | # Overwrite hyp with hyp*.txt (optional)
52 | f = glob.glob('hyp*.txt')
53 | if f:
54 | print('Using %s' % f[0])
55 | for k, v in zip(hyp.keys(), np.loadtxt(f[0])):
56 | hyp[k] = v
57 |
58 | # Print focal loss if gamma > 0
59 | if hyp['fl_gamma']:
60 | print('Using FocalLoss(gamma=%g)' % hyp['fl_gamma'])
61 |
62 |
63 | def train(hyp):
64 | epochs = opt.epochs # 300
65 | batch_size = opt.batch_size # 64
66 | weights = opt.weights # initial training weights
67 |
68 | # Configure
69 | init_seeds(1)
70 | with open(opt.data) as f:
71 | data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict
72 | train_path = data_dict['train']
73 | test_path = data_dict['val']
74 | nc = 1 if opt.single_cls else int(data_dict['nc']) # number of classes
75 |
76 | # Remove previous results
77 | for f in glob.glob('*_batch*.jpg') + glob.glob(results_file):
78 | os.remove(f)
79 |
80 | # Create model
81 | model = Model(opt.cfg, nc=data_dict['nc']).to(device)
82 |
83 | # Image sizes
84 | gs = int(max(model.stride)) # grid size (max stride)
85 | imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples
86 |
87 | # Optimizer
88 | nbs = 64 # nominal batch size
89 | accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
90 | hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
91 | pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
92 | for k, v in model.named_parameters():
93 | if v.requires_grad:
94 | if '.bias' in k:
95 | pg2.append(v) # biases
96 | elif '.weight' in k and '.bn' not in k:
97 | pg1.append(v) # apply weight decay
98 | else:
99 | pg0.append(v) # all else
100 |
101 | optimizer = optim.Adam(pg0, lr=hyp['lr0']) if opt.adam else \
102 | optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
103 | optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay
104 | optimizer.add_param_group({'params': pg2}) # add pg2 (biases)
105 | # Scheduler https://arxiv.org/pdf/1812.01187.pdf
106 | lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine
107 | scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
108 | print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
109 | del pg0, pg1, pg2
110 |
111 | # Load Model
112 | google_utils.attempt_download(weights)
113 | start_epoch, best_fitness = 0, 0.0
114 | if weights.endswith('.pt'): # pytorch format
115 | ckpt = torch.load(weights, map_location=device) # load checkpoint
116 |
117 | # load model
118 | try:
119 | ckpt['model'] = {k: v for k, v in ckpt['model'].float().state_dict().items()
120 | if model.state_dict()[k].shape == v.shape} # to FP32, filter
121 | model.load_state_dict(ckpt['model'], strict=False)
122 | except KeyError as e:
123 | s = "%s is not compatible with %s. This may be due to model differences or %s may be out of date. " \
124 | "Please delete or update %s and try again, or use --weights '' to train from scratch." \
125 | % (opt.weights, opt.cfg, opt.weights, opt.weights)
126 | raise KeyError(s) from e
127 |
128 | # load optimizer
129 | if ckpt['optimizer'] is not None:
130 | optimizer.load_state_dict(ckpt['optimizer'])
131 | best_fitness = ckpt['best_fitness']
132 |
133 | # load results
134 | if ckpt.get('training_results') is not None:
135 | with open(results_file, 'w') as file:
136 | file.write(ckpt['training_results']) # write results.txt
137 |
138 | # epochs
139 | start_epoch = ckpt['epoch'] + 1
140 | if epochs < start_epoch:
141 | print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
142 | (opt.weights, ckpt['epoch'], epochs))
143 | epochs += ckpt['epoch'] # finetune additional epochs
144 |
145 | del ckpt
146 |
147 | # Mixed precision training https://github.com/NVIDIA/apex
148 | if mixed_precision:
149 | model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
150 |
151 |
152 | scheduler.last_epoch = start_epoch - 1 # do not move
153 | # https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
154 | # plot_lr_scheduler(optimizer, scheduler, epochs)
155 |
156 | # Initialize distributed training
157 | if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
158 | dist.init_process_group(backend='nccl', # distributed backend
159 | init_method='tcp://127.0.0.1:9999', # init method
160 | world_size=1, # number of nodes
161 | rank=0) # node rank
162 | model = torch.nn.parallel.DistributedDataParallel(model)
163 | # pip install torch==1.4.0+cu100 torchvision==0.5.0+cu100 -f https://download.pytorch.org/whl/torch_stable.html
164 |
165 | # Trainloader
166 | dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
167 | hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect)
168 | mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class
169 | assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Correct your labels or your model.' % (mlc, nc, opt.cfg)
170 |
171 | # Testloader
172 | testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt,
173 | hyp=hyp, augment=False, cache=opt.cache_images, rect=True)[0]
174 |
175 | # Model parameters
176 | hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset
177 | model.nc = nc # attach number of classes to model
178 | model.hyp = hyp # attach hyperparameters to model
179 | model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou)
180 | model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
181 | model.names = data_dict['names']
182 |
183 | # Class frequency
184 | labels = np.concatenate(dataset.labels, 0)
185 | c = torch.tensor(labels[:, 0]) # classes
186 | # cf = torch.bincount(c.long(), minlength=nc) + 1.
187 | # model._initialize_biases(cf.to(device))
188 | if tb_writer:
189 | plot_labels(labels)
190 | tb_writer.add_histogram('classes', c, 0)
191 |
192 | # Check anchors
193 | if not opt.noautoanchor:
194 | check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
195 |
196 | # Exponential moving average
197 | ema = torch_utils.ModelEMA(model)
198 |
199 | # Start training
200 | t0 = time.time()
201 | nb = len(dataloader) # number of batches
202 | n_burn = max(3 * nb, 1e3) # burn-in iterations, max(3 epochs, 1k iterations)
203 | maps = np.zeros(nc) # mAP per class
204 | results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
205 | print('Image sizes %g train, %g test' % (imgsz, imgsz_test))
206 | print('Using %g dataloader workers' % dataloader.num_workers)
207 | print('Starting training for %g epochs...' % epochs)
208 | # torch.autograd.set_detect_anomaly(True)
209 | for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
210 | model.train()
211 |
212 | # Update image weights (optional)
213 | if dataset.image_weights:
214 | w = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights
215 | image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
216 | dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx
217 |
218 | # Update mosaic border
219 | # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
220 | # dataset.mosaic_border = [b - imgsz, -b] # height, width borders
221 |
222 | mloss = torch.zeros(4, device=device) # mean losses
223 | print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
224 | pbar = tqdm(enumerate(dataloader), total=nb) # progress bar
225 | for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
226 | ni = i + nb * epoch # number integrated batches (since train start)
227 | imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0
228 |
229 | # Burn-in
230 | if ni <= n_burn:
231 | xi = [0, n_burn] # x interp
232 | # model.gr = np.interp(ni, xi, [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou)
233 | accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
234 | for j, x in enumerate(optimizer.param_groups):
235 | # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
236 | x['lr'] = np.interp(ni, xi, [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
237 | if 'momentum' in x:
238 | x['momentum'] = np.interp(ni, xi, [0.9, hyp['momentum']])
239 |
240 | # Multi-scale
241 | if opt.multi_scale:
242 | sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
243 | sf = sz / max(imgs.shape[2:]) # scale factor
244 | if sf != 1:
245 | ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)
246 | imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
247 |
248 | # Forward
249 | pred = model(imgs)
250 |
251 | # Loss
252 | loss, loss_items = compute_loss(pred, targets.to(device), model)
253 | if not torch.isfinite(loss):
254 | print('WARNING: non-finite loss, ending training ', loss_items)
255 | return results
256 |
257 | # Backward
258 | if mixed_precision:
259 | with amp.scale_loss(loss, optimizer) as scaled_loss:
260 | scaled_loss.backward()
261 | else:
262 | loss.backward()
263 |
264 | # Optimize
265 | if ni % accumulate == 0:
266 | optimizer.step()
267 | optimizer.zero_grad()
268 | ema.update(model)
269 |
270 | # Print
271 | mloss = (mloss * i + loss_items) / (i + 1) # update mean losses
272 | mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0) # (GB)
273 | s = ('%10s' * 2 + '%10.4g' * 6) % (
274 | '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
275 | pbar.set_description(s)
276 |
277 | # Plot
278 | if ni < 3:
279 | f = 'train_batch%g.jpg' % ni # filename
280 | result = plot_images(images=imgs, targets=targets, paths=paths, fname=f)
281 | if tb_writer and result is not None:
282 | tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
283 | # tb_writer.add_graph(model, imgs) # add model to tensorboard
284 |
285 | # end batch ------------------------------------------------------------------------------------------------
286 |
287 | # Scheduler
288 | scheduler.step()
289 |
290 | # mAP
291 | ema.update_attr(model)
292 | final_epoch = epoch + 1 == epochs
293 | if not opt.notest or final_epoch: # Calculate mAP
294 | results, maps, times = test.test(opt.data,
295 | batch_size=batch_size,
296 | imgsz=imgsz_test,
297 | save_json=final_epoch and opt.data.endswith(os.sep + 'coco.yaml'),
298 | model=ema.ema,
299 | single_cls=opt.single_cls,
300 | dataloader=testloader)
301 |
302 | # Write
303 | with open(results_file, 'a') as f:
304 | f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
305 | if len(opt.name) and opt.bucket:
306 | os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (opt.bucket, opt.name))
307 |
308 | # Tensorboard
309 | if tb_writer:
310 | tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss',
311 | 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1',
312 | 'val/giou_loss', 'val/obj_loss', 'val/cls_loss']
313 | for x, tag in zip(list(mloss[:-1]) + list(results), tags):
314 | tb_writer.add_scalar(tag, x, epoch)
315 |
316 | # Update best mAP
317 | fi = fitness(np.array(results).reshape(1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1]
318 | if fi > best_fitness:
319 | best_fitness = fi
320 |
321 | # Save model
322 | save = (not opt.nosave) or (final_epoch and not opt.evolve)
323 | if save:
324 | with open(results_file, 'r') as f: # create checkpoint
325 | ckpt = {'epoch': epoch,
326 | 'best_fitness': best_fitness,
327 | 'training_results': f.read(),
328 | 'model': ema.ema,
329 | 'optimizer': None if final_epoch else optimizer.state_dict()}
330 |
331 | # Save last, best and delete
332 | torch.save(ckpt, last)
333 | if (best_fitness == fi) and not final_epoch:
334 | torch.save(ckpt, best)
335 | del ckpt
336 |
337 | # end epoch ----------------------------------------------------------------------------------------------------
338 | # end training
339 |
340 | # Strip optimizers
341 | n = ('_' if len(opt.name) and not opt.name.isnumeric() else '') + opt.name
342 | fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
343 | for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]):
344 | if os.path.exists(f1):
345 | os.rename(f1, f2) # rename
346 | ispt = f2.endswith('.pt') # is *.pt
347 | strip_optimizer(f2) if ispt else None # strip optimizer
348 | os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None # upload
349 |
350 | # Finish
351 | if not opt.evolve:
352 | plot_results() # save as results.png
353 | print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
354 | dist.destroy_process_group() if device.type != 'cpu' and torch.cuda.device_count() > 1 else None
355 | torch.cuda.empty_cache()
356 | return results
357 |
358 |
359 | if __name__ == '__main__':
360 | check_git_status()
361 | parser = argparse.ArgumentParser()
362 | parser.add_argument('--epochs', type=int, default=1000)
363 | parser.add_argument('--batch-size', type=int, default=3)
364 | parser.add_argument('--cfg', type=str, default='models/yolov5x.yaml', help='*.cfg path')
365 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path')
366 | parser.add_argument('--img-size', nargs='+', type=int, default=[1024, 1024], help='train,test sizes')
367 | parser.add_argument('--rect', action='store_true', help='rectangular training')
368 | parser.add_argument('--resume', action='store_true', help='resume training from last.pt')
369 | parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
370 | parser.add_argument('--notest', action='store_true', help='only test final epoch')
371 | parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
372 | parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
373 | parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
374 | parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
375 | parser.add_argument('--weights', type=str, default='weights/best.pt', help='initial weights path')
376 | parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')
377 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
378 | parser.add_argument('--adam', action='store_true', help='use adam optimizer')
379 | parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%')
380 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
381 | opt = parser.parse_args()
382 | opt.weights = last if opt.resume and not opt.weights else opt.weights
383 | opt.cfg = check_file(opt.cfg) # check file
384 | opt.data = check_file(opt.data) # check file
385 | print(opt)
386 | opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test)
387 | device = torch_utils.select_device(opt.device, apex=mixed_precision, batch_size=opt.batch_size)
388 | if device.type == 'cpu':
389 | mixed_precision = False
390 |
391 | # Train
392 | if not opt.evolve:
393 | tb_writer = SummaryWriter(comment=opt.name)
394 | print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
395 | train(hyp)
396 |
397 | # Evolve hyperparameters (optional)
398 | else:
399 | tb_writer = None
400 | opt.notest, opt.nosave = True, True # only test/save final epoch
401 | if opt.bucket:
402 | os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
403 |
404 | for _ in range(10): # generations to evolve
405 | if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate
406 | # Select parent(s)
407 | parent = 'single' # parent selection method: 'single' or 'weighted'
408 | x = np.loadtxt('evolve.txt', ndmin=2)
409 | n = min(5, len(x)) # number of previous results to consider
410 | x = x[np.argsort(-fitness(x))][:n] # top n mutations
411 | w = fitness(x) - fitness(x).min() # weights
412 | if parent == 'single' or len(x) == 1:
413 | # x = x[random.randint(0, n - 1)] # random selection
414 | x = x[random.choices(range(n), weights=w)[0]] # weighted selection
415 | elif parent == 'weighted':
416 | x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
417 |
418 | # Mutate
419 | mp, s = 0.9, 0.2 # mutation probability, sigma
420 | npr = np.random
421 | npr.seed(int(time.time()))
422 | g = np.array([1, 1, 1, 1, 1, 1, 1, 0, .1, 1, 0, 1, 1, 1, 1, 1, 1, 1]) # gains
423 | ng = len(g)
424 | v = np.ones(ng)
425 | while all(v == 1): # mutate until a change occurs (prevent duplicates)
426 | v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
427 | for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300)
428 | hyp[k] = x[i + 7] * v[i] # mutate
429 |
430 | # Clip to limits
431 | keys = ['lr0', 'iou_t', 'momentum', 'weight_decay', 'hsv_s', 'hsv_v', 'translate', 'scale', 'fl_gamma']
432 | limits = [(1e-5, 1e-2), (0.00, 0.70), (0.60, 0.98), (0, 0.001), (0, .9), (0, .9), (0, .9), (0, .9), (0, 3)]
433 | for k, v in zip(keys, limits):
434 | hyp[k] = np.clip(hyp[k], v[0], v[1])
435 |
436 | # Train mutation
437 | results = train(hyp.copy())
438 |
439 | # Write mutation results
440 | print_mutation(hyp, results, opt.bucket)
441 |
442 | # Plot results
443 | # plot_evolution_results(hyp)
444 |
--------------------------------------------------------------------------------