├── README.md ├── convertTrainLabel.py ├── detect.py ├── hubconf.py ├── process_data_yolo.py ├── task05_rnn.ipynb ├── test.py └── train.py /README.md: -------------------------------------------------------------------------------- 1 | ## 任务安排 2 | 3 | 开营时间:02月16日21:00 4 | 5 | - 比赛题目:天池创新大赛:热身赛 布匹缺陷检测,内容:根据给出的布匹图片标注出其中的缺陷 6 | 7 | - 比赛链接:https://tianchi.aliyun.com/competition/entrance/531864/introduction?spm=5176.12281976.0.0.506441a6dTFHF3 8 | 9 | 10 | ### Task00:熟悉规则(1天) 11 | 12 | - 组队、修改群昵称。 13 | - 熟悉打卡规则。 14 | - 打卡截止时间:02月18日03:00 15 | 16 | ### Task01:比赛全流程体验(3天) 17 | 18 | - 学习如何使用Docker提交代码及比赛上分。 19 | - 记录比赛中遇到的问题,并在学习笔记中插入初始分数截图。 20 | - 打卡截止时间:02月21日03:00 21 | - 学习资料: 22 | - [Docker环境配置指南!](https://tianchi.aliyun.com/competition/entrance/231759/tab/226) 23 | - [比赛Docker相关操作](https://github.com/datawhalechina/team-learning-cv/blob/master/DefectDetection/docker%E6%8F%90%E4%BA%A4%E6%95%99%E7%A8%8B.pdf) 24 | 25 | ### Task02:Baseline学习及改进(5天) 26 | 27 | - 学习baseline,并提出自己的改进策略,提交代码并更新自己的分数排名。 28 | - 在学习笔记中插入改进baseline之后分数排名截图。 29 | - 打卡截止时间:02月26日03:00 30 | - 学习资料: 31 | - [Baseline学习及上分技巧](https://github.com/datawhalechina/team-learning-cv/blob/master/DefectDetection/README.md) 32 | 33 | ### Task03:学习者分享(2天) 34 | 35 | - 我们根据截图,邀请提分比较多的学习者进行分享。 36 | 37 | 38 | ## 文件说明 39 | - code : 存放所有相关代码的文件夹 40 | - train_data : 存放原始数据文件 guangdong1_round2_train2_20191004_Annotations guangdong1_round2_train2_20191004_images 41 | - tcdata: 存放官方测试数据文件,docker 提交后会自动生成 42 | - data :训练数据路径设置 coco128.yaml中设置训练数据路径 43 | - models : 网络相关的代码文件夹 44 | - weights : 保存训练模型的文件夹,best.pt last.pt 45 | - convertTrainLabel.py:将官方的数据集转换成yolo数据的格式 运行生成convertor数据文件夹 46 | - process_data_yolo.py:滑动窗口处理convertor数据文件夹里面数据,将大图变成1024*1024小图,生成数据文件夹process_data 47 | - train.py : 训练代码, 运行该函数进行模型的训练,可以得到模型 48 | - detect.py : 预测代码 49 | - test.py :测试模型代码 50 | - run.sh : 预测测试集,生成结果的脚本 sh run.sh 51 | - train.sh : 训练脚本 sh trian.sh 52 | 53 | 54 | 55 | 56 | ## 操作说明 57 | - step1 : 将官方训练数据集解压后放入train_data 文件夹 58 | - step2 : 训练运行 sh train.sh 59 | - train.sh 有四步 60 | -python convertTrainLabel.py 61 | -python process_data_yolo.py 62 | -rm -rf ./convertor 63 | -python train.py 64 | - step3 : 生成结果 sh run.sh 65 | 66 | ## 思路说明 67 | -本方案采用了yolov5作为baseline 68 | -数据处理:滑动窗口分割训练图片 69 | 70 | 71 | ##改进思路 72 | -数据扩增:训练样本扩增随机竖直/水平翻折,色彩空间增强,使缺陷样本均匀 73 | -自适应anchor策略 74 | -适当减少回归框损失的权重 75 | -正负样本类别 76 | -多尺度训练 77 | -空洞卷积替换FPN最后一层 78 | -FPN改进尝试:NAS-FPN、AC-PFN 79 | -Anchor 匹配策略 -------------------------------------------------------------------------------- /convertTrainLabel.py: -------------------------------------------------------------------------------- 1 | import numpy as np # linear algebra 2 | import os 3 | import json 4 | from tqdm.auto import tqdm 5 | import shutil as sh 6 | import cv2 7 | 8 | josn_path = "./train_data/guangdong1_round2_train2_20191004_Annotations/Annotations/anno_train.json" 9 | image_path = "./train_data/guangdong1_round2_train2_20191004_images/defect/" 10 | 11 | name_list = [] 12 | image_h_list = [] 13 | image_w_list = [] 14 | c_list = [] 15 | w_list = [] 16 | h_list = [] 17 | x_center_list = [] 18 | y_center_list = [] 19 | 20 | with open(josn_path, 'r') as f: 21 | temps = tqdm(json.loads(f.read())) 22 | for temp in temps: 23 | # image_w = temp["image_width"] 24 | # image_h = temp["image_height"] 25 | name = temp["name"].split('.')[0] 26 | path = os.path.join(image_path, name, temp["name"]) 27 | # print('path: ',path) 28 | im = cv2.imread(path) 29 | sp = im.shape 30 | image_h, image_w = sp[0], sp[1] 31 | # print("image_h, image_w: ", image_h, image_w) 32 | # print("defect_name: ",temp["defect_name"]) 33 | #bboxs 34 | x_l, y_l, x_r, y_r = temp["bbox"] 35 | # print(temp["name"], temp["bbox"]) 36 | if temp["defect_name"]=="沾污": 37 | defect_name = '0' 38 | elif temp["defect_name"]=="错花": 39 | defect_name = '1' 40 | elif temp["defect_name"] == "水印": 41 | defect_name = '2' 42 | elif temp["defect_name"] == "花毛": 43 | defect_name = '3' 44 | elif temp["defect_name"] == "缝头": 45 | defect_name = '4' 46 | elif temp["defect_name"] == "缝头印": 47 | defect_name = '5' 48 | elif temp["defect_name"] == "虫粘": 49 | defect_name = '6' 50 | elif temp["defect_name"] == "破洞": 51 | defect_name = '7' 52 | elif temp["defect_name"] == "褶子": 53 | defect_name = '8' 54 | elif temp["defect_name"] == "织疵": 55 | defect_name = '9' 56 | elif temp["defect_name"] == "漏印": 57 | defect_name = '10' 58 | elif temp["defect_name"] == "蜡斑": 59 | defect_name = '11' 60 | elif temp["defect_name"] == "色差": 61 | defect_name = '12' 62 | elif temp["defect_name"] == "网折": 63 | defect_name = '13' 64 | elif temp["defect_name"] == "其他": 65 | defect_name = '14' 66 | else: 67 | defect_name = '15' 68 | print("----------------------------------error---------------------------") 69 | raise("erro") 70 | # print(image_w, image_h) 71 | # print(defect_name) 72 | x_center = (x_l + x_r)/(2*image_w) 73 | y_center = (y_l + y_r)/(2*image_h) 74 | w = (x_r - x_l)/(image_w) 75 | h = (y_r - y_l)/(image_h) 76 | # print(x_center, y_center, w, h) 77 | name_list.append(temp["name"]) 78 | c_list.append(defect_name) 79 | image_h_list.append(image_w) 80 | image_w_list.append(image_h) 81 | x_center_list.append(x_center) 82 | y_center_list.append(y_center) 83 | w_list.append(w) 84 | h_list.append(h) 85 | 86 | index = list(set(name_list)) 87 | print(len(index)) 88 | for fold in [0]: 89 | val_index = index[len(index) * fold // 5:len(index) * (fold + 1) // 5] 90 | print(len(val_index)) 91 | for num, name in enumerate(name_list): 92 | print(c_list[num], x_center_list[num], y_center_list[num], w_list[num], h_list[num]) 93 | row = [c_list[num], x_center_list[num], y_center_list[num], w_list[num], h_list[num]] 94 | if name in val_index: 95 | path2save = 'val/' 96 | else: 97 | path2save = 'train/' 98 | # print('convertor\\fold{}\\labels\\'.format(fold) + path2save) 99 | # print('convertor\\fold{}/labels\\'.format(fold) + path2save + name.split('.')[0] + ".txt") 100 | # print("{}/{}".format(image_path, name)) 101 | # print('convertor\\fold{}\\images\\{}\\{}'.format(fold, path2save, name)) 102 | if not os.path.exists('convertor/fold{}/labels/'.format(fold) + path2save): 103 | os.makedirs('convertor/fold{}/labels/'.format(fold) + path2save) 104 | with open('convertor/fold{}/labels/'.format(fold) + path2save + name.split('.')[0] + ".txt", 'a+') as f: 105 | for data in row: 106 | f.write('{} '.format(data)) 107 | f.write('\n') 108 | if not os.path.exists('convertor/fold{}/images/{}'.format(fold, path2save)): 109 | os.makedirs('convertor/fold{}/images/{}'.format(fold, path2save)) 110 | sh.copy(os.path.join(image_path, name.split('.')[0], name), 111 | 'convertor/fold{}/images/{}/{}'.format(fold, path2save, name)) 112 | 113 | 114 | -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch.backends.cudnn as cudnn 4 | import json 5 | import cv2 6 | import os 7 | import torch 8 | from utils import google_utils 9 | from utils.datasets import * 10 | from utils.utils import * 11 | 12 | 13 | def detect(save_img=False): 14 | out, source, weights, view_img, save_txt, imgsz = \ 15 | opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size 16 | webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') 17 | save_dir = opt.save_dir 18 | # Initialize 19 | device = torch_utils.select_device(opt.device) 20 | if os.path.exists(out): 21 | shutil.rmtree(out) # delete output folder 22 | os.makedirs(out) # make new output folder 23 | half = device.type != 'cpu' # half precision only supported on CUDA 24 | 25 | # Load model 26 | google_utils.attempt_download(weights) 27 | model = torch.load(weights, map_location=device)['model'].float().eval() # load FP32 model 28 | imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size 29 | if half: 30 | model.half() # to FP16 31 | 32 | # Second-stage classifier 33 | classify = False 34 | if classify: 35 | modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize 36 | modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights 37 | modelc.to(device).eval() 38 | 39 | # Set Dataloader 40 | vid_path, vid_writer = None, None 41 | if webcam: 42 | view_img = True 43 | cudnn.benchmark = True # set True to speed up constant image size inference 44 | dataset = LoadStreams(source, img_size=imgsz) 45 | else: 46 | save_img = True 47 | dataset = LoadImagesTest(source, img_size=imgsz) 48 | 49 | # Get names and colors 50 | names = model.module.names if hasattr(model, 'module') else model.names 51 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] 52 | 53 | # Run inference 54 | t0 = time.time() 55 | img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img 56 | _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once 57 | 58 | save_json = True 59 | result = [] 60 | for path, img, im0s, vid_cap in dataset: 61 | img = torch.from_numpy(img).to(device) 62 | img = img.half() if half else img.float() # uint8 to fp16/32 63 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 64 | if img.ndimension() == 3: 65 | img = img.unsqueeze(0) 66 | 67 | # Inference 68 | t1 = torch_utils.time_synchronized() 69 | pred = model(img, augment=opt.augment)[0] 70 | 71 | # Apply NMS 72 | pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) 73 | t2 = torch_utils.time_synchronized() 74 | 75 | # Apply Classifier 76 | if classify: 77 | pred = apply_classifier(pred, modelc, img, im0s) 78 | 79 | # Process detections 80 | for i, det in enumerate(pred): # detections per image 81 | if webcam: # batch_size >= 1 82 | p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() 83 | else: 84 | p, s, im0 = path, '', im0s 85 | 86 | save_path = str(Path(out) / Path(p).name) 87 | txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '') 88 | s += '%gx%g ' % img.shape[2:] # print string 89 | gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh 90 | if det is not None and len(det): 91 | # Rescale boxes from img_size to im0 size 92 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 93 | 94 | # Print results 95 | for c in det[:, -1].unique(): 96 | n = (det[:, -1] == c).sum() # detections per class 97 | s += '%g %ss, ' % (n, names[int(c)]) # add to string 98 | 99 | # Write results 100 | for *xyxy, conf, cls in det: 101 | if save_txt: # Write to file 102 | xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh 103 | with open(txt_path + '.txt', 'a') as f: 104 | f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format 105 | 106 | # write jiang ################# 107 | if save_json: 108 | name = os.path.split(txt_path)[-1] 109 | print(name) 110 | 111 | x1, y1, x2, y2 = float(xyxy[0]), float(xyxy[1]), float(xyxy[2]), float(xyxy[3]) 112 | bbox = [x1, y1, x2, y2] 113 | img_name = name 114 | conf = float(conf) 115 | 116 | #add solution remove other 117 | result.append( 118 | {'name': img_name+'.jpg', 'category': int(cls+1), 'bbox': bbox, 119 | 'score': conf}) 120 | print("result: ", {'name': img_name+'.jpg', 'category': int(cls+1), 'bbox': bbox,'score': conf}) 121 | 122 | if save_img or view_img: # Add bbox to image 123 | label = '%s %.2f' % (names[int(cls)], conf) 124 | plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) 125 | 126 | # Print time (inference + NMS) 127 | print('%sDone. (%.3fs)' % (s, t2 - t1)) 128 | 129 | # Stream results 130 | if view_img: 131 | cv2.imshow(p, im0) 132 | if cv2.waitKey(1) == ord('q'): # q to quit 133 | raise StopIteration 134 | 135 | # Save results (image with detections) 136 | if save_img: 137 | if dataset.mode == 'images': 138 | cv2.imwrite(save_path, im0) 139 | else: 140 | if vid_path != save_path: # new video 141 | vid_path = save_path 142 | if isinstance(vid_writer, cv2.VideoWriter): 143 | vid_writer.release() # release previous video writer 144 | 145 | fourcc = 'mp4v' # output video codec 146 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 147 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 148 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 149 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) 150 | vid_writer.write(im0) 151 | 152 | if save_txt or save_img: 153 | print('Results saved to %s' % os.getcwd() + os.sep + out) 154 | if platform == 'darwin': # MacOS 155 | os.system('open ' + save_path) 156 | 157 | if save_json: 158 | if not os.path.exists(save_dir): 159 | os.makedirs(save_dir) 160 | with open(os.path.join(save_dir, "result.json"), 'w') as fp: 161 | json.dump(result, fp, indent=4, ensure_ascii=False) 162 | 163 | 164 | print('Done. (%.3fs)' % (time.time() - t0)) 165 | 166 | 167 | 168 | if __name__ == '__main__': 169 | parser = argparse.ArgumentParser() 170 | parser.add_argument('--weights', type=str, default='weights/best.pt', help='model.pt path') 171 | parser.add_argument('--save_dir', type=str, default='./', help='result save dir') 172 | # parser.add_argument('--source', type=str, default='convertor/fold0/images/val', help='source') # file/folder, 0 for webcam 173 | parser.add_argument('--source', type=str, default='../../data/guangdong1_round2_train_part1_20190924/defect', 174 | help='source') # file/folder, 0 for webcam 175 | parser.add_argument('--output', type=str, default='inference/output', help='output folder') # output folder 176 | parser.add_argument('--img-size', type=int, default=1024, help='inference size (pixels)') 177 | parser.add_argument('--conf-thres', type=float, default=0.04, help='object confidence threshold') 178 | parser.add_argument('--iou-thres', type=float, default=0.05, help='IOU threshold for NMS') 179 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 180 | parser.add_argument('--view-img', action='store_true', help='display results') 181 | parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') 182 | parser.add_argument('--classes', nargs='+', type=int, help='filter by class') 183 | parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') 184 | parser.add_argument('--augment', action='store_true', help='augmented inference') 185 | parser.add_argument('--update', action='store_true', help='update all models') 186 | opt = parser.parse_args() 187 | print(opt) 188 | 189 | with torch.no_grad(): 190 | if opt.update: # update all models (to fix SourceChangeWarning) 191 | for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']: 192 | detect() 193 | create_pretrained(opt.weights, opt.weights) 194 | else: 195 | detect() 196 | -------------------------------------------------------------------------------- /hubconf.py: -------------------------------------------------------------------------------- 1 | """File for accessing YOLOv5 via PyTorch Hub https://pytorch.org/hub/ 2 | 3 | Usage: 4 | import torch 5 | model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, channels=3, classes=80) 6 | """ 7 | 8 | dependencies = ['torch', 'yaml'] 9 | 10 | import os 11 | 12 | import torch 13 | 14 | from models.yolo import Model 15 | from utils import google_utils 16 | 17 | 18 | def create(name, pretrained, channels, classes): 19 | """Creates a specified YOLOv5 model 20 | 21 | Arguments: 22 | name (str): name of model, i.e. 'yolov5s' 23 | pretrained (bool): load pretrained weights into the model 24 | channels (int): number of input channels 25 | classes (int): number of model classes 26 | 27 | Returns: 28 | pytorch model 29 | """ 30 | config = os.path.join(os.path.dirname(__file__), 'models', '%s.yaml' % name) # model.yaml path 31 | model = Model(config, channels, classes) 32 | if pretrained: 33 | ckpt = '%s.pt' % name # checkpoint filename 34 | google_utils.attempt_download(ckpt) # download if not found locally 35 | state_dict = torch.load(ckpt, map_location=torch.device('cpu'))['model'].float().state_dict() # to FP32 36 | state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape} # filter 37 | model.load_state_dict(state_dict, strict=False) # load 38 | return model 39 | 40 | 41 | def yolov5s(pretrained=False, channels=3, classes=80): 42 | """YOLOv5-small model from https://github.com/ultralytics/yolov5 43 | 44 | Arguments: 45 | pretrained (bool): load pretrained weights into the model, default=False 46 | channels (int): number of input channels, default=3 47 | classes (int): number of model classes, default=80 48 | 49 | Returns: 50 | pytorch model 51 | """ 52 | return create('yolov5s', pretrained, channels, classes) 53 | 54 | 55 | def yolov5m(pretrained=False, channels=3, classes=80): 56 | """YOLOv5-medium model from https://github.com/ultralytics/yolov5 57 | 58 | Arguments: 59 | pretrained (bool): load pretrained weights into the model, default=False 60 | channels (int): number of input channels, default=3 61 | classes (int): number of model classes, default=80 62 | 63 | Returns: 64 | pytorch model 65 | """ 66 | return create('yolov5m', pretrained, channels, classes) 67 | 68 | 69 | def yolov5l(pretrained=False, channels=3, classes=80): 70 | """YOLOv5-large model from https://github.com/ultralytics/yolov5 71 | 72 | Arguments: 73 | pretrained (bool): load pretrained weights into the model, default=False 74 | channels (int): number of input channels, default=3 75 | classes (int): number of model classes, default=80 76 | 77 | Returns: 78 | pytorch model 79 | """ 80 | return create('yolov5l', pretrained, channels, classes) 81 | 82 | 83 | def yolov5x(pretrained=False, channels=3, classes=80): 84 | """YOLOv5-xlarge model from https://github.com/ultralytics/yolov5 85 | 86 | Arguments: 87 | pretrained (bool): load pretrained weights into the model, default=False 88 | channels (int): number of input channels, default=3 89 | classes (int): number of model classes, default=80 90 | 91 | Returns: 92 | pytorch model 93 | """ 94 | return create('yolov5x', pretrained, channels, classes) 95 | -------------------------------------------------------------------------------- /process_data_yolo.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | ''' 3 | @use:将图片和对应的xml生成为裁剪后两张的图片及数据集 4 | ''' 5 | 6 | from __future__ import division 7 | import os.path 8 | from PIL import Image 9 | import numpy as np 10 | import shutil 11 | import cv2 12 | from tqdm import tqdm 13 | 14 | ImgPath = './convertor/fold0/images/val/' #原始图片 15 | path = './convertor/fold0/labels/val/' #原始标注 16 | 17 | ProcessedPath = './process_data/' #生成后数据 18 | 19 | txtfiles = os.listdir(path) 20 | print(txtfiles) 21 | #patch img_size 22 | patch_size = 1024 23 | #slide window stride 24 | stride = 600 25 | 26 | txtfiles = tqdm(txtfiles) 27 | for file in txtfiles: #遍历txt进行操作 28 | image_pre, ext = os.path.splitext(file) 29 | imgfile = ImgPath + image_pre + '.jpg' 30 | txtfile = path + image_pre + '.txt' 31 | # if not os.path.isdir(file): # 判断是否是文件夹,不是文件夹才打开 32 | # print(file) 33 | 34 | img = cv2.imread(imgfile) 35 | sp = img.shape 36 | img_h, img_w = sp[0], sp[1] 37 | 38 | f = open(os.path.join(path, file), "r") 39 | lines = f.readlines() 40 | savepath_img = ProcessedPath + 'images' + '/val' #处理完的图片保存路径 41 | savepath_txt = ProcessedPath + 'labels' + '/val' #处理完的图片标签路径 42 | if not os.path.exists(savepath_img): 43 | os.makedirs(savepath_img) 44 | if not os.path.exists(savepath_txt): 45 | os.makedirs(savepath_txt) 46 | 47 | bndbox = [] 48 | boxname = [] 49 | for line in lines: 50 | c, x_c, y_c, w, h, _ = line.split(' ') 51 | c, x_c, y_c, w, h = float(c), float(x_c), float(y_c), float(w), float(h) 52 | bndbox.append([x_c, y_c, w, h]) 53 | boxname.append([c]) 54 | # print("boxname: ", boxname) 55 | # b = bndbox[1] 56 | # print(b.nodeName) 57 | #a: x起点, b: y起点, w: 宽, h: 高 58 | 59 | a = [] 60 | b = [] 61 | for a_ in range(0, img_w, stride): 62 | a.append(a_) 63 | for b_ in range(0, img_h, stride): 64 | b.append(b_) 65 | 66 | 67 | cropboxes = [] 68 | for i in a: 69 | for j in b: 70 | cropboxes.append([i, j, i + patch_size, j + patch_size]) 71 | i = 1 72 | top_size, bottom_size, left_size, right_size = (150, 0, 0, 0) 73 | 74 | def select(m, n, w, h): 75 | # m: x起点, n: y起点, w: 宽, h: 高 76 | bbox = [] 77 | # 查找图片中所有的 box 框 78 | for index in range(0, len(bndbox)): 79 | boxcls = boxname[index]#获取回归框的类别 80 | # print(bndbox[index]) 81 | # x min 82 | x1 = float(bndbox[index][0] * img_w - bndbox[index][2] * img_w/2) 83 | # y min 84 | y1 = float(bndbox[index][1] * img_h - bndbox[index][3] * img_h/2) 85 | # x max 86 | x2 = float(bndbox[index][0] * img_w + bndbox[index][2] * img_w/2) 87 | # y max 88 | y2 = float(bndbox[index][1] * img_h + bndbox[index][3] * img_h/2) 89 | # print("the index of the box is", index) 90 | # print("the box cls is",boxcls[0]) 91 | # print("the xy", x1, y1, x2, y2) 92 | #如果标记框在第一个范围内则存入bbox[] 并转换成新的格式 93 | if x1 >= m and x2 <= m + w and y1 >= n and y2 <= n + h: 94 | a1 = x1 - m 95 | b1 = y1 - n 96 | a2 = x2 - m 97 | b2 = y2 - n 98 | box_w = a2 - a1 99 | box_h = b2 - b1 100 | x_c = (a1 + box_w/2)/w 101 | y_c = (b1 + box_h/2)/h 102 | box_w = box_w / w 103 | box_h = box_h / h 104 | bbox.append([boxcls[0], x_c, y_c, box_w, box_h]) # 更新后的标记框 105 | if bbox is not None: 106 | return bbox 107 | else: 108 | return 0 109 | 110 | img = Image.open(imgfile) 111 | for j in range(0, len(cropboxes)): 112 | # print("the img number is :", j) 113 | # 获取在 patch 的 box 114 | Bboxes = select(cropboxes[j][0], cropboxes[j][1], patch_size, patch_size) 115 | if len(Bboxes): 116 | with open(savepath_txt + '/' + image_pre + '_' + '{}'.format(j) + '.txt', 'w') as f: 117 | for Bbox in Bboxes: 118 | for data in Bbox: 119 | f.write('{} '.format(data)) 120 | f.write('\n') 121 | 122 | #图片裁剪 123 | try: 124 | cropedimg = img.crop(cropboxes[j]) 125 | # print(np.array(cropedimg).shape) 126 | cropedimg.save(savepath_img + '/' + image_pre + '_' + str(j) + '.jpg') 127 | # print("done!") 128 | except: 129 | continue 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /task05_rnn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "configured-exploration", 6 | "metadata": {}, 7 | "source": [ 8 | "循环神经网络RNN\n", 9 | "\n", 10 | "1. 计算图\n", 11 | "2. RNN\n", 12 | "3. 长短时记忆网络\n", 13 | "4. 其他RNN\n", 14 | "5. RNN主要应用\n", 15 | "\n", 16 | "\n", 17 | "\n", 18 | "# 计算图\n", 19 | "\n", 20 | "计算图的引入是为了后面更方便的表示网络,计算图是描述计算结构的一种图,它的元素包括节点(node)和边(edge),节点表示变量,可以是标量、矢量、张量等,而边表示的是某个操作,即函数。\n", 21 | "\n", 22 | "![6.1](./PIC/6/6.1.png)\n", 23 | "\n", 24 | "下面这个计算图表示复合函数\n", 25 | "\n", 26 | "![6.2](./PIC/6/6.2.png)\n", 27 | "\n", 28 | "关于计算图的求导,我们可以用链式法则表示,有下面两种情况。\n", 29 | "\n", 30 | "- 情况1\n", 31 | "\n", 32 | "![6.3](./PIC/6/6.3.png)\n", 33 | "\n", 34 | "- 情况2\n", 35 | "\n", 36 | "![6.4](./PIC/6/6.4.png)\n", 37 | "\n", 38 | "求导举例:\n", 39 | "\n", 40 | "例1\n", 41 | "\n", 42 | "\"6.5\"\n", 43 | "\n", 44 | "- a = 3, b = 1 可以得到 c = 3, d = 2, e = 6\n", 45 | "\n", 46 | "- $\\frac{\\partial e}{\\partial a} = \\frac{\\partial e}{\\partial c}\\frac{\\partial c}{\\partial a} = d = b + 1 = 2$\n", 47 | "- $\\frac{\\partial e}{\\partial b} = \\frac{\\partial e}{\\partial c}\\frac{\\partial c}{\\partial b}+\\frac{\\partial e}{\\partial d}\\frac{\\partial d}{\\partial b} = d + c=b+1+a+b = 5$\n", 48 | "\n", 49 | "例2\n", 50 | "\n", 51 | "\"6.6\"\n", 52 | "\n", 53 | "$\\frac{\\partial Z}{\\partial X}=\\alpha \\delta+\\alpha \\epsilon+\\alpha \\zeta+\\beta \\delta+\\beta \\epsilon+\\beta \\zeta+\\gamma \\delta+\\gamma \\epsilon+\\gamma \\zeta = (\\alpha +\\beta+\\gamma)(\\delta+\\epsilon+\\zeta) $\n", 54 | "\n", 55 | "计算图可以很好的表示导数的前向传递和后向传递的过程,比如上面例2,前向传递了$\\frac{\\partial }{\\partial X}$ ,反向传递$\\frac{\\partial }{\\partial Z}$ 。\n", 56 | "\n", 57 | "\"6.7\"\n", 58 | "\n", 59 | "\"6.8\"\n", 60 | "\n", 61 | "# 循环神经网络(Recurrent Neural Network)\n", 62 | "\n", 63 | "上一章我们已经介绍了CNN,可能我们会想这里为什么还需要构建一种新的网络RNN呢?因为现实生活中存在很多序列化结构,我们需要建立一种更优秀的序列数据模型。\n", 64 | "\n", 65 | "- 文本:字母和词汇的序列\n", 66 | "- 语音:音节的序列\n", 67 | "- 视频:图像帧的序列\n", 68 | "- 时态数据:气象观测数据,股票交易数据、房价数据等\n", 69 | "\n", 70 | "RNN的发展历程:\n", 71 | "\n", 72 | "![6.9](/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.9.png)\n", 73 | "\n", 74 | "循环神经网络是一种人工神经网络,它的节点间的连接形成一个遵循时间序列的有向图,它的核心思想是,样本间存在顺序关系,每个样本和它之前的样本存在关联。通过神经网络在时序上的展开,我们能够找到样本之间的序列相关性。\n", 75 | "\n", 76 | "下面给出RNN的一般结构:\n", 77 | "\n", 78 | "![6.10](/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.10.png)\n", 79 | "\n", 80 | "其中各个符号的表示:$x_t,s_t,o_t$分别表示的是$t$时刻的输入、记忆和输出,$U,V,W$是RNN的连接权重,$b_s,b_o$是RNN的偏置,$\\sigma,\\varphi$是激活函数,$\\sigma$通常选tanh或sigmoid,$\\varphi$通常选用softmax。\n", 81 | "\n", 82 | "其中 softmax 函数,用于分类问题的概率计算。本质上是将一个K维的任意实数向量压缩 (映射)成另一个K维的实数向量,其中向量中的每个元素取值都介于(0,1)之间。\n", 83 | "$$\n", 84 | "\\sigma(\\vec{z})_{i}=\\frac{e^{z_{i}}}{\\sum_{j=1}^{K} e^{z_{j}}}\n", 85 | "$$\n", 86 | "\n", 87 | "### RNN案例\n", 88 | "\n", 89 | "比如词性标注,\n", 90 | "\n", 91 | "- 我/n,爱/v购物/n,\n", 92 | "- 我/n在/pre华联/n购物/v\n", 93 | "\n", 94 | "Word Embedding:自然语言处理(NLP)中的 一组语言建模和特征学习技术的统称,其中来自词汇表的单词或短语被映射到实数的向量。比如这里映射到三个向量然后输入:\n", 95 | "\n", 96 | "\"6.11\"\n", 97 | "\n", 98 | "将神经元的输出存到memory中,memory中值会作为下一时刻的输入。在最开始时刻,给定 memory初始值,然后逐次更新memory中的值。\n", 99 | "\n", 100 | "![6.12](/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.12.png)\n", 101 | "\n", 102 | "![6.13](/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.13.png)\n", 103 | "\n", 104 | "### RNN的一般结构\n", 105 | "\n", 106 | "- Elman Network\n", 107 | "\n", 108 | "\"6.14\"\n", 109 | "\n", 110 | "- Jordan Network\n", 111 | "\n", 112 | "\"6.15\"\n", 113 | "\n", 114 | "各种不同的RNN结构\n", 115 | "\n", 116 | "![6.16](/Users/liuyang/Desktop/中科院/datawhale/DL理论/PIC/6/6.16.png)\n", 117 | "\n", 118 | "### RNN训练算法 - BPTT\n", 119 | "\n", 120 | "我们先来回顾一下BP算法,就是定义损失函数 Loss 来表示输出 $\\hat{y}$ 和真实标签 y 的误差,通过链式法则自顶向下求得 Loss 对网络权重的偏导。沿梯度的反方向更新权重的值, 直到 Loss 收敛。而这里的 BPTT 算法就是加上了时序演化,后面的两个字母 TT 就是 Through Time。\n", 121 | "\n", 122 | "\"6.17\"\n", 123 | "\n", 124 | "我们先定义输出函数:\n", 125 | "$$\n", 126 | "\\begin{array}{l}s_{t}=\\tanh \\left(U x_{t}+W s_{t-1}\\right) \\\\ \\hat{y}_{t}=\\operatorname{softmax}\\left(V s_{t}\\right)\\end{array}\n", 127 | "$$\n", 128 | "再定义损失函数:\n", 129 | "$$\n", 130 | "\\begin{aligned} E_{t}\\left(y_{t}, \\hat{y}_{t}\\right) =-y_{t} \\log \\hat{y}_{t} \\\\ E(y, \\hat{y}) =\\sum_{t} E_{t}\\left(y_{t}, \\hat{y}_{t}\\right) \\\\ =-\\sum_{t} y_{t} \\log \\hat{y}_{t}\\end{aligned}\n", 131 | "$$\n", 132 | "\"6.18\"\n", 133 | "\n", 134 | "我们分别求损失函数 E 对 U、V、W的梯度:\n", 135 | "$$\n", 136 | "\\begin{array}{l}\\frac{\\partial E}{\\partial V}=\\sum_{t} \\frac{\\partial E_{t}}{\\partial V} \\\\ \\frac{\\partial E}{\\partial W}=\\sum_{t} \\frac{\\partial E_{t}}{\\partial W} \\\\ \\frac{\\partial E}{\\partial U}=\\sum_{t} \\frac{\\partial E_{t}}{\\partial U}\\end{array}\n", 137 | "$$\n", 138 | "\n", 139 | "- 求 E 对 V 的梯度,先求 $E_3$ 对 V 的梯度\n", 140 | "\n", 141 | "$$\n", 142 | "\\begin{aligned} \\frac{\\partial E_{3}}{\\partial V} &=\\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial V} \\\\ &=\\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial z_{3}} \\frac{\\partial z_{3}}{\\partial V} \\end{aligned}\n", 143 | "$$\n", 144 | "\n", 145 | "其中 $z_3 = V s_3$,然后求和即可。\n", 146 | "\n", 147 | "- 求 E 对 W 的梯度,先求 $E_3$ 对 W 的梯度\n", 148 | "\n", 149 | "$$\n", 150 | "\\begin{array}{c}\\frac{\\partial E_{3}}{\\partial W}=\\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}} \\frac{\\partial s_{3}}{\\partial W} \\\\ s_{3}=\\tanh \\left(U x_{3}+W s_{2}\\right) \\\\ \\frac{\\partial E_{3}}{\\partial W}=\\sum_{k=0}^{3} \\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}} \\frac{\\partial s_{3}}{\\partial s_{k}} \\frac{\\partial s_{k}}{\\partial W} \\\\ \\frac{\\partial E_{3}}{\\partial W}=\\sum_{k=0}^{3} \\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}}\\left(\\prod_{j=k+1}^{3} \\frac{\\partial s_{j}}{\\partial s_{j-1}}\\right) \\frac{\\partial s_{k}}{\\partial W}\\end{array}\n", 151 | "$$\n", 152 | "\n", 153 | "其中: $s_3$ 依赖于 $s_2$,而 $s_2$ 又依赖于 $s_1 $ 和 W ,依赖关系 一直传递到 t = 0 的时刻。因此,当我们计算对于 W 的偏导数时,不能把 $s_2$ 看作是常数项!\n", 154 | "\n", 155 | "- 求 E 对 U 的梯度,先求 $E_3$ 对 U 的梯度\n", 156 | "\n", 157 | "$$\n", 158 | "\\begin{array}{c}\\frac{\\partial E_{3}}{\\partial W}=\\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}} \\frac{\\partial s_{3}}{\\partial U} \\\\ s_{3}=\\tanh \\left(U x_{3}+W s_{2}\\right) \\\\ \\frac{\\partial E_{3}}{\\partial U}=\\sum_{k=0}^{3} \\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}} \\frac{\\partial s_{3}}{\\partial s_{k}} \\frac{\\partial s_{k}}{\\partial U}\\end{array}\n", 159 | "$$\n", 160 | "\n", 161 | "# 长短时记忆网络\n", 162 | "\n", 163 | "在RNN中,存在一个很重要的问题,就是梯度消失问题,一开始我们不能有效的解决长时依赖问题,其中梯度消失的原因有两个:BPTT算法和激活函数Tanh\n", 164 | "$$\n", 165 | "\\frac{\\partial E_{3}}{\\partial W}=\\sum_{k=0}^{3} \\frac{\\partial E_{3}}{\\partial \\hat{y}_{3}} \\frac{\\partial \\hat{y}_{3}}{\\partial s_{3}}\\left(\\prod_{j=k+1}^{3} \\frac{\\partial s_{j}}{\\partial s_{j-1}}\\right) \\frac{\\partial s_{k}}{\\partial W}\n", 166 | "$$\n", 167 | "有两种解决方案,分别是ReLU函数和门控RNN(LSTM).\n", 168 | "\n", 169 | "### LSTM\n", 170 | "\n", 171 | "LSTM,即长短时记忆网络,于1997年被Sepp Hochreiter 和Jürgen Schmidhuber提出来,LSTM是一种用于深度学习领域的人工循环神经网络(RNN)结构。一个LSTM单元由输入门、输出门和遗忘门组成,三个门控制信息进出单元。\n", 172 | "\n", 173 | "![6.20](./PIC/6/6.20.png)\n", 174 | "\n", 175 | "- LSTM依靠贯穿隐藏层的细胞状态实现隐藏单元之间的信息传递,其中只有少量的线性操作\n", 176 | "- LSTM引入了“门”机制对细胞状态信息进行添加或删除,由此实现长程记忆\n", 177 | "- “门”机制由一个Sigmoid激活函数层和一个向量点乘操作组成,Sigmoid层的输出控制了信息传递的比例\n", 178 | "\n", 179 | "**遗忘门**:LSTM通过遗忘门(forget gate)实现对细胞状态信息遗忘程度的控制,输出当前状态的遗忘权重,取决于 $h_{t−1}$ 和 $x_t$.\n", 180 | "$$\n", 181 | "f_{t}=\\sigma\\left(W_{f} \\cdot\\left[h_{t-1}, x_{t}\\right]+b_{f}\\right)\n", 182 | "$$\n", 183 | "\"6.21\"\n", 184 | "\n", 185 | "**输入门**:LSTM通过输入门(input gate)实现对细胞状态输入接收程度的控制,输出当前输入信息的接受权重,取决于 $h_{t−1}$ 和 $x_t$.\n", 186 | "$$\n", 187 | "\\begin{array}{c}i_{t}=\\sigma\\left(W_{i} \\cdot\\left[h_{t-1}, x_{t}\\right]+b_{i}\\right) \\\\ \\tilde{C}_{t}=\\tanh \\left(W_{C} \\cdot\\left[h_{t-1}, x_{t}\\right]+b_{C}\\right)\\end{array}\n", 188 | "$$\n", 189 | "\"6.22\"\n", 190 | "\n", 191 | "**输出门**:LSTM通过输出门(output gate)实现对细胞状态输出认可程度的控制,输出当前输出信息的认可权重,取决于 $h_{t−1}$ 和 $x_t$.\n", 192 | "$$\n", 193 | "o_{t}=\\sigma\\left(W_{o} \\cdot\\left[h_{t-1}, x_{t}\\right]+b_{o}\\right)\n", 194 | "$$\n", 195 | "\"6.23\"\n", 196 | "\n", 197 | "**状态更新**:“门”机制对细胞状态信息进行添加或删除,由此实现长程记忆。\n", 198 | "$$\n", 199 | "\\begin{array}{c}C_{t}=f_{t} * C_{t-1}+i_{t} * \\tilde{C}_{t} \\\\ h_{t}=o_{t} * \\tanh \\left(C_{t}\\right)\\end{array}\n", 200 | "$$\n", 201 | "\"6.24\"\n", 202 | "\n", 203 | "下面给出一个标准化的RNN例子\n", 204 | "\n", 205 | "```python\n", 206 | "#构造RNN网络,x的维度5,隐层的维度10,网络的层数2\n", 207 | "rnn_ seq = nn.RNN(5, 10,2)\n", 208 | "#构造一个输入序列,长为6,batch是3,特征是5\n", 209 | "X =V(torch. randn(6, 3,5))\n", 210 | "#out,ht = rnn_ seq(x, h0) # h0可以指定或者不指定\n", 211 | "out,ht = rnn_ seq(x)\n", 212 | "# q1:这里out、ht的size是多少呢? out:6*3*10, ht:2*3*10\n", 213 | "\n", 214 | "#输入维度50,隐层100维,两层\n", 215 | "Lstm_ seq = nn.LSTM(50, 100,num layers=2 )\n", 216 | "#输入序列seq= 10,batch =3,输入维度=50\n", 217 | "lstm input = torch. randn(10,3,50)\n", 218 | "out, (h, c) = lstm_ seq(lstm_ _input) #使用默认的全0隐藏状态\n", 219 | "```\n", 220 | "\n", 221 | "# 其他经典的循环神经网络\n", 222 | "\n", 223 | "### Gated Recurrent Unit(GRU)\n", 224 | "\n", 225 | "Gated Recurrent Unit (GRU),是在2014年提出的,可认为是LSTM 的变种,它的细胞状态与隐状态合并,在计算当前时刻新信息的方法和LSTM有 所不同;GRU只包含重置门和更新门;在音乐建模与语音信号建模领域与LSTM具有相似的性能,但是参数更少,只有两个门控。\n", 226 | "\n", 227 | "![6.19](./PIC/6/6.19.png)\n", 228 | "\n", 229 | "### Peephole LSTM\n", 230 | "\n", 231 | "让门层也接受细胞状态的输入,同时考虑隐层信息的输入。\n", 232 | "\n", 233 | "\"6.25\"\n", 234 | "\n", 235 | "### Bi-directional RNN(双向RNN) \n", 236 | "\n", 237 | "Bi-directional RNN(双向RNN)假设当前t的输出不仅仅和之前的序列有关,并且还与之后的序列有关,例如:完形填空,它由两个RNNs上下叠加在一起组成,输出由这两个RNNs的隐藏层的状态决定。\n", 238 | "\n", 239 | "\"6.26\"\n", 240 | "\n", 241 | "\"6.27\"\n", 242 | "\n", 243 | "### Continuous time RNN(CTRNN)\n", 244 | "\n", 245 | "CTRNN利用常微分方程系统对输入脉冲序列神经元的影响 进行建模。CTRNN被应用到进化机器人中,用于解决视觉、协作和最 小认知行为等问题。\n", 246 | "\n", 247 | "\"6.28\"\n", 248 | "\n", 249 | "# 循环神经网络的主要应用\n", 250 | "\n", 251 | "### 语言模型\n", 252 | "\n", 253 | "根据之前和当前词预测下一个单词或者字母\n", 254 | "\n", 255 | "\"6.29\"\n", 256 | "\n", 257 | "问答系统\n", 258 | "\n", 259 | "\"6.30\"\n", 260 | "\n", 261 | "### 自动作曲\n", 262 | "\n", 263 | "\"6.31\"\n", 264 | "\n", 265 | "参考:Hang Chu, Raquel Urtasun, Sanja Fidler. Song From PI: A Musically Plausible Network for Pop Music Generation. CoRR abs/1611.03477 (2016)\n", 266 | "\n", 267 | "Music AI Lab: **https://musicai.citi.sinica.edu.tw/**\n", 268 | "\n", 269 | "\"6.32\"\n", 270 | "\n", 271 | "### 机器翻译\n", 272 | "\n", 273 | "将一种语言自动翻译成另一种语言\n", 274 | "\n", 275 | "\"6.35\"\n", 276 | "\n", 277 | "### 自动写作\n", 278 | "\n", 279 | "根据现有资料自动写作,当前主要包括新闻写作和诗歌创作。主要是基于RNN&LSTM的文本生成技术来实现,需要训练大量同 类文本,结合模板技术。\n", 280 | "\n", 281 | "目前主要产品有:腾讯Dreamwriter写稿机器人、今日头条xiaomingbot、第一财经DT稿王(背后是阿里巴巴) 、百度Writing-bots...\n", 282 | "\n", 283 | "### 图像描述\n", 284 | "\n", 285 | "根据图像形成语言描述\n", 286 | "\n", 287 | "\"6.33\"\n", 288 | "\n", 289 | "\"6.34\"\n", 290 | "\n" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "id": "settled-engine", 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [] 300 | } 301 | ], 302 | "metadata": { 303 | "kernelspec": { 304 | "display_name": "Python 3", 305 | "language": "python", 306 | "name": "python3" 307 | }, 308 | "language_info": { 309 | "codemirror_mode": { 310 | "name": "ipython", 311 | "version": 3 312 | }, 313 | "file_extension": ".py", 314 | "mimetype": "text/x-python", 315 | "name": "python", 316 | "nbconvert_exporter": "python", 317 | "pygments_lexer": "ipython3", 318 | "version": "3.8.10" 319 | } 320 | }, 321 | "nbformat": 4, 322 | "nbformat_minor": 5 323 | } 324 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | 4 | from utils import google_utils 5 | from utils.datasets import * 6 | from utils.utils import * 7 | 8 | 9 | def test(data, 10 | weights=None, 11 | batch_size=16, 12 | imgsz=640, 13 | conf_thres=0.001, 14 | iou_thres=0.6, # for NMS 15 | save_json=False, 16 | single_cls=False, 17 | augment=False, 18 | verbose=False, 19 | model=None, 20 | dataloader=None, 21 | merge=False): 22 | # Initialize/load model and set device 23 | if model is None: 24 | training = False 25 | merge = opt.merge # use Merge NMS 26 | device = torch_utils.select_device(opt.device, batch_size=batch_size) 27 | 28 | # Remove previous 29 | for f in glob.glob('test_batch*.jpg'): 30 | os.remove(f) 31 | 32 | # Load model 33 | google_utils.attempt_download(weights) 34 | model = torch.load(weights, map_location=device)['model'].float().fuse().to(device) # load to FP32 35 | imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size 36 | 37 | # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 38 | # if device.type != 'cpu' and torch.cuda.device_count() > 1: 39 | # model = nn.DataParallel(model) 40 | 41 | else: # called by train.py 42 | training = True 43 | device = next(model.parameters()).device # get model device 44 | 45 | # Half 46 | half = device.type != 'cpu' and torch.cuda.device_count() == 1 # half precision only supported on single-GPU 47 | if half: 48 | model.half() # to FP16 49 | 50 | # Configure 51 | model.eval() 52 | with open(data) as f: 53 | data = yaml.load(f, Loader=yaml.FullLoader) # model dict 54 | nc = 1 if single_cls else int(data['nc']) # number of classes 55 | iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95 56 | niou = iouv.numel() 57 | 58 | # Dataloader 59 | if dataloader is None: # not training 60 | img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img 61 | _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once 62 | path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images 63 | dataloader = create_dataloader(path, imgsz, batch_size, int(max(model.stride)), opt, 64 | hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0] 65 | 66 | seen = 0 67 | names = model.names if hasattr(model, 'names') else model.module.names 68 | coco91class = coco80_to_coco91_class() 69 | s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95') 70 | p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. 71 | loss = torch.zeros(3, device=device) 72 | jdict, stats, ap, ap_class = [], [], [], [] 73 | for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): 74 | img = img.to(device) 75 | img = img.half() if half else img.float() # uint8 to fp16/32 76 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 77 | targets = targets.to(device) 78 | nb, _, height, width = img.shape # batch size, channels, height, width 79 | whwh = torch.Tensor([width, height, width, height]).to(device) 80 | 81 | # Disable gradients 82 | with torch.no_grad(): 83 | # Run model 84 | t = torch_utils.time_synchronized() 85 | inf_out, train_out = model(img, augment=augment) # inference and training outputs 86 | t0 += torch_utils.time_synchronized() - t 87 | 88 | # Compute loss 89 | if training: # if model has loss hyperparameters 90 | loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # GIoU, obj, cls 91 | 92 | # Run NMS 93 | t = torch_utils.time_synchronized() 94 | output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge) 95 | t1 += torch_utils.time_synchronized() - t 96 | 97 | # Statistics per image 98 | for si, pred in enumerate(output): 99 | labels = targets[targets[:, 0] == si, 1:] 100 | nl = len(labels) 101 | tcls = labels[:, 0].tolist() if nl else [] # target class 102 | seen += 1 103 | 104 | if pred is None: 105 | if nl: 106 | stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) 107 | continue 108 | 109 | # Append to text file 110 | # with open('test.txt', 'a') as file: 111 | # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred] 112 | 113 | # Clip boxes to image bounds 114 | clip_coords(pred, (height, width)) 115 | 116 | # Append to pycocotools JSON dictionary 117 | if save_json: 118 | # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... 119 | image_id = int(Path(paths[si]).stem.split('_')[-1]) 120 | box = pred[:, :4].clone() # xyxy 121 | scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape 122 | box = xyxy2xywh(box) # xywh 123 | box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner 124 | for p, b in zip(pred.tolist(), box.tolist()): 125 | jdict.append({'image_id': image_id, 126 | 'category_id': coco91class[int(p[5])], 127 | 'bbox': [round(x, 3) for x in b], 128 | 'score': round(p[4], 5)}) 129 | 130 | # Assign all predictions as incorrect 131 | correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) 132 | if nl: 133 | detected = [] # target indices 134 | tcls_tensor = labels[:, 0] 135 | 136 | # target boxes 137 | tbox = xywh2xyxy(labels[:, 1:5]) * whwh 138 | 139 | # Per target class 140 | for cls in torch.unique(tcls_tensor): 141 | ti = (cls == tcls_tensor).nonzero().view(-1) # prediction indices 142 | pi = (cls == pred[:, 5]).nonzero().view(-1) # target indices 143 | 144 | # Search for detections 145 | if pi.shape[0]: 146 | # Prediction to target ious 147 | ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices 148 | 149 | # Append detections 150 | for j in (ious > iouv[0]).nonzero(): 151 | d = ti[i[j]] # detected target 152 | if d not in detected: 153 | detected.append(d) 154 | correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn 155 | if len(detected) == nl: # all targets already located in image 156 | break 157 | 158 | # Append statistics (correct, conf, pcls, tcls) 159 | stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) 160 | 161 | # Plot images 162 | if batch_i < 1: 163 | f = 'test_batch%g_gt.jpg' % batch_i # filename 164 | plot_images(img, targets, paths, f, names) # ground truth 165 | f = 'test_batch%g_pred.jpg' % batch_i 166 | plot_images(img, output_to_target(output, width, height), paths, f, names) # predictions 167 | 168 | # Compute statistics 169 | stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy 170 | if len(stats): 171 | p, r, ap, f1, ap_class = ap_per_class(*stats) 172 | p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95] 173 | mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() 174 | nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class 175 | else: 176 | nt = torch.zeros(1) 177 | 178 | # Print results 179 | pf = '%20s' + '%12.3g' * 6 # print format 180 | print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) 181 | 182 | # Print results per class 183 | if verbose and nc > 1 and len(stats): 184 | for i, c in enumerate(ap_class): 185 | print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) 186 | 187 | # Print speeds 188 | t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple 189 | if not training: 190 | print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) 191 | 192 | # Save JSON 193 | if save_json and map50 and len(jdict): 194 | imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataloader.dataset.img_files] 195 | f = 'detections_val2017_%s_results.json' % \ 196 | (weights.split(os.sep)[-1].replace('.pt', '') if weights else '') # filename 197 | print('\nCOCO mAP with pycocotools... saving %s...' % f) 198 | with open(f, 'w') as file: 199 | json.dump(jdict, file) 200 | 201 | try: 202 | from pycocotools.coco import COCO 203 | from pycocotools.cocoeval import COCOeval 204 | 205 | # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb 206 | cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0]) # initialize COCO ground truth api 207 | cocoDt = cocoGt.loadRes(f) # initialize COCO pred api 208 | 209 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') 210 | cocoEval.params.imgIds = imgIds # image IDs to evaluate 211 | cocoEval.evaluate() 212 | cocoEval.accumulate() 213 | cocoEval.summarize() 214 | map, map50 = cocoEval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) 215 | except: 216 | print('WARNING: pycocotools must be installed with numpy==1.17 to run correctly. ' 217 | 'See https://github.com/cocodataset/cocoapi/issues/356') 218 | 219 | # Return results 220 | model.float() # for training 221 | maps = np.zeros(nc) + map 222 | for i, c in enumerate(ap_class): 223 | maps[c] = ap[i] 224 | return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t 225 | 226 | 227 | if __name__ == '__main__': 228 | parser = argparse.ArgumentParser(prog='test.py') 229 | parser.add_argument('--weights', type=str, default='weights/yolov5s.pt', help='model.pt path') 230 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path') 231 | parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch') 232 | parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') 233 | parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold') 234 | parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS') 235 | parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') 236 | parser.add_argument('--task', default='val', help="'val', 'test', 'study'") 237 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 238 | parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') 239 | parser.add_argument('--augment', action='store_true', help='augmented inference') 240 | parser.add_argument('--merge', action='store_true', help='use Merge NMS') 241 | parser.add_argument('--verbose', action='store_true', help='report mAP by class') 242 | opt = parser.parse_args() 243 | opt.save_json = opt.save_json or opt.data.endswith('coco.yaml') 244 | opt.data = check_file(opt.data) # check file 245 | print(opt) 246 | 247 | # task = 'val', 'test', 'study' 248 | if opt.task in ['val', 'test']: # (default) run normally 249 | test(opt.data, 250 | opt.weights, 251 | opt.batch_size, 252 | opt.img_size, 253 | opt.conf_thres, 254 | opt.iou_thres, 255 | opt.save_json, 256 | opt.single_cls, 257 | opt.augment, 258 | opt.verbose) 259 | 260 | elif opt.task == 'study': # run over a range of settings and save/plot 261 | for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']: 262 | f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem) # filename to save to 263 | x = list(range(352, 832, 64)) # x axis 264 | y = [] # y axis 265 | for i in x: # img-size 266 | print('\nRunning %s point %s...' % (f, i)) 267 | r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json) 268 | y.append(r + t) # results and times 269 | np.savetxt(f, y, fmt='%10.4g') # save 270 | os.system('zip -r study.zip study_*.txt') 271 | # plot_study_txt(f, x) # plot 272 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch.distributed as dist 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import torch.optim.lr_scheduler as lr_scheduler 7 | import torch.utils.data 8 | import numpy as np 9 | from torch.utils.tensorboard import SummaryWriter 10 | import os 11 | import test # import test.py to get mAP after each epoch 12 | from models.yolo import Model 13 | from utils import google_utils 14 | from utils.datasets import * 15 | from utils.utils import * 16 | 17 | mixed_precision = True 18 | try: # Mixed precision training https://github.com/NVIDIA/apex 19 | from apex import amp 20 | except: 21 | print('Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex') 22 | mixed_precision = False # not installed 23 | 24 | wdir = 'weights' + os.sep # weights dir 25 | os.makedirs(wdir, exist_ok=True) 26 | last = wdir + 'last.pt' 27 | best = wdir + 'best.pt' 28 | results_file = 'results.txt' 29 | 30 | # Hyperparameters 31 | hyp = {'lr0': 0.01, # initial learning rate (SGD=1E-2, Adam=1E-3) 32 | 'momentum': 0.937, # SGD momentum 33 | 'weight_decay': 5e-4, # optimizer weight decay 34 | 'giou': 0.05, # giou loss gain 35 | 'cls': 0.58, # cls loss gain 36 | 'cls_pw': 1.0, # cls BCELoss positive_weight 37 | 'obj': 1.0, # obj loss gain (*=img_size/320 if img_size != 320) 38 | 'obj_pw': 1.0, # obj BCELoss positive_weight 39 | 'iou_t': 0.20, # iou training threshold 40 | 'anchor_t': 4.0, # anchor-multiple threshold 41 | 'fl_gamma': 0.0, # focal loss gamma (efficientDet default is gamma=1.5) 42 | 'hsv_h': 0.014, # image HSV-Hue augmentation (fraction) 43 | 'hsv_s': 0.68, # image HSV-Saturation augmentation (fraction) 44 | 'hsv_v': 0.36, # image HSV-Value augmentation (fraction) 45 | 'degrees': 0.0, # image rotation (+/- deg) 46 | 'translate': 0.0, # image translation (+/- fraction) 47 | 'scale': 0.5, # image scale (+/- gain) 48 | 'shear': 0.0} # image shear (+/- deg) 49 | print(hyp) 50 | 51 | # Overwrite hyp with hyp*.txt (optional) 52 | f = glob.glob('hyp*.txt') 53 | if f: 54 | print('Using %s' % f[0]) 55 | for k, v in zip(hyp.keys(), np.loadtxt(f[0])): 56 | hyp[k] = v 57 | 58 | # Print focal loss if gamma > 0 59 | if hyp['fl_gamma']: 60 | print('Using FocalLoss(gamma=%g)' % hyp['fl_gamma']) 61 | 62 | 63 | def train(hyp): 64 | epochs = opt.epochs # 300 65 | batch_size = opt.batch_size # 64 66 | weights = opt.weights # initial training weights 67 | 68 | # Configure 69 | init_seeds(1) 70 | with open(opt.data) as f: 71 | data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict 72 | train_path = data_dict['train'] 73 | test_path = data_dict['val'] 74 | nc = 1 if opt.single_cls else int(data_dict['nc']) # number of classes 75 | 76 | # Remove previous results 77 | for f in glob.glob('*_batch*.jpg') + glob.glob(results_file): 78 | os.remove(f) 79 | 80 | # Create model 81 | model = Model(opt.cfg, nc=data_dict['nc']).to(device) 82 | 83 | # Image sizes 84 | gs = int(max(model.stride)) # grid size (max stride) 85 | imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples 86 | 87 | # Optimizer 88 | nbs = 64 # nominal batch size 89 | accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing 90 | hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay 91 | pg0, pg1, pg2 = [], [], [] # optimizer parameter groups 92 | for k, v in model.named_parameters(): 93 | if v.requires_grad: 94 | if '.bias' in k: 95 | pg2.append(v) # biases 96 | elif '.weight' in k and '.bn' not in k: 97 | pg1.append(v) # apply weight decay 98 | else: 99 | pg0.append(v) # all else 100 | 101 | optimizer = optim.Adam(pg0, lr=hyp['lr0']) if opt.adam else \ 102 | optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) 103 | optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay 104 | optimizer.add_param_group({'params': pg2}) # add pg2 (biases) 105 | # Scheduler https://arxiv.org/pdf/1812.01187.pdf 106 | lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine 107 | scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) 108 | print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) 109 | del pg0, pg1, pg2 110 | 111 | # Load Model 112 | google_utils.attempt_download(weights) 113 | start_epoch, best_fitness = 0, 0.0 114 | if weights.endswith('.pt'): # pytorch format 115 | ckpt = torch.load(weights, map_location=device) # load checkpoint 116 | 117 | # load model 118 | try: 119 | ckpt['model'] = {k: v for k, v in ckpt['model'].float().state_dict().items() 120 | if model.state_dict()[k].shape == v.shape} # to FP32, filter 121 | model.load_state_dict(ckpt['model'], strict=False) 122 | except KeyError as e: 123 | s = "%s is not compatible with %s. This may be due to model differences or %s may be out of date. " \ 124 | "Please delete or update %s and try again, or use --weights '' to train from scratch." \ 125 | % (opt.weights, opt.cfg, opt.weights, opt.weights) 126 | raise KeyError(s) from e 127 | 128 | # load optimizer 129 | if ckpt['optimizer'] is not None: 130 | optimizer.load_state_dict(ckpt['optimizer']) 131 | best_fitness = ckpt['best_fitness'] 132 | 133 | # load results 134 | if ckpt.get('training_results') is not None: 135 | with open(results_file, 'w') as file: 136 | file.write(ckpt['training_results']) # write results.txt 137 | 138 | # epochs 139 | start_epoch = ckpt['epoch'] + 1 140 | if epochs < start_epoch: 141 | print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % 142 | (opt.weights, ckpt['epoch'], epochs)) 143 | epochs += ckpt['epoch'] # finetune additional epochs 144 | 145 | del ckpt 146 | 147 | # Mixed precision training https://github.com/NVIDIA/apex 148 | if mixed_precision: 149 | model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) 150 | 151 | 152 | scheduler.last_epoch = start_epoch - 1 # do not move 153 | # https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822 154 | # plot_lr_scheduler(optimizer, scheduler, epochs) 155 | 156 | # Initialize distributed training 157 | if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available(): 158 | dist.init_process_group(backend='nccl', # distributed backend 159 | init_method='tcp://127.0.0.1:9999', # init method 160 | world_size=1, # number of nodes 161 | rank=0) # node rank 162 | model = torch.nn.parallel.DistributedDataParallel(model) 163 | # pip install torch==1.4.0+cu100 torchvision==0.5.0+cu100 -f https://download.pytorch.org/whl/torch_stable.html 164 | 165 | # Trainloader 166 | dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, 167 | hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect) 168 | mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class 169 | assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Correct your labels or your model.' % (mlc, nc, opt.cfg) 170 | 171 | # Testloader 172 | testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt, 173 | hyp=hyp, augment=False, cache=opt.cache_images, rect=True)[0] 174 | 175 | # Model parameters 176 | hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset 177 | model.nc = nc # attach number of classes to model 178 | model.hyp = hyp # attach hyperparameters to model 179 | model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) 180 | model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights 181 | model.names = data_dict['names'] 182 | 183 | # Class frequency 184 | labels = np.concatenate(dataset.labels, 0) 185 | c = torch.tensor(labels[:, 0]) # classes 186 | # cf = torch.bincount(c.long(), minlength=nc) + 1. 187 | # model._initialize_biases(cf.to(device)) 188 | if tb_writer: 189 | plot_labels(labels) 190 | tb_writer.add_histogram('classes', c, 0) 191 | 192 | # Check anchors 193 | if not opt.noautoanchor: 194 | check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) 195 | 196 | # Exponential moving average 197 | ema = torch_utils.ModelEMA(model) 198 | 199 | # Start training 200 | t0 = time.time() 201 | nb = len(dataloader) # number of batches 202 | n_burn = max(3 * nb, 1e3) # burn-in iterations, max(3 epochs, 1k iterations) 203 | maps = np.zeros(nc) # mAP per class 204 | results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' 205 | print('Image sizes %g train, %g test' % (imgsz, imgsz_test)) 206 | print('Using %g dataloader workers' % dataloader.num_workers) 207 | print('Starting training for %g epochs...' % epochs) 208 | # torch.autograd.set_detect_anomaly(True) 209 | for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ 210 | model.train() 211 | 212 | # Update image weights (optional) 213 | if dataset.image_weights: 214 | w = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights 215 | image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w) 216 | dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx 217 | 218 | # Update mosaic border 219 | # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) 220 | # dataset.mosaic_border = [b - imgsz, -b] # height, width borders 221 | 222 | mloss = torch.zeros(4, device=device) # mean losses 223 | print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size')) 224 | pbar = tqdm(enumerate(dataloader), total=nb) # progress bar 225 | for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- 226 | ni = i + nb * epoch # number integrated batches (since train start) 227 | imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 228 | 229 | # Burn-in 230 | if ni <= n_burn: 231 | xi = [0, n_burn] # x interp 232 | # model.gr = np.interp(ni, xi, [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou) 233 | accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) 234 | for j, x in enumerate(optimizer.param_groups): 235 | # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 236 | x['lr'] = np.interp(ni, xi, [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) 237 | if 'momentum' in x: 238 | x['momentum'] = np.interp(ni, xi, [0.9, hyp['momentum']]) 239 | 240 | # Multi-scale 241 | if opt.multi_scale: 242 | sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size 243 | sf = sz / max(imgs.shape[2:]) # scale factor 244 | if sf != 1: 245 | ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) 246 | imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) 247 | 248 | # Forward 249 | pred = model(imgs) 250 | 251 | # Loss 252 | loss, loss_items = compute_loss(pred, targets.to(device), model) 253 | if not torch.isfinite(loss): 254 | print('WARNING: non-finite loss, ending training ', loss_items) 255 | return results 256 | 257 | # Backward 258 | if mixed_precision: 259 | with amp.scale_loss(loss, optimizer) as scaled_loss: 260 | scaled_loss.backward() 261 | else: 262 | loss.backward() 263 | 264 | # Optimize 265 | if ni % accumulate == 0: 266 | optimizer.step() 267 | optimizer.zero_grad() 268 | ema.update(model) 269 | 270 | # Print 271 | mloss = (mloss * i + loss_items) / (i + 1) # update mean losses 272 | mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0) # (GB) 273 | s = ('%10s' * 2 + '%10.4g' * 6) % ( 274 | '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) 275 | pbar.set_description(s) 276 | 277 | # Plot 278 | if ni < 3: 279 | f = 'train_batch%g.jpg' % ni # filename 280 | result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) 281 | if tb_writer and result is not None: 282 | tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) 283 | # tb_writer.add_graph(model, imgs) # add model to tensorboard 284 | 285 | # end batch ------------------------------------------------------------------------------------------------ 286 | 287 | # Scheduler 288 | scheduler.step() 289 | 290 | # mAP 291 | ema.update_attr(model) 292 | final_epoch = epoch + 1 == epochs 293 | if not opt.notest or final_epoch: # Calculate mAP 294 | results, maps, times = test.test(opt.data, 295 | batch_size=batch_size, 296 | imgsz=imgsz_test, 297 | save_json=final_epoch and opt.data.endswith(os.sep + 'coco.yaml'), 298 | model=ema.ema, 299 | single_cls=opt.single_cls, 300 | dataloader=testloader) 301 | 302 | # Write 303 | with open(results_file, 'a') as f: 304 | f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) 305 | if len(opt.name) and opt.bucket: 306 | os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (opt.bucket, opt.name)) 307 | 308 | # Tensorboard 309 | if tb_writer: 310 | tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', 311 | 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1', 312 | 'val/giou_loss', 'val/obj_loss', 'val/cls_loss'] 313 | for x, tag in zip(list(mloss[:-1]) + list(results), tags): 314 | tb_writer.add_scalar(tag, x, epoch) 315 | 316 | # Update best mAP 317 | fi = fitness(np.array(results).reshape(1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1] 318 | if fi > best_fitness: 319 | best_fitness = fi 320 | 321 | # Save model 322 | save = (not opt.nosave) or (final_epoch and not opt.evolve) 323 | if save: 324 | with open(results_file, 'r') as f: # create checkpoint 325 | ckpt = {'epoch': epoch, 326 | 'best_fitness': best_fitness, 327 | 'training_results': f.read(), 328 | 'model': ema.ema, 329 | 'optimizer': None if final_epoch else optimizer.state_dict()} 330 | 331 | # Save last, best and delete 332 | torch.save(ckpt, last) 333 | if (best_fitness == fi) and not final_epoch: 334 | torch.save(ckpt, best) 335 | del ckpt 336 | 337 | # end epoch ---------------------------------------------------------------------------------------------------- 338 | # end training 339 | 340 | # Strip optimizers 341 | n = ('_' if len(opt.name) and not opt.name.isnumeric() else '') + opt.name 342 | fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n 343 | for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]): 344 | if os.path.exists(f1): 345 | os.rename(f1, f2) # rename 346 | ispt = f2.endswith('.pt') # is *.pt 347 | strip_optimizer(f2) if ispt else None # strip optimizer 348 | os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None # upload 349 | 350 | # Finish 351 | if not opt.evolve: 352 | plot_results() # save as results.png 353 | print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) 354 | dist.destroy_process_group() if device.type != 'cpu' and torch.cuda.device_count() > 1 else None 355 | torch.cuda.empty_cache() 356 | return results 357 | 358 | 359 | if __name__ == '__main__': 360 | check_git_status() 361 | parser = argparse.ArgumentParser() 362 | parser.add_argument('--epochs', type=int, default=1000) 363 | parser.add_argument('--batch-size', type=int, default=3) 364 | parser.add_argument('--cfg', type=str, default='models/yolov5x.yaml', help='*.cfg path') 365 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path') 366 | parser.add_argument('--img-size', nargs='+', type=int, default=[1024, 1024], help='train,test sizes') 367 | parser.add_argument('--rect', action='store_true', help='rectangular training') 368 | parser.add_argument('--resume', action='store_true', help='resume training from last.pt') 369 | parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') 370 | parser.add_argument('--notest', action='store_true', help='only test final epoch') 371 | parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check') 372 | parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters') 373 | parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') 374 | parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') 375 | parser.add_argument('--weights', type=str, default='weights/best.pt', help='initial weights path') 376 | parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied') 377 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 378 | parser.add_argument('--adam', action='store_true', help='use adam optimizer') 379 | parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%') 380 | parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') 381 | opt = parser.parse_args() 382 | opt.weights = last if opt.resume and not opt.weights else opt.weights 383 | opt.cfg = check_file(opt.cfg) # check file 384 | opt.data = check_file(opt.data) # check file 385 | print(opt) 386 | opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) 387 | device = torch_utils.select_device(opt.device, apex=mixed_precision, batch_size=opt.batch_size) 388 | if device.type == 'cpu': 389 | mixed_precision = False 390 | 391 | # Train 392 | if not opt.evolve: 393 | tb_writer = SummaryWriter(comment=opt.name) 394 | print('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/') 395 | train(hyp) 396 | 397 | # Evolve hyperparameters (optional) 398 | else: 399 | tb_writer = None 400 | opt.notest, opt.nosave = True, True # only test/save final epoch 401 | if opt.bucket: 402 | os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists 403 | 404 | for _ in range(10): # generations to evolve 405 | if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate 406 | # Select parent(s) 407 | parent = 'single' # parent selection method: 'single' or 'weighted' 408 | x = np.loadtxt('evolve.txt', ndmin=2) 409 | n = min(5, len(x)) # number of previous results to consider 410 | x = x[np.argsort(-fitness(x))][:n] # top n mutations 411 | w = fitness(x) - fitness(x).min() # weights 412 | if parent == 'single' or len(x) == 1: 413 | # x = x[random.randint(0, n - 1)] # random selection 414 | x = x[random.choices(range(n), weights=w)[0]] # weighted selection 415 | elif parent == 'weighted': 416 | x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination 417 | 418 | # Mutate 419 | mp, s = 0.9, 0.2 # mutation probability, sigma 420 | npr = np.random 421 | npr.seed(int(time.time())) 422 | g = np.array([1, 1, 1, 1, 1, 1, 1, 0, .1, 1, 0, 1, 1, 1, 1, 1, 1, 1]) # gains 423 | ng = len(g) 424 | v = np.ones(ng) 425 | while all(v == 1): # mutate until a change occurs (prevent duplicates) 426 | v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) 427 | for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) 428 | hyp[k] = x[i + 7] * v[i] # mutate 429 | 430 | # Clip to limits 431 | keys = ['lr0', 'iou_t', 'momentum', 'weight_decay', 'hsv_s', 'hsv_v', 'translate', 'scale', 'fl_gamma'] 432 | limits = [(1e-5, 1e-2), (0.00, 0.70), (0.60, 0.98), (0, 0.001), (0, .9), (0, .9), (0, .9), (0, .9), (0, 3)] 433 | for k, v in zip(keys, limits): 434 | hyp[k] = np.clip(hyp[k], v[0], v[1]) 435 | 436 | # Train mutation 437 | results = train(hyp.copy()) 438 | 439 | # Write mutation results 440 | print_mutation(hyp, results, opt.bucket) 441 | 442 | # Plot results 443 | # plot_evolution_results(hyp) 444 | --------------------------------------------------------------------------------