├── .gitattributes
├── .gitignore
├── CVC-YOLOv3
    ├── README.md
    ├── __init__.py
    ├── dataset
    │   └── README.md
    ├── detect.py
    ├── generate_kmeans_dataset_csvs.py
    ├── logs
    │   └── README.md
    ├── model_cfg
    │   ├── yolo_baseline.cfg
    │   └── yolo_baseline_tiny.cfg
    ├── models.py
    ├── outputs
    │   ├── README.md
    │   └── visualization
    │   │   └── README.md
    ├── requirements.txt
    ├── setup.py
    ├── train.py
    ├── train_hyper.py
    ├── utils
    │   ├── __init__.py
    │   ├── datasets.py
    │   ├── nms.py
    │   ├── parse_config.py
    │   └── utils.py
    ├── validate.py
    ├── yolo2onnx.py
    ├── yolo_tutorial.ipynb
    └── yolo_tutorial_util.py
├── Driverless_CV_Paper.pdf
├── LICENSE
├── README.md
└── RektNet
    ├── README.md
    ├── __init__.py
    ├── cross_ratio_loss.py
    ├── dataset.py
    ├── dataset
        └── README.md
    ├── detect.py
    ├── keypoint_net.py
    ├── keypoint_tutorial_util.py
    ├── keypoints_tutorial.ipynb
    ├── logs
        └── README.md
    ├── outputs
        ├── README.md
        └── visualization
        │   └── README.md
    ├── pt_to_onnx.py
    ├── requirements.txt
    ├── resnet.py
    ├── train_eval.py
    ├── train_eval_hyper.py
    └── utils.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.pt filter=lfs diff=lfs merge=lfs -text
2 | *.weights filter=lfs diff=lfs merge=lfs -text
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .pth
 2 | *.mp4
 3 | *.jpg
 4 | *.png
 5 | *.swp
 6 | **/utils/gs/
 7 | **/.DS_Store
 8 | 
 9 | **/.idea
10 | **/*.egg-info/
11 | **/__pycache__
12 | **/build
13 | *.py[cod]
14 | **/bin/*
15 | *.log
16 | *.error
17 | *.a
18 | *.so
19 | *.so.2
20 | .ipynb_checkpoints
21 | .python-version
22 | */gs/*
23 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/README.md:
--------------------------------------------------------------------------------
  1 | ### Description
  2 | 
  3 | The repo is originally forked from https://github.com/ultralytics/yolov3 and contains inference and training code for YOLOv3 in PyTorch.
  4 | 
  5 | ## Requirements:
  6 | 
  7 | * CUDA>=10.1
  8 | * python==3.6
  9 | * numpy==1.16.4
 10 | * matplotlib==3.1.0
 11 | * torchvision==0.3.0
 12 | * opencv_python==4.1.0.25
 13 | * torch==1.1.0
 14 | * requests==2.20.0
 15 | * pandas==0.24.2
 16 | * imgaug==0.3.0
 17 | * onnx==1.6.0
 18 | * optuna==0.19.0
 19 | * Pillow==6.2.1
 20 | * protobuf==3.11.0
 21 | * pymysql==0.9.3
 22 | * retrying==1.3.3
 23 | * tensorboardX==1.9
 24 | * tqdm==4.39.0
 25 | 
 26 | ## Usage
 27 | ### 1.Download our dataset
 28 | 
 29 | ##### Download through GCP Tookit
 30 | ###### 1.1.1 Image dataset:
 31 | ```
 32 | gsutil cp -p gs://mit-driverless-open-source/YOLO_Dataset.zip ./dataset/
 33 | ```
 34 | then unzip 
 35 | ```
 36 | unzip dataset/YOLO_Dataset.zip -d ./dataset/
 37 | ```
 38 | ###### 1.1.2 Label csv file:
 39 | ```
 40 | gsutil cp -p gs://mit-driverless-open-source/yolov3-training/all.csv ./dataset/
 41 | ```
 42 | ```
 43 | gsutil cp -p gs://mit-driverless-open-source/yolov3-training/train.csv ./dataset/
 44 | ```
 45 | ```
 46 | gsutil cp -p gs://mit-driverless-open-source/yolov3-training/validate.csv ./dataset/
 47 | ```
 48 | ###### 1.1.3 Initial weights file:
 49 | YoloV3 initial weights:
 50 | ```
 51 | gsutil cp -p  gs://mit-driverless-open-source/yolov3-training/sample-yolov3.weights ./dataset/
 52 | ```
 53 | 
 54 | YoloV3-tiny initial weights:
 55 | ```
 56 | gsutil cp =p gs://mit-driverless-open-source/yolov3-training/sample-yolov3-tiny.weights ./dataset/
 57 | ```
 58 | 
 59 | ##### Download manually (Optional)
 60 | You can download image dataset and label csv from the link below and unzip them into `./dataset/YOLO_Dataset/` 
 61 | 
 62 | [Image dataset](https://storage.cloud.google.com/mit-driverless-open-source/YOLO_Dataset.zip?authuser=1)
 63 | 
 64 | [All label csv](https://storage.cloud.google.com/mit-driverless-open-source/yolov3-training/all.csv?authuser=1)
 65 | 
 66 | [Train label csv](https://storage.cloud.google.com/mit-driverless-open-source/yolov3-training/train.csv?authuser=1)
 67 | 
 68 | [Validate label csv](https://storage.cloud.google.com/mit-driverless-open-source/yolov3-training/validate.csv?authuser=1)
 69 | 
 70 | [Initial YOLOv3 weights file](https://storage.cloud.google.com/mit-driverless-open-source/yolov3-training/sample-yolov3.weights?authuser=1)
 71 | 
 72 | [Initial YOLOv3-tiny weights file](https://storage.cloud.google.com/mit-driverless-open-source/yolov3-training/sample-yolov3-tiny.weights?authuser=1)
 73 | 
 74 | #### 1.2 Environment Setup (Optional)
 75 | 
 76 | ```
 77 | sudo python3 setup.py build develop
 78 | ```
 79 | 
 80 | ### 2.Training
 81 | 
 82 | ```
 83 | python3 train.py --model_cfg=model_cfg/yolo_baseline.cfg --weights_path=dataset/sample-yolov3.weights
 84 | ```
 85 | 
 86 | Once you've finished training, you can access the weights file in `./outputs/`
 87 | 
 88 | (Optional: We also provide tiny yolo cfg, with no evaluation metrics available)
 89 | 
 90 | ### 3.Inference
 91 | 
 92 | #### To download our pretrained YOLO weights for *Formula Student Standard*, click ***[here](https://storage.googleapis.com/mit-driverless-open-source/pretrained_yolo.weights)***
 93 | 
 94 | ```
 95 | python3 detect.py --model_cfg=<path to cfg file> --target_path=<path to an image or video> --weights_path=<path to your trained weights file>
 96 | ```
 97 | 
 98 | Once you've finished inference, you can access the result in `./outputs/visualization/`
 99 | 
100 | #### Run Bayesian hyperparameter search
101 | 
102 | Before running the Bayesian hyperparameter search, make sure you know what specific hyperparameter that you wish to tuning on, and a reasonable operating range/options of that hyperparameter.
103 | 
104 | Go into the `objective()` function of `train_hyper.py` edit your custom search
105 | 
106 | Then launch your Bayesian hyperparameter search
107 | ```
108 | python3 train_hyper.py --model_cfg=<path to cfg file> --study_name=<give it a proper name>
109 | ```
110 | 
111 | #### Convert .weights to .onnx manually
112 | 
113 | Though our training scrip will do automatical .weights->.onnx conversion, you can always do it manually
114 | ```
115 | python3 yolo2onnx.py --cfg_name=<path to your cfg file> --weights_name=<path to your .weights file>
116 | ```
117 | 
118 | #### Splits your own csv file 
119 | 
120 | ```
121 | python3 generate_kmeans_dataset_csvs.py --input_csvs=<path to your csv file that contains all the label> --dataset_path=<path to your image dataset>
122 | ```
123 | 
124 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cv-core/MIT-Driverless-CV-TrainingInfra/bb8d6e22d8211aad1cd56f698723db8812bd6623/CVC-YOLOv3/__init__.py


--------------------------------------------------------------------------------
/CVC-YOLOv3/dataset/README.md:
--------------------------------------------------------------------------------
 1 | This is the folder that stores dataset csv files
 2 | 
 3 | To download our open-sourced dataset label from MIT Driverless GCP bucket:
 4 | ```
 5 | gsutil cp -p gs://mit-driverless-open-source/yolov3-training/all.csv ./
 6 | ```
 7 | ```
 8 | gsutil cp -p gs://mit-driverless-open-source/yolov3-training/train.csv ./
 9 | ```
10 | ```
11 | gsutil cp -p gs://mit-driverless-open-source/yolov3-training/validate.csv ./
12 | ```


--------------------------------------------------------------------------------
/CVC-YOLOv3/detect.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import argparse
  4 | import os
  5 | from os.path import isfile, join
  6 | import random
  7 | import tempfile
  8 | import time
  9 | import copy
 10 | import multiprocessing
 11 | import subprocess
 12 | import shutil
 13 | import cv2
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | from torch.utils.data import DataLoader
 18 | from tensorboardX import SummaryWriter
 19 | 
 20 | from PIL import Image, ImageDraw
 21 | 
 22 | import torchvision
 23 | from models import Darknet
 24 | from utils.datasets import ImageLabelDataset
 25 | from utils.nms import nms
 26 | from utils.utils import xywh2xyxy, calculate_padding
 27 | 
 28 | import warnings
 29 | from tqdm import tqdm
 30 | 
 31 | warnings.filterwarnings("ignore")
 32 | 
 33 | detection_tmp_path = "/tmp/detect/"
 34 | 
 35 | 
 36 | def main(target_path,output_path,weights_path,model_cfg,conf_thres,nms_thres,xy_loss,wh_loss,no_object_loss,object_loss,vanilla_anchor):
 37 | 
 38 |     cuda = torch.cuda.is_available()
 39 |     device = torch.device('cuda:0' if cuda else 'cpu')
 40 |     random.seed(0)
 41 |     torch.manual_seed(0)
 42 |     if cuda:
 43 |         torch.cuda.manual_seed(0)
 44 |         torch.cuda.manual_seed_all(0)
 45 |         torch.backends.cudnn.benchmark = True
 46 |         torch.cuda.empty_cache()
 47 |     model = Darknet(config_path=model_cfg,xy_loss=xy_loss,wh_loss=wh_loss,no_object_loss=no_object_loss,object_loss=object_loss,vanilla_anchor=vanilla_anchor)
 48 | 
 49 |     # Load weights
 50 |     model.load_weights(weights_path, model.get_start_weight_dim())
 51 |     model.to(device, non_blocking=True)
 52 | 
 53 |     detect(target_path,
 54 |            output_path,
 55 |            model,
 56 |            device=device,
 57 |            conf_thres=conf_thres,
 58 |            nms_thres=nms_thres)
 59 | 
 60 | def single_img_detect(target_path,output_path,mode,model,device,conf_thres,nms_thres):
 61 | 
 62 |     img = Image.open(target_path).convert('RGB')
 63 |     w, h = img.size
 64 |     new_width, new_height = model.img_size()
 65 |     pad_h, pad_w, ratio = calculate_padding(h, w, new_height, new_width)
 66 |     img = torchvision.transforms.functional.pad(img, padding=(pad_w, pad_h, pad_w, pad_h), fill=(127, 127, 127), padding_mode="constant")
 67 |     img = torchvision.transforms.functional.resize(img, (new_height, new_width))
 68 | 
 69 |     bw = model.get_bw()
 70 |     if bw:
 71 |         img = torchvision.transforms.functional.to_grayscale(img, num_output_channels=1)
 72 | 
 73 |     img = torchvision.transforms.functional.to_tensor(img)
 74 |     img = img.unsqueeze(0)
 75 |     
 76 |     with torch.no_grad():
 77 |         model.eval()
 78 |         img = img.to(device, non_blocking=True)
 79 |         # output,first_layer,second_layer,third_layer = model(img)
 80 |         output = model(img)
 81 | 
 82 | 
 83 |         for detections in output:
 84 |             detections = detections[detections[:, 4] > conf_thres]
 85 |             box_corner = torch.zeros((detections.shape[0], 4), device=detections.device)
 86 |             xy = detections[:, 0:2]
 87 |             wh = detections[:, 2:4] / 2
 88 |             box_corner[:, 0:2] = xy - wh
 89 |             box_corner[:, 2:4] = xy + wh
 90 |             probabilities = detections[:, 4]
 91 |             nms_indices = nms(box_corner, probabilities, nms_thres)
 92 |             main_box_corner = box_corner[nms_indices]
 93 |             if nms_indices.shape[0] == 0:  
 94 |                 continue
 95 |         img_with_boxes = Image.open(target_path)
 96 |         draw = ImageDraw.Draw(img_with_boxes)
 97 |         w, h = img_with_boxes.size
 98 | 
 99 |         for i in range(len(main_box_corner)):
100 |             x0 = main_box_corner[i, 0].to('cpu').item() / ratio - pad_w
101 |             y0 = main_box_corner[i, 1].to('cpu').item() / ratio - pad_h
102 |             x1 = main_box_corner[i, 2].to('cpu').item() / ratio - pad_w
103 |             y1 = main_box_corner[i, 3].to('cpu').item() / ratio - pad_h 
104 |             draw.rectangle((x0, y0, x1, y1), outline="red")
105 | 
106 |         if mode == 'image':
107 |             img_with_boxes.save(os.path.join(output_path,target_path.split('/')[-1]))
108 |             return os.path.join(output_path,target_path.split('/')[-1])
109 |         else:
110 |             img_with_boxes.save(target_path)
111 |             return target_path
112 | 
113 | def detect(target_path,
114 |            output_path,
115 |            model,
116 |            device,
117 |            conf_thres,
118 |            nms_thres):
119 | 
120 |         target_filepath = target_path
121 | 
122 |         img_formats = ['.jpg', '.jpeg', '.png', '.tif']
123 |         vid_formats = ['.mov', '.avi', '.mp4']
124 | 
125 |         mode = None
126 | 
127 |         if os.path.splitext(target_filepath)[-1].lower() in img_formats:
128 |             mode = 'image'
129 |         
130 |         elif os.path.splitext(target_filepath)[-1].lower() in vid_formats:
131 |             mode = 'video'
132 |         
133 |         print("Detection Mode is: " + mode)
134 | 
135 |         raw_file_name = target_filepath.split('/')[-1].split('.')[0].split('_')[-4:]
136 |         raw_file_name = '_'.join(raw_file_name)
137 |         
138 |         if mode == 'image':
139 |             detection_path = single_img_detect(target_path=target_filepath,output_path=output_path,mode=mode,model=model,device=device,conf_thres=conf_thres,nms_thres=nms_thres)
140 | 
141 |             print(f'Please check output image at {detection_path}')
142 | 
143 |         elif mode == 'video':
144 |             if os.path.exists(detection_tmp_path):
145 |                 shutil.rmtree(detection_tmp_path)  # delete output folder
146 |             os.makedirs(detection_tmp_path)  # make new output folder
147 | 
148 |             vidcap = cv2.VideoCapture(target_filepath)
149 |             success,image = vidcap.read()
150 |             count = 0
151 | 
152 |             
153 | 
154 |             while success:
155 |                 cv2.imwrite(detection_tmp_path + "/frame%d.jpg" % count, image)     # save frame as JPEG file      
156 |                 success,image = vidcap.read()
157 |                 count += 1
158 | 
159 |             # Find OpenCV version
160 |             (major_ver, minor_ver, subminor_ver) = (cv2.__version__).split('.')
161 | 
162 |             if int(major_ver)  < 3 :
163 |                 fps = vidcap.get(cv2.cv.CV_CAP_PROP_FPS)
164 |                 print ("Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps))
165 |             else :
166 |                 fps = vidcap.get(cv2.CAP_PROP_FPS)
167 |                 print ("Frames per second using video.get(cv2.CAP_PROP_FPS) : {0}".format(fps))
168 |             vidcap.release(); 
169 | 
170 |             frame_array = []
171 |             files = [f for f in os.listdir(detection_tmp_path) if isfile(join(detection_tmp_path, f))]
172 |         
173 |             #for sorting the file names properly
174 |             files.sort(key = lambda x: int(x[5:-4]))
175 |             for i in tqdm(files,desc='Doing Single Image Detection'):
176 |                 filename=detection_tmp_path + i
177 |                 
178 |                 detection_path = single_img_detect(target_path=filename,output_path=output_path,mode=mode,model=model,device=device,conf_thres=conf_thres,nms_thres=nms_thres)
179 |                 #reading each files
180 |                 img = cv2.imread(detection_path)
181 |                 height, width, layers = img.shape
182 |                 size = (width,height)
183 |                 frame_array.append(img)
184 | 
185 |             local_output_uri = output_path + raw_file_name + ".mp4"
186 |             
187 |             video_output = cv2.VideoWriter(local_output_uri,cv2.VideoWriter_fourcc(*'DIVX'), fps, size)
188 | 
189 |             for frame in tqdm(frame_array,desc='Creating Video'):
190 |                 # writing to a image array
191 |                 video_output.write(frame)
192 |             video_output.release()
193 |             print(f'please check output video at {local_output_uri}')
194 |             shutil.rmtree(detection_tmp_path)
195 |         print("Please go to the link below to check the detection output file: ")
196 |         print(output_path)
197 | 
198 | if __name__ == '__main__':
199 |     parser = argparse.ArgumentParser()
200 |     def add_bool_arg(name, default, help):
201 |         arg_group = parser.add_mutually_exclusive_group(required=False)
202 |         arg_group.add_argument('--' + name, dest=name, action='store_true', help=help)
203 |         arg_group.add_argument('--no_' + name, dest=name, action='store_false', help=("Do not " + help))
204 |         parser.set_defaults(**{name:default})
205 |     parser.add_argument('--model_cfg', type=str, default='model_cfg/yolo_baseline.cfg')
206 |     parser.add_argument('--target_path', type=str, help='path to target image/video')
207 |     parser.add_argument('--output_path', type=str, default="outputs/visualization/")
208 |     parser.add_argument('--weights_path', type=str, help='path to weights file')
209 |     parser.add_argument('--conf_thres', type=float, default=0.8, help='object confidence threshold')
210 |     parser.add_argument('--nms_thres', type=float, default=0.25, help='IoU threshold for non-maximum suppression')
211 | 
212 |     add_bool_arg('vanilla_anchor', default=False, help="whether to use vanilla anchor boxes for training")
213 |     ##### Loss Constants #####
214 |     parser.add_argument('--xy_loss', type=float, default=2, help='confidence loss for x and y')
215 |     parser.add_argument('--wh_loss', type=float, default=1.6, help='confidence loss for width and height')
216 |     parser.add_argument('--no_object_loss', type=float, default=25, help='confidence loss for background')
217 |     parser.add_argument('--object_loss', type=float, default=0.1, help='confidence loss for foreground')
218 | 
219 |     opt = parser.parse_args()
220 | 
221 |     main(target_path=opt.target_path,
222 |          output_path=opt.output_path,
223 |          weights_path=opt.weights_path,
224 |          model_cfg=opt.model_cfg,
225 |          conf_thres=opt.conf_thres,
226 |          nms_thres=opt.nms_thres,
227 |          xy_loss=opt.xy_loss,
228 |          wh_loss=opt.wh_loss,
229 |          no_object_loss=opt.no_object_loss,
230 |          object_loss=opt.object_loss,
231 |          vanilla_anchor=opt.vanilla_anchor)
232 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/generate_kmeans_dataset_csvs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import csv
  4 | import tempfile
  5 | import argparse
  6 | import cv2
  7 | import matplotlib
  8 | matplotlib.use('Agg') 
  9 | import matplotlib.pyplot as plt
 10 | import pandas as pd
 11 | import numpy as np
 12 | from tqdm import tqdm
 13 | 
 14 | visualization_tmp_path = "outputs/visualization/"
 15 | 
 16 | def assignment(boxes, centroids):
 17 |     for i in centroids:
 18 |         boxes['distance_from_{}'.format(i)] = (np.sqrt((boxes['h'] - centroids[i][0])**2 + (boxes['w'] - centroids[i][1])**2))
 19 |     centroid_distance_cols = ['distance_from_{}'.format(i) for i in centroids.keys()]
 20 |     boxes['closest'] = boxes.loc[:, centroid_distance_cols].idxmin(axis=1)
 21 |     boxes['closest'] = boxes['closest'].map(lambda x: int(x.lstrip('distance_from_')))
 22 |     return boxes   
 23 |     
 24 | def update(boxes, centroids):
 25 |     for i in centroids:
 26 |         centroids[i][0] = np.mean(boxes[boxes['closest'] == i]['h'])
 27 |         centroids[i][1] = np.mean(boxes[boxes['closest'] == i]['w'])
 28 |     return centroids
 29 | 
 30 | def main(csv_uri,dataset_path,output_path,num_clst,max_cone,min_cone,if_plot,split_up):
 31 |     box_dict = {} #dictionary with key=tuple of image size, value=list of bounding boxes in image of that size
 32 |     img_w = 0
 33 |     img_h = 0
 34 |     updated_rows = []
 35 |     final_rows = []
 36 |     in_csv_tempfile = csv_uri
 37 |     length = 0
 38 | 
 39 |     ##### getting csv length for progress bar #####
 40 |     with open(in_csv_tempfile) as lines:
 41 |         next(lines) #skip first line
 42 |         lines = [line for line in lines]
 43 |     length = len(lines)
 44 |     #############################
 45 | 
 46 | 
 47 |     with open(in_csv_tempfile) as f:
 48 |         next(f) #skip first line
 49 |         csv_reader = csv.reader(f)
 50 | 
 51 |         print("getting images' width and height")
 52 |         for i, row in enumerate(tqdm(csv_reader,total=length,desc='Reading Images')): 
 53 |             if i < 1:
 54 |                 continue           
 55 |             ##### getting image width and height #####
 56 |             img_path = os.path.join(dataset_path,row[0])
 57 |             if not os.path.isfile(img_path):
 58 |                 raise Exception("could not find image: {image_uri}".format(image_uri=os.path.join(dataset_path,row[0])))
 59 |             img = cv2.imread(img_path)
 60 |             img_h, img_w, _ = img.shape            
 61 |             #############################
 62 | 
 63 |             ##### writing updated rows #####
 64 |             begin_part = row[:2]
 65 |             end_part = row[2:]
 66 |             begin_part.append(img_w)
 67 |             begin_part.append(img_h)
 68 |             rows = begin_part + end_part
 69 |             updated_rows.append(rows)
 70 |             #############################
 71 | 
 72 |             ##### preparing box dictionary for k-means #####
 73 |             h = int(row[2])
 74 |             w = int(row[3])
 75 |             box_dict[(img_h,img_w)] = box_dict.get((img_h, img_w), []) + [(h,w)]
 76 |             #############################
 77 | 
 78 |     ##### plot original #####
 79 |     if if_plot:
 80 |         colors = matplotlib.cm.rainbow(np.linspace(0, 1, len(box_dict)))
 81 |         i=0
 82 |         fig = plt.figure()
 83 |         labels = []
 84 |         for key in box_dict:
 85 |             labels.append(key)
 86 |             h = [points[0] for points in box_dict[key]]
 87 |             w = [points[1] for points in box_dict[key]]
 88 |             plt.scatter(w, h, color=colors[i])
 89 |             i+=1
 90 |         fig.suptitle('Original Sizes', fontsize=20)
 91 |         x1,x2,y1,y2 = plt.axis()
 92 |         plt.axis((0,450,0,450))
 93 |         plt.xlabel('Width', fontsize=18)
 94 |         plt.ylabel('Height', fontsize=16)
 95 |         plt.legend(labels)
 96 |         fig.savefig(os.path.join(visualization_tmp_path,'original_boxes.png'))
 97 | 
 98 |     #############################
 99 | 
100 |     ##### calculating scale #####
101 |     max_sizes = {}
102 |     min_sizes = {}
103 |     for h,w in box_dict:
104 |         boxes = sorted(box_dict[(h,w)], key=lambda x: x[0])
105 |         max_sizes[(h,w)] = boxes[int(.95*len(boxes))-1]
106 |         min_sizes[(h,w)] = boxes[int(0.05*(len(boxes)))]
107 | 
108 |     scaled_heights = []
109 |     scaled_widths = []
110 | 
111 |     i=0
112 |     scaled_plot = {}
113 |     scale_dict = {}
114 |     for h,w in box_dict:
115 |         plot_heights = []
116 |         plot_widths = []
117 |         max_h, max_w = max_sizes[(h,w)]
118 |         min_h, min_w = min_sizes[(h,w)]
119 | 
120 |         h_ratio = (max_cone-min_cone)/(max_h-min_h)
121 |         print("{height}x{width} images are scaled by {scale}".format(height=h, width=w, scale=h_ratio))
122 |         scale_dict[(h,w)] = scale_dict.get((h, w), 0) + h_ratio
123 | 
124 |         for box_h, box_w in box_dict[(h,w)]:
125 |             scaled_heights.append((box_h-min_h)*h_ratio + min_cone)
126 |             scaled_widths.append((box_w-min_w)*h_ratio + min_cone)
127 |             if if_plot:
128 |                 plot_heights.append((box_h-min_h)*h_ratio + min_cone)
129 |                 plot_widths.append((box_w-min_w)*h_ratio + min_cone)
130 |         if if_plot: 
131 |             scaled_plot[i] = [plot_widths, plot_heights]
132 |             i += 1
133 | 
134 |     scaled_boxes = pd.DataFrame({'h': scaled_heights, 'w': scaled_widths})
135 |     #############################
136 | 
137 |     ##### calculating k-means #####
138 | 
139 |     centroids = {} 
140 |     for i in range(num_clst): #start with random boxes as centroids
141 |         rand_index = np.random.randint(0, scaled_boxes.shape[0])
142 |         centroids[i] = [scaled_boxes['h'][rand_index], scaled_boxes['w'][rand_index]]
143 |     scaled_boxes = assignment(scaled_boxes, centroids)
144 |     while True:
145 |         closest_centroids = scaled_boxes['closest'].copy(deep=True)
146 |         centroids = update(scaled_boxes, centroids)
147 |         scaled_boxes = assignment(scaled_boxes, centroids)
148 |         if closest_centroids.equals(scaled_boxes['closest']):
149 |             break
150 |     #############################
151 | 
152 |     ##### plot afterwards #####
153 |     if if_plot:
154 |         colors = matplotlib.cm.rainbow(np.linspace(0, 1, len(scaled_plot)))
155 |         #plot scaled height
156 |         figure = plt.figure()
157 |         for i in scaled_plot:
158 |             plt.scatter(scaled_plot[i][0], scaled_plot[i][1], color=colors[i])
159 |         figure.suptitle('Scaled Sizes', fontsize=20)
160 |         x1,x2,y1,y2 = plt.axis()
161 |         plt.axis((0,450,0,450))
162 |         plt.legend(labels)
163 |         plt.xlabel('Width', fontsize=18)
164 |         plt.ylabel('Height', fontsize=16)
165 |         figure.savefig('scaled_boxes.png')
166 |         #############################
167 | 
168 |         #plot centroids on top
169 |         h = []
170 |         w = []
171 |         for key in centroids:
172 |             h.append(centroids[key][0])
173 |             w.append(centroids[key][1])
174 |         plt.scatter(w,h, color='k')
175 |         figure.suptitle('Centroids and Scaled Boxes', fontsize=20)
176 |         figure.savefig(os.path.join(visualization_tmp_path,'centroids_scaled.png'))
177 |         #############################
178 | 
179 |         #plot centroids separately
180 |         figure2=plt.figure()
181 |         h = []
182 |         w = []
183 |         for key in centroids:
184 |             h.append(centroids[key][0])
185 |             w.append(centroids[key][1])
186 |         plt.scatter(w,h)
187 |         figure2.suptitle('Centroids', fontsize=20)
188 |         figure2.savefig(os.path.join(visualization_tmp_path,'centroids.png'))
189 |         #############################
190 |     #############################
191 |     
192 |     ##### uploading anchor boxes file #####
193 |     text_file  = open('anchors.txt','w')
194 |     print('Anchors = ', centroids)
195 |     for key in centroids:
196 |         text_file .write('%0.2f,%0.2f \n'%(centroids[key][0], centroids[key][1]))
197 |     text_file.close()
198 |     #############################
199 | 
200 |     scale = None
201 |     flag_row = None
202 |     with open(in_csv_tempfile) as f:
203 |         next(f) #skip first line
204 |         csv_reader = csv.reader(f)
205 | 
206 |         print("writing updated rows into csv file")
207 |         for i, row in enumerate(tqdm(updated_rows,desc='Writing Files')):            
208 | 
209 |             ##### writing updated rows #####
210 |             begin_part = row[:4]
211 |             end_part = row[4:]
212 |             img_w = row[2]
213 |             img_h = row[3]
214 |             begin_part.append(scale_dict[(img_h,img_w)])
215 |             flag_row = begin_part + end_part
216 |             final_rows.append(flag_row)
217 |             #############################
218 | 
219 |     new_train_uri = os.path.join(output_path, "train.csv")
220 |     train_rows = []
221 |     new_test_uri = os.path.join(output_path, "test.csv")
222 |     test_rows = []
223 |     new_validate_uri = os.path.join(output_path, "validate.csv")
224 |     validate_rows = []
225 |     new_train_validate_uri = os.path.join(output_path, "train-validate.csv")
226 |     train_validate_rows = []
227 |     all_uri = os.path.join(output_path, "all.csv")
228 |     all_rows = []
229 |     empty_imgs = []
230 |     compensate_rows = []
231 | 
232 |     print("spliting up datasets")
233 |     for i, row in enumerate(tqdm(final_rows,desc='Spliting Datasets')):
234 |         all_rows.append(row)
235 |         remainder = i % 100
236 |         if remainder < int(split_up[0]):
237 |             train_rows.append(row)
238 |             train_validate_rows.append(row)
239 |             continue
240 |         if remainder < int(split_up[0]) + int(split_up[1]):
241 |             validate_rows.append(row)
242 |             train_validate_rows.append(row)
243 |             continue
244 |         test_rows.append(row)
245 |     
246 |     ##############for 0 label images trading##############
247 | 
248 |     ###getting all 0 labeled images in validation set
249 |     for i,row in enumerate(validate_rows):
250 |         if "" == "".join(row[5:]):
251 |             empty_imgs.append(row)
252 |     #############################
253 | 
254 | 
255 |     ###remove all those 0 labeled images in validation set
256 |     for i,row in enumerate(empty_imgs):
257 |         validate_rows.remove(row)
258 |     #############################
259 |     
260 | 
261 |     ###get compensation from training set
262 |     counter = 0
263 |     for i,row in enumerate(train_rows):
264 |         if not "" == "".join(row[5:]):
265 |             compensate_rows.append(row)
266 |             counter +=1
267 |             if counter == len(empty_imgs):
268 |                 break
269 |     #############################
270 | 
271 |     ###remove compensation from training set
272 |     for i,row in enumerate(compensate_rows):
273 |         train_rows.remove(row)
274 |     #############################
275 | 
276 |     ###adding 0 labeled images back to training set        
277 |     for i,row in enumerate(empty_imgs):
278 |         train_rows.append(row)
279 |     #############################
280 | 
281 |     ###add compensation back to validation set
282 |     for i,row in enumerate(compensate_rows):
283 |         validate_rows.append(row)
284 |     #############################
285 | 
286 |     
287 | 
288 |     ######################################################
289 | 
290 |     print(str(len(empty_imgs))+" '0 label images' got traded from validation set to training set.")
291 | 
292 |     ##### getting anchor values in order #####
293 |     anchors = []
294 |     anchors_prime = ""
295 |     for key in centroids:
296 |         anchors.append([centroids[key][0], centroids[key][1]])
297 |     anchors.sort(key=lambda x: x[0]*x[1])
298 |     for anchor in anchors:
299 |         anchors_prime += str(anchor)[1:-1]
300 |         anchors_prime += "|"
301 |     anchors_prime = anchors_prime[:-1]
302 |     first_row = anchors_prime
303 |     notes = "please see k-means anchor boxes in train.csv"
304 |     #############################
305 | 
306 |     second_row = ['Name', 'URL', 'Width', 'Height', 'Scale','X0, Y0, H0, W0', 'X1, Y1, H1, W1', 'etc', '\n']
307 | 
308 |     for (list_rows, list_uri) in ((train_rows, new_train_uri), (test_rows, new_test_uri),
309 |                                   (validate_rows, new_validate_uri), (train_validate_rows, new_train_validate_uri),
310 |                                   (all_rows, all_uri)):
311 |         with tempfile.NamedTemporaryFile() as out_csv_tempfile:
312 |             with open(out_csv_tempfile.name, 'w+') as out_csv_file:
313 |                 csv_writer = csv.writer(out_csv_file)
314 |                 if list_uri != new_train_uri:
315 |                     csv_writer.writerow([notes])
316 |                 else:
317 |                     csv_writer.writerow([first_row])
318 |                 csv_writer.writerow(second_row)
319 |                 for row in list_rows:
320 |                     csv_writer.writerow(row)
321 |             print("Saving {list_uri} ...")
322 |             os.rename(out_csv_tempfile.name, list_uri)
323 |     
324 | if __name__ == "__main__":
325 |     parser = argparse.ArgumentParser()
326 |     def add_bool_arg(name, default, help):
327 |         arg_group = parser.add_mutually_exclusive_group(required=False)
328 |         arg_group.add_argument('--' + name, dest=name, action='store_true', help=help)
329 |         arg_group.add_argument('--no_' + name, dest=name, action='store_false', help=("Do not " + help))
330 |         parser.set_defaults(**{name:default})
331 | 
332 |     parser.add_argument("--input_csvs", help="csv file to split", default = 'dataset/all.csv')
333 |     parser.add_argument('--dataset_path', type=str, help='path to image dataset',default="dataset/YOLO_Dataset/")
334 |     parser.add_argument('--output_path', type=str, help='path to output csv files',default="dataset/")
335 |     parser.add_argument('--num_clst', type=int, default=9, help='number of anchor boxes wish to be generated')
336 |     parser.add_argument('--max_cone_height', default = 83, type = int, help='height of maximum sized cone to scale to\n')
337 |     parser.add_argument('--min_cone_height', default = 10, type = int, help='height of minimum sized cone to scale to\n')
338 |     parser.add_argument("--split_up",  type=str, default = '75-15-0', help="train/validate/test split")
339 |     
340 |     add_bool_arg('if_plot', default=True, help='whether to get anchor boxes plotted, plots saved as original_boxes.png, scaled_boxes.png, centroids.png in output uri')
341 | 
342 |     opt = parser.parse_args()
343 | 
344 |     split_up = [int(x) for x in opt.split_up.split('-')]
345 | 
346 |     main(csv_uri=opt.input_csvs,
347 |     dataset_path=opt.dataset_path,
348 |     output_path=opt.output_path,
349 |     num_clst=opt.num_clst,
350 |     max_cone=opt.max_cone_height,
351 |     min_cone=opt.min_cone_height,
352 |     if_plot=opt.if_plot,
353 |     split_up=split_up)
354 |     
355 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/logs/README.md:
--------------------------------------------------------------------------------
1 | Use this folder for training logs
2 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/model_cfg/yolo_baseline.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | width=800
  3 | height=800
  4 | onnx_height=320
  5 | classes=80
  6 | channels=3
  7 | yolo_masks=6,7,8|3,4,5|0,1,2
  8 | yolo_scales=32,16,8
  9 | validate_uri=dataset/validate.csv
 10 | train_uri=dataset/train.csv
 11 | weights_uri=sample-yolov3.weights
 12 | start_weights_dim=255,255,255
 13 | num_train_images=-1
 14 | num_validate_images=-1
 15 | leaky_slope=0.1
 16 | conv_activation=leaky
 17 | build_targets_ignore_thresh=0.5
 18 | conf_thresh=0.8
 19 | nms_thresh=0.25
 20 | iou_thresh=0.5
 21 | 
 22 | [convolutional]
 23 | filters=32
 24 | size=3
 25 | stride=1
 26 | 
 27 | # Downsample
 28 | 
 29 | [convolutional]
 30 | filters=64
 31 | size=3
 32 | stride=2
 33 | 
 34 | [convolutional]
 35 | filters=32
 36 | size=1
 37 | stride=1
 38 | 
 39 | [convolutional]
 40 | filters=64
 41 | size=3
 42 | stride=1
 43 | 
 44 | [shortcut]
 45 | from=-3
 46 | activation=linear
 47 | 
 48 | # Downsample
 49 | 
 50 | [convolutional]
 51 | filters=128
 52 | size=3
 53 | stride=2
 54 | 
 55 | [convolutional]
 56 | filters=64
 57 | size=1
 58 | stride=1
 59 | 
 60 | [convolutional]
 61 | filters=128
 62 | size=3
 63 | stride=1
 64 | 
 65 | [shortcut]
 66 | from=-3
 67 | activation=linear
 68 | 
 69 | [convolutional]
 70 | filters=64
 71 | size=1
 72 | stride=1
 73 | 
 74 | [convolutional]
 75 | filters=128
 76 | size=3
 77 | stride=1
 78 | 
 79 | [shortcut]
 80 | from=-3
 81 | activation=linear
 82 | 
 83 | # Downsample
 84 | 
 85 | [convolutional]
 86 | filters=256
 87 | size=3
 88 | stride=2
 89 | 
 90 | [convolutional]
 91 | filters=128
 92 | size=1
 93 | stride=1
 94 | 
 95 | [convolutional]
 96 | filters=256
 97 | size=3
 98 | stride=1
 99 | 
100 | [shortcut]
101 | from=-3
102 | activation=linear
103 | 
104 | [convolutional]
105 | filters=128
106 | size=1
107 | stride=1
108 | 
109 | [convolutional]
110 | filters=256
111 | size=3
112 | stride=1
113 | 
114 | [shortcut]
115 | from=-3
116 | activation=linear
117 | 
118 | [convolutional]
119 | filters=128
120 | size=1
121 | stride=1
122 | 
123 | [convolutional]
124 | filters=256
125 | size=3
126 | stride=1
127 | 
128 | [shortcut]
129 | from=-3
130 | activation=linear
131 | 
132 | [convolutional]
133 | filters=128
134 | size=1
135 | stride=1
136 | 
137 | [convolutional]
138 | filters=256
139 | size=3
140 | stride=1
141 | 
142 | [shortcut]
143 | from=-3
144 | activation=linear
145 | 
146 | [convolutional]
147 | filters=128
148 | size=1
149 | stride=1
150 | 
151 | [convolutional]
152 | filters=256
153 | size=3
154 | stride=1
155 | 
156 | [shortcut]
157 | from=-3
158 | activation=linear
159 | 
160 | [convolutional]
161 | filters=128
162 | size=1
163 | stride=1
164 | 
165 | [convolutional]
166 | filters=256
167 | size=3
168 | stride=1
169 | 
170 | [shortcut]
171 | from=-3
172 | activation=linear
173 | 
174 | [convolutional]
175 | filters=128
176 | size=1
177 | stride=1
178 | 
179 | [convolutional]
180 | filters=256
181 | size=3
182 | stride=1
183 | 
184 | [shortcut]
185 | from=-3
186 | activation=linear
187 | 
188 | [convolutional]
189 | filters=128
190 | size=1
191 | stride=1
192 | 
193 | [convolutional]
194 | filters=256
195 | size=3
196 | stride=1
197 | 
198 | [shortcut]
199 | from=-3
200 | activation=linear
201 | 
202 | # Downsample
203 | 
204 | [convolutional]
205 | filters=512
206 | size=3
207 | stride=2
208 | 
209 | [convolutional]
210 | filters=256
211 | size=1
212 | stride=1
213 | 
214 | [convolutional]
215 | filters=512
216 | size=3
217 | stride=1
218 | 
219 | [shortcut]
220 | from=-3
221 | activation=linear
222 | 
223 | [convolutional]
224 | filters=256
225 | size=1
226 | stride=1
227 | 
228 | [convolutional]
229 | filters=512
230 | size=3
231 | stride=1
232 | 
233 | [shortcut]
234 | from=-3
235 | activation=linear
236 | 
237 | [convolutional]
238 | filters=256
239 | size=1
240 | stride=1
241 | 
242 | [convolutional]
243 | filters=512
244 | size=3
245 | stride=1
246 | 
247 | [shortcut]
248 | from=-3
249 | activation=linear
250 | 
251 | [convolutional]
252 | filters=256
253 | size=1
254 | stride=1
255 | 
256 | [convolutional]
257 | filters=512
258 | size=3
259 | stride=1
260 | 
261 | [shortcut]
262 | from=-3
263 | activation=linear
264 | 
265 | [convolutional]
266 | filters=256
267 | size=1
268 | stride=1
269 | 
270 | [convolutional]
271 | filters=512
272 | size=3
273 | stride=1
274 | 
275 | [shortcut]
276 | from=-3
277 | activation=linear
278 | 
279 | [convolutional]
280 | filters=256
281 | size=1
282 | stride=1
283 | 
284 | [convolutional]
285 | filters=512
286 | size=3
287 | stride=1
288 | 
289 | [shortcut]
290 | from=-3
291 | activation=linear
292 | 
293 | [convolutional]
294 | filters=256
295 | size=1
296 | stride=1
297 | 
298 | [convolutional]
299 | filters=512
300 | size=3
301 | stride=1
302 | 
303 | [shortcut]
304 | from=-3
305 | activation=linear
306 | 
307 | [convolutional]
308 | filters=256
309 | size=1
310 | stride=1
311 | 
312 | [convolutional]
313 | filters=512
314 | size=3
315 | stride=1
316 | 
317 | [shortcut]
318 | from=-3
319 | activation=linear
320 | 
321 | # Downsample
322 | 
323 | [convolutional]
324 | filters=1024
325 | size=3
326 | stride=2
327 | 
328 | [convolutional]
329 | filters=512
330 | size=1
331 | stride=1
332 | 
333 | [convolutional]
334 | filters=1024
335 | size=3
336 | stride=1
337 | 
338 | [shortcut]
339 | from=-3
340 | activation=linear
341 | 
342 | [convolutional]
343 | filters=512
344 | size=1
345 | stride=1
346 | 
347 | [convolutional]
348 | filters=1024
349 | size=3
350 | stride=1
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | [convolutional]
357 | filters=512
358 | size=1
359 | stride=1
360 | 
361 | [convolutional]
362 | filters=1024
363 | size=3
364 | stride=1
365 | 
366 | [shortcut]
367 | from=-3
368 | activation=linear
369 | 
370 | [convolutional]
371 | filters=512
372 | size=1
373 | stride=1
374 | 
375 | [convolutional]
376 | filters=1024
377 | size=3
378 | stride=1
379 | 
380 | [shortcut]
381 | from=-3
382 | activation=linear
383 | 
384 | ######################
385 | 
386 | [convolutional]
387 | filters=512
388 | size=1
389 | stride=1
390 | 
391 | [convolutional]
392 | size=3
393 | stride=1
394 | filters=1024
395 | 
396 | [convolutional]
397 | filters=512
398 | size=1
399 | stride=1
400 | 
401 | [convolutional]
402 | size=3
403 | stride=1
404 | filters=1024
405 | 
406 | [convolutional]
407 | filters=512
408 | size=1
409 | stride=1
410 | 
411 | [convolutional]
412 | size=3
413 | stride=1
414 | filters=1024
415 | 
416 | [convolutional]
417 | size=1
418 | stride=1
419 | filters=preyolo
420 | activation=linear
421 | 
422 | [yolo]
423 | note=Notice filters above
424 | 
425 | [route]
426 | layers = -4
427 | 
428 | [convolutional]
429 | filters=256
430 | size=1
431 | stride=1
432 | 
433 | [upsample]
434 | stride=2
435 | 
436 | [route]
437 | layers = -1, 61
438 | 
439 | [convolutional]
440 | filters=256
441 | size=1
442 | stride=1
443 | 
444 | [convolutional]
445 | size=3
446 | stride=1
447 | filters=512
448 | 
449 | [convolutional]
450 | filters=256
451 | size=1
452 | stride=1
453 | 
454 | [convolutional]
455 | size=3
456 | stride=1
457 | filters=512
458 | 
459 | [convolutional]
460 | filters=256
461 | size=1
462 | stride=1
463 | 
464 | [convolutional]
465 | size=3
466 | stride=1
467 | filters=512
468 | 
469 | [convolutional]
470 | size=1
471 | stride=1
472 | filters=preyolo
473 | activation=linear
474 | 
475 | [yolo]
476 | note=Notice filters above
477 | 
478 | [route]
479 | layers = -4
480 | 
481 | [convolutional]
482 | filters=128
483 | size=1
484 | stride=1
485 | 
486 | [upsample]
487 | stride=2
488 | 
489 | [route]
490 | layers = -1, 36
491 | 
492 | [convolutional]
493 | filters=128
494 | size=1
495 | stride=1
496 | 
497 | [convolutional]
498 | size=3
499 | stride=1
500 | filters=256
501 | 
502 | [convolutional]
503 | filters=128
504 | size=1
505 | stride=1
506 | 
507 | [convolutional]
508 | size=3
509 | stride=1
510 | filters=256
511 | 
512 | [convolutional]
513 | filters=128
514 | size=1
515 | stride=1
516 | 
517 | [convolutional]
518 | size=3
519 | stride=1
520 | filters=256
521 | 
522 | [convolutional]
523 | size=1
524 | stride=1
525 | filters=preyolo
526 | activation=linear
527 | 
528 | [yolo]
529 | note=Notice filters above
530 | 
531 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/model_cfg/yolo_baseline_tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | width=800
  3 | height=800
  4 | onnx_height=320
  5 | classes=80
  6 | channels=3
  7 | yolo_masks=3,4,5|0,1,2
  8 | yolo_scales=32,16
  9 | validate_uri=dataset/validate.csv
 10 | train_uri=dataset/train.csv
 11 | weights_uri=sample-yolov3-tiny.weights
 12 | start_weights_dim=255,255
 13 | num_train_images=-1
 14 | num_validate_images=-1
 15 | leaky_slope=0.1
 16 | conv_activation=leaky
 17 | build_targets_ignore_thresh=0.5
 18 | conf_thresh=0.8
 19 | nms_thresh=0.25
 20 | iou_thresh=0.5
 21 | 
 22 | [convolutional]
 23 | filters=16
 24 | size=3
 25 | stride=1
 26 | 
 27 | [maxpool]
 28 | size=2
 29 | stride=2
 30 | 
 31 | [convolutional]
 32 | filters=32
 33 | size=3
 34 | stride=1
 35 | 
 36 | [maxpool]
 37 | size=2
 38 | stride=2
 39 | 
 40 | [convolutional]
 41 | filters=64
 42 | size=3
 43 | stride=1
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | filters=128
 51 | size=3
 52 | stride=1
 53 | 
 54 | [maxpool]
 55 | size=2
 56 | stride=2
 57 | 
 58 | [convolutional]
 59 | filters=256
 60 | size=3
 61 | stride=1
 62 | 
 63 | [maxpool]
 64 | size=2
 65 | stride=2
 66 | 
 67 | [convolutional]
 68 | filters=512
 69 | size=3
 70 | stride=1
 71 | 
 72 | [maxpool]
 73 | size=2
 74 | stride=1
 75 | 
 76 | [convolutional]
 77 | filters=1024
 78 | size=3
 79 | stride=1
 80 | 
 81 | ###########
 82 | 
 83 | [convolutional]
 84 | filters=256
 85 | size=1
 86 | stride=1
 87 | 
 88 | [convolutional]
 89 | filters=512
 90 | size=3
 91 | stride=1
 92 | 
 93 | [convolutional]
 94 | size=1
 95 | stride=1
 96 | filters=preyolo
 97 | activation=linear
 98 | 
 99 | [yolo]
100 | note=Notice filters above
101 | 
102 | [route]
103 | layers = -4
104 | 
105 | [convolutional]
106 | filters=128
107 | size=1
108 | stride=1
109 | 
110 | [upsample]
111 | stride=2
112 | 
113 | [route]
114 | layers = -1, 8
115 | 
116 | [convolutional]
117 | filters=256
118 | size=3
119 | stride=1
120 | 
121 | [convolutional]
122 | size=1
123 | stride=1
124 | filters=preyolo
125 | activation=linear
126 | 
127 | [yolo]
128 | note=Notice filters above
129 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/models.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import numpy as np
  6 | import os
  7 | from datetime import datetime
  8 | import csv
  9 | 
 10 | from utils.parse_config import parse_model_config
 11 | from utils.utils import build_targets
 12 | 
 13 | vanilla_anchor_list = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]]
 14 | 
 15 | def create_modules(module_defs,xy_loss,wh_loss,no_object_loss,object_loss,vanilla_anchor):
 16 |     """
 17 |     Constructs module list of layer blocks from module configuration in module_defs
 18 |     """
 19 |     hyperparams = module_defs.pop(0)
 20 |     output_filters = [int(hyperparams["channels"])]
 21 |     img_width = int(hyperparams["width"])
 22 |     img_height = int(hyperparams["height"])
 23 |     onnx_height = int(hyperparams["onnx_height"])
 24 |     num_classes = int(hyperparams["classes"])
 25 |     leaky_slope = float(hyperparams["leaky_slope"])
 26 |     conv_activation = hyperparams["conv_activation"]
 27 |     yolo_masks = [[int(y) for y in x.split(',')] for x in hyperparams["yolo_masks"].split('|')]
 28 |     ##### reading anchors from train.csv #####
 29 |     csv_uri = hyperparams["train_uri"]
 30 |     training_csv_tempfile = csv_uri
 31 |     with open(training_csv_tempfile) as f:
 32 |         csv_reader = csv.reader(f)
 33 |         row = next(csv_reader)
 34 |         row = str(row)[2:-2]
 35 |         anchor_list = [[float(y) for y in x.split(',')] for x in row.split("'")[0].split('|')]
 36 |     #############################
 37 | 
 38 |     ##### using vanilla anchor boxes if its switch is on #####
 39 |     if vanilla_anchor:
 40 |         anchor_list = vanilla_anchor_list
 41 |     #############################
 42 |     build_targets_ignore_thresh=float(hyperparams["build_targets_ignore_thresh"])
 43 |     module_list = nn.ModuleList()
 44 |     
 45 |     yolo_count = 0
 46 |     act_flag = 1 #all pre yolo layers need linear activations
 47 |     for i, module_def in enumerate(module_defs):
 48 |         modules = nn.Sequential()
 49 |         if module_def["type"] == "convolutional":
 50 |             bn = 1
 51 |             if module_def["filters"] == 'preyolo':
 52 |                 filters = (num_classes + 5) * len(yolo_masks[yolo_count])
 53 |                 act_flag = 0
 54 |                 bn = 0
 55 |             else:
 56 |                 filters = int(module_def["filters"])
 57 |             kernel_size = int(module_def["size"])
 58 |             pad = int((kernel_size - 1) // 2)
 59 |             modules.add_module("conv_%d" % i, nn.Conv2d(
 60 |                     in_channels=output_filters[-1],
 61 |                     out_channels=filters,
 62 |                     kernel_size=kernel_size,
 63 |                     stride=int(module_def["stride"]),
 64 |                     padding=pad,
 65 |                     bias=not bn))
 66 |             if bn:
 67 |                 modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters))
 68 |             if conv_activation == "leaky" and act_flag == 1:
 69 |                 modules.add_module("leaky_%d" % i, nn.LeakyReLU(leaky_slope))
 70 |             if conv_activation == "ReLU" and act_flag == 1:
 71 |                 modules.add_module("ReLU_%d" % i, nn.ReLU()) 
 72 |             act_flag = 1
 73 | 
 74 |         elif module_def["type"] == "maxpool":
 75 |             kernel_size = int(module_def["size"])
 76 |             stride = int(module_def["stride"])
 77 |             if kernel_size == 2 and stride == 1:
 78 |                 padding = nn.ZeroPad2d((0, 1, 0, 1))
 79 |                 modules.add_module("_debug_padding_%d" % i, padding)
 80 |             maxpool = nn.MaxPool2d(
 81 |                 kernel_size=int(module_def["size"]),
 82 |                 stride=int(module_def["stride"]),
 83 |                 padding=int((kernel_size - 1) // 2))
 84 |             modules.add_module("maxpool_%d" % i, maxpool)
 85 | 
 86 |         elif module_def["type"] == "upsample":
 87 |             upsample = nn.Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
 88 |             modules.add_module("upsample_%d" % i, upsample)
 89 | 
 90 |         elif module_def["type"] == "route":
 91 |             layers = [int(x) for x in module_def["layers"].split(",")]
 92 |             filters = 0
 93 |             for layer_i in layers:
 94 |                 if layer_i > 0:
 95 |                     layer_i += 1
 96 |                 filters += output_filters[layer_i]
 97 |             modules.add_module("route_%d" % i, EmptyLayer())
 98 | 
 99 |         elif module_def["type"] == "shortcut":
100 |             filters = output_filters[int(module_def["from"])]
101 |             modules.add_module("shortcut_%d" % i, EmptyLayer())
102 | 
103 |         elif module_def["type"] == "yolo":
104 |             anchors = ([anchor_list[i] for i in yolo_masks[yolo_count]])
105 |             yolo_layer = YOLOLayer(anchors, num_classes, img_height, img_width, build_targets_ignore_thresh,conv_activation,xy_loss,wh_loss,object_loss,no_object_loss)
106 |             modules.add_module("yolo_%d" % i, yolo_layer)
107 |             yolo_count += 1
108 |         module_list.append(modules)
109 |         output_filters.append(filters)
110 |     return hyperparams, module_list
111 | 
112 | class EmptyLayer(nn.Module):
113 |     """Placeholder for 'route' and 'shortcut' layers"""
114 | 
115 |     def __init__(self):
116 |         super(EmptyLayer, self).__init__()
117 | 
118 | class YOLOLayer(nn.Module):
119 |     """Detection layer"""
120 | 
121 |     def __init__(self, anchors, num_classes, img_height, img_width, build_targets_ignore_thresh, conv_activation, xy_loss, wh_loss, object_loss, no_object_loss):
122 |         super(YOLOLayer, self).__init__()
123 |         self.anchors = anchors
124 |         self.num_anchors = len(anchors)
125 |         self.num_classes = num_classes
126 |         self.bbox_attrs = 5 + num_classes
127 |         self.image_height = img_height
128 |         self.image_width = img_width
129 |         self.ignore_thres = build_targets_ignore_thresh
130 |         self.xy_loss = xy_loss
131 |         self.wh_loss = wh_loss
132 |         self.no_object_loss = no_object_loss
133 |         self.object_loss = object_loss
134 |         self.conv_activation = conv_activation
135 | 
136 |         self.mse_loss = nn.MSELoss(size_average=True)  # Coordinate loss
137 |         self.bce_loss = nn.BCELoss(size_average=True)  # Confidence loss
138 |         self.ce_loss = nn.CrossEntropyLoss()  # Class loss
139 | 
140 |     def forward(self, sample, targets=None):
141 |         nA = self.num_anchors
142 |         nB = sample.size(0)
143 |         nGh = sample.size(2)
144 |         nGw = sample.size(3)
145 |         stride = self.image_height / nGh
146 |         
147 |         prediction = sample.view(nB, nA, self.bbox_attrs, nGh, nGw).permute(0, 1, 3, 4, 2).contiguous()
148 | 
149 |         # Get outputs
150 |         x = torch.sigmoid(prediction[..., 0])  # Center x
151 |         y = torch.sigmoid(prediction[..., 1])  # Center y
152 |         w = prediction[..., 2]  # Width
153 |         h = prediction[..., 3]  # Height
154 |         pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
155 |         pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
156 | 
157 |         # Calculate offsets for each grid
158 |         grid_x = torch.arange(nGw, dtype=torch.float, device=x.device).repeat(nGh, 1).view([1, 1, nGh, nGw])
159 |         grid_y = torch.arange(nGh, dtype=torch.float, device=x.device).repeat(nGw, 1).t().view([1, 1, nGh, nGw]).contiguous()
160 |         scaled_anchors = torch.tensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors], dtype=torch.float, device=x.device)
161 |         anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
162 |         anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))
163 | 
164 |         # Add offset and scale with anchors
165 |         pred_boxes = torch.zeros(prediction[..., :4].shape, dtype=torch.float, device=x.device)
166 |         pred_boxes[..., 0] = x.data + grid_x
167 |         pred_boxes[..., 1] = y.data + grid_y
168 |         pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
169 |         pred_boxes[..., 3] = torch.exp(h.data) * anchor_h
170 |         
171 |         # Training
172 |         if targets is not None:
173 |             self.mse_loss = self.mse_loss.to(sample.device, non_blocking=True)
174 |             self.bce_loss = self.bce_loss.to(sample.device, non_blocking=True)
175 |             self.ce_loss = self.ce_loss.to(x.device, non_blocking=True)
176 |             mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
177 |                 target=targets,
178 |                 anchors=scaled_anchors,
179 |                 num_anchors=nA,
180 |                 num_classes=self.num_classes,
181 |                 grid_size_h=nGh,
182 |                 grid_size_w=nGw,
183 |                 ignore_thres=self.ignore_thres,
184 |             )
185 | 
186 |             # Handle target variables
187 |             tx.requires_grad_(False)
188 |             ty.requires_grad_(False)
189 |             tw.requires_grad_(False)
190 |             th.requires_grad_(False)
191 |             tconf.requires_grad_(False)
192 |             tcls.requires_grad_(False)
193 | 
194 |             # Get conf mask where gt and where there is no gt
195 |             conf_mask_true = mask
196 |             conf_mask_false = conf_mask - mask
197 | 
198 |             # Mask outputs to ignore non-existing objects
199 |             loss_x = self.xy_loss * self.mse_loss(x[mask], tx[mask])
200 |             loss_y = self.xy_loss * self.mse_loss(y[mask], ty[mask])
201 |             loss_w = self.wh_loss * self.mse_loss(w[mask], tw[mask])
202 |             loss_h = self.wh_loss * self.mse_loss(h[mask], th[mask])
203 |             #We are only doing single class detection, so we set loss_cls always to be 0. You can always make it to another value if you wish to do multi-class training
204 |             loss_cls_constant = 0
205 |             loss_cls = loss_cls_constant * (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
206 |             
207 |             loss_noobj = self.no_object_loss * self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) 
208 |             loss_obj = self.object_loss * self.bce_loss(pred_conf[conf_mask_true], tconf[conf_mask_true])
209 |             loss = loss_x + loss_y + loss_w + loss_h + loss_noobj + loss_obj + loss_cls
210 | 
211 |             return loss, torch.tensor((loss_x, loss_y, loss_w, loss_h, loss_obj, loss_noobj), device=targets.device)
212 | 
213 |         else:
214 |             # If not in training phase return predictions
215 |             output = torch.cat((
216 |                     pred_boxes.view(nB, -1, 4) * stride,
217 |                     pred_conf.view(nB, -1, 1),
218 |                     pred_cls.view(nB, -1, self.num_classes)),
219 |                     -1)
220 |             return output
221 | 
222 | class Darknet(nn.Module):
223 |     """YOLOv3 object detection model"""
224 | 
225 |     def __init__(self, config_path, xy_loss, wh_loss, no_object_loss, object_loss,vanilla_anchor):
226 |         super(Darknet, self).__init__()
227 |         self.module_defs = parse_model_config(config_path)
228 |         
229 |         self.hyperparams, self.module_list = create_modules(module_defs=self.module_defs,xy_loss=xy_loss,wh_loss=wh_loss,no_object_loss=no_object_loss,object_loss=object_loss,vanilla_anchor=vanilla_anchor)
230 |         self.img_width = int(self.hyperparams["width"])
231 |         self.img_height = int(self.hyperparams["height"])
232 |         # in order to help train.py defines the onnx filename since it is not defined by yolo2onnx.py
233 |         self.onnx_height = int(self.hyperparams["onnx_height"])
234 |         self.onnx_name = config_path.split('/')[-1].split('.')[0] + '_' + str(self.img_width) + str(self.onnx_height) + '.onnx'
235 |         self.num_classes = int(self.hyperparams["classes"])
236 |         if int(self.hyperparams["channels"]) == 1:
237 |             self.bw = True
238 |         elif int(self.hyperparams["channels"]) == 3:
239 |             self.bw = False
240 |         else:
241 |             print('Channels in cfg file is not set properly, making it colour')
242 |             self.bw = False
243 |         current_month = datetime.now().strftime('%B').lower()
244 |         current_year = str(datetime.now().year)
245 | 
246 |         self.validate_uri = self.hyperparams["validate_uri"]
247 |         self.train_uri = self.hyperparams["train_uri"]
248 |         self.num_train_images = int(self.hyperparams["num_train_images"])
249 |         self.num_validate_images = int(self.hyperparams["num_validate_images"])
250 |         self.conf_thresh = float(self.hyperparams["conf_thresh"])
251 |         self.nms_thresh = float(self.hyperparams["nms_thresh"])
252 |         self.iou_thresh = float(self.hyperparams["iou_thresh"])
253 |         self.start_weights_dim = [int(x) for x in self.hyperparams["start_weights_dim"].split(',')]
254 |         self.conv_activation = self.hyperparams["conv_activation"]
255 | 
256 |         ##### loss constants #####
257 |         self.xy_loss=xy_loss
258 |         self.wh_loss=wh_loss
259 |         self.no_object_loss=no_object_loss
260 |         self.object_loss=object_loss
261 |         ##### reading anchors from train.csv #####
262 |         csv_uri = self.hyperparams["train_uri"]
263 |         training_csv_tempfile = csv_uri
264 |         with open(training_csv_tempfile) as f:
265 |             csv_reader = csv.reader(f)
266 |             row = next(csv_reader)
267 |             row = str(row)[2:-2]
268 |             anchor_list = [[float(y) for y in x.split(',')] for x in row.split("'")[0].split('|')]
269 |         #############################
270 | 
271 |         ##### using vanilla anchor boxes until skanda dataloader is done #####
272 |         if vanilla_anchor:
273 |             anchor_list = vanilla_anchor_list
274 |         #############################
275 |         self.anchors = anchor_list
276 |         self.seen = 0
277 |         self.header_info = torch.tensor([0, 0, 0, self.seen, 0])
278 | 
279 |     def get_start_weight_dim(self):
280 |         return self.start_weights_dim
281 | 
282 |     def get_onnx_name(self):
283 |         return self.onnx_name
284 | 
285 |     def get_bw(self):
286 |         return self.bw
287 |     
288 |     def get_loss_constant(self):
289 |         return [self.xy_loss,self.wh_loss,self.no_object_loss,self.object_loss]
290 | 
291 |     def get_conv_activation(self):
292 |         return self.conv_activation
293 | 
294 |     def get_num_classes(self):
295 |         return self.num_classes
296 | 
297 |     def get_anchors(self):
298 |         return self.anchors
299 | 
300 |     def get_threshs(self):
301 |         return self.conf_thresh, self.nms_thresh, self.iou_thresh
302 |     
303 |     def img_size(self):
304 |         return self.img_width, self.img_height
305 |     
306 |     def get_links(self):
307 |         return self.validate_uri, self.train_uri
308 |     
309 |     def num_images(self):
310 |         return self.num_validate_images, self.num_train_images
311 |     
312 |     def forward(self, x, targets=None):
313 |         is_training = targets is not None
314 |         output = []
315 | 
316 |         if is_training:
317 |             total_losses = torch.zeros(6, device=targets.device)
318 |         layer_outputs = []
319 |         for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
320 |             if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
321 |                 x = module(x)
322 |             elif module_def["type"] == "route":
323 |                 layer_i = [int(x) for x in module_def["layers"].split(",")]
324 |                 x = torch.cat([layer_outputs[i] for i in layer_i], 1)
325 |             elif module_def["type"] == "shortcut":
326 |                 layer_i = int(module_def["from"])
327 |                 x = layer_outputs[-1] + layer_outputs[layer_i]
328 |             elif module_def["type"] == "yolo":
329 |                 # Train phase: get loss
330 |                 if is_training:
331 |                     x, losses = module[0](x, targets)
332 |                     total_losses += losses
333 |                 # Test phase: Get detections
334 |                 else:
335 |                     x = module(x)
336 |                 output.append(x)
337 |             layer_outputs.append(x)
338 |         return (sum(output), *total_losses) if is_training else torch.cat(output, 1)
339 |     def load_weights(self, weights_path, start_weight_dim):
340 |         # Open the weights file
341 |         fp = open(weights_path, "rb")
342 |         header = np.fromfile(fp, dtype=np.int32, count=5)  # First five are header values
343 |         # Needed to write header when saving weights
344 |         self.header_info = header
345 | 
346 |         self.seen = header[3]
347 |         weights = np.fromfile(fp, dtype=np.float32)  # The rest are weights
348 |         fp.close()
349 | 
350 |         ptr = 0
351 |         yolo_count = 0
352 |         for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
353 |             if module_def["type"] == "convolutional":
354 |                 conv_layer = module[0]
355 |                 if module_def["filters"] != 'preyolo':
356 |                     # Load BN bias, weights, running mean and running variance
357 |                     bn_layer = module[1]
358 |                     num_b = bn_layer.bias.numel()  # Number of biases
359 |                     # Bias
360 |                     bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
361 |                     bn_layer.bias.data.copy_(bn_b)
362 |                     ptr += num_b
363 |                     # Weight
364 |                     bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
365 |                     bn_layer.weight.data.copy_(bn_w)
366 |                     ptr += num_b
367 |                     # Running Mean
368 |                     bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
369 |                     bn_layer.running_mean.data.copy_(bn_rm)
370 |                     ptr += num_b
371 |                     # Running Var
372 |                     bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
373 |                     bn_layer.running_var.data.copy_(bn_rv)
374 |                     ptr += num_b
375 |                     # Load conv. weights
376 |                     num_w = conv_layer.weight.numel()
377 |                     conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
378 |                     conv_layer.weight.data.copy_(conv_w)
379 |                     ptr += num_w
380 |                 elif module_def["filters"] == 'preyolo':
381 |                     orig_dim = start_weight_dim[yolo_count]
382 |                     yolo_count += 1
383 |                     num_b = conv_layer.bias.numel()
384 |                     conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
385 |                     conv_layer.bias.data.copy_(conv_b)
386 |                     ptr += orig_dim
387 |                     # Load conv. weights
388 |                     num_w = conv_layer.weight.numel()
389 |                     dummyDims = [orig_dim] + list(conv_layer.weight.size()[1:])
390 |                     dummy = torch.zeros(tuple(dummyDims))
391 |                     conv_w = torch.from_numpy(weights[ptr : ptr + int(num_w * orig_dim / num_b)]).view_as(dummy)
392 |                     conv_w = conv_w[0:num_b][:][:][:]
393 |                     conv_layer.weight.data.copy_(conv_w)
394 |                     ptr += int(num_w * orig_dim / num_b)
395 |                 else:
396 |                     print(module)
397 |                     raise Exception('The above layer has its BN or preyolo defined wrong')
398 | 
399 |     def save_weights(self, path, cutoff=-1):
400 | 
401 |         fp = open(path, "wb")
402 |         self.header_info[3] = self.seen
403 |         self.header_info.tofile(fp)
404 | 
405 |         # Iterate through layers
406 |         for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
407 |             if module_def["type"] == "convolutional":
408 |                 conv_layer = module[0]
409 |                 # If batch norm, load bn first
410 |                 if module_def["filters"] != 'preyolo':
411 |                     bn_layer = module[1]
412 |                     bn_layer.bias.data.cpu().numpy().tofile(fp)
413 |                     bn_layer.weight.data.cpu().numpy().tofile(fp)
414 |                     bn_layer.running_mean.data.cpu().numpy().tofile(fp)
415 |                     bn_layer.running_var.data.cpu().numpy().tofile(fp)
416 |                 # Load conv bias
417 |                 else:
418 |                     conv_layer.bias.data.cpu().numpy().tofile(fp)
419 |                 # Load conv weights
420 |                 conv_layer.weight.data.cpu().numpy().tofile(fp)
421 | 
422 |         fp.close()
423 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/outputs/README.md:
--------------------------------------------------------------------------------
1 | This is the folder that we output weights file by default


--------------------------------------------------------------------------------
/CVC-YOLOv3/outputs/visualization/README.md:
--------------------------------------------------------------------------------
1 | This is the folder that we store all visualization by default


--------------------------------------------------------------------------------
/CVC-YOLOv3/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy==1.16.4
 2 | matplotlib==3.1.0
 3 | torchvision==0.3.0
 4 | opencv_python==4.1.0.25
 5 | torch==1.1.0
 6 | requests==2.20.0
 7 | pandas==0.24.2
 8 | imgaug==0.3.0
 9 | onnx==1.6.0
10 | optuna==0.19.0
11 | Pillow==6.2.1
12 | protobuf==3.11.0
13 | pymysql==0.9.3
14 | retrying==1.3.3
15 | tensorboardX==1.9
16 | tqdm==4.39.0
17 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #!/usr/bin/env python
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import setup
 9 | from torch.utils.cpp_extension import CUDAExtension
10 | 
11 | # also requires torch and torchvision as well but assuming they are already installed
12 | 
13 | requirements = ["tqdm", "requests", "imgaug", "numpy", "requests", "pillow", "tensorboardX",
14 |                 "google-cloud-storage", "retrying", "optuna", "pymysql"]
15 | 
16 | def get_extensions():
17 |     this_dir = os.path.dirname(os.path.abspath(__file__))
18 |     extensions_dir = os.path.join(this_dir, "csrc")
19 | 
20 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
21 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
22 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
23 | 
24 |     sources = main_file + source_cpu
25 | 
26 |     extra_compile_args = {"cxx": []}
27 |     define_macros = []
28 | 
29 |     extension = CUDAExtension
30 |     sources += source_cuda
31 |     define_macros += [("WITH_CUDA", None)]
32 |     extra_compile_args["nvcc"] = [
33 |         "-DCUDA_HAS_FP16=1",
34 |         "-D__CUDA_NO_HALF_OPERATORS__",
35 |         "-D__CUDA_NO_HALF_CONVERSIONS__",
36 |         "-D__CUDA_NO_HALF2_OPERATORS__",
37 |     ]
38 | 
39 |     sources = [os.path.join(extensions_dir, s) for s in sources]
40 | 
41 |     include_dirs = [extensions_dir]
42 | 
43 |     ext_modules = [
44 |         extension(
45 |             "_C",
46 |             sources,
47 |             include_dirs=include_dirs,
48 |             define_macros=define_macros,
49 |             extra_compile_args=extra_compile_args,
50 |         )
51 |     ]
52 | 
53 |     return ext_modules
54 | 
55 | 
56 | setup(
57 |     name="mit-dut-yolov3",
58 |     version="0.1",
59 |     author="MIT Driverless",
60 |     url="https://github.com/DUT-Racing/DUT18D_PerceptionCV/vectorized_yolov3",
61 |     description="",
62 |     # packages=find_packages(exclude=("configs", "tests",)),
63 |     install_requires=requirements,
64 |     ext_modules=get_extensions(),
65 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
66 | )
67 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import argparse
  4 | import os
  5 | import random
  6 | import tempfile
  7 | import time
  8 | import multiprocessing
  9 | import subprocess
 10 | import math
 11 | import shutil
 12 | import math
 13 | 
 14 | from datetime import datetime
 15 | import torch
 16 | import torch.nn as nn
 17 | from torch.utils.data import DataLoader
 18 | 
 19 | from models import Darknet
 20 | from utils.datasets import ImageLabelDataset
 21 | from utils.utils import model_info, print_args, Logger, visualize_and_save_to_local,xywh2xyxy
 22 | import validate
 23 | import warnings
 24 | import sys
 25 | 
 26 | ##### section for all random seeds #####
 27 | torch.manual_seed(17)
 28 | torch.backends.cudnn.deterministic = True
 29 | torch.backends.cudnn.benchmark = False
 30 | ########################################
 31 | 
 32 | warnings.filterwarnings("ignore")
 33 | os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
 34 | 
 35 | cuda = torch.cuda.is_available()
 36 | device = torch.device('cuda:0' if cuda else 'cpu')
 37 | num_cpu = multiprocessing.cpu_count() if cuda else 0
 38 | if cuda:
 39 |     torch.cuda.synchronize()
 40 | random.seed(0)
 41 | torch.manual_seed(0)
 42 | 
 43 | if cuda:
 44 |     torch.cuda.manual_seed(0)
 45 |     torch.cuda.manual_seed_all(0)
 46 |     torch.backends.cudnn.benchmark = True
 47 |     torch.cuda.empty_cache()
 48 | 
 49 | def run_epoch(label_prefix, data_loader, num_steps, optimizer, model, epoch,
 50 |               num_epochs, step):
 51 |     print(f"Model in {label_prefix} mode")
 52 |     epoch_losses = [0.0] * 7
 53 |     epoch_time_total = 0.0
 54 |     epoch_num_targets = 1e-12
 55 |     t1 = time.time()
 56 |     loss_labels = ["Total", "L-x", "L-y", "L-w", "L-h", "L-noobj", "L-obj"]
 57 |     for i, (img_uri, imgs, targets) in enumerate(data_loader):
 58 |         if step[0] >= num_steps:
 59 |             break
 60 |         imgs = imgs.to(device, non_blocking=True)
 61 |         targets = targets.to(device, non_blocking=True)
 62 |         targets.requires_grad_(False)
 63 |         step_num_targets = ((targets[:, :, 1:5] > 0).sum(dim=2) > 1).sum().item() + 1e-12
 64 |         epoch_num_targets += step_num_targets
 65 |         # Compute loss, compute gradient, update parameters
 66 |         if optimizer is not None:
 67 |             optimizer.zero_grad()
 68 |         losses = model(imgs, targets)
 69 |         if label_prefix == "train":
 70 |             losses[0].sum().backward()
 71 |         if optimizer is not None:
 72 |             optimizer.step()
 73 | 
 74 |         for j, (label, loss) in enumerate(zip(loss_labels, losses)):
 75 |             batch_loss = loss.sum().to('cpu').item()
 76 |             epoch_losses[j] += batch_loss
 77 |         finished_time = time.time()
 78 |         step_time_total = finished_time - t1
 79 |         epoch_time_total += step_time_total
 80 |         
 81 |         statement = label_prefix + ' Epoch: ' + str(epoch) + ', Batch: ' + str(i + 1) + '/' + str(len(data_loader))
 82 |         count = 0
 83 |         for (loss_label, loss) in zip(loss_labels, losses):
 84 |             if count == 0:
 85 |                 statement += ', Total: ' + '{0:10.6f}'.format(loss.item() / step_num_targets)
 86 |                 tot_loss = loss.item()
 87 |                 count += 1
 88 |             else:
 89 |                 statement += ',   ' + loss_label + ': {0:5.2f}'.format(loss.item() / tot_loss * 100) + '%'
 90 |         print(statement)
 91 |         if label_prefix == "train":
 92 |             step[0] += 1
 93 |     return epoch_losses, epoch_time_total, epoch_num_targets
 94 | 
 95 | def main(*, evaluate, batch_size, optimizer_pick, model_cfg, weights_path, output_path, dataset_path, num_epochs, num_steps, checkpoint_interval, 
 96 |         augment_affine, augment_hsv, lr_flip, ud_flip, momentum, gamma, lr, weight_decay, vis_batch, data_aug, blur, salt, noise, contrast, sharpen, ts, debug_mode, upload_dataset,xy_loss,wh_loss,no_object_loss,object_loss,vanilla_anchor,val_tolerance,min_epochs):
 97 |     input_arguments = list(locals().items())
 98 | 
 99 |     print("Initializing model")
100 |     model = Darknet(config_path=model_cfg,xy_loss=xy_loss,wh_loss=wh_loss,no_object_loss=no_object_loss,object_loss=object_loss,vanilla_anchor=vanilla_anchor)
101 |     img_width, img_height = model.img_size()
102 |     bw  = model.get_bw()
103 |     validate_uri, train_uri = model.get_links()
104 | 
105 |     if output_path == "automatic":
106 |         current_month = datetime.now().strftime('%B').lower()
107 |         current_year = str(datetime.now().year)
108 |         if not os.path.exists(os.path.join('outputs/', current_month + '-' + current_year + '-experiments/' + model_cfg.split('.')[0].split('/')[-1])):
109 |             os.makedirs(os.path.join('outputs/', current_month + '-' + current_year + '-experiments/' + model_cfg.split('.')[0].split('/')[-1]))
110 |         output_uri = os.path.join('outputs/', current_month + '-' + current_year + '-experiments/' + model_cfg.split('.')[0].split('/')[-1])
111 |     else:
112 |         output_uri = output_path
113 | 
114 |     num_validate_images, num_train_images = model.num_images()
115 |     conf_thresh, nms_thresh, iou_thresh = model.get_threshs()
116 |     num_classes = model.get_num_classes()
117 |     loss_constant = model.get_loss_constant()
118 |     conv_activation = model.get_conv_activation()
119 |     anchors = model.get_anchors()
120 |     onnx_name = model.get_onnx_name()
121 | 
122 |     with tempfile.TemporaryDirectory() as tensorboard_data_dir:
123 |         print("Initializing data loaders")
124 |         train_data_loader = torch.utils.data.DataLoader(
125 |             ImageLabelDataset(train_uri, dataset_path=dataset_path, width=img_width, height=img_height, augment_hsv=augment_hsv,
126 |                                 augment_affine=augment_affine, num_images=num_train_images,
127 |                                 bw=bw, n_cpu=num_cpu, lr_flip=lr_flip, ud_flip=ud_flip,vis_batch=vis_batch,data_aug=data_aug,blur=blur,salt=salt,noise=noise,contrast=contrast,sharpen=sharpen,ts=ts,debug_mode=debug_mode, upload_dataset=upload_dataset),
128 |             batch_size=(1 if debug_mode else batch_size),
129 |             shuffle=(False if debug_mode else True),
130 |             num_workers=(0 if vis_batch else num_cpu),
131 |             pin_memory=cuda)
132 |         print("Num train images: ", len(train_data_loader.dataset))
133 | 
134 |         validate_data_loader = torch.utils.data.DataLoader(
135 |             ImageLabelDataset(validate_uri, dataset_path=dataset_path, width=img_width, height=img_height, augment_hsv=False,
136 |                                 augment_affine=False, num_images=num_validate_images,
137 |                                 bw=bw, n_cpu=num_cpu, lr_flip=False, ud_flip=False,vis_batch=vis_batch,data_aug=False,blur=False,salt=False,noise=False,contrast=False,sharpen=False,ts=ts,debug_mode=debug_mode, upload_dataset=upload_dataset),
138 |             batch_size=(1 if debug_mode else batch_size),
139 |             shuffle=False,
140 |             num_workers=(0 if vis_batch else num_cpu),
141 |             pin_memory=cuda)
142 |         print("Num validate images: ", len(validate_data_loader.dataset))
143 | 
144 |         ##### additional configuration #####
145 |         print("Training batch size: " + str(batch_size))
146 |         
147 |         print("Checkpoint interval: " + str(checkpoint_interval))
148 | 
149 |         print("Loss constants: " + str(loss_constant))
150 | 
151 |         print("Anchor boxes: " + str(anchors))
152 | 
153 |         print("Training image width: " + str(img_width))
154 | 
155 |         print("Training image height: " + str(img_height))
156 | 
157 |         print("Confidence Threshold: " + str(conf_thresh))
158 | 
159 |         print("Number of training classes: " + str(num_classes))
160 | 
161 |         print("Conv activation type: " + str(conv_activation))
162 | 
163 |         print("Starting learning rate: " + str(lr))
164 | 
165 |         if ts:
166 |             print("Tile and scale mode [on]")
167 |         else:
168 |             print("Tile and scale mode [off]")
169 | 
170 |         if data_aug:
171 |             print("Data augmentation mode [on]")
172 |         else:
173 |             print("Data augmentation mode [off]")
174 | 
175 |         ####################################
176 | 
177 |         start_epoch = 0
178 | 
179 |         weights_path = weights_path
180 |         if optimizer_pick == "Adam":
181 |             print("Using Adam Optimizer")
182 |             optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
183 |                                         lr=lr, weight_decay=weight_decay)
184 |         elif optimizer_pick == "SGD":
185 |             print("Using SGD Optimizer")
186 |             optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()),
187 |                                     lr=lr, momentum=momentum, weight_decay=weight_decay)
188 |         else:
189 |             raise Exception(f"Invalid optimizer name: {optimizer_pick}")
190 |         print("Loading weights")
191 |         model.load_weights(weights_path, model.get_start_weight_dim())
192 | 
193 |         if torch.cuda.device_count() > 1:
194 |             print('Using ', torch.cuda.device_count(), ' GPUs')
195 |             model = nn.DataParallel(model)
196 |         model = model.to(device, non_blocking=True)
197 | 
198 |         # Set scheduler
199 |         scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=gamma)
200 | 
201 |         val_loss = 999  # using a high number for validation loss
202 |         val_loss_counter = 0
203 |         step = [0]  # wrapping in an array so it is mutable
204 |         epoch = start_epoch
205 |         while epoch < num_epochs and step[0] < num_steps and not evaluate:
206 |             epoch += 1
207 |             scheduler.step()
208 |             model.train()
209 |             run_epoch(label_prefix="train", data_loader=train_data_loader, epoch=epoch,
210 |                         step=step, model=model, num_epochs=num_epochs, num_steps=num_steps,
211 |                         optimizer=optimizer)
212 |             print('Completed epoch: ', epoch)
213 |             # Update best loss
214 |             if epoch % checkpoint_interval == 0 or epoch == num_epochs or step[0] >= num_steps:
215 |                 # First, save the weights
216 |                 save_weights_uri = os.path.join(output_uri, "{epoch}.weights".format(epoch=epoch))
217 |                 model.save_weights(save_weights_uri)
218 | 
219 |                 with torch.no_grad():
220 |                     print("Calculating loss on validate data")
221 |                     epoch_losses, epoch_time_total, epoch_num_targets = run_epoch(
222 |                         label_prefix="validate", data_loader=validate_data_loader, epoch=epoch,
223 |                         model=model, num_epochs=num_epochs, num_steps=num_steps, optimizer=None,
224 |                         step=step)
225 |                     avg_epoch_loss = epoch_losses[0] / epoch_num_targets
226 |                     print('Average Validation Loss: {0:10.6f}'.format(avg_epoch_loss))
227 | 
228 |                     if avg_epoch_loss > val_loss and epoch > min_epochs:
229 |                         val_loss_counter += 1
230 |                         print(f"Validation loss did not decrease for {val_loss_counter}"
231 |                                 f" consecutive check(s)")
232 |                     else:
233 |                         print("Validation loss decreased. Yay!!")
234 |                         val_loss_counter = 0
235 |                         val_loss = avg_epoch_loss
236 |                         ##### updating best result for optuna study #####
237 |                         result = open("logs/result.txt", "w" )
238 |                         result.write(str(avg_epoch_loss))
239 |                         result.close() 
240 |                         ###########################################
241 |                     validate.validate(dataloader=validate_data_loader, model=model, device=device, step=step[0], bbox_all=False,debug_mode=debug_mode)
242 |                     if val_loss_counter == val_tolerance:
243 |                         print("Validation loss stopped decreasing over the last " + str(val_tolerance) + " checkpoints, creating onnx file")
244 |                         with tempfile.NamedTemporaryFile() as tmpfile:
245 |                             model.save_weights(tmpfile.name)
246 |                             weights_name = tmpfile.name
247 |                             cfg_name = os.path.join(tempfile.gettempdir(), model_cfg.split('/')[-1].split('.')[0] + '.tmp')
248 |                             onnx_gen = subprocess.call(['python3', 'yolo2onnx.py', '--cfg_name', cfg_name, '--weights_name', weights_name])
249 |                             save_weights_uri = os.path.join(output_uri, onnx_name)
250 |                             os.rename(weights_name, save_weights_uri)
251 |                             try:
252 |                                 os.remove(onnx_name)
253 |                             except:
254 |                                 pass
255 |                             os.remove(cfg_name)
256 |                         break
257 |         if evaluate:
258 |             validation = validate.validate(dataloader=validate_data_loader, model=model, device=device, step=-1, bbox_all=False, tensorboard_writer=None,debug_mode=debug_mode)
259 |     return val_loss
260 | 
261 | 
262 | if __name__ == '__main__':
263 |     parser = argparse.ArgumentParser()
264 |     def add_bool_arg(name, default, help):
265 |         arg_group = parser.add_mutually_exclusive_group(required=False)
266 |         arg_group.add_argument('--' + name, dest=name, action='store_true', help=help)
267 |         arg_group.add_argument('--no_' + name, dest=name, action='store_false', help=("Do not " + help))
268 |         parser.set_defaults(**{name:default})
269 | 
270 |     parser.add_argument('--batch_size', type=int, default=7, help='size of each image batch')
271 |     parser.add_argument('--optimizer_pick', type=str, default="Adam", help='choose optimizer between Adam and SGD')
272 |     parser.add_argument('--model_cfg', type=str, help='cfg file path',required=True)
273 |     parser.add_argument('--weights_path', type=str, help='initial weights path',default="sample-yolov3.weights")
274 |     parser.add_argument('--output_path', type=str, help='output weights path, by default we will create a folder based on current system time and name of your cfg file',default="automatic")
275 |     parser.add_argument('--dataset_path', type=str, help='path to image dataset',default="dataset/YOLO_Dataset/")
276 |     parser.add_argument('--num_epochs', type=int, default=2048, help='maximum number of epochs')
277 |     parser.add_argument('--num_steps', type=int, default=8388608, help="maximum number of steps")
278 |     parser.add_argument('--val_tolerance', type=int, default=3, help="tolerance for validation loss decreasing")
279 |     parser.add_argument('--min_epochs', type=int, default=3, help="minimum training epochs")
280 |     parser.add_argument("--checkpoint_interval", type=int, default=1, help="interval between saving model weights")
281 |     # Default output location of visualization is "Buckets/mit-dut-driverless-internal/dumping-ground/visualization/"
282 |     parser.add_argument("--vis_batch", type=int, default=0, help="number of batches you wish to load and visualize before quitting training")
283 | 
284 |     ##### tile and scale #####
285 |     add_bool_arg('ts', default=True, help="whether to initially scale the entire image by a constant factor determined by the appropriate cone pixel size, then chop and tile (instead of pad and resize)")
286 |     ##########################
287 | 
288 |     add_bool_arg('augment_affine', default=False, help='whether to augment images')
289 |     add_bool_arg('augment_hsv', default=False, help="whether to augment hsv")
290 |     add_bool_arg('augment_lr_flip', default=False, help="whether to flip left/right")
291 |     add_bool_arg('augment_ud_flip', default=False, help="whether to flip up/down")
292 |     add_bool_arg('augment_blur', default=False, help="whether to add blur")
293 |     add_bool_arg('augment_salt', default=False, help="whether to add salt/pepper")
294 |     add_bool_arg('augment_noise', default=False, help="whether to add noise")
295 |     add_bool_arg('augment_contrast', default=False, help="whether to add contrast")
296 |     add_bool_arg('augment_sharpen', default=False, help="whether to add sharpen")
297 |     add_bool_arg('evaluate', default =False, help="If we want to get the mAP values rather than train")
298 |     ##########################
299 | 
300 |     add_bool_arg('vanilla_anchor', default=False, help="whether to use vanilla anchor boxes for training")
301 |     add_bool_arg('debug_mode', default=False, help="whether to visualize the validate prediction during mAP calculation, need to make CUDA=False at first. If true then batch size will also automatically set to 1 and training shuffle will be False. ")
302 |     add_bool_arg('data_aug', default=False, help="whether to do all stable data augmentation")
303 |     add_bool_arg('upload_dataset', default=False, help="whether to uploading all tiles to GCP, have to enable --ts first")
304 | 
305 |     
306 |     parser.add_argument('--momentum', type=float, default=0.9, help='momentum for the optimizer')
307 |     parser.add_argument('--gamma', type=float, default=0.95, help='gamma for the scheduler')
308 |     parser.add_argument('--lr', type=float, default=0.001, help='starting learning rate')
309 |     parser.add_argument('--weight_decay', type=float, default=0.0, help='weight decay')
310 | 
311 |     ##### Loss Constants #####
312 |     parser.add_argument('--xy_loss', type=float, default=2, help='confidence loss for x and y')
313 |     parser.add_argument('--wh_loss', type=float, default=1.6, help='confidence loss for width and height')
314 |     parser.add_argument('--no_object_loss', type=float, default=25, help='confidence loss for non-objectness')
315 |     parser.add_argument('--object_loss', type=float, default=0.1, help='confidence loss for objectness')
316 |     
317 |     opt = parser.parse_args()
318 |    
319 |     save_file_name = 'logs/' + opt.model_cfg.split('/')[-1].split('.')[0]
320 |     sys.stdout = Logger(save_file_name + '.log')
321 |     sys.stderr = Logger(save_file_name + '.error')
322 | 
323 |     temp_dir = tempfile.gettempdir()
324 |     temp_path = os.path.join(temp_dir, save_file_name.split('/')[-1] + '.tmp')
325 |     shutil.copy2(opt.model_cfg, temp_path)
326 |     label = subprocess.call(["git", "describe", "--always"])
327 |     result = main(evaluate=opt.evaluate,
328 |                   batch_size=opt.batch_size,
329 |                   optimizer_pick=opt.optimizer_pick,
330 |                   model_cfg=opt.model_cfg,
331 |                   weights_path=opt.weights_path,
332 |                   output_path=opt.output_path,
333 |                   dataset_path=opt.dataset_path,
334 |                   num_epochs=opt.num_epochs,
335 |                   num_steps=(opt.num_steps if opt.vis_batch is 0 else opt.vis_batch),
336 |                   checkpoint_interval=opt.checkpoint_interval,
337 |                   augment_affine=opt.augment_affine,
338 |                   augment_hsv=opt.augment_hsv,
339 |                   lr_flip=opt.augment_lr_flip,
340 |                   ud_flip=opt.augment_ud_flip,
341 |                   momentum=opt.momentum,
342 |                   gamma=opt.gamma,
343 |                   lr=opt.lr,
344 |                   weight_decay=opt.weight_decay,
345 |                   vis_batch=opt.vis_batch,
346 |                   data_aug=opt.data_aug,
347 |                   blur=opt.augment_blur,
348 |                   salt=opt.augment_salt,
349 |                   noise=opt.augment_noise,
350 |                   contrast=opt.augment_contrast,
351 |                   sharpen=opt.augment_sharpen,
352 |                   ts=opt.ts,
353 |                   debug_mode=opt.debug_mode,
354 |                   upload_dataset=opt.upload_dataset,
355 |                   xy_loss=opt.xy_loss,
356 |                   wh_loss=opt.wh_loss,
357 |                   no_object_loss=opt.no_object_loss,
358 |                   object_loss=opt.object_loss,
359 |                   vanilla_anchor=opt.vanilla_anchor,
360 |                   val_tolerance=opt.val_tolerance,
361 |                   min_epochs=opt.min_epochs
362 |                   )
363 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/train_hyper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import argparse
  4 | import subprocess
  5 | 
  6 | import optuna
  7 | import pymysql
  8 | import numpy as np
  9 | import torch
 10 | 
 11 | ##### section for all random seeds #####
 12 | # cv2.setRNGSeed(17)
 13 | # torch.manual_seed(17)
 14 | # np.random.seed(17)
 15 | # torch.backends.cudnn.deterministic = True
 16 | # torch.backends.cudnn.benchmark = False
 17 | ########################################
 18 | 
 19 | pymysql.converters.encoders[np.float64] = pymysql.converters.escape_float
 20 | pymysql.converters.conversions = pymysql.converters.encoders.copy()
 21 | pymysql.converters.conversions.update(pymysql.converters.decoders)
 22 | 
 23 | if __name__ == "__main__":
 24 |     parser = argparse.ArgumentParser()
 25 |     def add_bool_arg(name, default, help):
 26 |         arg_group = parser.add_mutually_exclusive_group(required=False)
 27 |         arg_group.add_argument('--' + name, dest=name, action='store_true', help=help)
 28 |         arg_group.add_argument('--no_' + name, dest=name, action='store_false', help=("Do not " + help))
 29 |         parser.set_defaults(**{name:default})
 30 |     parser.add_argument('--model_cfg', type=str, help='cfg file path',required=True)
 31 |     parser.add_argument("--checkpoint_interval", type=int, default=1, help="interval between saving model weights")
 32 |     parser.add_argument('--num_epochs', type=int, default=20, help='maximum number of epochs')
 33 |     parser.add_argument('--num_trials', type=int, default=100, help="number of optuna trials to run")
 34 |     parser.add_argument('--val_tolerance', type=int, default=1, help="tolerance for validation loss decreasing")
 35 |     parser.add_argument('--study_name', type=str, default='optuna_test_6', help="cometml / optuna study name")
 36 | 
 37 | 
 38 |     ##### tile and scale #####
 39 |     add_bool_arg('ts_study', default=False, help="whether to initialize study of whether using tiling dataloader")
 40 |     ##########################
 41 |     add_bool_arg('optimizer_study', default=False, help="whether to have optuna study between Adam and SGD optimizer")
 42 |     add_bool_arg('loss_study', default=False, help="whether to have optuna study on loss constants")
 43 | 
 44 |     add_bool_arg('auto_sd', default=False, help='whether to enable automatical instance shutdown after training. default to True')
 45 | 
 46 |     opt = parser.parse_args()
 47 | 
 48 |     def objective(trial):
 49 |         ######################################
 50 |         if opt.loss_study:
 51 |             xy_loss = trial.suggest_uniform('xy_loss', 1.6, 2.4)
 52 |             wh_loss = trial.suggest_uniform('wh_loss', 1.28, 1.92)
 53 |             no_object_loss = trial.suggest_uniform('no_object_loss', 20.0, 30.0)
 54 |             object_loss = trial.suggest_uniform('object_loss', 0.08, 0.12)
 55 |             
 56 |         else:
 57 |             xy_loss = 2
 58 |             wh_loss = 1.6
 59 |             no_object_loss = 25
 60 |             object_loss = 0.1
 61 |         ######################################
 62 | 
 63 |         if opt.ts_study:
 64 |             tile = trial.suggest_categorical('tile', [False, True])
 65 |         else:
 66 |             tile = True #Default to use tiling dataloader
 67 |         ######################################
 68 | 
 69 |         if opt.optimizer_study:
 70 |             optimizer_pick = trial.suggest_categorical('optimizer_pick', ["Adam", "SGD"])
 71 |         else:
 72 |             optimizer_pick = "Adam"    #Default to Adam optimizer
 73 |         ######################################
 74 | 
 75 |         # build the argstring
 76 |         args = {
 77 |             "model_cfg": opt.model_cfg,
 78 |             "ts": tile,
 79 |             "xy_loss": xy_loss,
 80 |             "wh_loss": wh_loss,
 81 |             "no_object_loss": no_object_loss,
 82 |             "object_loss": object_loss,
 83 |             "num_epochs": opt.num_epochs,
 84 |             "checkpoint_interval": opt.checkpoint_interval,
 85 |             "optimizer_pick": optimizer_pick,
 86 |             "val_tolerance": opt.val_tolerance,
 87 |             "auto_sd": opt.auto_sd
 88 |         }
 89 |         arglist = ["python3", "train.py"]
 90 |         for arg, value in args.items():
 91 |             if value is None:
 92 |                 continue
 93 |             if value is False:
 94 |                 arglist.append(f"--no_{arg}")
 95 |                 continue
 96 |             if value is True:
 97 |                 arglist.append(f"--{arg}")
 98 |                 continue
 99 |             arglist.append(f"--{arg}={value}")
100 | 
101 |         statement = " ".join(arglist)
102 |         print(f"statement for this study is: ")
103 |         print(statement)
104 | 
105 |         # calling through subprocess to ensure that all cuda memory is fully released between experiments
106 |         subprocess.check_call(arglist)
107 | 
108 |         result_file = open("logs/result.txt","r+")
109 |         score = float(result_file.read())
110 |         print(f"score for this study is {score}")
111 |         return score    # want to return a value to minimize
112 | 
113 |     try:
114 |         # study = optuna.create_study(study_name=opt.study_name, storage="mysql+pymysql://root:root@35.224.251.208/optuna")
115 |         study = optuna.create_study(study_name=opt.study_name)
116 |         print("Created optuna study")
117 |     except ValueError as e:
118 |         if "Please use a different name" in str(e):
119 |             # study = optuna.Study(study_name=opt.study_name, storage="mysql+pymysql://root:root@35.224.251.208/optuna")
120 |             study = optuna.Study(study_name=opt.study_name)
121 |             print("Joined existing optuna study")
122 |         else:
123 |             raise
124 |     except:
125 |         raise
126 |     study.optimize(objective, n_trials=opt.num_trials)
127 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cv-core/MIT-Driverless-CV-TrainingInfra/bb8d6e22d8211aad1cd56f698723db8812bd6623/CVC-YOLOv3/utils/__init__.py


--------------------------------------------------------------------------------
/CVC-YOLOv3/utils/datasets.py:
--------------------------------------------------------------------------------
  1 | import concurrent.futures
  2 | import csv
  3 | import copy
  4 | import json
  5 | import math
  6 | import operator
  7 | import os
  8 | import sys
  9 | import random
 10 | import warnings
 11 | import numpy as np
 12 | 
 13 | import torchvision
 14 | import torch
 15 | import PIL
 16 | from PIL import Image, ImageDraw
 17 | import torch.utils.data
 18 | import torch.nn.functional as F
 19 | import imgaug.augmenters as iaa
 20 | 
 21 | from utils.utils import xyxy2xywh, xywh2xyxy, calculate_padding, visualize_and_save_to_local, scale_image, add_class_dimension_to_labels, xyhw2xyxy_corner, scale_labels, add_padding_on_each_side, get_patch_spacings, get_patch, pre_tile_padding, filter_and_offset_labels, upload_label_and_image_to_gcloud
 22 | 
 23 | ##### section for all random seeds #####
 24 | torch.manual_seed(17)
 25 | np.random.seed(17)
 26 | torch.backends.cudnn.deterministic = True
 27 | torch.backends.cudnn.benchmark = False
 28 | ########################################
 29 | 
 30 | random.seed(a=17, version=2)
 31 | torchvision.set_image_backend('accimage')
 32 | visualization_tmp_path = "/outputs/visualization/"
 33 | 
 34 | class ImageLabelDataset(torch.utils.data.Dataset, object):
 35 |     def __init__(self, path, dataset_path, width, height, augment_affine, num_images, augment_hsv, lr_flip, ud_flip, bw, n_cpu, vis_batch, data_aug, blur, salt, noise, contrast, sharpen, ts,debug_mode, upload_dataset):
 36 |         self.img_files = []
 37 |         self.labels = []
 38 |         if ts:
 39 |             self.scales = []
 40 |         self.num_targets_per_image = None
 41 | 
 42 |         list_path = path
 43 | 
 44 |         self.ts = ts
 45 |         self.debug_mode = debug_mode
 46 | 
 47 |         with open(list_path) as csv_file:
 48 |             csv_reader = csv.reader(csv_file)
 49 |             for i, row in enumerate(csv_reader):
 50 |                 if i < 2:
 51 |                     continue
 52 |                 img_boxes = []
 53 |                 for img_box_str in row[5:]:
 54 |                     if not img_box_str == "":
 55 |                         img_boxes.append(json.loads(img_box_str))
 56 | 
 57 |                 img_boxes = torch.tensor(img_boxes, dtype=torch.float)
 58 |                 if (img_boxes < 0).sum() > 0:
 59 |                     warnings.warn("Image {image} at line {line} has negative bounding box coordinates; skipping".format(image=os.path.join(dataset_path,row[0]), line=i+1))
 60 |                     continue
 61 | 
 62 |                 img_width, img_height = int(row[2]), int(row[3])
 63 | 
 64 |                 scale = float(row[4])
 65 | 
 66 |                 new_height = int(img_height * scale)
 67 |                 new_width = int(img_width * scale)
 68 | 
 69 |                 vert_pad, horiz_pad = pre_tile_padding(new_width,new_height,width,height)
 70 | 
 71 |                 if self.ts:
 72 |                     _,_,n_patches,_,_ = get_patch_spacings(new_width+horiz_pad*2, new_height+vert_pad*2, width, height)
 73 |                     self.img_files.extend([os.path.join(dataset_path,row[0])]*n_patches)
 74 |                     self.labels.extend([img_boxes]*n_patches)
 75 |                 else:
 76 |                     self.img_files.append(os.path.join(dataset_path,row[0]))
 77 |                     self.labels.append(img_boxes)
 78 |                 if ts:
 79 |                     self.scales.extend([float(row[4])]*n_patches)
 80 | 
 81 |         if num_images >= 0:
 82 |             sample_indices = random.sample(range(len(self.img_files)), k=num_images)
 83 |             if len(sample_indices) > 1:
 84 |                 self.img_files = operator.itemgetter(*sample_indices)(self.img_files)
 85 |                 self.labels = operator.itemgetter(*sample_indices)(self.labels)
 86 |                 if ts:
 87 |                     self.scales = operator.itemgetter(*sample_indices)(self.scales)
 88 | 
 89 |         if n_cpu > 0:
 90 |             executor = concurrent.futures.ProcessPoolExecutor(n_cpu)
 91 |             futures = []
 92 | 
 93 |         for (img_file, label) in zip(self.img_files, self.labels):
 94 |             if self.num_targets_per_image is None or len(label) > self.num_targets_per_image:
 95 |                 self.num_targets_per_image = len(label)
 96 | 
 97 |         if n_cpu > 0:
 98 |             concurrent.futures.wait(futures)
 99 | 
100 |         self.height = height
101 |         self.width = width
102 |         self.augment_affine = augment_affine
103 |         self.lr_flip = lr_flip
104 |         self.ud_flip = ud_flip
105 |         self.augment_hsv = augment_hsv
106 |         self.data_aug = data_aug
107 |         if self.augment_hsv or self.data_aug:
108 |             self.jitter = torchvision.transforms.ColorJitter(saturation=0.25, contrast=0.25, brightness=0.25, hue=0.04)
109 |         self.bw = bw # black and white
110 |         self.vis_batch = vis_batch
111 |         self.vis_counter = 0
112 |         self.blur = blur
113 |         self.salt = salt
114 |         self.noise = noise
115 |         self.contrast = contrast
116 |         self.sharpen = sharpen
117 |         self.upload_dataset = upload_dataset
118 |         
119 | 
120 |     def __len__(self):
121 |         return len(self.img_files)
122 | 
123 | 
124 |     def __getitem__(self, index):
125 |         img_uri = self.img_files[index]
126 |         img_labels = self.labels[index]
127 |         # don't download, since it was already downloaded in the init
128 |         img_path = img_uri
129 |         img_name = ("_".join(map(str, img_path.split("_")[-5:])))
130 |         orig_img = PIL.Image.open(img_path).convert('RGB')
131 |         if orig_img is None:
132 |             raise Exception("Empty image: {img_path}".format(img_path=img_path))
133 | 
134 |         if self.vis_batch and len(img_labels) > 0:
135 |             vis_orig_img = copy.deepcopy(orig_img)
136 |             labels = add_class_dimension_to_labels(img_labels)
137 |             labels = xyhw2xyxy_corner(labels, skip_class_dimension=True)
138 |             tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + ".jpg")
139 |             visualize_and_save_to_local(vis_orig_img, labels, tmp_path, box_color="green")
140 |             print(f'new image uploaded to {tmp_path}')
141 |         
142 |         # First, handle image re-shaping 
143 |         if self.ts:
144 |             scale = self.scales[index]
145 |             scaled_img = scale_image(orig_img, scale)
146 |             scaled_img_width, scaled_img_height = scaled_img.size
147 |             patch_width, patch_height = self.width, self.height
148 | 
149 |             vert_pad, horiz_pad = pre_tile_padding(scaled_img_width,scaled_img_height,patch_width,patch_height)
150 |             padded_img = torchvision.transforms.functional.pad(scaled_img, padding=(horiz_pad, vert_pad, horiz_pad, vert_pad), fill=(127, 127, 127), padding_mode="constant")
151 |             padded_img_width, padded_img_height = padded_img.size
152 | 
153 |             _,_,n_patches,_,_ = get_patch_spacings(padded_img_width, padded_img_height,
154 |                                                    patch_width, patch_height)
155 | 
156 |             patch_index = random.randint(0,n_patches-1)
157 |             if self.debug_mode:
158 |                 patch_index = 0
159 |             img, boundary = get_patch(padded_img, patch_width, patch_height, patch_index)
160 |         else:
161 |             orig_img_width, orig_img_height = orig_img.size
162 |             vert_pad, horiz_pad, ratio = calculate_padding(orig_img_height, orig_img_width, self.height, self.width)
163 |             img = torchvision.transforms.functional.pad(orig_img, padding=(horiz_pad, vert_pad, horiz_pad, vert_pad), fill=(127, 127, 127), padding_mode="constant")
164 |             img = torchvision.transforms.functional.resize(img, (self.height, self.width))
165 | 
166 |         # If no labels, no need to do augmentation (this should change in the future)
167 |         #   so immediately return with the padded image and empty labels
168 |         if len(img_labels) == 0:
169 |             labels = torch.zeros((len(img_labels), 5))
170 |             img = torchvision.transforms.functional.to_tensor(img)
171 |             labels = F.pad(labels,
172 |                     pad=[0, 0, 0, self.num_targets_per_image - len(labels)],
173 |                     mode="constant")
174 |             return img_uri, img, labels
175 | 
176 |         # Next, handle label re-shaping 
177 |         labels = add_class_dimension_to_labels(img_labels)
178 |         labels = xyhw2xyxy_corner(labels)
179 |         if self.ts:
180 |             labels = scale_labels(labels, self.scales[index])
181 |             labels = add_padding_on_each_side(labels, horiz_pad, vert_pad)
182 |             if self.vis_batch:
183 |                 tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + "_scaled.jpg")
184 |                 visualize_and_save_to_local(padded_img, labels, tmp_path, box_color="red")
185 | 
186 |             labels_temp = filter_and_offset_labels(labels, boundary)
187 | 
188 |             if self.vis_batch:
189 |                 pre_vis_labels = copy.deepcopy(labels)
190 |                 for i in range(n_patches):
191 |                     vis_patch_img, boundary = get_patch(padded_img, patch_width, patch_height, i)
192 | 
193 |                     labels = filter_and_offset_labels(pre_vis_labels, boundary)
194 | 
195 |                     tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + \
196 |                                         "_patch_{}.jpg".format(i))
197 |                     visualize_and_save_to_local(vis_patch_img, labels, tmp_path, box_color="blue")
198 |             if self.upload_dataset:
199 |                 pre_vis_labels = copy.deepcopy(labels)
200 |                 for i in range(n_patches):
201 |                     vis_patch_img, boundary = get_patch(padded_img, patch_width, patch_height, i)
202 | 
203 |                     labels = filter_and_offset_labels(pre_vis_labels, boundary)
204 | 
205 |                     tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + \
206 |                                         "_patch_{}.jpg".format(i))
207 |                     upload_label_and_image_to_gcloud(vis_patch_img, labels, tmp_path)
208 | 
209 |             else:
210 |                 labels = filter_and_offset_labels(labels, boundary)
211 |         else:
212 |             labels = add_padding_on_each_side(labels, horiz_pad, vert_pad)
213 |             labels = scale_labels(labels, ratio)
214 |             labels_temp = labels
215 | 
216 |             if self.vis_batch:
217 |                 tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + "_pad_resized.jpg")
218 |                 visualize_and_save_to_local(img, labels, tmp_path, box_color="blue")
219 | 
220 |         labels = labels_temp
221 |         if self.vis_batch and self.data_aug:
222 |             vis_aug_img = copy.deepcopy(img)
223 |             tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + "_before_aug.jpg")
224 |             visualize_and_save_to_local(vis_aug_img, labels, tmp_path, box_color="red")
225 |         if self.augment_hsv or self.data_aug:
226 |             if random.random() > 0.5:
227 |                 img = self.jitter(img)
228 |                 # no transformation on labels
229 | 
230 |         # Augment image and labels
231 |         img_width, img_height = img.size
232 |         if self.augment_affine or self.data_aug:
233 |             if random.random() > 0:
234 |                 angle = random.uniform(-10, 10)
235 |                 translate = (random.uniform(-40, 40), random.uniform(-40, 40)) ## WORKS
236 |                 scale = random.uniform(0.9, 1.1)
237 |                 shear = random.uniform(-3, 3)
238 |                 img = torchvision.transforms.functional.affine(img, angle, translate, scale, shear, 2, fillcolor=(127, 127, 127))
239 |                 labels = affine_labels(img_height, img_width, labels, -angle, translate, scale, (-shear, 0))
240 | 
241 |         if self.bw:
242 |             img = torchvision.transforms.functional.to_grayscale(img, num_output_channels=1)
243 |         
244 |         # random left-right flip
245 |         if self.lr_flip:
246 |             if random.random() > 0.5:
247 |                 img = torchvision.transforms.functional.hflip(img)
248 |                 # Is this correct?
249 |                 # Not immediately obvious, when composed with the angle shift above
250 |                 labels[:, 1] = img_width - labels[:, 1]
251 |                 labels[:, 3] = img_width - labels[:, 3]
252 | 
253 |         # GaussianBlur, needs further development
254 |         if self.blur:
255 |             if random.random() > 0.2:
256 |                 arr = np.asarray(img)
257 |                 angle = random.uniform(40, -40)
258 |                 sigma = random.uniform(0,3.00)
259 |                 seq = iaa.Sequential([
260 |                     iaa.GaussianBlur(sigma=sigma)
261 |                     ])
262 |                 images_aug = seq.augment_images(arr)
263 |                 img = PIL.Image.fromarray(np.uint8(images_aug),'RGB')
264 |         
265 |         #AdditiveGaussianNoise
266 |         if self.noise:
267 |             if random.random() > 0.3:
268 |                 arr = np.asarray(img)
269 |                 scale = random.uniform(0,0.03*255)
270 |                 seq = iaa.Sequential([
271 |                     iaa.AdditiveGaussianNoise(loc=0, scale=scale, per_channel=0.5)
272 |                     ])
273 |                 images_aug = seq.augment_images(arr)
274 |                 img = PIL.Image.fromarray(np.uint8(images_aug),'RGB')
275 | 
276 |         #SigmoidContrast, need further development
277 |         if self.contrast:
278 |             if random.random() > 0.5:
279 |                 arr = np.asarray(img)
280 |                 cutoff = random.uniform(0.45,0.75)
281 |                 gain = random.randint(5,10)
282 |                 seq = iaa.Sequential([
283 |                     iaa.SigmoidContrast(gain=gain,cutoff=cutoff) 
284 |                     ])
285 |                 images_aug = seq.augment_images(arr)
286 |                 img = PIL.Image.fromarray(np.uint8(images_aug),'RGB')
287 | 
288 |         #Sharpen, need further development
289 |         if self.sharpen:
290 |             if random.random() > 0.3:
291 |                 arr = np.asarray(img)
292 |                 alpha = random.uniform(0,0.5)
293 |                 seq = iaa.Sharpen(alpha=alpha)
294 |                 images_aug = seq.augment_images(arr)
295 |                 img = PIL.Image.fromarray(np.uint8(images_aug),'RGB')
296 | 
297 |         if self.vis_batch and self.data_aug:
298 |             vis_post_aug_img = copy.deepcopy(img)
299 |             tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + "_post_augmentation.jpg")
300 |             visualize_and_save_to_local(vis_post_aug_img, labels, tmp_path, box_color="green")
301 | 
302 |         if self.vis_batch:
303 |             self.vis_counter += 1
304 |             if self.vis_counter > (self.vis_batch -1):
305 |                 sys.exit('Finished visualizing enough images. Exiting!')
306 | 
307 |         labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
308 |         labels[:, (1, 3)] /= self.width
309 |         labels[:, (2, 4)] /= self.height
310 | 
311 |         img = torchvision.transforms.functional.to_tensor(img)
312 |         labels = F.pad(labels, pad=[0, 0, 0, self.num_targets_per_image - len(labels)], mode="constant")
313 |         if (labels < 0).sum() > 0:
314 |             raise Exception(f"labels for image {img_uri} have negative values")
315 |         return img_uri, img, labels
316 | 
317 | def affine_labels(h, w, targets, angle=0, translate=(0, 0), scale=1.0, shear=(0, 0)):
318 |     # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
319 |     # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
320 | 
321 |     height = max(w, h)
322 | 
323 |     # Rotation and Scale
324 |     alpha = scale * math.cos(math.radians(angle))
325 |     beta = scale * math.sin(math.radians(angle))
326 |     R = torch.tensor((
327 |         (alpha, beta, (1-alpha)*(w/2.0)-beta*(h/2.0)),
328 |         (-beta, alpha, (beta*w/2.0)+(1-alpha)*(h/2.0)),
329 |         (0, 0, 1)
330 |     ), dtype=torch.float)
331 |     # angle += random.choice([-180, -90, 0, 90])  # 90deg rotations added to small rotations
332 | 
333 |     # Translation
334 |     T = torch.eye(3)
335 |     T[0, 2] = translate[0]  # x translation (pixels)
336 |     T[1, 2] = translate[1]  # y translation (pixels)
337 | 
338 |     # Shear (about the center)
339 |     S = torch.eye(3)
340 |     S[0, 1] = math.tan(math.radians(shear[0])) # x shear
341 |     S[0, 2] = -math.tan(math.radians(shear[0])) * h/2.0
342 |     S[1, 0] = math.tan(math.radians(shear[1])) # y shear
343 |     S[1, 2] = -math.tan(math.radians(shear[1])) * w/2.0
344 |     M = S @ T @ R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
345 | 
346 |     # Return warped points also
347 |     n = targets.shape[0]
348 |     points = targets[:, 1:5]
349 |     area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
350 | 
351 |     # warp points
352 |     xy = torch.ones((n * 4, 3))
353 |     xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
354 |     xy = (xy @ M.transpose(0, 1))
355 |     xy = xy[:, :2] / xy[:, 2].unsqueeze(1).expand(-1, 2)
356 |     xy = xy[:, :2].reshape(n, 8)
357 | 
358 |     # create new boxes
359 |     x = xy[:, [0, 2, 4, 6]]
360 |     y = xy[:, [1, 3, 5, 7]]
361 |     xy = torch.cat((x.min(1)[0], y.min(1)[0], x.max(1)[0], y.max(1)[0])).reshape(4, n).transpose(0, 1)
362 | 
363 |     # apply angle-based reduction
364 |     radians = angle * math.pi / 180
365 |     reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
366 |     x = (xy[:, 2] + xy[:, 0]) / 2
367 |     y = (xy[:, 3] + xy[:, 1]) / 2
368 |     w = (xy[:, 2] - xy[:, 0]) * reduction
369 |     h = (xy[:, 3] - xy[:, 1]) * reduction
370 |     xy = torch.cat((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).transpose(0, 1)
371 |     
372 |     #print("diagnosing affine targets")
373 |     # reject warped points outside of image
374 |     torch.clamp(xy, 0, height, out=xy)
375 |     w = xy[:, 2] - xy[:, 0]
376 |     h = xy[:, 3] - xy[:, 1]
377 |     area = w * h
378 |     ar = torch.max(w / (h + 1e-16), h / (w + 1e-16))
379 |     i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
380 | 
381 |     targets[i, 1:5] = xy[i]
382 |     return targets
383 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/utils/nms.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import torch
 3 | 
 4 | def nms(boxes, scores, overlap=0.5, top_k=200):
 5 |     """Apply non-maximum suppression at test time to avoid detecting too many
 6 |     overlapping bounding boxes for a given object.
 7 |     Args:
 8 |         boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
 9 |         scores: (tensor) The class predscores for the img, Shape:[num_priors].
10 |         overlap: (float) The overlap thresh for suppressing unnecessary boxes.
11 |         top_k: (int) The Maximum number of box preds to consider.
12 |     Return:
13 |         The indices of the kept boxes with respect to num_priors.
14 |     """
15 | 
16 |     keep = scores.new(scores.size(0)).zero_().long()
17 |     if boxes.numel() == 0:
18 |         return keep
19 |     x1 = boxes[:, 0]
20 |     y1 = boxes[:, 1]
21 |     x2 = boxes[:, 2]
22 |     y2 = boxes[:, 3]
23 |     area = torch.mul(x2 - x1, y2 - y1)
24 |     v, idx = scores.sort(0)  # sort in ascending order
25 |     # I = I[v >= 0.01]
26 |     idx = idx[-top_k:]  # indices of the top-k largest vals
27 |     w = boxes.new()
28 |     h = boxes.new()
29 | 
30 |     count = 0
31 |     while idx.numel() > 0:
32 |         i = idx[-1]  # index of current largest val
33 |         keep[count] = i
34 |         count += 1
35 |         if idx.size(0) == 1:
36 |             break
37 |         idx = idx[:-1]  # remove kept element from view
38 |         # load bboxes of next highest vals
39 |         xx1 = torch.index_select(x1, 0, idx)
40 |         yy1 = torch.index_select(y1, 0, idx)
41 |         xx2 = torch.index_select(x2, 0, idx)
42 |         yy2 = torch.index_select(y2, 0, idx)
43 |         # store element-wise max with next highest score
44 |         xx1 = torch.clamp(xx1, min=x1[i])
45 |         yy1 = torch.clamp(yy1, min=y1[i])
46 |         xx2 = torch.clamp(xx2, max=x2[i])
47 |         yy2 = torch.clamp(yy2, max=y2[i])
48 |         w.resize_as_(xx2)
49 |         h.resize_as_(yy2)
50 |         w = xx2 - xx1
51 |         h = yy2 - yy1
52 |         # check sizes of xx1 and xx2.. after each iteration
53 |         w = torch.clamp(w, min=0.0)
54 |         h = torch.clamp(h, min=0.0)
55 |         inter = w*h
56 |         # IoU = i / (area(a) + area(b) - i)
57 |         rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
58 |         union = (rem_areas - inter) + area[i]
59 |         IoU = inter.float()/union.float()  # store result in iou
60 |         # keep only elements with an IoU <= overlap
61 |         idx = idx[IoU.le(overlap)]
62 |     return keep[:count]


--------------------------------------------------------------------------------
/CVC-YOLOv3/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | def parse_model_config(path):
 2 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
 3 |     file = open(path, 'r')
 4 |     lines = file.read().split('\n')
 5 |     lines = [x for x in lines if x and not x.startswith('#')]
 6 |     lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
 7 |     module_defs = []
 8 |     for line in lines:
 9 |         if line.startswith('['): # This marks the start of a new block
10 |             module_defs.append({})
11 |             module_defs[-1]['type'] = line[1:-1].rstrip()
12 |             if module_defs[-1]['type'] == 'convolutional':
13 |                 module_defs[-1]['batch_normalize'] = 0
14 |         else:
15 |             key, value = line.split("=")
16 |             value = value.strip()
17 |             module_defs[-1][key.rstrip()] = value.strip()
18 |     return module_defs
19 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/validate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import argparse
  4 | import random
  5 | import time
  6 | import os
  7 | 
  8 | from PIL import Image
  9 | import torch
 10 | import torchvision
 11 | 
 12 | from models import Darknet
 13 | from utils.datasets import ImageLabelDataset
 14 | from utils.nms import nms
 15 | from utils.utils import average_precision, bbox_iou, xywh2xyxy, calculate_padding, draw_labels_on_image, visualize_and_save_to_local,xywh2xyxy,add_class_dimension_to_labels
 16 | from tqdm import tqdm
 17 | 
 18 | ################################################
 19 | from torchvision import transforms
 20 | import copy
 21 | ################################################
 22 | 
 23 | ################################################
 24 | gcloud_vis_path = "gs://mit-dut-driverless-internal/dumping-ground/visualization/"
 25 | visualization_tmp_path = "/outputs/visualization/"
 26 | ################################################
 27 | 
 28 | def main(*, batch_size, model_cfg, weights_path, bbox_all, step, n_cpu):
 29 |     cuda = torch.cuda.is_available()
 30 |     device = torch.device('cuda:0' if cuda else 'cpu')
 31 |     random.seed(0)
 32 |     torch.manual_seed(0)
 33 |     if cuda:
 34 |         torch.cuda.manual_seed(0)
 35 |         torch.cuda.manual_seed_all(0)
 36 |         torch.backends.cudnn.benchmark = True
 37 |         torch.cuda.empty_cache()
 38 | 
 39 |     # Initiate model
 40 |     model = Darknet(model_cfg)
 41 |     validate_uri, _, weights_uri = model.get_links()
 42 |     _, _, _, _, bw = model.get_dataAug()
 43 |     num_images, _ = model.num_images()
 44 | 
 45 |     # Load weights
 46 |     model.load_weights(weights_path, model.get_start_weight_dim())
 47 |     model.to(device, non_blocking=True)
 48 | 
 49 |     # Get dataloader
 50 |     dataloader = torch.utils.data.DataLoader(
 51 |         ImageLabelDataset(validate_uri, height=img_height, width=img_width, augment_hsv=False,
 52 |                           augment_affine=False, num_images=num_images,
 53 |                           bw=bw, n_cpu=n_cpu, lr_flip=False, ud_flip=False),
 54 |         batch_size=batch_size,
 55 |         shuffle=False,
 56 |         num_workers=n_cpu,
 57 |         pin_memory=True)
 58 |     return validate(dataloader, model, device, step, bbox_all)
 59 | 
 60 | # only works on a single class
 61 | def validate(*, dataloader, model, device, step=-1, bbox_all=False,debug_mode):
 62 |         # result = open("logs/result.txt", "w" )
 63 | 
 64 |         with torch.no_grad():
 65 |             t_start = time.time()
 66 |             conf_thres, nms_thres, iou_thres = model.get_threshs()
 67 |             width, height = model.img_size()
 68 |             model.eval()
 69 |             print("Calculating mAP - Model in evaluation mode")
 70 |             n_images = len(dataloader.dataset)
 71 |             mAPs = []
 72 |             mR = []
 73 |             mP = []
 74 |             for batch_i, (img_uris, imgs, targets) in enumerate(tqdm(dataloader,desc='Computing mAP')):
 75 |                 imgs = imgs.to(device, non_blocking=True)
 76 |                 targets = targets.to(device, non_blocking=True)
 77 |                 # output,_,_,_ = model(imgs)
 78 |                 output = model(imgs)
 79 | 
 80 |                 for sample_i, (labels, detections) in enumerate(zip(targets, output)):
 81 |                     detections = detections[detections[:, 4] > conf_thres]
 82 |                     if detections.size()[0] == 0:
 83 |                         predictions = torch.tensor([])
 84 |                     else:
 85 |                         predictions = torch.argmax(detections[:, 5:], dim=1)
 86 |                     # From (center x, center y, width, height) to (x1, y1, x2, y2)
 87 |                     box_corner = torch.zeros((detections.shape[0], 4), device=detections.device)
 88 |                     xy = detections[:, 0:2]
 89 |                     wh = detections[:, 2:4] / 2
 90 |                     box_corner[:, 0:2] = xy - wh
 91 |                     box_corner[:, 2:4] = xy + wh
 92 |                     probabilities = detections[:, 4]
 93 |                     nms_indices = nms(box_corner, probabilities, nms_thres)
 94 |                     box_corner = box_corner[nms_indices]
 95 |                     probabilities = probabilities[nms_indices]
 96 |                     predictions = predictions[nms_indices]
 97 | 
 98 |                     if nms_indices.shape[0] == 0:  # there should always be at least one label
 99 |                         continue
100 |                     # Get detections sorted by decreasing confidence scores
101 |                     _, inds = torch.sort(-probabilities)
102 |                     box_corner = box_corner[inds]
103 | 
104 |                     probabilities = probabilities[inds]
105 |                     predictions = predictions[inds]
106 |                     labels = labels[(labels[:, 1:5] <= 0).sum(dim=1) == 0]  # remove the 0-padding added by the dataloader
107 |                     # Extract target boxes as (x1, y1, x2, y2)
108 |                     target_boxes = xywh2xyxy(labels[:, 1:5])
109 |                     target_boxes[:, (0,2)] *= width
110 |                     target_boxes[:, (1,3)] *= height
111 |                     detected = torch.zeros(target_boxes.shape[0], device=target_boxes.device, dtype=torch.uint8)
112 |                     correct = torch.zeros(nms_indices.shape[0], device=box_corner.device, dtype=torch.uint8)
113 |                     # 0th dim is the detection
114 |                     # (repeat in the 1st dim)
115 |                     # 2nd dim is the coord
116 |                     ious = bbox_iou(box_corner.unsqueeze(1).expand(-1, target_boxes.shape[0], -1),
117 |                                     target_boxes.unsqueeze(0).expand(box_corner.shape[0], -1, -1))
118 |                     # ious is 2d -- 0th dim is the detected box, 1st dim is the target box, value is iou
119 | 
120 |                     #######################################################
121 |                     ##### skip images without label #####
122 |                     if [] in ious.data.tolist():
123 |                         continue
124 |                     #######################################################
125 | 
126 |                     best_is = torch.argmax(ious, dim=1)
127 | 
128 |                     # TODO fix for multi-class. Need to use predictions somehow?
129 |                     for i, iou in enumerate(ious):
130 |                         best_i = best_is[i]
131 |                         if ious[i, best_i] > iou_thres and detected[best_i] == 0:
132 |                             correct[i] = 1
133 |                             detected[best_i] = 1
134 | 
135 |                     # Compute Average Precision (AP) per class
136 |                     ap, r, p = average_precision(tp=correct, conf=probabilities, n_gt=labels.shape[0])
137 | 
138 |                     # Compute mean AP across all classes in this image, and append to image list
139 |                     mAPs.append(ap)
140 |                     mR.append(r)
141 |                     mP.append(p)
142 |                     if bbox_all or sample_i < 2:  # log the first two images in every batch
143 |                         img_filepath = img_uris[sample_i]
144 |                         if img_filepath is None:
145 |                             print("NULL image filepath for image uri: {uri}".format(uri=img_uris[sample_i]))
146 |                         orig_img = Image.open(img_filepath)
147 |                         # draw = ImageDraw.Draw(img_with_boxes)
148 |                         w, h = orig_img.size
149 |                         pad_h, pad_w, scale_factor = calculate_padding(h, w, height, width)
150 | 
151 |                         ##################################
152 |                         detect_box = copy.deepcopy(box_corner)
153 |                         ##################################
154 | 
155 |                         box_corner /= scale_factor
156 |                         box_corner[:, (0, 2)] -= pad_w
157 |                         box_corner[:, (1, 3)] -= pad_h 
158 | 
159 |                         #######################################################################################
160 |                         if debug_mode:
161 |                             pil_img = transforms.ToPILImage()(imgs.squeeze())
162 |                             ##### getting the image's name #####
163 |                             img_path = img_uris[0]
164 |                             img_name = ("_".join(map(str, img_path.split("_")[-5:])))
165 |                             tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + "_predicted_vis.jpg")
166 |                             vis_label = add_class_dimension_to_labels(detect_box)
167 |                             visualize_and_save_to_local(pil_img, vis_label, tmp_path,box_color="red")
168 |                             print("Prediction visualization uploaded")
169 |                         #######################################################################################
170 | 
171 |                 mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item()
172 |                 mean_R = torch.tensor(mR, dtype=torch.float).mean().item()
173 |                 mean_P = torch.tensor(mP, dtype=torch.float).mean().item()
174 |             # Means of all images
175 |             mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item()
176 |             mean_R = torch.tensor(mR, dtype=torch.float).mean().item()
177 |             mean_P = torch.tensor(mP, dtype=torch.float).mean().item()
178 |             dt = time.time() - t_start
179 |             print('mAP: {0:5.2%}, Recall: {1:5.2%}, Precision: {2:5.2%}'.format(mean_mAP, mean_R, mean_P))
180 |             # result.write(str(1-mean_mAP))
181 |             # result.close() 
182 |             return mean_mAP, mean_R, mean_P, dt/(n_images + 1e-12)
183 | 
184 | if __name__ == '__main__':
185 |     parser = argparse.ArgumentParser()
186 |     def add_bool_arg(name, default, help):
187 |         group = parser.add_mutually_exclusive_group(required=False)
188 |         group.add_argument('--' + name, dest=name, action='store_true', help=help)
189 |         group.add_argument('--no_' + name, dest=name, action='store_false', help=("Do not " + help))
190 |         parser.set_defaults(**{name:default})
191 |     
192 |     parser.add_argument('--batch_size', type=int, help='size of each image batch')
193 |     parser.add_argument('--model_cfg', type=str, help='path to model config file')
194 |     parser.add_argument('--weights_path', type=str, help='initial weights path',required=True)
195 |     add_bool_arg('bbox_all', default=False, help="whether to draw bounding boxes on all images")
196 |     parser.add_argument('--step', type=int, default=-1, help='the step at which these images were generated')
197 |     parser.add_argument('--n_cpu', type=int, default=0, help='number of cpu threads to use during batch generation')
198 | 
199 |     opt = parser.parse_args()
200 |     results = main(batch_size=opt.batch_size, model_cfg=opt.model_cfg, weights_path=opt.weights_path, bbox_all=opt.bbox_all, step=opt.step, n_cpu=opt.n_cpu)
201 | 


--------------------------------------------------------------------------------
/CVC-YOLOv3/yolo_tutorial_util.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import tempfile
  4 | import time
  5 | import multiprocessing
  6 | import subprocess
  7 | import math
  8 | import shutil
  9 | import math
 10 | 
 11 | from datetime import datetime
 12 | import torch
 13 | import torch.nn as nn
 14 | from torch.utils.data import DataLoader
 15 | 
 16 | from models import Darknet
 17 | from utils.datasets import ImageLabelDataset
 18 | from utils.utils import model_info, print_args, Logger, visualize_and_save_to_local,xywh2xyxy
 19 | import validate
 20 | import warnings
 21 | 
 22 | import sys
 23 | from os.path import isfile, join
 24 | import copy
 25 | import cv2
 26 | from tensorboardX import SummaryWriter
 27 | from PIL import Image, ImageDraw
 28 | import torchvision
 29 | from utils.nms import nms
 30 | from utils.utils import calculate_padding
 31 | from tqdm import tqdm
 32 | 
 33 | cuda = torch.cuda.is_available()
 34 | device = torch.device('cuda:0' if cuda else 'cpu')
 35 | num_cpu = multiprocessing.cpu_count() if cuda else 0
 36 | 
 37 | def run_epoch(label_prefix, data_loader, num_steps, optimizer, model, epoch, num_epochs, step, device):
 38 |     print(f"Model in {label_prefix} mode")
 39 |     epoch_losses = [0.0] * 7
 40 |     epoch_time_total = 0.0
 41 |     epoch_num_targets = 1e-12
 42 |     t1 = time.time()
 43 |     loss_labels = ["Total", "L-x", "L-y", "L-w", "L-h", "L-noobj", "L-obj"]
 44 |     for i, (img_uri, imgs, targets) in enumerate(data_loader):
 45 |         if step[0] >= num_steps:
 46 |             break
 47 |         imgs = imgs.to(device, non_blocking=True)
 48 |         targets = targets.to(device, non_blocking=True)
 49 |         targets.requires_grad_(False)
 50 |         step_num_targets = ((targets[:, :, 1:5] > 0).sum(dim=2) > 1).sum().item() + 1e-12
 51 |         epoch_num_targets += step_num_targets
 52 |         # Compute loss, compute gradient, update parameters
 53 |         if optimizer is not None:
 54 |             optimizer.zero_grad()
 55 |         losses = model(imgs, targets)
 56 |         if label_prefix == "train":
 57 |             losses[0].sum().backward()
 58 |         if optimizer is not None:
 59 |             optimizer.step()
 60 | 
 61 |         for j, (label, loss) in enumerate(zip(loss_labels, losses)):
 62 |             batch_loss = loss.sum().to('cpu').item()
 63 |             epoch_losses[j] += batch_loss
 64 |         finished_time = time.time()
 65 |         step_time_total = finished_time - t1
 66 |         epoch_time_total += step_time_total
 67 |         
 68 |         statement = label_prefix + ' Epoch: ' + str(epoch) + ', Batch: ' + str(i + 1) + '/' + str(len(data_loader))
 69 |         count = 0
 70 |         for (loss_label, loss) in zip(loss_labels, losses):
 71 |             if count == 0:
 72 |                 statement += ', Total: ' + '{0:10.6f}'.format(loss.item() / step_num_targets)
 73 |                 tot_loss = loss.item()
 74 |                 count += 1
 75 |             else:
 76 |                 statement += ',   ' + loss_label + ': {0:5.2f}'.format(loss.item() / tot_loss * 100) + '%'
 77 |         print(statement)
 78 |         if label_prefix == "train":
 79 |             step[0] += 1
 80 |     return epoch_losses, epoch_time_total, epoch_num_targets
 81 | 
 82 | 
 83 | def single_img_detect(target_path,output_path,mode,model,device,conf_thres,nms_thres):
 84 | 
 85 |     img = Image.open(target_path).convert('RGB')
 86 |     w, h = img.size
 87 |     new_width, new_height = model.img_size()
 88 |     pad_h, pad_w, ratio = calculate_padding(h, w, new_height, new_width)
 89 |     img = torchvision.transforms.functional.pad(img, padding=(pad_w, pad_h, pad_w, pad_h), fill=(127, 127, 127), padding_mode="constant")
 90 |     img = torchvision.transforms.functional.resize(img, (new_height, new_width))
 91 | 
 92 |     bw = model.get_bw()
 93 |     if bw:
 94 |         img = torchvision.transforms.functional.to_grayscale(img, num_output_channels=1)
 95 | 
 96 |     img = torchvision.transforms.functional.to_tensor(img)
 97 |     img = img.unsqueeze(0)
 98 |     
 99 |     with torch.no_grad():
100 |         model.eval()
101 |         img = img.to(device, non_blocking=True)
102 |         # output,first_layer,second_layer,third_layer = model(img)
103 |         output = model(img)
104 | 
105 | 
106 |         for detections in output:
107 |             detections = detections[detections[:, 4] > conf_thres]
108 |             box_corner = torch.zeros((detections.shape[0], 4), device=detections.device)
109 |             xy = detections[:, 0:2]
110 |             wh = detections[:, 2:4] / 2
111 |             box_corner[:, 0:2] = xy - wh
112 |             box_corner[:, 2:4] = xy + wh
113 |             probabilities = detections[:, 4]
114 |             nms_indices = nms(box_corner, probabilities, nms_thres)
115 |             main_box_corner = box_corner[nms_indices]
116 |             if nms_indices.shape[0] == 0:  
117 |                 continue
118 |         img_with_boxes = Image.open(target_path)
119 |         draw = ImageDraw.Draw(img_with_boxes)
120 |         w, h = img_with_boxes.size
121 | 
122 |         for i in range(len(main_box_corner)):
123 |             x0 = main_box_corner[i, 0].to('cpu').item() / ratio - pad_w
124 |             y0 = main_box_corner[i, 1].to('cpu').item() / ratio - pad_h
125 |             x1 = main_box_corner[i, 2].to('cpu').item() / ratio - pad_w
126 |             y1 = main_box_corner[i, 3].to('cpu').item() / ratio - pad_h 
127 |             draw.rectangle((x0, y0, x1, y1), outline="red")
128 | 
129 |         if mode == 'image':
130 |             img_with_boxes.save(os.path.join(output_path,target_path.split('/')[-1]))
131 |             return os.path.join(output_path,target_path.split('/')[-1])
132 |         else:
133 |             img_with_boxes.save(target_path)
134 |             return target_path
135 | 
136 | def detect(target_path,
137 |            output_path,
138 |            model,
139 |            device,
140 |            conf_thres,
141 |            nms_thres,
142 |            detection_tmp_path):
143 | 
144 |         target_filepath = target_path
145 | 
146 |         img_formats = ['.jpg', '.jpeg', '.png', '.tif']
147 |         vid_formats = ['.mov', '.avi', '.mp4']
148 | 
149 |         mode = None
150 | 
151 |         if os.path.splitext(target_filepath)[-1].lower() in img_formats:
152 |             mode = 'image'
153 |         
154 |         elif os.path.splitext(target_filepath)[-1].lower() in vid_formats:
155 |             mode = 'video'
156 |         
157 |         print("Detection Mode is: " + mode)
158 | 
159 |         raw_file_name = target_filepath.split('/')[-1].split('.')[0].split('_')[-4:]
160 |         raw_file_name = '_'.join(raw_file_name)
161 |         
162 |         if mode == 'image':
163 |             detection_path = single_img_detect(target_path=target_filepath,output_path=output_path,mode=mode,model=model,device=device,conf_thres=conf_thres,nms_thres=nms_thres)
164 | 
165 |             print(f'Please check output image at {detection_path}')
166 | 
167 |         elif mode == 'video':
168 |             if os.path.exists(detection_tmp_path):
169 |                 shutil.rmtree(detection_tmp_path)  # delete output folder
170 |             os.makedirs(detection_tmp_path)  # make new output folder
171 | 
172 |             vidcap = cv2.VideoCapture(target_filepath)
173 |             success,image = vidcap.read()
174 |             count = 0
175 | 
176 |             
177 | 
178 |             while success:
179 |                 cv2.imwrite(detection_tmp_path + "/frame%d.jpg" % count, image)     # save frame as JPEG file      
180 |                 success,image = vidcap.read()
181 |                 count += 1
182 | 
183 |             # Find OpenCV version
184 |             (major_ver, minor_ver, subminor_ver) = (cv2.__version__).split('.')
185 | 
186 |             if int(major_ver)  < 3 :
187 |                 fps = vidcap.get(cv2.cv.CV_CAP_PROP_FPS)
188 |                 print ("Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps))
189 |             else :
190 |                 fps = vidcap.get(cv2.CAP_PROP_FPS)
191 |                 print ("Frames per second using video.get(cv2.CAP_PROP_FPS) : {0}".format(fps))
192 |             vidcap.release(); 
193 | 
194 |             frame_array = []
195 |             files = [f for f in os.listdir(detection_tmp_path) if isfile(join(detection_tmp_path, f))]
196 |         
197 |             #for sorting the file names properly
198 |             files.sort(key = lambda x: int(x[5:-4]))
199 |             for i in tqdm(files,desc='Doing Single Image Detection'):
200 |                 filename=detection_tmp_path + i
201 |                 
202 |                 detection_path = single_img_detect(target_path=filename,output_path=output_path,mode=mode,model=model,device=device,conf_thres=conf_thres,nms_thres=nms_thres)
203 |                 #reading each files
204 |                 img = cv2.imread(detection_path)
205 |                 height, width, layers = img.shape
206 |                 size = (width,height)
207 |                 frame_array.append(img)
208 | 
209 |             local_output_uri = output_path + raw_file_name + ".mp4"
210 |             
211 |             video_output = cv2.VideoWriter(local_output_uri,cv2.VideoWriter_fourcc(*'DIVX'), fps, size)
212 | 
213 |             for frame in tqdm(frame_array,desc='Creating Video'):
214 |                 # writing to a image array
215 |                 video_output.write(frame)
216 |             video_output.release()
217 |             shutil.rmtree(detection_tmp_path)


--------------------------------------------------------------------------------
/Driverless_CV_Paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cv-core/MIT-Driverless-CV-TrainingInfra/bb8d6e22d8211aad1cd56f698723db8812bd6623/Driverless_CV_Paper.pdf


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Accurate Low Latency Visual Perception for Autonomous Racing: Challenges Mechanisms and Practical Solutions
 2 | 
 3 | 
 4 | <p align="center">
 5 | <img src="https://user-images.githubusercontent.com/22118253/70957091-fe06a480-2042-11ea-8c06-0fcc549fc19a.png" width="800">
 6 | </p>
 7 | 
 8 | 
 9 | This is the Pytorch side code for the accurate low latency visual perception system introduced by *[Kieran Strobel, Sibo Zhu, Raphael Chang, and Skanda Koppula. "Accurate Low Latency Visual Perception for Autonomous Racing: Challenges Mechanisms and Practical Solutions" ](https://static1.squarespace.com/static/5b79970e3c3a53723fab8cfc/t/5dd31c1eb16d2c02ed66408d/1574116397888/Accurate__Low_Latency_Visual_Perception_for_Autonomous_Racing__Challenges__Mechanisms__and_Practical_Solutions_.pdf)*. If you use the code, please cite the paper:
10 | 
11 | ```
12 | @misc{strobel2020accurate,
13 |     title={Accurate, Low-Latency Visual Perception for Autonomous Racing:Challenges, Mechanisms, and Practical Solutions},
14 |     author={Kieran Strobel and Sibo Zhu and Raphael Chang and Skanda Koppula},
15 |     year={2020},
16 |     eprint={2007.13971},
17 |     archivePrefix={arXiv},
18 |     primaryClass={cs.CV}
19 | }
20 | ```
21 | 
22 | Abstract
23 | 
24 | >Autonomous racing provides the opportunity to test safety-critical perception pipelines at their limit. This paper describes the practical challenges and solutions to applying state-of-the-art computer vision algorithms to build a low-latency, high-accuracy perception system for DUT18 Driverless(DUT18D), a 4WD electric race car with podium finishes at all  Formula Driverless competitions for which it raced. The key components of DUT18D include  YOLOv3-based object detection, pose estimation and time synchronization on its dual stereovision/monovision camera setup. We highlight modifications required to adapt perception  CNNs to racing domains, improvements to loss functions used for pose estimation, and methodologies for sub-microsecond camera synchronization among other improvements. We perform  an extensive experimental evaluation of the system, demonstrating its accuracy and low-latency  in real-world racing scenarios.
25 | 
26 | <p align="center">
27 | <img src="https://user-images.githubusercontent.com/22118253/70950893-e2de6980-202f-11ea-9a16-399579926ee5.gif" width="800">
28 | </p>
29 | 
30 | ## CVC-YOLOv3
31 | 
32 | CVC-YOLOv3 is the MIT Driverless Custom implementation of YOLOv3. 
33 | 
34 | One of our main contributions to vanilla YOLOv3 is the custom data loader we implemented:
35 | 
36 | Each set of training images from a specific sensor/lens/perspective combination is uniformly rescaled such that their landmark size distributions matched that of the camera system on the vehicle. Each training image was then padded if too small or split up into multiple images if too large.
37 | 
38 | <p align="center">
39 | <img src="https://user-images.githubusercontent.com/22118253/69765465-09e90000-1142-11ea-96b7-370868a0033b.png" width="400">
40 | </p>
41 | 
42 | 
43 | Our final accuracy metrics for detecting traffic cones on the racing track:
44 | 
45 | | mAP | Recall | Precision |
46 | |----|----|----|
47 | | 89.35% | 92.77% | 86.94% |
48 | 
49 | #### CVC-YOLOv3 Dataset with *Formula Student Standard* is open-sourced ***[here](https://storage.cloud.google.com/mit-driverless-open-source/YOLO_Dataset.zip?authuser=1)***
50 | 
51 | ## RektNet
52 | 
53 | RektNet is the MIT Driverless Custom Key Points Detection Network. 
54 | 
55 | <p align="center">
56 | <img src="https://user-images.githubusercontent.com/22118253/69765965-fd65a700-1143-11ea-8804-cd1d33f2e824.png" width="800">
57 | </p>
58 | 
59 | RektNet takes in bounding boxes outputed from CVC-YOLOv3 and outputs seven key points on the traffic cone, which is responsible for depth estimation of traffic cones on the 3D map. 
60 | v
61 | Our final *Depth estimation error VS Distance* graph (The **Monocular** part):
62 | 
63 | <p align="center">
64 | <img src="https://user-images.githubusercontent.com/22118253/69766182-cc39a680-1144-11ea-9ebc-5708019ba5d2.png" width="600">
65 | </p>
66 | 
67 | #### RektNet Dataset with *Formula Student Driverless Standard* is open-sourced ***[here](https://storage.cloud.google.com/mit-driverless-open-source/RektNet_Dataset.zip?authuser=1)***
68 | 
69 | ## License
70 | 
71 | This repository is released under the Apache-2.0 license. See [LICENSE](LICENSE) for additional details.
72 | 


--------------------------------------------------------------------------------
/RektNet/README.md:
--------------------------------------------------------------------------------
 1 | ### Description
 2 | 
 3 | This is our custom Key Points detection network
 4 | 
 5 | ## Requirements:
 6 | 
 7 | * CUDA>=10.1
 8 | * python==3.6
 9 | * opencv_python==4.1.0.25
10 | * numpy==1.16.4
11 | * torch==1.1.0
12 | * torchvision==0.3.0
13 | * pandas==0.24.2
14 | * optuna==0.19.0
15 | * Pillow==6.2.1
16 | * protobuf==3.11.0
17 | * pymysql==0.9.3
18 | * tqdm==4.39.0
19 | 
20 | ## Usage
21 | ### 1.Download our dataset
22 | 
23 | ##### Download through GCP Tookit
24 | ###### 1.1 Image dataset:
25 | ```
26 | gsutil cp -p gs://mit-driverless-open-source/RektNet_Dataset.zip ./dataset/
27 | ```
28 | then unzip 
29 | ```
30 | unzip dataset/RektNet_Dataset.zip -d ./dataset/
31 | ```
32 | ###### 1.2 Label csv file:
33 | ```
34 | gsutil cp -p gs://mit-driverless-open-source/rektnet-training/rektnet_label.csv ./dataset/
35 | ```
36 | 
37 | ##### Download manually (Optional)
38 | You can download image dataset and label csv from the link below and unzip them into `./dataset/RektNet_Dataset/` 
39 | 
40 | [Image dataset](https://storage.cloud.google.com/mit-driverless-open-source/RektNet_Dataset.zip?authuser=1)
41 | 
42 | [All label csv](https://storage.cloud.google.com/mit-driverless-open-source/rektnet-training/rektnet_label.csv?authuser=1)
43 | 
44 | ### 2.Training
45 | 
46 | ```
47 | python3 train_eval.py --study_name=<name for this experiment>
48 | ```
49 | 
50 | Once you've finished training, you can access the weights file in `./outputs/`
51 | 
52 | ### 3.Inference
53 | 
54 | #### To download our pretrained Keypoints weights for *Formula Student Standard*, click ***[here](https://storage.googleapis.com/mit-driverless-open-source/pretrained_kpt.pt)***
55 | 
56 | 
57 | ```
58 | python3 detect.py --model=<path to .pt weights file> --img=<path to an image>
59 | ```
60 | 
61 | Once you've finished inference, you can access the result in `./outputs/visualization/`
62 | 
63 | #### Run Bayesian hyperparameter search
64 | 
65 | Before running the Bayesian hyperparameter search, make sure you know what specific hyperparameter that you wish to tuning on, and a reasonable operating range/options of that hyperparameter.
66 | 
67 | Go into the `objective()` function of `train_hyper.py` edit your custom search
68 | 
69 | Then launch your Bayesian hyperparameter search
70 | ```
71 | python3 train_eval_hyper.py --study_name=<give it a proper name>
72 | ```
73 | 
74 | #### Convert .weights to .onnx manually
75 | 
76 | Though our training scrip will do automatical .pt->.onnx conversion, you can always do it manually
77 | ```
78 | python3 yolo2onnx.py --onnx_name=<path to output .onnx file> --weights_uri=<path to your .pt file>
79 | ```


--------------------------------------------------------------------------------
/RektNet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cv-core/MIT-Driverless-CV-TrainingInfra/bb8d6e22d8211aad1cd56f698723db8812bd6623/RektNet/__init__.py


--------------------------------------------------------------------------------
/RektNet/cross_ratio_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | from torch.autograd import Variable
 5 | import numpy as np
 6 | 
 7 | 
 8 | class CrossRatioLoss(nn.Module):
 9 |     def __init__(self, loss_type, include_geo, geo_loss_gamma_horz, geo_loss_gamma_vert):
10 |         super(CrossRatioLoss, self).__init__()
11 |         self.loss_type = loss_type
12 |         self.include_geo = include_geo
13 |         self.geo_loss_gamma_vert = geo_loss_gamma_vert
14 |         self.geo_loss_gamma_horz = geo_loss_gamma_horz
15 |         print(f"Including geometric loss: {include_geo}")
16 |         print(f"Loss type: {loss_type}")
17 | 
18 |     # input is the heatmap output by the model
19 |     # points is the x,y locations of the points output by the model
20 |     def forward(self, heatmap, points, target_hm, target_points):
21 |         if(self.loss_type == 'l2_softargmax' or self.loss_type == 'l2_sm'):
22 |             mse_loss = (points - target_points) ** 2
23 |             location_loss = mse_loss.sum(2).sum(1).mean()
24 |         elif(self.loss_type == 'l2_heatmap' or self.loss_type == 'l2_hm'):
25 |             mse_loss = (heatmap - target_hm) ** 2
26 |             location_loss = mse_loss.sum(3).sum(2).sum(1).mean()
27 |         elif(self.loss_type == 'l1_softargmax' or self.loss_type == 'l1_sm'):
28 |             l1_loss = torch.abs(points - target_points)
29 |             location_loss = l1_loss.sum(2).sum(1).mean()
30 |         else:
31 |             print("Did not recognize loss function selection!")
32 |             sys.exit(1)
33 | 
34 |         if self.include_geo:
35 |             # Loss on co-linearity of points along side of cone
36 |             v53 = F.normalize(points[:, 5] - points[:, 3], dim=1)
37 |             v31 = F.normalize(points[:, 3] - points[:, 1], dim=1)
38 |             vA = 1.0 - torch.tensordot(v31, v53, dims=([1], [1]))
39 |             v10 = F.normalize(points[:, 1] - points[:, 0], dim=1)
40 |             vB = 1.0 - torch.tensordot(v10, v31, dims=([1], [1]))
41 | 
42 |             v64 = F.normalize(points[:, 6] - points[:, 4], dim=1)
43 |             v42 = F.normalize(points[:, 4] - points[:, 2], dim=1)
44 |             vC = 1.0 - torch.tensordot(v64, v42, dims=([1], [1]))
45 | 
46 |             v20 = F.normalize(points[:, 2] - points[:, 0], dim=1)
47 |             vD = 1.0 - torch.tensordot(v42, v20, dims=([1], [1]))
48 |             
49 |             # Loss on horizontals on cones (color boundaries)
50 |             h21 = F.normalize(points[:, 2] - points[:, 1], dim=1)
51 |             h43 = F.normalize(points[:, 4] - points[:, 3], dim=1)
52 |             hA = 1.0 - torch.tensordot(h43, h21, dims=([1], [1]))
53 | 
54 |             h65 = F.normalize(points[:, 6] - points[:, 5], dim=1)
55 |             hB = 1.0 - torch.tensordot(h65, h43, dims=([1], [1]))
56 |             
57 |             geo_loss = self.geo_loss_gamma_horz * (hA + hB).mean() / 2 + self.geo_loss_gamma_vert * (vA + vB + vC + vD).mean() / 4
58 |         else:
59 |             geo_loss = torch.tensor(0)
60 |         #print('----------')
61 |         #print('Geo Loss:      ' + str(geo_loss.item()))
62 |         #print('Location Loss: ' + str(location_loss.item()))
63 |         return location_loss, geo_loss, location_loss+geo_loss
64 | 
65 | 


--------------------------------------------------------------------------------
/RektNet/dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | from torchvision import transforms
 4 | import numpy as np
 5 | import cv2
 6 | import os
 7 | import shutil
 8 | import PIL
 9 | from PIL import Image, ImageDraw
10 | import random
11 | import math
12 | import sys
13 | import copy
14 | from utils import vis_kpt_and_save, vis_hm_and_save, prep_image, prep_label, get_scale, scale_labels
15 | 
16 | def print_tensor_stats(x, name):
17 |     flattened_x = x.cpu().detach().numpy().flatten()
18 |     avg = sum(flattened_x)/len(flattened_x)
19 |     print(f"\t\t{name}: {avg},{min(flattened_x)},{max(flattened_x)}")
20 | 
21 | class ConeDataset(Dataset):
22 |     def __init__(self, images, labels, dataset_path, target_image_size, save_checkpoints, vis_dataloader, transform=None):
23 |         self.images = images
24 |         self.labels = labels
25 |         self.target_image_size = target_image_size
26 |         self.transform = transform
27 |         self.save_checkpoints = save_checkpoints
28 |         self.vis_dataloader = vis_dataloader
29 |         self.dataset_path = dataset_path
30 | 
31 |     def __len__(self):
32 |         return len(self.images)
33 | 
34 |     def __getitem__(self, index):
35 |         image = cv2.imread(self.dataset_path+self.images[index])
36 |         orig_image_size = image.shape
37 |         image_name = self.images[index].split(".")[0]
38 |         image = prep_image(image=image,target_image_size=self.target_image_size)
39 | 
40 |         hm = prep_label(label=self.labels[index], target_image_size=self.target_image_size, orig_image_size=orig_image_size, image_path=self.images[index])
41 |         h_scale, w_scale = get_scale(actual_image_size=orig_image_size, target_image_size=self.target_image_size)
42 |         scaled_labels = scale_labels(self.labels[index], h_scale, w_scale)
43 |         scaled_labels = scaled_labels / self.target_image_size[0]
44 | 
45 |         if self.vis_dataloader:
46 |             tmp_image = copy.deepcopy(image)
47 |             ##### visualize label #####
48 | 
49 |             vis_kpt_and_save(np_image=tmp_image, image_name=image_name, h_scale=h_scale, w_scale=w_scale, labels=scaled_labels)
50 | 
51 |             ##### visualize heat-map #####
52 |             vis_hm_and_save(np_heat_map=hm, image_name=image_name)
53 | 
54 |         image = image.transpose((2, 0, 1)) / 255.0
55 |         tensor_image = torch.from_numpy(image).type('torch.FloatTensor')
56 |         return tensor_image, torch.from_numpy(hm).type('torch.FloatTensor'), torch.from_numpy(scaled_labels).type('torch.FloatTensor'), image_name, orig_image_size
57 | 
58 | 


--------------------------------------------------------------------------------
/RektNet/dataset/README.md:
--------------------------------------------------------------------------------
1 | This is the folder that stores dataset csv files
2 | 
3 | To download our open-sourced dataset label from MIT Driverless GCP bucket:
4 | ```
5 | gsutil cp -p gs://mit-driverless-open-source/rektnet-training/rektnet_label.csv ./
6 | ```
7 | 


--------------------------------------------------------------------------------
/RektNet/detect.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import cv2
 3 | import numpy as np
 4 | import argparse
 5 | import sys
 6 | import os
 7 | import sys
 8 | import shutil
 9 | from utils import vis_tensor_and_save, prep_image
10 | 
11 | from keypoint_net import KeypointNet
12 | 
13 | def main(model,img,img_size,output,flip,rotate):
14 | 
15 |     output_path = output
16 | 
17 |     model_path = model
18 | 
19 |     model_filepath = model_path
20 | 
21 |     image_path = img
22 | 
23 |     image_filepath = image_path
24 | 
25 |     img_name = '_'.join(image_filepath.split('/')[-1].split('.')[0].split('_')[-5:])
26 | 
27 |     image_size = (img_size, img_size)
28 | 
29 |     image = cv2.imread(image_filepath)
30 |     h, w, _ = image.shape
31 | 
32 |     image = prep_image(image=image,target_image_size=image_size)
33 |     image = (image.transpose((2, 0, 1)) / 255.0)[np.newaxis, :]
34 |     image = torch.from_numpy(image).type('torch.FloatTensor')
35 | 
36 |     model = KeypointNet()
37 |     model.load_state_dict(torch.load(model_filepath).get('model'))
38 |     model.eval()
39 |     output = model(image)
40 |     out = np.empty(shape=(0, output[0][0].shape[2]))
41 |     for o in output[0][0]:
42 |         chan = np.array(o.cpu().data)
43 |         cmin = chan.min()
44 |         cmax = chan.max()
45 |         chan -= cmin
46 |         chan /= cmax - cmin
47 |         out = np.concatenate((out, chan), axis=0)
48 |     cv2.imwrite(output_path + img_name + "_hm.jpg", out * 255)
49 |     print(f'please check the output image here: {output_path + img_name + "_hm.jpg", out * 255}')
50 | 
51 | 
52 |     image = cv2.imread(image_filepath)
53 |     h, w, _ = image.shape
54 | 
55 |     vis_tensor_and_save(image=image, h=h, w=w, tensor_output=output[1][0].cpu().data, image_name=img_name, output_uri=output_path)
56 | 
57 | if __name__ == "__main__":
58 |     parser = argparse.ArgumentParser(description='Keypoints Visualization')
59 |     def add_bool_arg(name, default, help):
60 |         arg_group = parser.add_mutually_exclusive_group(required=False)
61 |         arg_group.add_argument('--' + name, dest=name, action='store_true', help=help)
62 |         arg_group.add_argument('--no_' + name, dest=name, action='store_false', help=("Do not " + help))
63 |         parser.set_defaults(**{name:default})
64 | 
65 |     parser.add_argument('--model', help='path to model', type=str, required=True)
66 |     parser.add_argument('--img', help='path to single image', type=str, default="gs://mit-dut-driverless-external/ConeColourLabels/vid_3_frame_22063_0.jpg")
67 |     parser.add_argument('--img_size', help='image size', default=80, type=int)
68 |     parser.add_argument('--output', help='path to upload the detection', default="outputs/visualization/")
69 | 
70 |     add_bool_arg('flip', default=False, help='flip image')
71 |     add_bool_arg('rotate', default=False, help='rotate image')
72 | 
73 |     args = parser.parse_args(sys.argv[1:])
74 | 
75 |     main(model=args.model,img=args.img,img_size=args.img_size,output=args.output,flip=args.flip,rotate=args.rotate)
76 | 


--------------------------------------------------------------------------------
/RektNet/keypoint_net.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import torch.nn.functional
 4 | from resnet import ResNet
 5 | from cross_ratio_loss import CrossRatioLoss
 6 | 
 7 | def print_tensor_stats(x, name):
 8 |     flattened_x = x.cpu().detach().numpy().flatten()
 9 |     avg = sum(flattened_x)/len(flattened_x)
10 |     print(f"\t\t\t{name}: {avg},{min(flattened_x)},{max(flattened_x)}")
11 | 
12 | class KeypointNet(nn.Module):
13 |     def __init__(self, num_kpt=7, image_size=(80, 80), onnx_mode=False, init_weight=True):
14 |         super(KeypointNet, self).__init__()
15 |         net_size = 16
16 | 
17 |         self.conv = nn.Conv2d(in_channels=3, out_channels=net_size, kernel_size=7, stride=1, padding=3)
18 |         # torch.nn.init.xavier_uniform(self.conv.weight)
19 |         self.bn = nn.BatchNorm2d(net_size)
20 |         self.relu = nn.ReLU()
21 |         self.res1 = ResNet(net_size, net_size)
22 |         self.res2 = ResNet(net_size, net_size * 2)
23 |         self.res3 = ResNet(net_size * 2, net_size * 4)
24 |         self.res4 = ResNet(net_size * 4, net_size * 8)
25 |         self.out = nn.Conv2d(in_channels=net_size * 8, out_channels=num_kpt, kernel_size=1, stride=1, padding=0)
26 |         # torch.nn.init.xavier_uniform(self.out.weight)
27 |         if init_weight:
28 |             self._initialize_weights()
29 |         self.image_size = image_size
30 |         self.num_kpt = num_kpt
31 |         self.onnx_mode = onnx_mode
32 | 
33 |     def _initialize_weights(self):
34 |         for m in self.modules():
35 |             if isinstance(m, nn.Conv2d):
36 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
37 |                 if m.bias is not None:
38 |                     nn.init.constant_(m.bias, 0)
39 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
40 |                 nn.init.constant_(m.weight, 1)
41 |                 nn.init.constant_(m.bias, 0)
42 |             elif isinstance(m, nn.Linear):
43 |                 nn.init.normal_(m.weight, 0, 0.01)
44 |                 nn.init.constant_(m.bias, 0)
45 | 
46 |     def flat_softmax(self, inp):
47 |         flat = inp.view(-1, self.image_size[0] * self.image_size[1])
48 |         flat = torch.nn.functional.softmax(flat, 1)
49 |         return flat.view(-1, self.num_kpt, self.image_size[0], self.image_size[1])
50 | 
51 |     def soft_argmax(self, inp):
52 |         values_y = torch.linspace(0, (self.image_size[0] - 1.) / self.image_size[0], self.image_size[0], dtype=inp.dtype, device=inp.device)
53 |         values_x = torch.linspace(0, (self.image_size[1] - 1.) / self.image_size[1], self.image_size[1], dtype=inp.dtype, device=inp.device)
54 |         exp_y = (inp.sum(3) * values_y).sum(-1)
55 |         exp_x = (inp.sum(2) * values_x).sum(-1)
56 |         return torch.stack([exp_x, exp_y], -1)
57 | 
58 |     def forward(self, x):
59 |         act1 = self.relu(self.bn(self.conv(x)))
60 |         act2 = self.res1(act1)
61 |         act3 = self.res2(act2)
62 |         act4 = self.res3(act3)
63 |         act5 = self.res4(act4)
64 |         hm = self.out(act5)
65 |         if self.onnx_mode:
66 |             return hm
67 |         else:
68 |             hm = self.flat_softmax(self.out(act5))
69 |             out = self.soft_argmax(hm)
70 |             return hm, out.view(-1, self.num_kpt, 2)
71 | 
72 | if  __name__=='__main__':
73 |     from torch.autograd import Variable
74 |     from torch import autograd
75 |     net = KeypointNet()
76 |     test = net(Variable(torch.randn(3, 3, 80, 80)))
77 |     loss = CrossRatioLoss()
78 |     target = autograd.Variable(torch.randn(3, 7, 2))
79 |     l = loss(test, target)
80 | 


--------------------------------------------------------------------------------
/RektNet/keypoint_tutorial_util.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import tempfile
  3 | import sys
  4 | import os
  5 | import multiprocessing
  6 | import shutil
  7 | from tqdm import tqdm
  8 | import numpy as np
  9 | import cv2
 10 | import copy
 11 | from datetime import datetime
 12 | from tqdm import tqdm
 13 | 
 14 | import PIL
 15 | from PIL import Image, ImageDraw
 16 | 
 17 | import torch
 18 | from torch.autograd import Variable
 19 | from torch.backends import cudnn
 20 | from torch import nn
 21 | from torch import optim
 22 | from torch.utils.data import DataLoader
 23 | from torchvision import transforms
 24 | 
 25 | from keypoint_net import KeypointNet
 26 | from cross_ratio_loss import CrossRatioLoss
 27 | from utils import Logger
 28 | from utils import load_train_csv_dataset, prep_image, visualize_data, vis_tensor_and_save, calculate_distance, calculate_mean_distance
 29 | from dataset import ConeDataset
 30 | 
 31 | cuda = torch.cuda.is_available()
 32 | device = torch.device('cuda:0' if cuda else 'cpu')
 33 | 
 34 | def print_tensor_stats(x, name):
 35 |     flattened_x = x.cpu().detach().numpy().flatten()
 36 |     avg = sum(flattened_x)/len(flattened_x)
 37 |     print(f"\t\t{name}: {avg},{min(flattened_x)},{max(flattened_x)}")
 38 | 
 39 | def eval_model(model, dataloader, loss_function, input_size):
 40 |     print("\tStarting validation...")
 41 |     model.eval()
 42 |     with torch.no_grad():
 43 |         loss_sums = [0,0,0]
 44 |         batch_num = 0
 45 |         for x_batch,y_hm_batch,y_point_batch,image_name, _ in dataloader:
 46 |             x_batch = x_batch.to(device)
 47 |             y_hm_batch = y_hm_batch.to(device)
 48 |             y_point_batch = y_point_batch.to(device)
 49 |             output = model(x_batch)
 50 |             loc_loss, geo_loss, loss = loss_function(output[0], output[1], y_hm_batch, y_point_batch)
 51 |             loss_sums[0] += loc_loss.item()
 52 |             loss_sums[1] += geo_loss.item()
 53 |             loss_sums[2] += loss.item()
 54 |             
 55 |             batch_num += 1
 56 | 
 57 |     val_loc_loss = loss_sums[0] / batch_num
 58 |     val_geo_loss = loss_sums[1] / batch_num
 59 |     val_loss = loss_sums[2] / batch_num
 60 |     print(f"\tValidation: MSE/Geometric/Total Loss: {round(val_loc_loss,10)}/{round(val_geo_loss,10)}/{round(val_loss,10)}")
 61 | 
 62 |     return val_loc_loss, val_geo_loss, val_loss
 63 | 
 64 | def print_kpt_L2_distance(model, dataloader, kpt_keys, study_name, evaluate_mode, input_size):
 65 |     kpt_distances = []
 66 |     if evaluate_mode:
 67 |         validation_textfile = open('logs/rektnet_validation.txt', 'a')
 68 | 
 69 |     for x_batch, y_hm_batch, y_point_batch, _, image_shape in dataloader:
 70 |         x_batch = x_batch.to(device)
 71 |         y_hm_batch = y_hm_batch.to(device)
 72 |         y_point_batch = y_point_batch.to(device)
 73 | 
 74 |         output = model(x_batch)
 75 | 
 76 |         pred_points = output[1]*x_batch.shape[1]
 77 |         pred_points = pred_points.data.cpu().numpy()
 78 |         pred_points *= input_size
 79 |         target_points = y_point_batch*x_batch.shape[1]
 80 |         target_points = target_points.data.cpu().numpy()
 81 |         target_points *= input_size
 82 | 
 83 |         kpt_dis = calculate_distance(target_points, pred_points)
 84 | 
 85 |         ##### for validation knowledge of avg kpt mse vs BB size distribution #####
 86 |         if evaluate_mode:
 87 |             height,width,_ = image_shape
 88 |             print(width.numpy()[0],height.numpy()[0])
 89 |             print(kpt_dis)
 90 | 
 91 |             single_img_kpt_dis_sum = sum(kpt_dis) 
 92 |             validation_textfile.write(f"{[width.numpy()[0],height.numpy()[0]]}:{single_img_kpt_dis_sum}\n")
 93 |         ###########################################################################
 94 | 
 95 |         kpt_distances.append(kpt_dis)
 96 |     if evaluate_mode:
 97 |         validation_textfile.close()
 98 |     final_stats, total_dist, final_stats_std = calculate_mean_distance(kpt_distances)
 99 |     print(f'Mean distance error of each keypoint is:')
100 |     for i, kpt_key in enumerate(kpt_keys):
101 |         print(f'\t{kpt_key}: {final_stats[i]}')
102 |     print(f'Standard deviation of each keypoint is:')
103 |     for i, kpt_key in enumerate(kpt_keys):
104 |         print(f'\t{kpt_key}: {final_stats_std[i]}')
105 |     print(f'Total distance error is: {total_dist}')
106 |     ##### updating best result for optuna study #####
107 |     result = open("logs/" + study_name + ".txt", "w" )
108 |     result.write(str(total_dist))
109 |     result.close() 
110 |     ###########################################


--------------------------------------------------------------------------------
/RektNet/keypoints_tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# How to Train Your Own Key Points Detection Networks\n",
  8 |     "\n",
  9 |     "![](https://user-images.githubusercontent.com/22118253/69765965-fd65a700-1143-11ea-8804-cd1d33f2e824.png)\n",
 10 |     "\n",
 11 |     "In this notebook, we will demonstrate \n",
 12 |     "- how to train your own KeyPoints detection network and do inference on pictures of traffic cone.\n",
 13 |     "\n",
 14 |     "**[Accurate Low Latency Visual Perception for Autonomous Racing: Challenges Mechanisms and Practical Solutions](https://github.com/mit-han-lab/once-for-all)** is an accurate low latency visual perception system introduced by Kieran Strobel, Sibo Zhu, Raphael Chang, and Skanda Koppula.\n"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## 1. Preparation\n",
 22 |     "Let's first install all the required packages:"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "! sudo apt install unzip\n",
 32 |     "print('Installing numpy...')\n",
 33 |     "! pip3 install numpy \n",
 34 |     "# tqdm is a package for displaying a progress bar.\n",
 35 |     "print('Installing tqdm (progress bar) ...')\n",
 36 |     "! pip3 install tqdm \n",
 37 |     "print('Installing matplotlib...')\n",
 38 |     "! pip3 install matplotlib \n",
 39 |     "print('Installing dataset reader...')\n",
 40 |     "! pip3 install pandas"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "Let' s clone our repo first..."
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "! git clone https://github.com/cv-core/MIT-Driverless-CV-TrainingInfra.git\n",
 57 |     "\n",
 58 |     "! mv MIT-Driverless-CV-TrainingInfra/RektNet/* ."
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "Before we start training, let's download the Cone Detection dataset and the corresponding label and intial training weights. "
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "print(\"Downloading Training Dataset\")\n",
 75 |     "! wget https://storage.googleapis.com/mit-driverless-open-source/RektNet_Dataset.zip\n",
 76 |     "! unzip -q RektNet_Dataset.zip\n",
 77 |     "! mv RektNet_Dataset dataset/ && rm RektNet_Dataset.zip\n",
 78 |     "print(\"Downloading Training and Validation Label\")\n",
 79 |     "! cd dataset/ && wget https://storage.googleapis.com/mit-driverless-open-source/rektnet-training/mini_rektnet_label.csv && mv mini_rektnet_label.csv rektnet_label.csv && cd .."
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "## 2. Training\n"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "First, import all the packages used in this tutorial:"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "import argparse\n",
103 |     "import tempfile\n",
104 |     "import sys\n",
105 |     "import os\n",
106 |     "import multiprocessing\n",
107 |     "import shutil\n",
108 |     "from tqdm import tqdm\n",
109 |     "import numpy as np\n",
110 |     "import cv2\n",
111 |     "import copy\n",
112 |     "from datetime import datetime\n",
113 |     "from tqdm import tqdm\n",
114 |     "\n",
115 |     "import PIL\n",
116 |     "from PIL import Image, ImageDraw\n",
117 |     "\n",
118 |     "import torch\n",
119 |     "from torch.autograd import Variable\n",
120 |     "from torch.backends import cudnn\n",
121 |     "from torch import nn\n",
122 |     "from torch import optim\n",
123 |     "from torch.utils.data import DataLoader\n",
124 |     "from torchvision import transforms\n",
125 |     "\n",
126 |     "from keypoint_net import KeypointNet\n",
127 |     "from cross_ratio_loss import CrossRatioLoss\n",
128 |     "from utils import Logger\n",
129 |     "from utils import load_train_csv_dataset, prep_image, visualize_data, vis_tensor_and_save, calculate_distance, calculate_mean_distance\n",
130 |     "from dataset import ConeDataset\n",
131 |     "from keypoint_tutorial_util import print_tensor_stats, eval_model, print_kpt_L2_distance\n",
132 |     "\n",
133 |     "cv2.setRNGSeed(2)\n",
134 |     "torch.manual_seed(2)\n",
135 |     "np.random.seed(2)\n",
136 |     "torch.backends.cudnn.deterministic = True\n",
137 |     "torch.backends.cudnn.benchmark = True\n",
138 |     "cuda = torch.cuda.is_available()\n",
139 |     "device = torch.device('cuda:0' if cuda else 'cpu')\n",
140 |     "\n",
141 |     "visualization_tmp_path = \"/outputs/visualization/\""
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {},
147 |    "source": [
148 |     "Successfully imported all packages and configured random seed to 17!"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "Training Config"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "study_name=\"tutorial\"\n",
165 |     "\n",
166 |     "current_month = datetime.now().strftime('%B').lower()\n",
167 |     "current_year = str(datetime.now().year)\n",
168 |     "if not os.path.exists(os.path.join('outputs/', current_month + '-' + current_year + '-experiments/' + study_name + '/')):\n",
169 |     "    os.makedirs(os.path.join('outputs/', current_month + '-' + current_year + '-experiments/' + study_name + '/'))\n",
170 |     "output_uri = os.path.join('outputs/', current_month + '-' + current_year + '-experiments/' + study_name + '/')\n",
171 |     "\n",
172 |     "save_file_name = 'logs/' + output_uri.split('/')[-2]\n",
173 |     "sys.stdout = Logger(save_file_name + '.log')\n",
174 |     "sys.stderr = Logger(save_file_name + '.error')\n",
175 |     "\n",
176 |     "# Training related config\n",
177 |     "INPUT_SIZE = (80, 80) # dataset size\n",
178 |     "KPT_KEYS = [\"top\", \"mid_L_top\", \"mid_R_top\", \"mid_L_bot\", \"mid_R_bot\", \"bot_L\", \"bot_R\"] # set up geometry loss keys\n",
179 |     "intervals = int(2) # for normal training, set it to 4\n",
180 |     "val_split = float(0.15) # training validation split ratio\n",
181 |     "batch_size= int(8)\n",
182 |     "num_epochs= int(4) # for normal training, set it to 1024\n",
183 |     "train_csv = \"dataset/rektnet_label.csv\"\n",
184 |     "dataset_path = \"dataset/RektNet_Dataset/\"\n",
185 |     "vis_dataloader = False # visualize dataset\n",
186 |     "save_checkpoints = True\n",
187 |     "\n",
188 |     "# Training related hyperparameter\n",
189 |     "lr = 1e-1\n",
190 |     "lr_gamma = 0.999\n",
191 |     "geo_loss = True\n",
192 |     "geo_loss_gamma_vert = 0\n",
193 |     "geo_loss_gamma_horz = 0\n",
194 |     "loss_type = \"l1_softargmax\" # loss function type: l2_softargmax|l2_heatmap|l1_softargmax\n",
195 |     "best_val_loss = float('inf')\n",
196 |     "best_epoch = 0\n",
197 |     "max_tolerance = 8\n",
198 |     "tolerance = 0\n",
199 |     "num_kpt=len(KPT_KEYS)"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "Create pytorch dataloaders for train and validation sets."
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": [
215 |     "train_images, train_labels, val_images, val_labels = load_train_csv_dataset(train_csv, validation_percent=val_split, keypoint_keys=KPT_KEYS, dataset_path=dataset_path, cache_location=\"./gs/\")\n",
216 |     "\n",
217 |     "train_dataset = ConeDataset(images=train_images, labels=train_labels, dataset_path=dataset_path, target_image_size=INPUT_SIZE, save_checkpoints=save_checkpoints, vis_dataloader=vis_dataloader)\n",
218 |     "train_dataloader = DataLoader(train_dataset, batch_size= batch_size, shuffle=False, num_workers=0)\n",
219 |     "val_dataset = ConeDataset(images=val_images, labels=val_labels, dataset_path=dataset_path, target_image_size=INPUT_SIZE, save_checkpoints=save_checkpoints, vis_dataloader=vis_dataloader)\n",
220 |     "val_dataloader = DataLoader(val_dataset, batch_size= 1, shuffle=False, num_workers=0)"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "Define model, optimizer and loss function."
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": [
236 |     "model = KeypointNet(len(KPT_KEYS), INPUT_SIZE, onnx_mode=False)\n",
237 |     "model = model.to(device)\n",
238 |     "if torch.cuda.is_available():\n",
239 |     "    model.cuda()\n",
240 |     "\n",
241 |     "optimizer = optim.Adam(model.parameters(), lr=lr)\n",
242 |     "scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=lr_gamma)\n",
243 |     "loss_func = CrossRatioLoss(loss_type, geo_loss, geo_loss_gamma_horz, geo_loss_gamma_vert)"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "markdown",
248 |    "metadata": {},
249 |    "source": [
250 |     "## Training"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": null,
256 |    "metadata": {},
257 |    "outputs": [],
258 |    "source": [
259 |     "for epoch in range(num_epochs):\n",
260 |     "    print(f\"EPOCH {epoch}\")\n",
261 |     "    model.train()\n",
262 |     "    total_loss = [0,0,0] # location/geometric/total\n",
263 |     "    batch_num = 0\n",
264 |     "\n",
265 |     "    train_process = tqdm(train_dataloader)\n",
266 |     "    for x_batch, y_hm_batch, y_points_batch, image_name, _ in train_process:\n",
267 |     "        x_batch = x_batch.to(device, non_blocking=True)\n",
268 |     "        y_hm_batch = y_hm_batch.to(device, non_blocking=True)\n",
269 |     "        y_points_batch = y_points_batch.to(device, non_blocking=True)\n",
270 |     "\n",
271 |     "        # Zero the gradients.\n",
272 |     "        if optimizer is not None:\n",
273 |     "            optimizer.zero_grad()\n",
274 |     "\n",
275 |     "        # Compute output and loss.\n",
276 |     "        output = model(x_batch)\n",
277 |     "        loc_loss, geo_loss, loss = loss_func(output[0], output[1], y_hm_batch, y_points_batch)\n",
278 |     "        loss.backward()\n",
279 |     "        optimizer.step()\n",
280 |     "\n",
281 |     "        loc_loss, geo_loss, loss = loc_loss.item(), geo_loss.item(), loss.item()\n",
282 |     "        train_process.set_description(f\"Batch {batch_num}. Location Loss: {round(loc_loss,5)}. Geo Loss: {round(geo_loss,5)}. Total Loss: {round(loss,5)}\")\n",
283 |     "        total_loss[0] += loc_loss\n",
284 |     "        total_loss[1] += geo_loss\n",
285 |     "        total_loss[2] += loss\n",
286 |     "        batch_num += 1\n",
287 |     "\n",
288 |     "    print(f\"\\tTraining: MSE/Geometric/Total Loss: {round(total_loss[0]/batch_num,10)}/{round(total_loss[1]/batch_num,10)}/{round(total_loss[2]/batch_num,10)}\")\n",
289 |     "    val_loc_loss, val_geo_loss, val_loss = eval_model(model=model, dataloader=val_dataloader, loss_function=loss_func, input_size=INPUT_SIZE)\n",
290 |     "\n",
291 |     "    scheduler.step()\n",
292 |     "\n",
293 |     "    if val_loss < best_val_loss:\n",
294 |     "        best_val_loss = val_loss\n",
295 |     "        best_epoch = epoch\n",
296 |     "        tolerance = 0\n",
297 |     "    else:\n",
298 |     "        tolerance += 1\n",
299 |     "\n",
300 |     "    if save_checkpoints and epoch != 0 and (epoch + 1) % intervals == 0:\n",
301 |     "        # Save the latest weights\n",
302 |     "        gs_pt_uri = os.path.join(output_uri, \"{epoch}_loss_{loss}.pt\".format(epoch=epoch, loss=round(val_loss, 2)))\n",
303 |     "        print(f'Saving model to {gs_pt_uri}')\n",
304 |     "        checkpoint = {'epoch': epoch,\n",
305 |     "                        'model': model.state_dict(),\n",
306 |     "                        'optimizer': optimizer.state_dict()}\n",
307 |     "        torch.save(checkpoint, gs_pt_uri)\n",
308 |     "    if tolerance >= max_tolerance:\n",
309 |     "        print(f\"Training is stopped due; loss no longer decreases. Epoch {best_epoch} is has the best validation loss.\")\n",
310 |     "        break"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "markdown",
315 |    "metadata": {},
316 |    "source": [
317 |     "## 3. Inference"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "markdown",
322 |    "metadata": {},
323 |    "source": [
324 |     "Download target image file for inference"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": null,
330 |    "metadata": {},
331 |    "outputs": [],
332 |    "source": [
333 |     "! wget https://storage.googleapis.com/mit-driverless-open-source/test_kpt.png\n",
334 |     "    \n",
335 |     "import cv2\n",
336 |     "%matplotlib inline\n",
337 |     "import matplotlib.pylab as pt\n",
338 |     "\n",
339 |     "image = cv2.imread(\"test_kpt.png\")\n",
340 |     "image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
341 |     "pt.fig = pt.figure(figsize=(5, 5))\n",
342 |     "\n",
343 |     "pt.imshow(image)\n",
344 |     "pt.axis('off')\n",
345 |     "pt.title('Keypoints Testing Image')"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "markdown",
350 |    "metadata": {},
351 |    "source": [
352 |     "Download pretrained weights for inference"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": null,
358 |    "metadata": {},
359 |    "outputs": [],
360 |    "source": [
361 |     "! wget https://storage.googleapis.com/mit-driverless-open-source/pretrained_kpt.pt"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "markdown",
366 |    "metadata": {},
367 |    "source": [
368 |     "Set up config file for inference"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": null,
374 |    "metadata": {},
375 |    "outputs": [],
376 |    "source": [
377 |     "model = \"pretrained_kpt.pt\"\n",
378 |     "img = \"test_kpt.png\"\n",
379 |     "img_size = int(80)\n",
380 |     "output = \"outputs/visualization/\"\n",
381 |     "flip = False\n",
382 |     "rotate = False"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "markdown",
387 |    "metadata": {},
388 |    "source": [
389 |     "Prepared Image"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "code",
394 |    "execution_count": null,
395 |    "metadata": {},
396 |    "outputs": [],
397 |    "source": [
398 |     "output_path = output\n",
399 |     "model_filepath = model\n",
400 |     "image_filepath = img\n",
401 |     "\n",
402 |     "img_name = '_'.join(image_filepath.split('/')[-1].split('.')[0].split('_')[-5:])\n",
403 |     "\n",
404 |     "image_size = (img_size, img_size)\n",
405 |     "\n",
406 |     "image = cv2.imread(image_filepath)\n",
407 |     "\n",
408 |     "image = prep_image(image=image,target_image_size=image_size)\n",
409 |     "image = (image.transpose((2, 0, 1)) / 255.0)[np.newaxis, :]\n",
410 |     "image = torch.from_numpy(image).type('torch.FloatTensor')"
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "code",
415 |    "execution_count": null,
416 |    "metadata": {
417 |     "scrolled": true
418 |    },
419 |    "outputs": [],
420 |    "source": [
421 |     "model = KeypointNet()\n",
422 |     "model.load_state_dict(torch.load(model_filepath).get('model'))\n",
423 |     "model.eval()\n",
424 |     "output = model(image)\n",
425 |     "out = np.empty(shape=(0, output[0][0].shape[2]))\n",
426 |     "for o in output[0][0]:\n",
427 |     "    chan = np.array(o.cpu().data)\n",
428 |     "    cmin = chan.min()\n",
429 |     "    cmax = chan.max()\n",
430 |     "    chan -= cmin\n",
431 |     "    chan /= cmax - cmin\n",
432 |     "    out = np.concatenate((out, chan), axis=0)\n",
433 |     "cv2.imwrite(output_path + img_name + \"_hm.jpg\", out * 255)"
434 |    ]
435 |   },
436 |   {
437 |    "cell_type": "code",
438 |    "execution_count": null,
439 |    "metadata": {},
440 |    "outputs": [],
441 |    "source": [
442 |     "image = cv2.imread(image_filepath)\n",
443 |     "h, w, _ = image.shape\n",
444 |     "\n",
445 |     "image = vis_tensor_and_save(image=image, h=h, w=w, tensor_output=output[1][0].cpu().data, image_name=img_name, output_uri=output_path)\n",
446 |     "\n",
447 |     "image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
448 |     "\n",
449 |     "pt.fig = pt.figure(figsize=(5, 5))\n",
450 |     "\n",
451 |     "pt.imshow(image)\n",
452 |     "pt.axis('off')\n",
453 |     "pt.title('Keypoints Detection Result')"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "markdown",
458 |    "metadata": {},
459 |    "source": [
460 |     "Congratulations! You've finished all the content of this tutorial!\n",
461 |     "Hope you enjoy playing with the our object detection model. If you are interested,  please refer to our paper and GitHub Repo for further details.\n",
462 |     "\n",
463 |     "## Reference\n",
464 |     "[1] Kieran Strobel, Sibo Zhu, Raphael Chang and Skanda Koppula.\n",
465 |     "**Accurate, Low-Latency Visual Perception for Autonomous Racing:Challenges, Mechanisms, and Practical Solutions**. In *IROS* 2020.\n",
466 |     "[[paper]](https://arxiv.org/abs/2007.13971), [[code]](https://github.com/cv-core/MIT-Driverless-CV-TrainingInfra)."
467 |    ]
468 |   }
469 |  ],
470 |  "metadata": {
471 |   "file_extension": ".py",
472 |   "kernelspec": {
473 |    "display_name": "Python 3.6.9 64-bit",
474 |    "language": "python",
475 |    "name": "python36964bitfb145c69a41e49ec9393ba0ede4656b6"
476 |   },
477 |   "language_info": {
478 |    "codemirror_mode": {
479 |     "name": "ipython",
480 |     "version": 3
481 |    },
482 |    "file_extension": ".py",
483 |    "mimetype": "text/x-python",
484 |    "name": "python",
485 |    "nbconvert_exporter": "python",
486 |    "pygments_lexer": "ipython3",
487 |    "version": "3.6.9"
488 |   },
489 |   "mimetype": "text/x-python",
490 |   "name": "python",
491 |   "npconvert_exporter": "python",
492 |   "pygments_lexer": "ipython3",
493 |   "version": 3
494 |  },
495 |  "nbformat": 4,
496 |  "nbformat_minor": 2
497 | }
498 | 


--------------------------------------------------------------------------------
/RektNet/logs/README.md:
--------------------------------------------------------------------------------
1 | this is a placeholder for the log folder
2 | 


--------------------------------------------------------------------------------
/RektNet/outputs/README.md:
--------------------------------------------------------------------------------
1 | This is the folder that we output weights file by default


--------------------------------------------------------------------------------
/RektNet/outputs/visualization/README.md:
--------------------------------------------------------------------------------
1 | This is the folder that we store all visualization by default


--------------------------------------------------------------------------------
/RektNet/pt_to_onnx.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch.backends import cudnn
 4 | from torch import nn
 5 | from keypoint_net import KeypointNet
 6 | 
 7 | import torch
 8 | from torch.autograd import Variable
 9 | from torch.backends import cudnn
10 | from torch import nn
11 | 
12 | import argparse
13 | import os
14 | import sys
15 | 
16 | 
17 | def main(weights_uri,onnx_name):
18 |     model = KeypointNet(7, (80, 80),onnx_mode=True)
19 | 
20 |     weights_path = weights_uri
21 | 
22 |     model.load_state_dict(torch.load(weights_path, map_location='cpu').get('model'))
23 |     torch.onnx.export(model, torch.randn(1, 3, 80, 80), onnx_name)
24 | 
25 |     print("onnx file conversion succeed and saved at: " + onnx_name)
26 | 
27 | if __name__ == "__main__":
28 | 
29 |     parser = argparse.ArgumentParser(description='.pt weights file convert to .onnx')
30 |     parser.add_argument('--onnx_name', default='new_keypoints.onnx',
31 |                                             help='the name of output onnx file')
32 | 
33 |     parser.add_argument('--weights_uri', required=True,
34 |                                             help='Path to weights file')
35 |     args = parser.parse_args()
36 |     
37 | 
38 |     main(weights_uri=args.weights_uri, onnx_name=args.onnx_name)
39 | 


--------------------------------------------------------------------------------
/RektNet/requirements.txt:
--------------------------------------------------------------------------------
 1 | opencv_python==4.1.0.25
 2 | numpy==1.16.4
 3 | torch==1.1.0
 4 | torchvision==0.3.0
 5 | pandas==0.24.2
 6 | optuna==0.19.0
 7 | Pillow==6.2.1
 8 | protobuf==3.11.0
 9 | pymysql==0.9.3
10 | tqdm==4.39.0
11 | 


--------------------------------------------------------------------------------
/RektNet/resnet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | def print_tensor_stats(x, name):
 4 |     flattened_x = x.cpu().detach().numpy().flatten()
 5 |     avg = sum(flattened_x)/len(flattened_x)
 6 |     print(f"\t\t\t\t{name}: {round(avg,10)},{round(min(flattened_x),10)},{round(max(flattened_x),10)}")
 7 | 
 8 | class ResNet(nn.Module):
 9 |     def __init__(self, in_channels, out_channels):
10 |         super(ResNet, self).__init__()
11 | 
12 |         self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=2, dilation=2)
13 |         self.bn1 = nn.BatchNorm2d(out_channels)
14 |         self.relu1 = nn.ReLU()
15 |         self.conv2 = nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1)
16 |         self.bn2 = nn.BatchNorm2d(out_channels)
17 |         self.relu2 = nn.ReLU()
18 | 
19 |         self.shortcut_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1)
20 |         self.shortcut_bn = nn.BatchNorm2d(out_channels)
21 | 
22 |     def forward(self, x):
23 |         c1 = self.conv1(x)
24 |         b1 = self.bn1(c1)
25 |         act1 = self.relu1(b1)
26 |         out = self.relu2(self.shortcut_bn(self.shortcut_conv(x)) + self.bn2(self.conv2(act1)))
27 |         return out
28 | 


--------------------------------------------------------------------------------
/RektNet/train_eval.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import tempfile
  3 | import sys
  4 | import os
  5 | import multiprocessing
  6 | import shutil
  7 | from tqdm import tqdm
  8 | import numpy as np
  9 | import cv2
 10 | import copy
 11 | from datetime import datetime
 12 | 
 13 | import PIL
 14 | from PIL import Image, ImageDraw
 15 | 
 16 | import torch
 17 | from torch.autograd import Variable
 18 | from torch.backends import cudnn
 19 | from torch import nn
 20 | from torch import optim
 21 | from torch.utils.data import DataLoader
 22 | from torchvision import transforms
 23 | 
 24 | from keypoint_net import KeypointNet
 25 | from cross_ratio_loss import CrossRatioLoss
 26 | from utils import Logger
 27 | from utils import load_train_csv_dataset, prep_image, visualize_data, vis_tensor_and_save, calculate_distance, calculate_mean_distance
 28 | from dataset import ConeDataset
 29 | 
 30 | cv2.setRNGSeed(17)
 31 | torch.manual_seed(17)
 32 | np.random.seed(17)
 33 | torch.backends.cudnn.deterministic = True
 34 | torch.backends.cudnn.benchmark = False
 35 | cuda = torch.cuda.is_available()
 36 | device = torch.device('cuda:0' if cuda else 'cpu')
 37 | 
 38 | visualization_tmp_path = "/outputs/visualization/"
 39 | 
 40 | def print_tensor_stats(x, name):
 41 |     flattened_x = x.cpu().detach().numpy().flatten()
 42 |     avg = sum(flattened_x)/len(flattened_x)
 43 |     print(f"\t\t{name}: {avg},{min(flattened_x)},{max(flattened_x)}")
 44 | 
 45 | def train_model(model, output_uri, dataloader, loss_function, optimizer, scheduler, epochs, val_dataloader, intervals, input_size, num_kpt, save_checkpoints, kpt_keys, study_name, evaluate_mode):
 46 |     
 47 |     best_val_loss = float('inf')
 48 |     best_epoch = 0
 49 |     max_tolerance = 8
 50 |     tolerance = 0
 51 | 
 52 |     for epoch in range(epochs):
 53 |         print(f"EPOCH {epoch}")
 54 |         model.train()
 55 |         total_loss = [0,0,0] # location/geometric/total
 56 |         batch_num = 0
 57 | 
 58 |         train_process = tqdm(dataloader)
 59 |         for x_batch, y_hm_batch, y_points_batch, image_name, _ in train_process:
 60 |             x_batch = x_batch.to(device)
 61 |             y_hm_batch = y_hm_batch.to(device)
 62 |             y_points_batch = y_points_batch.to(device)
 63 | 
 64 |             # Zero the gradients.
 65 |             if optimizer is not None:
 66 |                 optimizer.zero_grad()
 67 | 
 68 |             # Compute output and loss.
 69 |             output = model(x_batch)
 70 |             loc_loss, geo_loss, loss = loss_function(output[0], output[1], y_hm_batch, y_points_batch)
 71 |             loss.backward()
 72 |             optimizer.step()
 73 | 
 74 |             loc_loss, geo_loss, loss = loc_loss.item(), geo_loss.item(), loss.item()
 75 |             train_process.set_description(f"Batch {batch_num}. Location Loss: {round(loc_loss,5)}. Geo Loss: {round(geo_loss,5)}. Total Loss: {round(loss,5)}")
 76 |             total_loss[0] += loc_loss
 77 |             total_loss[1] += geo_loss
 78 |             total_loss[2] += loss
 79 |             batch_num += 1
 80 | 
 81 |         print(f"\tTraining: MSE/Geometric/Total Loss: {round(total_loss[0]/batch_num,10)}/{round(total_loss[1]/batch_num,10)}/{round(total_loss[2]/batch_num,10)}")
 82 |         val_loc_loss, val_geo_loss, val_loss = eval_model(model=model, dataloader=val_dataloader, loss_function=loss_function, input_size=input_size)
 83 | 
 84 |         # Position suggested by https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
 85 |         scheduler.step()
 86 | 
 87 |         if val_loss < best_val_loss:
 88 |             best_val_loss = val_loss
 89 |             best_epoch = epoch
 90 |             tolerance = 0
 91 | 
 92 |             # Save model onnx for inference.
 93 |             if save_checkpoints:
 94 |                 onnx_uri = os.path.join(output_uri,f"best_keypoints_{input_size[0]}{input_size[1]}.onnx")
 95 |                 onnx_model = KeypointNet(num_kpt, input_size, onnx_mode=True)
 96 |                 onnx_model.load_state_dict(model.state_dict())
 97 |                 torch.onnx.export(onnx_model, torch.randn(1, 3, input_size[0], input_size[1]), onnx_uri)
 98 |                 print(f"Saving ONNX model to {onnx_uri}")
 99 |                 best_model = copy.deepcopy(model)
100 |         else:
101 |             tolerance += 1
102 | 
103 |         if save_checkpoints and epoch != 0 and (epoch + 1) % intervals == 0:
104 |             # Save the latest weights
105 |             gs_pt_uri = os.path.join(output_uri, "{epoch}_loss_{loss}.pt".format(epoch=epoch, loss=round(val_loss, 2)))
106 |             print(f'Saving model to {gs_pt_uri}')
107 |             checkpoint = {'epoch': epoch,
108 |                             'model': model.state_dict(),
109 |                             'optimizer': optimizer.state_dict()}
110 |             torch.save(checkpoint, gs_pt_uri)
111 |         if tolerance >= max_tolerance:
112 |             print(f"Training is stopped due; loss no longer decreases. Epoch {best_epoch} is has the best validation loss.")
113 |             break
114 | 
115 | def eval_model(model, dataloader, loss_function, input_size):
116 |     print("\tStarting validation...")
117 |     model.eval()
118 |     with torch.no_grad():
119 |         loss_sums = [0,0,0]
120 |         batch_num = 0
121 |         for x_batch,y_hm_batch,y_point_batch,image_name, _ in dataloader:
122 |             x_batch = x_batch.to(device)
123 |             y_hm_batch = y_hm_batch.to(device)
124 |             y_point_batch = y_point_batch.to(device)
125 |             output = model(x_batch)
126 |             loc_loss, geo_loss, loss = loss_function(output[0], output[1], y_hm_batch, y_point_batch)
127 |             loss_sums[0] += loc_loss.item()
128 |             loss_sums[1] += geo_loss.item()
129 |             loss_sums[2] += loss.item()
130 |             
131 |             batch_num += 1
132 | 
133 |     val_loc_loss = loss_sums[0] / batch_num
134 |     val_geo_loss = loss_sums[1] / batch_num
135 |     val_loss = loss_sums[2] / batch_num
136 |     print(f"\tValidation: MSE/Geometric/Total Loss: {round(val_loc_loss,10)}/{round(val_geo_loss,10)}/{round(val_loss,10)}")
137 | 
138 |     return val_loc_loss, val_geo_loss, val_loss
139 | 
140 | def print_kpt_L2_distance(model, dataloader, kpt_keys, study_name, evaluate_mode, input_size):
141 |     kpt_distances = []
142 |     if evaluate_mode:
143 |         validation_textfile = open('logs/rektnet_validation.txt', 'a')
144 | 
145 |     for x_batch, y_hm_batch, y_point_batch, _, image_shape in dataloader:
146 |         x_batch = x_batch.to(device)
147 |         y_hm_batch = y_hm_batch.to(device)
148 |         y_point_batch = y_point_batch.to(device)
149 | 
150 |         output = model(x_batch)
151 | 
152 |         pred_points = output[1]*x_batch.shape[1]
153 |         pred_points = pred_points.data.cpu().numpy()
154 |         pred_points *= input_size
155 |         target_points = y_point_batch*x_batch.shape[1]
156 |         target_points = target_points.data.cpu().numpy()
157 |         target_points *= input_size
158 | 
159 |         kpt_dis = calculate_distance(target_points, pred_points)
160 | 
161 |         ##### for validation knowledge of avg kpt mse vs BB size distribution #####
162 |         if evaluate_mode:
163 |             height,width,_ = image_shape
164 |             print(width.numpy()[0],height.numpy()[0])
165 |             print(kpt_dis)
166 | 
167 |             single_img_kpt_dis_sum = sum(kpt_dis) 
168 |             validation_textfile.write(f"{[width.numpy()[0],height.numpy()[0]]}:{single_img_kpt_dis_sum}\n")
169 |         ###########################################################################
170 | 
171 |         kpt_distances.append(kpt_dis)
172 |     if evaluate_mode:
173 |         validation_textfile.close()
174 |     final_stats, total_dist, final_stats_std = calculate_mean_distance(kpt_distances)
175 |     print(f'Mean distance error of each keypoint is:')
176 |     for i, kpt_key in enumerate(kpt_keys):
177 |         print(f'\t{kpt_key}: {final_stats[i]}')
178 |     print(f'Standard deviation of each keypoint is:')
179 |     for i, kpt_key in enumerate(kpt_keys):
180 |         print(f'\t{kpt_key}: {final_stats_std[i]}')
181 |     print(f'Total distance error is: {total_dist}')
182 |     ##### updating best result for optuna study #####
183 |     result = open("logs/" + study_name + ".txt", "w" )
184 |     result.write(str(total_dist))
185 |     result.close() 
186 |     ###########################################
187 | 
188 | def main():
189 |     def add_bool_arg(name, default, help):
190 |         arg_group = parser.add_mutually_exclusive_group(required=False)
191 |         arg_group.add_argument('--' + name, dest=name, action='store_true', help=help)
192 |         arg_group.add_argument('--no_' + name, dest=name, action='store_false', help=("Do not " + help))
193 |         parser.set_defaults(**{name:default})
194 | 
195 |     parser = argparse.ArgumentParser(description='Keypoints Training with Pytorch')
196 | 
197 |     parser.add_argument('--input_size', default=80, help='input image size')
198 |     parser.add_argument('--train_dataset_uri', default='dataset/rektnet_label.csv', help='training dataset csv directory path')
199 |     parser.add_argument('--output_path', type=str, help='output weights path, by default we will create a folder based on current system time and name of your cfg file',default="automatic")
200 |     parser.add_argument('--dataset_path', type=str, help='path to image dataset',default="dataset/RektNet_Dataset/")
201 |     parser.add_argument('--loss_type', default='l1_softargmax', help='loss type: l2_softargmax|l2_heatmap|l1_softargmax')
202 |     parser.add_argument('--validation_ratio', default=0.15, type=float, help='percent of dataset to use for validation')
203 |     parser.add_argument('--batch_size', type=int, default=32, help='size of each image batch')
204 |     parser.add_argument('--lr', '--learning-rate', default=1e-1, type=float, help='learning rate')
205 |     parser.add_argument('--lr_gamma', default=0.999, help='gamma for the scheduler')
206 |     parser.add_argument('--num_epochs', default=1024, type=int, help='number of epochs')
207 |     parser.add_argument("--checkpoint_interval", type=int, default=4, help="interval between saving model weights")
208 |     parser.add_argument('--study_name', required=True, help='name for saving checkpoint models')
209 | 
210 |     add_bool_arg('geo_loss', default=True, help='whether to add in geo loss')
211 |     parser.add_argument('--geo_loss_gamma_vert', default=0, type=float, help='gamma for the geometric loss (horizontal)')
212 |     parser.add_argument('--geo_loss_gamma_horz', default=0, type=float, help='gamma for the geometric loss (vertical)')
213 |     
214 |     add_bool_arg('vis_upload_data', default=False, help='whether to visualize our dataset in Christmas Tree format and upload the whole dataset to. default to False')
215 |     add_bool_arg('save_checkpoints', default=True, help='whether to save checkpoints')
216 |     add_bool_arg('vis_dataloader', default=False, help='whether to visualize the image points and heatmap processed in our dataloader')
217 |     add_bool_arg('evaluate_mode', default=False, help='whether to evaluate avg kpt mse vs BB size distribution at end of training')
218 | 
219 |     args = parser.parse_args()
220 |     print("Program arguments:", args)
221 | 
222 |     if args.output_path == "automatic":
223 |         current_month = datetime.now().strftime('%B').lower()
224 |         current_year = str(datetime.now().year)
225 |         if not os.path.exists(os.path.join('outputs/', current_month + '-' + current_year + '-experiments/' + args.study_name + '/')):
226 |             os.makedirs(os.path.join('outputs/', current_month + '-' + current_year + '-experiments/' + args.study_name + '/'))
227 |         output_uri = os.path.join('outputs/', current_month + '-' + current_year + '-experiments/' + args.study_name + '/')
228 |     else:
229 |         output_uri = args.output_path
230 |     
231 |     save_file_name = 'logs/' + output_uri.split('/')[-2]
232 |     sys.stdout = Logger(save_file_name + '.log')
233 |     sys.stderr = Logger(save_file_name + '.error')
234 |     
235 |     INPUT_SIZE = (args.input_size, args.input_size)
236 |     KPT_KEYS = ["top", "mid_L_top", "mid_R_top", "mid_L_bot", "mid_R_bot", "bot_L", "bot_R"]
237 | 
238 |     intervals = args.checkpoint_interval
239 |     val_split = args.validation_ratio
240 |     
241 |     batch_size= args.batch_size
242 |     num_epochs= args.num_epochs
243 | 
244 |     # Load the train data.
245 |     train_csv = args.train_dataset_uri
246 |     train_images, train_labels, val_images, val_labels = load_train_csv_dataset(train_csv, validation_percent=val_split, keypoint_keys=KPT_KEYS, dataset_path=args.dataset_path, cache_location="./gs/")
247 | 
248 |     # "Become one with the data" - Andrej Karpathy
249 |     if args.vis_upload_data:
250 |         visualize_data(train_images, train_labels)
251 |         print('Shutting down instance...')
252 |         os.system('sudo shutdown now')
253 | 
254 |     # Create pytorch dataloaders for train and validation sets.
255 |     train_dataset = ConeDataset(images=train_images, labels=train_labels, dataset_path=args.dataset_path, target_image_size=INPUT_SIZE, save_checkpoints=args.save_checkpoints, vis_dataloader=args.vis_dataloader)
256 |     train_dataloader = DataLoader(train_dataset, batch_size= batch_size, shuffle=False, num_workers=0)
257 |     val_dataset = ConeDataset(images=val_images, labels=val_labels, dataset_path=args.dataset_path, target_image_size=INPUT_SIZE, save_checkpoints=args.save_checkpoints, vis_dataloader=args.vis_dataloader)
258 |     val_dataloader = DataLoader(val_dataset, batch_size= 1, shuffle=False, num_workers=0)
259 | 
260 |     # Define model, optimizer and loss function.
261 |     model = KeypointNet(len(KPT_KEYS), INPUT_SIZE, onnx_mode=False)
262 |     model = model.to(device)
263 |     optimizer = optim.Adam(model.parameters(), lr=args.lr)
264 |     scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=args.lr_gamma)
265 |     loss_func = CrossRatioLoss(args.loss_type, args.geo_loss, args.geo_loss_gamma_horz, args.geo_loss_gamma_vert)
266 | 
267 |     # Train our model.
268 |     train_model(
269 |         model=model,
270 |         output_uri=output_uri,
271 |         dataloader=train_dataloader, 
272 |         loss_function=loss_func, 
273 |         optimizer=optimizer, 
274 |         scheduler=scheduler, 
275 |         epochs=num_epochs, 
276 |         val_dataloader=val_dataloader, 
277 |         intervals=intervals, 
278 |         input_size=INPUT_SIZE,
279 |         num_kpt=len(KPT_KEYS), 
280 |         save_checkpoints=args.save_checkpoints,
281 |         kpt_keys=KPT_KEYS,
282 |         study_name=args.study_name,
283 |         evaluate_mode=args.evaluate_mode
284 |     )
285 | 
286 | if __name__=='__main__':
287 |     main()
288 | 


--------------------------------------------------------------------------------
/RektNet/train_eval_hyper.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import argparse
 4 | import subprocess
 5 | 
 6 | import optuna
 7 | import pymysql
 8 | import numpy as np
 9 | import torch
10 | 
11 | pymysql.converters.encoders[np.float64] = pymysql.converters.escape_float
12 | pymysql.converters.conversions = pymysql.converters.encoders.copy()
13 | pymysql.converters.conversions.update(pymysql.converters.decoders)
14 | 
15 | if __name__ == "__main__":
16 |     parser = argparse.ArgumentParser()
17 |     def add_bool_arg(name, default, help):
18 |         arg_group = parser.add_mutually_exclusive_group(required=False)
19 |         arg_group.add_argument('--' + name, dest=name, action='store_true', help=help)
20 |         arg_group.add_argument('--no_' + name, dest=name, action='store_false', help=("Do not " + help))
21 |         parser.set_defaults(**{name:default})
22 |     parser.add_argument('--num_trials', type=int, default=100, help="number of optuna trials to run")
23 |     parser.add_argument('--study_name', type=str, default='optuna_keypoints_study', help="cometml / optuna study name")
24 | 
25 |     ###### geo loss study ######
26 |     add_bool_arg('geo_loss_study', default=False, help="whether to initialize study of vertical and horizontal geo loss")
27 |     ##### loss type study ######
28 |     add_bool_arg('loss_type_study', default=False, help="whether to initialize study of three different loss type: l2_softargmax|l2_heatmap|l1_softargmax")
29 |     ############################
30 | 
31 |     add_bool_arg('auto_sd', default=False, help='whether to enable automatical instance shutdown after training. default to True')
32 | 
33 |     opt = parser.parse_args()
34 | 
35 |     def objective(trial):
36 |         ######################################
37 |         if opt.geo_loss_study:
38 |             geo_loss_gamma_vert = trial.suggest_uniform('geo_loss_gamma_vert', 0, 0.15)
39 |             geo_loss_gamma_horz = trial.suggest_uniform('geo_loss_gamma_horz', 0, 0.15)
40 |         else:
41 |             geo_loss_gamma_vert = 0
42 |             geo_loss_gamma_horz = 0
43 |         ######################################
44 |         if opt.loss_type_study:
45 |             loss_type = trial.suggest_categorical('loss_type', ['l2_softargmax', 'l2_heatmap', 'l1_softargmax'])
46 |         else:
47 |             loss_type = 'l1_softargmax'
48 |         ######################################
49 | 
50 |         # build the argstring
51 |         args = {
52 |             'geo_loss_gamma_vert': geo_loss_gamma_vert,
53 |             'geo_loss_gamma_horz': geo_loss_gamma_horz,
54 |             'loss_type': loss_type,
55 |             "study_name": opt.study_name,
56 |             "auto_sd": opt.auto_sd
57 |         }
58 |         arglist = ["python3", "-u",  "train_eval.py"]
59 |         for arg, value in args.items():
60 |             if value is None:
61 |                 continue
62 |             if value is False:
63 |                 arglist.append(f"--no_{arg}")
64 |                 continue
65 |             if value is True:
66 |                 arglist.append(f"--{arg}")
67 |                 continue
68 |             arglist.append(f"--{arg}={value}")
69 | 
70 |         statement = " ".join(arglist)
71 |         print(f"statement for this study is: ")
72 |         print(statement)
73 | 
74 |         # calling through subprocess to ensure that all cuda memory is fully released between experiments
75 |         subprocess.check_call(arglist)
76 | 
77 |         result_file = open("logs/" + opt.study_name +".txt","r+")
78 |         score = float(result_file.read())
79 |         print(f"score for this study is {score}")
80 |         return score    # want to return a value to minimize
81 | 
82 |     try:
83 |         # study = optuna.create_study(study_name=opt.study_name, storage="mysql+pymysql://root:root@35.224.251.208/optuna")
84 |         study = optuna.create_study(study_name=opt.study_name)
85 |         print("Created optuna study")
86 |     except ValueError as e:
87 |         if "Please use a different name" in str(e):
88 |             # study = optuna.Study(study_name=opt.study_name, storage="mysql+pymysql://root:root@35.224.251.208/optuna")
89 |             study = optuna.Study(study_name=opt.study_name)
90 |             print("Joined existing optuna study")
91 |         else:
92 |             raise
93 |     except:
94 |         raise
95 |     study.optimize(objective, n_trials=opt.num_trials)
96 | 


--------------------------------------------------------------------------------
/RektNet/utils.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import pandas as pd
  4 | import hashlib
  5 | import sys
  6 | import os
  7 | import shutil
  8 | import cv2
  9 | import tempfile
 10 | from google.cloud import storage
 11 | from tqdm import tqdm
 12 | import statistics
 13 | import torch
 14 | 
 15 | import PIL
 16 | from PIL import Image, ImageDraw
 17 | 
 18 | vis_tmp_path = "/tmp/detect/" #!!!don't specify this path outside of /tmp/, otherwise important files could be removed!!!
 19 | vis_path = "/outputs/visualization/"
 20 | 
 21 | if os.path.exists(vis_tmp_path):
 22 |     shutil.rmtree(vis_tmp_path)  # delete output folder
 23 | os.makedirs(vis_tmp_path)  # make new output folder
 24 | 
 25 | class Logger(object):
 26 |     def __init__(self, File):
 27 |         Type = File.split('.')[-1] 
 28 |         if Type == 'error':
 29 |             self.terminal = sys.stderr
 30 |         elif Type == 'log':
 31 |             self.terminal = sys.stdout
 32 |         self.log = open(File, "w")
 33 | 
 34 |     def write(self, message):
 35 |         self.terminal.write(message)
 36 |         self.log.write(message)  
 37 | 
 38 |     def flush(self):
 39 |         pass 
 40 | 
 41 | def vis_kpt_and_save(np_image, image_name, h_scale, w_scale, labels, color=(52,31,163)):
 42 |     circ_size = 3
 43 |     for pt in np.array(labels):
 44 |         x_coor, y_coor = pt 
 45 |         cv2.circle(np_image, (x_coor, y_coor), circ_size, color, -1) #BGR color (52,31,163) is called mit logo red
 46 |     if not cv2.imwrite(os.path.join(vis_tmp_path, image_name + "_label_vis.jpg"), np_image):
 47 |         raise Exception("Could not write image")    #opencv won't give you error for incorrect image but return False instead, so we have to do it manually
 48 |     os.rename(os.path.join(vis_tmp_path, image_name + "_label_vis.jpg"), os.path.join(vis_path, image_name + "_label_vis.jpg"))
 49 | 
 50 | def vis_hm_and_save(np_heat_map, image_name):
 51 |     np_image = np.zeros((1, np_heat_map.shape[1], np_heat_map.shape[2]))
 52 |     for i in range(np_heat_map.shape[0]):
 53 |         np_image += np_heat_map[i,:,:] #sum up the heat-map numpy matrix
 54 |     data = np_image.astype('f')
 55 |     data = data.squeeze(0) #squeeze the numpy image size from (1,width,height) to (width,height)
 56 |     img = Image.fromarray(((data - data.min()) * 255.0 /
 57 |         (data.max() - data.min())).astype(np.uint8)) #convert to PIL image
 58 |     img.save(os.path.join(vis_tmp_path, image_name + "_heat_map.jpg")) # opencv doesn't like our heat-map, so we use PIL instead here
 59 |     os.rename(os.path.join(vis_tmp_path, image_name + "_heat_map.jpg"), os.path.join(vis_path, image_name + "_heat_map.jpg"))
 60 | 
 61 | def vis_tensor_and_save(image, h, w, tensor_output, image_name, output_uri):
 62 |     colors = [(0, 255, 0), (255, 0, 0), (255, 255, 0), (0, 255, 255), (255, 0, 255), (127, 255, 127), (255, 127, 127)]
 63 |     i = 0
 64 |     for pt in np.array(tensor_output):
 65 |         cv2.circle(image, (int(pt[0] * w), int(pt[1] * h)), 2, colors[i], -1)
 66 |         i += 1
 67 |     if not cv2.imwrite(os.path.join(vis_tmp_path, image_name + "_inference.jpg"), image):
 68 |         raise Exception("Could not write image")    #opencv won't give you error for incorrect image but return False instead, so we have to do it manually
 69 |     
 70 |     os.rename(os.path.join(vis_tmp_path, image_name + "_inference.jpg"), os.path.join(output_uri, image_name + "_inference.jpg"))
 71 |     return image
 72 | 
 73 | def prep_image(image,target_image_size):
 74 |     h,w,_ = image.shape
 75 |     image = cv2.resize(image, target_image_size)
 76 |     return image
 77 | 
 78 | def print_tensor_stats(x, name):
 79 |     flattened_x = x.cpu().detach().numpy().flatten()
 80 |     avg = sum(flattened_x)/len(flattened_x)
 81 |     print(f"\t\t{name}: {avg},{min(flattened_x)},{max(flattened_x)}")
 82 | 
 83 | def prep_label(label, target_image_size, orig_image_size, image_path):
 84 |     hm = np.zeros((label.shape[0], target_image_size[0], target_image_size[1]))
 85 |     for i in range(label.shape[0]):
 86 |         row = label[i]
 87 |         # padded_image_size = max(orig_image_size[0],orig_image_size[1])
 88 |         hm_tmp = np.zeros((orig_image_size[0], orig_image_size[1]))
 89 |         hm_tmp[int(row[1]), int(row[0])] = 1.0
 90 |         hm[i] = cv2.resize(hm_tmp, target_image_size)
 91 |         hm[i] = cv2.GaussianBlur(hm[i], (5,5), 0)
 92 |         if hm[i].sum()==0:
 93 |             print("Incorrect Data Label Detected! Please revise the image label below and becoming the one with data!")
 94 |             print(image_path)
 95 |         hm[i] /= hm[i].sum()
 96 |     return hm
 97 | 
 98 | def get_scale(actual_image_size,target_image_size):
 99 |     ##### since we are dealing with square image only, is doesn't matter we use height or width #####
100 |     target_h, target_w = target_image_size
101 |     h_scale = target_h / actual_image_size[0]
102 |     w_scale = target_w / actual_image_size[1]
103 |     return h_scale, w_scale
104 | 
105 | def scale_labels(labels, h_scale, w_scale):
106 |     new_labels = []
107 |     for pt in np.array(labels):
108 |         x_coor = math.ceil((int(pt[0])) * w_scale)
109 |         y_coor = math.ceil((int(pt[1])) * h_scale)
110 |         new_labels.append([x_coor, y_coor])
111 |     return np.asarray(new_labels)
112 | 
113 | def visualize_data(images, labels):
114 |     vis_process = tqdm(images)
115 |     for index,_ in tqdm(enumerate(vis_process),desc="Processing Visualization"):
116 |         # print("{}/{}: {}".format(index + 1, len(images), images[index]))
117 |         image = cv2.imread("./gs/"+images[index])
118 |         h, w, _ = image.shape
119 |         dim_diff = np.abs(h - w)
120 |         if h<=w:
121 |             image = cv2.copyMakeBorder(image, 0, dim_diff, 0, 0, cv2.BORDER_CONSTANT, value=[128,128,128])
122 |         else:
123 |             image = cv2.copyMakeBorder(image, 0, 0, 0, dim_diff, cv2.BORDER_CONSTANT, value=[128,128,128])
124 |         h, w, _ = image.shape
125 |         image = cv2.resize(image, (1000, 1000))
126 |         label = labels[index]
127 |         hm = np.zeros((label.shape[0], 1000, 1000))
128 |         for i in range(hm.shape[0]):
129 |             row = label[i]
130 |             hm_tmp = np.zeros((h, w))
131 |             hm_tmp[int(row[1]), int(row[0])] = 1.0
132 |             hm[i] = cv2.resize(hm_tmp, (1000, 1000))
133 |             hm[i] = cv2.GaussianBlur(hm[i], (5,5), 0)
134 |             hm[i] /= hm[i].sum()
135 |         label = label / np.array([w, h])[np.newaxis, :]
136 |         h, w, _ = image.shape
137 |         prevpt = None
138 |         for pt in label:
139 |             cvpt = (int(pt[0] * w), int(pt[1] * h))
140 |             cv2.circle(image, cvpt, 3, (0, 255, 0), -1)
141 |             if prevpt is not None:
142 |                 cv2.line(image, prevpt, cvpt, (0, 255, 0), 2)
143 |             prevpt = cvpt
144 |         cv2.imwrite(vis_tmp_path + images[index], image)
145 |         os.rename(vis_tmp_path + images[index], vis_path + images[index])
146 |         cv2.waitKey(0)
147 |         for i in range(hm.shape[0]):
148 |             cv2.imwrite(vis_tmp_path + images[index], image)
149 |             os.rename(vis_tmp_path + images[index], vis_path + images[index])
150 |             cv2.waitKey(0)
151 | 
152 | def load_train_csv_dataset(train_csv_uri, validation_percent, keypoint_keys, dataset_path, cache_location=None):
153 |     train_data_table = pd.read_csv(train_csv_uri)
154 |     train_data_table_hash = hashlib.sha256(pd.util.hash_pandas_object(train_data_table, index=True).values).hexdigest()
155 | 
156 |     train_images, train_labels = None, None
157 |     if cache_location:
158 |         cache_folder = os.path.join(cache_location, train_data_table_hash)
159 |         cache_images_path = os.path.join(cache_folder, 'images.npy')
160 |         cache_labels_path = os.path.join(cache_folder, 'labels.npy')
161 | 
162 |         if os.path.exists(cache_images_path) and os.path.exists(cache_labels_path):
163 |             print(f"Caches exist: {cache_images_path} and {cache_labels_path}!")
164 |             train_images = np.load(cache_images_path)
165 |             train_labels = np.load(cache_labels_path)
166 |         else:
167 |             print("Caches do not exist!")
168 | 
169 |     if train_labels is None:
170 |         # Separate the labels from the input data.
171 |         images = train_data_table.values[:, 0]
172 |         labels = train_data_table.values[:, 2:2+len(keypoint_keys)]
173 | 
174 |         tmp_labels = []
175 |         image_uris = []
176 | 
177 |         for i in range(len(labels)):
178 |             label = labels[i]
179 |             if label[0] != label[0]:
180 |                 continue
181 |             label_np = np.zeros((len(keypoint_keys), 2))
182 |             for j in range(len(keypoint_keys)):
183 |                 col = keypoint_keys[j]
184 |                 txt = label[train_data_table.columns.get_loc(col) - 2][1:-1].split(",")
185 |                 label_np[j, 0] = txt[0]
186 |                 label_np[j, 1] = txt[1]
187 |             tmp_labels.append(label_np)
188 |             image_uris.append(os.path.join(dataset_path,images[i]))
189 | 
190 |         train_images = []
191 |         train_labels = []
192 | 
193 |         # if not os.path.isdir("./gs"):
194 |         #     os.mkdir("./gs")
195 |         # print("Downloading dataset...")
196 | 
197 |         num = 0
198 |         for uri in tqdm(image_uris,desc="Processing Image Dataset"):
199 |             uri_parts = uri.split("/")
200 | 
201 |             image = cv2.imread(uri)
202 |             h, _, _ = image.shape
203 |             if h < 10:
204 |                 num += 1
205 |                 continue
206 |             train_images.append(uri_parts[-1])
207 |             train_labels.append(tmp_labels[num])
208 |             num += 1
209 | 
210 |         if cache_location:
211 |             print("Saving cache...")
212 |             cache_folder = os.path.join(cache_location, train_data_table_hash)
213 |             os.makedirs(cache_folder, exist_ok=True)
214 |             cache_images_path = os.path.join(cache_folder, 'images.npy')
215 |             cache_labels_path = os.path.join(cache_folder, 'labels.npy')
216 |             print(cache_images_path, cache_labels_path)
217 |             np.save(cache_images_path, train_images)
218 |             np.save(cache_labels_path, train_labels)
219 | 
220 |     # Calculate how much of our training data is for train and validation.
221 |     num_train = len(train_labels)
222 |     num_val = int(num_train * validation_percent)
223 | 
224 |     # # Reshape data back to images, transpose to N,C,H,W format for pytorch.
225 |     # train_images = train_images.reshape([-1, 28, 28, 1]).transpose((0, 3, 1, 2))
226 |     
227 |     # # Split for train/val.
228 |     val_labels = train_labels[0:num_val]
229 |     val_images = train_images[0:num_val]
230 |     train_labels = train_labels[num_val:]
231 |     train_images = train_images[num_val:]
232 |     print(f"training image number: {len(train_images)}")
233 |     print(f"validation image number: {len(val_images)}")
234 | 
235 |     return train_images, train_labels, val_images, val_labels
236 | 
237 | 
238 | 
239 | def calculate_distance(target_points,pred_points):
240 |     dist_matrix = []
241 |     for i, point  in enumerate(target_points[0]):
242 |         dist = np.sqrt(np.square(point[0] - pred_points[0][i][0]) + np.square(point[1] - pred_points[0][i][1]))
243 |         dist_matrix.append(dist)
244 |     return dist_matrix
245 | 
246 | def calculate_mean_distance(epoch_kpt_dis):
247 |     top = []
248 |     mid_R_top = []
249 |     mid_R_bot = []
250 |     bot_R = []
251 |     bot_L = []
252 |     mid_L_bot = []
253 |     mid_L_top = []
254 |     for i, dist in enumerate(epoch_kpt_dis):
255 |         top.append(dist[0])
256 |         mid_L_top.append(dist[1])
257 |         mid_R_top.append(dist[2])
258 |         mid_L_bot.append(dist[3])
259 |         mid_R_bot.append(dist[4])
260 |         bot_L.append(dist[5])
261 |         bot_R.append(dist[6])
262 |         
263 |     top_std = np.std(top)
264 |     top = np.mean(top)
265 | 
266 |     mid_L_top_std = np.std(mid_L_top)
267 |     mid_L_top = np.mean(mid_L_top)
268 | 
269 |     mid_R_top_std = np.std(mid_R_top)
270 |     mid_R_top = np.mean(mid_R_top)
271 |     
272 |     mid_L_bot_std = np.std(mid_L_bot)
273 |     mid_L_bot = np.mean(mid_L_bot)
274 | 
275 |     mid_R_bot_std = np.std(mid_R_bot)
276 |     mid_R_bot = np.mean(mid_R_bot)
277 |     
278 |     bot_L_std = np.std(bot_L)
279 |     bot_L = np.mean(bot_L)
280 | 
281 |     bot_R_std = np.std(bot_R)
282 |     bot_R = np.mean(bot_R)
283 | 
284 | 
285 |     total = top + mid_L_top + mid_R_top + mid_L_bot + mid_R_bot + bot_L + bot_R
286 | 
287 |     return [top,mid_L_top,mid_R_top,mid_L_bot,mid_R_bot,bot_L,bot_R],total,[top_std,mid_L_top_std,mid_R_top_std,mid_L_bot_std,mid_R_bot_std,bot_L_std,bot_R_std]
288 | 
289 | 


--------------------------------------------------------------------------------