├── .github └── workflows │ └── manual.yml ├── CODEOWNERS ├── LICENSE.md ├── README.md ├── img ├── img_title_1.jpeg ├── img_title_2.png └── img_title_2_new.png ├── loop_over_dataset.py ├── misc ├── evaluation.py ├── helpers.py ├── objdet_tools.py └── params.py ├── my_tracking_result.mp4 ├── my_tracking_results.rar ├── requirements.txt ├── student ├── association.py ├── filter.py ├── measurements.py ├── objdet_detect.py ├── objdet_eval.py ├── objdet_pcl.py └── trackmanagement.py ├── tools ├── objdet_models │ ├── darknet │ │ ├── config │ │ │ └── complex_yolov4.cfg │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── darknet2pytorch.py │ │ │ ├── darknet_utils.py │ │ │ └── yolo_layer.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── cal_intersection_rotated_boxes.py │ │ │ ├── evaluation_utils.py │ │ │ ├── iou_rotated_boxes_utils.py │ │ │ └── torch_utils.py │ └── resnet │ │ ├── models │ │ ├── fpn_resnet.py │ │ └── resnet.py │ │ └── utils │ │ ├── evaluation_utils.py │ │ └── torch_utils.py └── waymo_reader │ ├── LICENSE │ ├── README.md │ ├── build │ └── lib │ │ └── simple_waymo_open_dataset_reader │ │ ├── __init__.py │ │ ├── dataset_pb2.py │ │ ├── label_pb2.py │ │ └── utils.py │ ├── dist │ └── simple_waymo_open_dataset_reader-0.0.0-py3.8.egg │ ├── generate_proto.sh │ ├── setup.py │ ├── simple_waymo_open_dataset_reader.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ ├── requires.txt │ └── top_level.txt │ └── simple_waymo_open_dataset_reader │ ├── __init__.py │ ├── dataset.proto │ ├── dataset_pb2.py │ ├── label.proto │ ├── label_pb2.py │ └── utils.py └── writeup.md /.github/workflows/manual.yml: -------------------------------------------------------------------------------- 1 | # Workflow to ensure whenever a Github PR is submitted, 2 | # a JIRA ticket gets created automatically. 3 | name: Manual Workflow 4 | 5 | # Controls when the action will run. 6 | on: 7 | # Triggers the workflow on pull request events but only for the master branch 8 | pull_request_target: 9 | types: [assigned, opened, reopened] 10 | 11 | # Allows you to run this workflow manually from the Actions tab 12 | workflow_dispatch: 13 | 14 | jobs: 15 | test-transition-issue: 16 | name: Convert Github Issue to Jira Issue 17 | runs-on: ubuntu-latest 18 | steps: 19 | - name: Checkout 20 | uses: actions/checkout@master 21 | 22 | - name: Login 23 | uses: atlassian/gajira-login@master 24 | env: 25 | JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} 26 | JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} 27 | JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} 28 | 29 | - name: Create NEW JIRA ticket 30 | id: create 31 | uses: atlassian/gajira-create@master 32 | with: 33 | project: CONUPDATE 34 | issuetype: Task 35 | summary: | 36 | Github PR - nd013 Self-Driving Car Engineer C2 Fusion Starter | Repo: ${{ github.repository }} | PR# ${{github.event.number}} 37 | description: | 38 | Repo link: https://github.com/${{ github.repository }} 39 | PR no. ${{ github.event.pull_request.number }} 40 | PR title: ${{ github.event.pull_request.title }} 41 | PR description: ${{ github.event.pull_request.description }} 42 | In addition, please resolve other issues, if any. 43 | fields: '{"components": [{"name":"nd013 - Self Driving Car Engineer ND"}], "customfield_16449":"https://classroom.udacity.com/", "customfield_16450":"Resolve the PR", "labels": ["github"], "priority":{"id": "4"}}' 44 | 45 | - name: Log created issue 46 | run: echo "Issue ${{ steps.create.outputs.issue }} was created" 47 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @udacity/active-public-content -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | Copyright © 2012 - 2021, Udacity, Inc. 3 | 4 | Udacity hereby grants you a license in and to the Educational Content, including but not limited to homework assignments, programming assignments, code samples, and other educational materials and tools (as further described in the Udacity Terms of Use), subject to, as modified herein, the terms and conditions of the Creative Commons Attribution-NonCommercial- NoDerivs 3.0 License located at http://creativecommons.org/licenses/by-nc-nd/4.0 and successor locations for such license (the "CC License") provided that, in each case, the Educational Content is specifically marked as being subject to the CC License. 5 | Udacity expressly defines the following as falling outside the definition of "non-commercial": 6 | (a) the sale or rental of (i) any part of the Educational Content, (ii) any derivative works based at least in part on the Educational Content, or (iii) any collective work that includes any part of the Educational Content; 7 | (b) the sale of access or a link to any part of the Educational Content without first obtaining informed consent from the buyer (that the buyer is aware that the Educational Content, or such part thereof, is available at the Website free of charge); 8 | (c) providing training, support, or editorial services that use or reference the Educational Content in exchange for a fee; 9 | (d) the sale of advertisements, sponsorships, or promotions placed on the Educational Content, or any part thereof, or the sale of advertisements, sponsorships, or promotions on any website or blog containing any part of the Educational Material, including without limitation any "pop-up advertisements"; 10 | (e) the use of Educational Content by a college, university, school, or other educational institution for instruction where tuition is charged; and 11 | (f) the use of Educational Content by a for-profit corporation or non-profit entity for internal professional development or training. 12 | 13 | 14 | 15 | THE SERVICES AND ONLINE COURSES (INCLUDING ANY CONTENT) ARE PROVIDED "AS IS" AND "AS AVAILABLE" WITH NO REPRESENTATIONS OR WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. YOU ASSUME TOTAL RESPONSIBILITY AND THE ENTIRE RISK FOR YOUR USE OF THE SERVICES, ONLINE COURSES, AND CONTENT. WITHOUT LIMITING THE FOREGOING, WE DO NOT WARRANT THAT (A) THE SERVICES, WEBSITES, CONTENT, OR THE ONLINE COURSES WILL MEET YOUR REQUIREMENTS OR EXPECTATIONS OR ACHIEVE THE INTENDED PURPOSES, (B) THE WEBSITES OR THE ONLINE COURSES WILL NOT EXPERIENCE OUTAGES OR OTHERWISE BE UNINTERRUPTED, TIMELY, SECURE OR ERROR-FREE, (C) THE INFORMATION OR CONTENT OBTAINED THROUGH THE SERVICES, SUCH AS CHAT ROOM SERVICES, WILL BE ACCURATE, COMPLETE, CURRENT, ERROR- FREE, COMPLETELY SECURE OR RELIABLE, OR (D) THAT DEFECTS IN OR ON THE SERVICES OR CONTENT WILL BE CORRECTED. YOU ASSUME ALL RISK OF PERSONAL INJURY, INCLUDING DEATH AND DAMAGE TO PERSONAL PROPERTY, SUSTAINED FROM USE OF SERVICES. 16 | -------------------------------------------------------------------------------- /img/img_title_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/img/img_title_1.jpeg -------------------------------------------------------------------------------- /img/img_title_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/img/img_title_2.png -------------------------------------------------------------------------------- /img/img_title_2_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/img/img_title_2_new.png -------------------------------------------------------------------------------- /loop_over_dataset.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------- 2 | # Project "Track 3D-Objects Over Time" 3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja. 4 | # 5 | # Purpose of this file : Loop over all frames in a Waymo Open Dataset file, 6 | # detect and track objects and visualize results 7 | # 8 | # You should have received a copy of the Udacity license together with this program. 9 | # 10 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013 11 | # ---------------------------------------------------------------------- 12 | # 13 | 14 | ################## 15 | ## Imports 16 | 17 | ## general package imports 18 | import os 19 | import sys 20 | import numpy as np 21 | import math 22 | import cv2 23 | import matplotlib.pyplot as plt 24 | import copy 25 | 26 | ## Add current working directory to path 27 | sys.path.append(os.getcwd()) 28 | 29 | ## Waymo open dataset reader 30 | from tools.waymo_reader.simple_waymo_open_dataset_reader import utils as waymo_utils 31 | from tools.waymo_reader.simple_waymo_open_dataset_reader import WaymoDataFileReader, dataset_pb2, label_pb2 32 | 33 | ## 3d object detection 34 | import student.objdet_pcl as pcl 35 | import student.objdet_detect as det 36 | import student.objdet_eval as eval 37 | 38 | import misc.objdet_tools as tools 39 | from misc.helpers import save_object_to_file, load_object_from_file, make_exec_list 40 | 41 | ## Tracking 42 | from student.filter import Filter 43 | from student.trackmanagement import Trackmanagement 44 | from student.association import Association 45 | from student.measurements import Sensor, Measurement 46 | from misc.evaluation import plot_tracks, plot_rmse, make_movie 47 | import misc.params as params 48 | 49 | ################## 50 | ## Set parameters and perform initializations 51 | 52 | ## Select Waymo Open Dataset file and frame numbers 53 | data_filename = 'training_segment-1005081002024129653_5313_150_5333_150_with_camera_labels.tfrecord' # Sequence 1 54 | # data_filename = 'training_segment-10072231702153043603_5725_000_5745_000_with_camera_labels.tfrecord' # Sequence 2 55 | # data_filename = 'training_segment-10963653239323173269_1924_000_1944_000_with_camera_labels.tfrecord' # Sequence 3 56 | show_only_frames = [0, 200] # show only frames in interval for debugging 57 | 58 | data_fullpath = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'dataset', data_filename) # adjustable path in case this script is called from another working directory 59 | model= "fpn-resnet" # options are 'darknet', 'resnet' 60 | model_res = "resnet" 61 | sequence = "1" 62 | results_fullpath = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results/' + model + '/results_sequence_' + sequence + '_' + model_res) 63 | datafile = WaymoDataFileReader(data_fullpath) 64 | datafile_iter = iter(datafile) # initialize dataset iterator 65 | 66 | ## Initialize object detection 67 | configs_det = det.load_configs(model_name='fpn_resnet') # options are 'darknet', 'fpn_resnet' 68 | model_det = det.create_model(configs_det) 69 | 70 | configs_det.use_labels_as_objects = False # True = use groundtruth labels as objects, False = use model-based detection 71 | 72 | ## Uncomment this setting to restrict the y-range in the final project 73 | # configs_det.lim_y = [-25, 25] 74 | 75 | ## Initialize tracking 76 | KF = Filter() # set up Kalman filter 77 | association = Association() # init data association 78 | manager = Trackmanagement() # init track manager 79 | lidar = None # init lidar sensor object 80 | camera = None # init camera sensor object 81 | np.random.seed(10) # make random values predictable 82 | 83 | ## Selective execution and visualization 84 | exec_detection = ['bev_from_pcl', 'detect_objects', 'validate_object_labels', 'measure_detection_performance'] # options are 'bev_from_pcl', 'detect_objects', 'validate_object_labels', 'measure_detection_performance'; options not in the list will be loaded from file 85 | exec_tracking = [] # options are 'perform_tracking' 86 | exec_visualization = [] # options are 'show_range_image', 'show_bev', 'show_pcl', 'show_labels_in_image', 'show_objects_and_labels_in_bev', 'show_objects_in_bev_labels_in_camera', 'show_tracks', 'show_detection_performance', 'make_tracking_movie' 87 | exec_list = make_exec_list(exec_detection, exec_tracking, exec_visualization) 88 | vis_pause_time = 0 # set pause time between frames in ms (0 = stop between frames until key is pressed) 89 | 90 | 91 | ################## 92 | ## Perform detection & tracking over all selected frames 93 | 94 | cnt_frame = 0 95 | all_labels = [] 96 | det_performance_all = [] 97 | np.random.seed(0) # make random values predictable 98 | if 'show_tracks' in exec_list: 99 | fig, (ax2, ax) = plt.subplots(1,2) # init track plot 100 | 101 | while True: 102 | try: 103 | ## Get next frame from Waymo dataset 104 | frame = next(datafile_iter) 105 | if cnt_frame < show_only_frames[0]: 106 | cnt_frame = cnt_frame + 1 107 | continue 108 | elif cnt_frame > show_only_frames[1]: 109 | print('reached end of selected frames') 110 | break 111 | 112 | print('------------------------------') 113 | print('processing frame #' + str(cnt_frame)) 114 | 115 | ################################# 116 | ## Perform 3D object detection 117 | 118 | ## Extract calibration data and front camera image from frame 119 | lidar_name = dataset_pb2.LaserName.TOP 120 | camera_name = dataset_pb2.CameraName.FRONT 121 | lidar_calibration = waymo_utils.get(frame.context.laser_calibrations, lidar_name) 122 | camera_calibration = waymo_utils.get(frame.context.camera_calibrations, camera_name) 123 | if 'load_image' in exec_list: 124 | image = tools.extract_front_camera_image(frame) 125 | 126 | ## Compute lidar point-cloud from range image 127 | if 'pcl_from_rangeimage' in exec_list: 128 | print('computing point-cloud from lidar range image') 129 | lidar_pcl = tools.pcl_from_range_image(frame, lidar_name) 130 | else: 131 | print('loading lidar point-cloud from result file') 132 | lidar_pcl = load_object_from_file(results_fullpath, data_filename, 'lidar_pcl', cnt_frame) 133 | 134 | ## Compute lidar birds-eye view (bev) 135 | if 'bev_from_pcl' in exec_list: 136 | print('computing birds-eye view from lidar pointcloud') 137 | lidar_bev = pcl.bev_from_pcl(lidar_pcl, configs_det) 138 | else: 139 | print('loading birds-eve view from result file') 140 | lidar_bev = load_object_from_file(results_fullpath, data_filename, 'lidar_bev', cnt_frame) 141 | 142 | ## 3D object detection 143 | if (configs_det.use_labels_as_objects==True): 144 | print('using groundtruth labels as objects') 145 | detections = tools.convert_labels_into_objects(frame.laser_labels, configs_det) 146 | else: 147 | if 'detect_objects' in exec_list: 148 | print('detecting objects in lidar pointcloud') 149 | detections = det.detect_objects(lidar_bev, model_det, configs_det) 150 | else: 151 | print('loading detected objects from result file') 152 | # load different data for final project vs. mid-term project 153 | if 'perform_tracking' in exec_list: 154 | detections = load_object_from_file(results_fullpath, data_filename, 'detections', cnt_frame) 155 | else: 156 | detections = load_object_from_file(results_fullpath, data_filename, 'detections_' + configs_det.arch + '_' + str(configs_det.conf_thresh), cnt_frame) 157 | 158 | ## Validate object labels 159 | if 'validate_object_labels' in exec_list: 160 | print("validating object labels") 161 | valid_label_flags = tools.validate_object_labels(frame.laser_labels, lidar_pcl, configs_det, 0 if configs_det.use_labels_as_objects==True else 10) 162 | else: 163 | print('loading object labels and validation from result file') 164 | valid_label_flags = load_object_from_file(results_fullpath, data_filename, 'valid_labels', cnt_frame) 165 | 166 | ## Performance evaluation for object detection 167 | if 'measure_detection_performance' in exec_list: 168 | print('measuring detection performance') 169 | det_performance = eval.measure_detection_performance(detections, frame.laser_labels, valid_label_flags, configs_det.min_iou) 170 | else: 171 | print('loading detection performance measures from file') 172 | # load different data for final project vs. mid-term project 173 | if 'perform_tracking' in exec_list: 174 | det_performance = load_object_from_file(results_fullpath, data_filename, 'det_performance', cnt_frame) 175 | else: 176 | det_performance = load_object_from_file(results_fullpath, data_filename, 'det_performance_' + configs_det.arch + '_' + str(configs_det.conf_thresh), cnt_frame) 177 | 178 | det_performance_all.append(det_performance) # store all evaluation results in a list for performance assessment at the end 179 | 180 | 181 | ## Visualization for object detection 182 | if 'show_range_image' in exec_list: 183 | img_range = pcl.show_range_image(frame, lidar_name) 184 | img_range = img_range.astype(np.uint8) 185 | cv2.imshow('range_image', img_range) 186 | cv2.waitKey(vis_pause_time) 187 | 188 | if 'show_pcl' in exec_list: 189 | pcl.show_pcl(lidar_pcl) 190 | 191 | if 'show_bev' in exec_list: 192 | tools.show_bev(lidar_bev, configs_det) 193 | cv2.waitKey(vis_pause_time) 194 | 195 | if 'show_labels_in_image' in exec_list: 196 | img_labels = tools.project_labels_into_camera(camera_calibration, image, frame.laser_labels, valid_label_flags, 0.5) 197 | cv2.imshow('img_labels', img_labels) 198 | cv2.waitKey(vis_pause_time) 199 | 200 | if 'show_objects_and_labels_in_bev' in exec_list: 201 | tools.show_objects_labels_in_bev(detections, frame.laser_labels, lidar_bev, configs_det) 202 | cv2.waitKey(vis_pause_time) 203 | 204 | if 'show_objects_in_bev_labels_in_camera' in exec_list: 205 | tools.show_objects_in_bev_labels_in_camera(detections, lidar_bev, image, frame.laser_labels, valid_label_flags, camera_calibration, configs_det) 206 | cv2.waitKey(vis_pause_time) 207 | 208 | 209 | ################################# 210 | ## Perform tracking 211 | if 'perform_tracking' in exec_list: 212 | # set up sensor objects 213 | if lidar is None: 214 | lidar = Sensor('lidar', lidar_calibration) 215 | if camera is None: 216 | camera = Sensor('camera', camera_calibration) 217 | 218 | # preprocess lidar detections 219 | meas_list_lidar = [] 220 | for detection in detections: 221 | # check if measurement lies inside specified range 222 | if detection[1] > configs_det.lim_x[0] and detection[1] < configs_det.lim_x[1] and detection[2] > configs_det.lim_y[0] and detection[2] < configs_det.lim_y[1]: 223 | meas_list_lidar = lidar.generate_measurement(cnt_frame, detection[1:], meas_list_lidar) 224 | 225 | # preprocess camera detections 226 | meas_list_cam = [] 227 | for label in frame.camera_labels[0].labels: 228 | if(label.type == label_pb2.Label.Type.TYPE_VEHICLE): 229 | 230 | box = label.box 231 | # use camera labels as measurements and add some random noise 232 | z = [box.center_x, box.center_y, box.width, box.length] 233 | z[0] = z[0] + np.random.normal(0, params.sigma_cam_i) 234 | z[1] = z[1] + np.random.normal(0, params.sigma_cam_j) 235 | meas_list_cam = camera.generate_measurement(cnt_frame, z, meas_list_cam) 236 | 237 | # Kalman prediction 238 | for track in manager.track_list: 239 | print('predict track', track.id) 240 | KF.predict(track) 241 | track.set_t((cnt_frame - 1)*0.1) # save next timestamp 242 | 243 | # associate all lidar measurements to all tracks 244 | association.associate_and_update(manager, meas_list_lidar, KF) 245 | 246 | # associate all camera measurements to all tracks 247 | association.associate_and_update(manager, meas_list_cam, KF) 248 | 249 | # save results for evaluation 250 | result_dict = {} 251 | for track in manager.track_list: 252 | result_dict[track.id] = track 253 | manager.result_list.append(copy.deepcopy(result_dict)) 254 | label_list = [frame.laser_labels, valid_label_flags] 255 | all_labels.append(label_list) 256 | 257 | # visualization 258 | if 'show_tracks' in exec_list: 259 | fig, ax, ax2 = plot_tracks(fig, ax, ax2, manager.track_list, meas_list_lidar, frame.laser_labels, 260 | valid_label_flags, image, camera, configs_det) 261 | if 'make_tracking_movie' in exec_list: 262 | # save track plots to file 263 | fname = results_fullpath + '/tracking%03d.png' % cnt_frame 264 | print('Saving frame', fname) 265 | fig.savefig(fname) 266 | 267 | # increment frame counter 268 | cnt_frame = cnt_frame + 1 269 | 270 | except StopIteration: 271 | # if StopIteration is raised, break from loop 272 | print("StopIteration has been raised\n") 273 | break 274 | 275 | 276 | ################################# 277 | ## Post-processing 278 | 279 | ## Evaluate object detection performance 280 | if 'show_detection_performance' in exec_list: 281 | eval.compute_performance_stats(det_performance_all, configs_det) 282 | 283 | ## Plot RMSE for all tracks 284 | if 'show_tracks' in exec_list: 285 | plot_rmse(manager, all_labels, configs_det) 286 | 287 | ## Make movie from tracking results 288 | if 'make_tracking_movie' in exec_list: 289 | make_movie(results_fullpath) 290 | -------------------------------------------------------------------------------- /misc/evaluation.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------- 2 | # Project "Track 3D-Objects Over Time" 3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja. 4 | # 5 | # Purpose of this file : Evaluate and plot results 6 | # 7 | # You should have received a copy of the Udacity license together with this program. 8 | # 9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013 10 | # ---------------------------------------------------------------------- 11 | # 12 | 13 | # imports 14 | import numpy as np 15 | import matplotlib 16 | matplotlib.use('wxagg') # change backend so that figure maximizing works on Mac as well 17 | import matplotlib.pyplot as plt 18 | import matplotlib.patches as patches 19 | from matplotlib.path import Path 20 | from matplotlib import colors 21 | from matplotlib.transforms import Affine2D 22 | import matplotlib.ticker as ticker 23 | import os 24 | import cv2 25 | 26 | # add project directory to python path to enable relative imports 27 | import os 28 | import sys 29 | PACKAGE_PARENT = '..' 30 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__)))) 31 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) 32 | 33 | from tools.waymo_reader.simple_waymo_open_dataset_reader import label_pb2 34 | 35 | def plot_tracks(fig, ax, ax2, track_list, meas_list, lidar_labels, lidar_labels_valid, 36 | image, camera, configs_det, state=None): 37 | 38 | # plot image 39 | ax.cla() 40 | ax2.cla() 41 | ax2.imshow(image) 42 | 43 | # plot tracks, measurements and ground truth in birds-eye view 44 | for track in track_list: 45 | if state == None or track.state == state: # plot e.g. only confirmed tracks 46 | 47 | # choose color according to track state 48 | if track.state == 'confirmed': 49 | col = 'green' 50 | elif track.state == 'tentative': 51 | col = 'orange' 52 | else: 53 | col = 'red' 54 | 55 | # get current state variables 56 | w = track.width 57 | h = track.height 58 | l = track.length 59 | x = track.x[0] 60 | y = track.x[1] 61 | z = track.x[2] 62 | yaw = track.yaw 63 | 64 | # plot boxes in top view 65 | point_of_rotation = np.array([w/2, l/2]) 66 | rec = plt.Rectangle(-point_of_rotation, width=w, height=l, 67 | color=col, alpha=0.2, 68 | transform=Affine2D().rotate_around(*(0,0), -yaw)+Affine2D().translate(-y,x)+ax.transData) 69 | ax.add_patch(rec) 70 | 71 | # write track id for debugging 72 | ax.text(float(-track.x[1]), float(track.x[0]+1), str(track.id)) 73 | 74 | if track.state =='initialized': 75 | ax.scatter(float(-track.x[1]), float(track.x[0]), color=col, s=80, marker='x', label='initialized track') 76 | elif track.state =='tentative': 77 | ax.scatter(float(-track.x[1]), float(track.x[0]), color=col, s=80, marker='x', label='tentative track') 78 | elif track.state =='confirmed': 79 | ax.scatter(float(-track.x[1]), float(track.x[0]), color=col, s=80, marker='x', label='confirmed track') 80 | 81 | # project tracks in image 82 | # transform from vehicle to camera coordinates 83 | pos_veh = np.ones((4, 1)) # homogeneous coordinates 84 | pos_veh[0:3] = track.x[0:3] 85 | pos_sens = camera.veh_to_sens*pos_veh # transform from vehicle to sensor coordinates 86 | x = pos_sens[0] 87 | y = pos_sens[1] 88 | z = pos_sens[2] 89 | 90 | # compute rotation around z axis 91 | R = np.matrix([[np.cos(yaw), np.sin(yaw), 0], 92 | [-np.sin(yaw), np.cos(yaw), 0], 93 | [0, 0, 1]]) 94 | 95 | # bounding box corners 96 | x_corners = [-l/2, l/2, l/2, l/2, l/2, -l/2, -l/2, -l/2] 97 | y_corners = [-w/2, -w/2, -w/2, w/2, w/2, w/2, w/2, -w/2] 98 | z_corners = [-h/2, -h/2, h/2, h/2, -h/2, -h/2, h/2, h/2] 99 | 100 | # bounding box 101 | corners_3D = np.array([x_corners, y_corners, z_corners]) 102 | 103 | # rotate 104 | corners_3D = R*corners_3D 105 | 106 | # translate 107 | corners_3D += np.array([x, y, z]).reshape((3, 1)) 108 | # print ( 'corners_3d', corners_3D) 109 | 110 | # remove bounding boxes that include negative x, projection makes no sense 111 | if np.any(corners_3D[0,:] <= 0): 112 | continue 113 | 114 | # project to image 115 | corners_2D = np.zeros((2,8)) 116 | for k in range(8): 117 | corners_2D[0,k] = camera.c_i - camera.f_i * corners_3D[1,k] / corners_3D[0,k] 118 | corners_2D[1,k] = camera.c_j - camera.f_j * corners_3D[2,k] / corners_3D[0,k] 119 | # print ( 'corners_2d', corners_2D) 120 | 121 | # edges of bounding box in vertex index from above, e.g. index 0 stands for [-l/2, -w/2, -h/2] 122 | draw_line_indices = [0, 1, 2, 3, 4, 5, 6, 7, 0, 5, 4, 1, 2, 7, 6, 3] 123 | 124 | paths_2D = np.transpose(corners_2D[:, draw_line_indices]) 125 | # print ( 'paths_2D', paths_2D) 126 | 127 | codes = [Path.LINETO]*paths_2D.shape[0] 128 | codes[0] = Path.MOVETO 129 | path = Path(paths_2D, codes) 130 | 131 | # plot bounding box in image 132 | p = patches.PathPatch( 133 | path, fill=False, color=col, linewidth=3) 134 | ax2.add_patch(p) 135 | 136 | # plot labels 137 | for label, valid in zip(lidar_labels, lidar_labels_valid): 138 | if valid: 139 | ax.scatter(-1*label.box.center_y, label.box.center_x, color='gray', s=80, marker='+', label='ground truth') 140 | # plot measurements 141 | for meas in meas_list: 142 | ax.scatter(-1*meas.z[1], meas.z[0], color='blue', marker='.', label='measurement') 143 | 144 | # maximize window 145 | mng = plt.get_current_fig_manager() 146 | mng.frame.Maximize(True) 147 | 148 | # axis 149 | ax.set_xlabel('y [m]') 150 | ax.set_ylabel('x [m]') 151 | ax.set_aspect('equal') 152 | ax.set_ylim(configs_det.lim_x[0], configs_det.lim_x[1]) # x forward, y left in vehicle coordinates 153 | ax.set_xlim(-configs_det.lim_y[1], -configs_det.lim_y[0]) 154 | # correct x ticks (positive to the left) 155 | ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(-x) if x!=0 else '{0:g}'.format(x)) 156 | ax.xaxis.set_major_formatter(ticks_x) 157 | 158 | # remove repeated labels 159 | handles, labels = ax.get_legend_handles_labels() 160 | handle_list, label_list = [], [] 161 | for handle, label in zip(handles, labels): 162 | if label not in label_list: 163 | handle_list.append(handle) 164 | label_list.append(label) 165 | ax.legend(handle_list, label_list, loc='center left', shadow=True, fontsize='x-large', bbox_to_anchor=(0.8, 0.5)) 166 | 167 | plt.pause(0.01) 168 | 169 | return fig, ax, ax2 170 | 171 | 172 | def plot_rmse(manager, all_labels, configs_det): 173 | fig, ax = plt.subplots() 174 | plot_empty = True 175 | 176 | # loop over all tracks 177 | for track_id in range(manager.last_id+1): 178 | rmse_sum = 0 179 | cnt = 0 180 | rmse = [] 181 | time = [] 182 | 183 | # loop over timesteps 184 | for i, result_dict in enumerate(manager.result_list): 185 | label_list = all_labels[i] 186 | if track_id not in result_dict: 187 | continue 188 | track = result_dict[track_id] 189 | if track.state != 'confirmed': 190 | continue 191 | 192 | # find closest label and calculate error at this timestamp 193 | min_error = np.inf 194 | for label, valid in zip(label_list[0], label_list[1]): 195 | error = 0 196 | if valid: 197 | # check if label lies inside specified range 198 | if label.box.center_x > configs_det.lim_x[0] and label.box.center_x < configs_det.lim_x[1] and label.box.center_y > configs_det.lim_y[0] and label.box.center_y < configs_det.lim_y[1]: 199 | error += (label.box.center_x - float(track.x[0]))**2 200 | error += (label.box.center_y - float(track.x[1]))**2 201 | error += (label.box.center_z - float(track.x[2]))**2 202 | if error < min_error: 203 | min_error = error 204 | if min_error < np.inf: 205 | error = np.sqrt(min_error) 206 | time.append(track.t) 207 | rmse.append(error) 208 | rmse_sum += error 209 | cnt += 1 210 | 211 | # calc overall RMSE 212 | if cnt != 0: 213 | plot_empty = False 214 | rmse_sum /= cnt 215 | # plot RMSE 216 | ax.plot(time, rmse, marker='x', label='RMSE track ' + str(track_id) + '\n(mean: ' 217 | + '{:.2f}'.format(rmse_sum) + ')') 218 | 219 | # maximize window 220 | mng = plt.get_current_fig_manager() 221 | mng.frame.Maximize(True) 222 | ax.set_ylim(0,1) 223 | if plot_empty: 224 | print('No confirmed tracks found to plot RMSE!') 225 | else: 226 | plt.legend(loc='center left', shadow=True, fontsize='x-large', bbox_to_anchor=(0.9, 0.5)) 227 | plt.xlabel('time [s]') 228 | plt.ylabel('RMSE [m]') 229 | plt.show() 230 | 231 | 232 | def make_movie(path): 233 | # read track plots 234 | images = [img for img in sorted(os.listdir(path)) if img.endswith(".png")] 235 | frame = cv2.imread(os.path.join(path, images[0])) 236 | height, width, layers = frame.shape 237 | 238 | # save with 10fps to result dir 239 | video = cv2.VideoWriter(os.path.join(path, 'my_tracking_results.avi'), 0, 10, (width,height)) 240 | 241 | for image in images: 242 | fname = os.path.join(path, image) 243 | video.write(cv2.imread(fname)) 244 | os.remove(fname) # clean up 245 | 246 | cv2.destroyAllWindows() 247 | video.release() -------------------------------------------------------------------------------- /misc/helpers.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------- 2 | # Project "Track 3D-Objects Over Time" 3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja. 4 | # 5 | # Purpose of this file : helper functions for loop_over_dataset.py 6 | # 7 | # You should have received a copy of the Udacity license together with this program. 8 | # 9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013 10 | # ---------------------------------------------------------------------- 11 | # 12 | 13 | # imports 14 | import os 15 | import pickle 16 | 17 | ## Saves an object to a binary file 18 | def save_object_to_file(object, file_path, base_filename, object_name, frame_id=1): 19 | object_filename = os.path.join(file_path, os.path.splitext(base_filename)[0] 20 | + "__frame-" + str(frame_id) + "__" + object_name + ".pkl") 21 | with open(object_filename, 'wb') as f: 22 | pickle.dump(object, f) 23 | 24 | ## Loads an object from a binary file 25 | def load_object_from_file(file_path, base_filename, object_name, frame_id=1): 26 | object_filename = os.path.join(file_path, os.path.splitext(base_filename)[0] 27 | + "__frame-" + str(frame_id) + "__" + object_name + ".pkl") 28 | with open(object_filename, 'rb') as f: 29 | object = pickle.load(f) 30 | return object 31 | 32 | ## Prepares an exec_list with all tasks to be executed 33 | def make_exec_list(exec_detection, exec_tracking, exec_visualization): 34 | 35 | # save all tasks in exec_list 36 | exec_list = exec_detection + exec_tracking + exec_visualization 37 | 38 | # check if we need pcl 39 | if any(i in exec_list for i in ('validate_object_labels', 'bev_from_pcl')): 40 | exec_list.append('pcl_from_rangeimage') 41 | # check if we need image 42 | if any(i in exec_list for i in ('show_tracks', 'show_labels_in_image', 'show_objects_in_bev_labels_in_camera')): 43 | exec_list.append('load_image') 44 | # movie does not work without show_tracks 45 | if 'make_tracking_movie' in exec_list: 46 | exec_list.append('show_tracks') 47 | return exec_list -------------------------------------------------------------------------------- /misc/params.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------- 2 | # Project "Track 3D-Objects Over Time" 3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja. 4 | # 5 | # Purpose of this file : Parameter file for tracking 6 | # 7 | # You should have received a copy of the Udacity license together with this program. 8 | # 9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013 10 | # ---------------------------------------------------------------------- 11 | # 12 | 13 | # general parameters 14 | dim_state = 6 # process model dimension 15 | 16 | # Kalman filter parameters (Step 1) 17 | dt = 0.1 # time increment 18 | q=3 # process noise variable for Kalman filter Q 19 | 20 | # track management parameters (Step 2) 21 | confirmed_threshold = 0.8 # track score threshold to switch from 'tentative' to 'confirmed' 22 | delete_threshold = 0.6 # track score threshold to delete confirmed tracks 23 | window = 6 # number of frames for track score calculation 24 | max_P = 3**2 # delete track if covariance of px or py bigger than this 25 | sigma_p44 = 50 # initial setting for estimation error covariance P entry for vx 26 | sigma_p55 = 50 # initial setting for estimation error covariance P entry for vy 27 | sigma_p66 = 5 # initial setting for estimation error covariance P entry for vz 28 | weight_dim = 0.1 # sliding average parameter for dimension estimation 29 | 30 | # association parameters (Step 3) 31 | gating_threshold = 0.995 # percentage of correct measurements that shall lie inside gate 32 | 33 | # measurement parameters (Step 4) 34 | sigma_lidar_x = 0.1 # measurement noise standard deviation for lidar x position 35 | sigma_lidar_y = 0.1 # measurement noise standard deviation for lidar y position 36 | sigma_lidar_z = 0.1 # measurement noise standard deviation for lidar z position 37 | sigma_cam_i = 5 # measurement noise standard deviation for image i coordinate 38 | sigma_cam_j = 5 # measurement noise standard deviation for image j coordinate 39 | -------------------------------------------------------------------------------- /my_tracking_result.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/my_tracking_result.mp4 -------------------------------------------------------------------------------- /my_tracking_results.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/my_tracking_results.rar -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | opencv-python 3 | protobuf 4 | easydict 5 | pytorch 6 | pillow 7 | matplotlib 8 | wxpython 9 | shapely 10 | tqdm 11 | open3d -------------------------------------------------------------------------------- /student/association.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------- 2 | # Project "Track 3D-Objects Over Time" 3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja. 4 | # 5 | # Purpose of this file : Data association class with single nearest neighbor association and gating based on Mahalanobis distance 6 | # 7 | # You should have received a copy of the Udacity license together with this program. 8 | # 9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013 10 | # ---------------------------------------------------------------------- 11 | # 12 | 13 | # imports 14 | import numpy as np 15 | from scipy.stats.distributions import chi2 16 | 17 | # add project directory to python path to enable relative imports 18 | import os 19 | import sys 20 | PACKAGE_PARENT = '..' 21 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__)))) 22 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) 23 | 24 | import misc.params as params 25 | 26 | class Association: 27 | '''Data association class with single nearest neighbor association and gating based on Mahalanobis distance''' 28 | def __init__(self): 29 | self.association_matrix = np.matrix([]) 30 | self.unassigned_tracks = [] 31 | self.unassigned_meas = [] 32 | 33 | def associate(self, track_list, meas_list, KF): 34 | 35 | ############ 36 | # TODO Step 3: association: 37 | # - replace association_matrix with the actual association matrix based on Mahalanobis distance (see below) for all tracks and all measurements 38 | # - update list of unassigned measurements and unassigned tracks 39 | ############ 40 | 41 | # the following only works for at most one track and one measurement 42 | self.association_matrix = np.matrix([]) # reset matrix 43 | self.unassigned_tracks = [] # reset lists 44 | self.unassigned_meas = [] 45 | 46 | 47 | if len(meas_list) > 0: 48 | self.unassigned_meas = [0] 49 | if len(track_list) > 0: 50 | self.unassigned_tracks = [0] 51 | if len(meas_list) > 0 and len(track_list) > 0: 52 | self.association_matrix = np.matrix([[0]]) 53 | 54 | N = len(track_list) 55 | M = len(meas_list) 56 | self.unassigned_tracks = list(range(N)) 57 | self.unassigned_meas = list(range(M)) 58 | 59 | self.association_matrix = np.asmatrix(np.inf * np.ones((N,M))) 60 | 61 | for i in range(N): 62 | track = track_list[i] 63 | for j in range(M): 64 | meas = meas_list[j] 65 | dist = self.MHD(track,meas,KF) 66 | if self.gating(dist, meas.sensor): 67 | self.association_matrix[i,j] = dist 68 | 69 | ############ 70 | # END student code 71 | ############ 72 | 73 | def get_closest_track_and_meas(self): 74 | ############ 75 | # TODO Step 3: find closest track and measurement: 76 | # - find minimum entry in association matrix 77 | # - delete row and column 78 | # - remove corresponding track and measurement from unassigned_tracks and unassigned_meas 79 | # - return this track and measurement 80 | ############ 81 | 82 | # the following only works for at most one track and one measurement 83 | update_track = 0 84 | update_meas = 0 85 | 86 | A = self.association_matrix 87 | if np.min(A) == np.inf: 88 | return np.nan, np.nan 89 | 90 | # get indices of minimum entry 91 | ij_min = np.unravel_index(np.argmin(A, axis=None), A.shape) 92 | ind_track = ij_min[0] 93 | ind_meas = ij_min[1] 94 | 95 | # delete row and column for next update 96 | A = np.delete(A, ind_track, 0) 97 | A = np.delete(A, ind_meas, 1) 98 | self.association_matrix = A 99 | 100 | # update this track with this measurement 101 | update_track = self.unassigned_tracks[ind_track] 102 | update_meas = self.unassigned_meas[ind_meas] 103 | 104 | # remove this track and measurement from list 105 | self.unassigned_tracks.remove(update_track) 106 | self.unassigned_meas.remove(update_meas) 107 | 108 | return update_track, update_meas 109 | 110 | ############ 111 | # END student code 112 | ############ 113 | 114 | def gating(self, MHD, sensor): 115 | ############ 116 | # TODO Step 3: return True if measurement lies inside gate, otherwise False 117 | ############ 118 | limit = chi2.ppf(params.gating_threshold, df = sensor.dim_meas) 119 | if MHD < limit: 120 | return True 121 | else: 122 | return False 123 | 124 | ############ 125 | # END student code 126 | ############ 127 | 128 | def MHD(self, track, meas, KF): 129 | ############ 130 | # TODO Step 3: calculate and return Mahalanobis distance 131 | ########### 132 | H = meas.sensor.get_H(track.x) 133 | S_inv = np.linalg.inv(KF.S(track,meas,H)) 134 | gamma = KF.gamma(track, meas) 135 | return gamma.T*S_inv*gamma 136 | ############ 137 | # END student code 138 | ############ 139 | 140 | def associate_and_update(self, manager, meas_list, KF): 141 | # associate measurements and tracks 142 | self.associate(manager.track_list, meas_list, KF) 143 | 144 | # update associated tracks with measurements 145 | while self.association_matrix.shape[0]>0 and self.association_matrix.shape[1]>0: 146 | 147 | # search for next association between a track and a measurement 148 | ind_track, ind_meas = self.get_closest_track_and_meas() 149 | if np.isnan(ind_track): 150 | print('---no more associations---') 151 | break 152 | track = manager.track_list[ind_track] 153 | 154 | # check visibility, only update tracks in fov 155 | if not meas_list[0].sensor.in_fov(track.x): 156 | continue 157 | 158 | # Kalman update 159 | print('update track', track.id, 'with', meas_list[ind_meas].sensor.name, 'measurement', ind_meas) 160 | KF.update(track, meas_list[ind_meas]) 161 | 162 | # update score and track state 163 | manager.handle_updated_track(track) 164 | 165 | # save updated track 166 | manager.track_list[ind_track] = track 167 | 168 | # run track management 169 | manager.manage_tracks(self.unassigned_tracks, self.unassigned_meas, meas_list) 170 | 171 | for track in manager.track_list: 172 | print('track', track.id, 'score =', track.score) 173 | -------------------------------------------------------------------------------- /student/filter.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------- 2 | # Project "Track 3D-Objects Over Time" 3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja. 4 | # 5 | # Purpose of this file : Kalman filter class 6 | # 7 | # You should have received a copy of the Udacity license together with this program. 8 | # 9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013 10 | # ---------------------------------------------------------------------- 11 | # 12 | 13 | # imports 14 | import numpy as np 15 | 16 | # add project directory to python path to enable relative imports 17 | import os 18 | import sys 19 | PACKAGE_PARENT = '..' 20 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__)))) 21 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) 22 | import misc.params as params 23 | 24 | class Filter: 25 | '''Kalman filter class''' 26 | def __init__(self): 27 | pass 28 | 29 | def F(self): 30 | ############ 31 | # TODO Step 1: implement and return system matrix F 32 | ############ 33 | dt = params.dt 34 | return np.matrix([[1, 0, 0, dt, 0 ,0], 35 | [0, 1, 0, 0, dt, 0], 36 | [0, 0, 1, 0, 0 , dt], 37 | [0, 0, 0, 1, 0, 0], 38 | [0, 0, 0, 0, 1, 0], 39 | [0, 0, 0, 0, 0, 1]]) 40 | 41 | ############ 42 | # END student code 43 | ############ 44 | 45 | def Q(self): 46 | ############ 47 | # TODO Step 1: implement and return process noise covariance Q 48 | q = params.q 49 | dt = params.dt 50 | q1 = ((dt**3)/3) * q 51 | q2 = ((dt**2)/2) * q 52 | q3 = dt * q 53 | return np.matrix([[q1, 0, 0, q2, 0, 0], 54 | [0, q1, 0, 0, q2, 0], 55 | [0, 0, q1, 0, 0, q2], 56 | [q2, 0, 0, q3, 0, 0], 57 | [0, q2, 0, 0, q3, 0], 58 | [0, 0, q2, 0, 0, q3]]) 59 | 60 | ############ 61 | # END student code 62 | ############ 63 | 64 | def predict(self, track): 65 | ############ 66 | # TODO Step 1: predict state x and estimation error covariance P to next timestep, save x and P in track 67 | ############ 68 | F = self.F() 69 | x = track.x 70 | P = track.P 71 | x = F*track.x # state prediction 72 | P = F*track.P*F.transpose() + self.Q() # covariance prediction 73 | track.set_x(x) 74 | track.set_P(P) 75 | 76 | ############ 77 | # END student code 78 | ############ 79 | 80 | def update(self, track, meas): 81 | ############ 82 | # TODO Step 1: update state x and covariance P with associated measurement, save x and P in track 83 | ############ 84 | H = meas.sensor.get_H(track.x) # measurement matrix 85 | gamma = self.gamma(track, meas) # residual 86 | S = self.S(track, meas, H) # covariance of residual 87 | K = track.P * H.transpose()* S.I # Kalman gain 88 | x = track.x + K * gamma # state update 89 | I = np.identity(params.dim_state) 90 | P = (I - K * H) * track.P # covariance update 91 | track.set_x(x) 92 | track.set_P(P) 93 | track.update_attributes(meas) 94 | 95 | 96 | def gamma(self, track, meas): 97 | ############ 98 | # TODO Step 1: calculate and return residual gamma 99 | ############ 100 | g = meas.z - meas.sensor.get_hx(track.x) 101 | return g 102 | 103 | 104 | ############ 105 | # END student code 106 | ############ 107 | 108 | def S(self, track, meas, H): 109 | ############ 110 | # TODO Step 1: calculate and return covariance of residual S 111 | ############ 112 | s = H * track.P * H.transpose() + meas.R 113 | return s 114 | 115 | ############ 116 | # END student code 117 | ############ 118 | -------------------------------------------------------------------------------- /student/measurements.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------- 2 | # Project "Track 3D-Objects Over Time" 3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja. 4 | # 5 | # Purpose of this file : Classes for sensor and measurement 6 | # 7 | # You should have received a copy of the Udacity license together with this program. 8 | # 9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013 10 | # ---------------------------------------------------------------------- 11 | # 12 | 13 | # imports 14 | import numpy as np 15 | 16 | # add project directory to python path to enable relative imports 17 | import os 18 | import sys 19 | PACKAGE_PARENT = '..' 20 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__)))) 21 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) 22 | import misc.params as params 23 | 24 | class Sensor: 25 | '''Sensor class including measurement matrix''' 26 | def __init__(self, name, calib): 27 | self.name = name 28 | if name == 'lidar': 29 | self.dim_meas = 3 30 | self.sens_to_veh = np.matrix(np.identity((4))) # transformation sensor to vehicle coordinates equals identity matrix because lidar detections are already in vehicle coordinates 31 | self.fov = [-np.pi/2, np.pi/2] # angle of field of view in radians 32 | 33 | elif name == 'camera': 34 | self.dim_meas = 2 35 | self.sens_to_veh = np.matrix(calib.extrinsic.transform).reshape(4,4) # transformation sensor to vehicle coordinates 36 | self.f_i = calib.intrinsic[0] # focal length i-coordinate 37 | self.f_j = calib.intrinsic[1] # focal length j-coordinate 38 | self.c_i = calib.intrinsic[2] # principal point i-coordinate 39 | self.c_j = calib.intrinsic[3] # principal point j-coordinate 40 | self.fov = [-0.35, 0.35] # angle of field of view in radians, inaccurate boundary region was removed 41 | 42 | self.veh_to_sens = np.linalg.inv(self.sens_to_veh) # transformation vehicle to sensor coordinates 43 | 44 | def in_fov(self, x): 45 | # check if an object x can be seen by this sensor 46 | ############ 47 | # TODO Step 4: implement a function that returns True if x lies in the sensor's field of view, 48 | # otherwise False. 49 | ############ 50 | pos_veh = np.ones((4,1)) 51 | pos_veh[0:3] = x[0:3] 52 | pos_sens = self.veh_to_sens * pos_veh 53 | if pos_sens[0] > 0: 54 | alpha = np.arctan(pos_sens[1]/pos_sens[0]) 55 | if alpha > self.fov[0] and alpha < self.fov[1]: 56 | return True 57 | else: 58 | return False 59 | ############ 60 | # END student code 61 | ############ 62 | 63 | def get_hx(self, x): 64 | # calculate nonlinear measurement expectation value h(x) 65 | if self.name == 'lidar': 66 | pos_veh = np.ones((4, 1)) # homogeneous coordinates 67 | pos_veh[0:3] = x[0:3] 68 | pos_sens = self.veh_to_sens*pos_veh # transform from vehicle to lidar coordinates 69 | return pos_sens[0:3] 70 | elif self.name == 'camera': 71 | 72 | ############ 73 | # TODO Step 4: implement nonlinear camera measurement function h: 74 | # - transform position estimate from vehicle to camera coordinates 75 | # - project from camera to image coordinates 76 | # - make sure to not divide by zero, raise an error if needed 77 | # - return h(x) 78 | ############ 79 | 80 | veh_to_cam = np.ones((4,1)) 81 | veh_to_cam[0:3] = x[0:3] 82 | cam_sens = self.veh_to_sens * veh_to_cam 83 | 84 | #project camera cord to image cord 85 | fi = self.f_i 86 | fj = self.f_j 87 | ci = self.c_i 88 | cj = self.c_j 89 | hx = np.zeros((2,1)) 90 | if cam_sens[0] == 0: 91 | raise NameError('Divided number cannot be zero') 92 | else: 93 | hx[0,0] = ci - fi * cam_sens[1]/cam_sens[0] 94 | hx[1,0] = cj - fj * cam_sens[2]/cam_sens[0] 95 | 96 | return hx 97 | 98 | ############ 99 | # END student code 100 | ############ 101 | 102 | def get_H(self, x): 103 | # calculate Jacobian H at current x from h(x) 104 | H = np.matrix(np.zeros((self.dim_meas, params.dim_state))) 105 | R = self.veh_to_sens[0:3, 0:3] # rotation 106 | T = self.veh_to_sens[0:3, 3] # translation 107 | if self.name == 'lidar': 108 | H[0:3, 0:3] = R 109 | elif self.name == 'camera': 110 | # check and print error message if dividing by zero 111 | if R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0] == 0: 112 | raise NameError('Jacobian not defined for this x!') 113 | else: 114 | H[0,0] = self.f_i * (-R[1,0] / (R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0]) 115 | + R[0,0] * (R[1,0]*x[0] + R[1,1]*x[1] + R[1,2]*x[2] + T[1]) \ 116 | / ((R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])**2)) 117 | H[1,0] = self.f_j * (-R[2,0] / (R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0]) 118 | + R[0,0] * (R[2,0]*x[0] + R[2,1]*x[1] + R[2,2]*x[2] + T[2]) \ 119 | / ((R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])**2)) 120 | H[0,1] = self.f_i * (-R[1,1] / (R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0]) 121 | + R[0,1] * (R[1,0]*x[0] + R[1,1]*x[1] + R[1,2]*x[2] + T[1]) \ 122 | / ((R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])**2)) 123 | H[1,1] = self.f_j * (-R[2,1] / (R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0]) 124 | + R[0,1] * (R[2,0]*x[0] + R[2,1]*x[1] + R[2,2]*x[2] + T[2]) \ 125 | / ((R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])**2)) 126 | H[0,2] = self.f_i * (-R[1,2] / (R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0]) 127 | + R[0,2] * (R[1,0]*x[0] + R[1,1]*x[1] + R[1,2]*x[2] + T[1]) \ 128 | / ((R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])**2)) 129 | H[1,2] = self.f_j * (-R[2,2] / (R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0]) 130 | + R[0,2] * (R[2,0]*x[0] + R[2,1]*x[1] + R[2,2]*x[2] + T[2]) \ 131 | / ((R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])**2)) 132 | return H 133 | 134 | def generate_measurement(self, num_frame, z, meas_list): 135 | # generate new measurement from this sensor and add to measurement list 136 | ############ 137 | # TODO Step 4: remove restriction to lidar in order to include camera as well 138 | ############ 139 | 140 | # if self.name == 'lidar': 141 | meas = Measurement(num_frame, z, self) 142 | meas_list.append(meas) 143 | return meas_list 144 | 145 | ############ 146 | # END student code 147 | ############ 148 | 149 | 150 | ################### 151 | 152 | class Measurement: 153 | '''Measurement class including measurement values, covariance, timestamp, sensor''' 154 | def __init__(self, num_frame, z, sensor): 155 | # create measurement object 156 | self.t = (num_frame - 1) * params.dt # time 157 | if sensor.name == 'lidar': 158 | sigma_lidar_x = params.sigma_lidar_x # load params 159 | sigma_lidar_y = params.sigma_lidar_y 160 | sigma_lidar_z = params.sigma_lidar_z 161 | self.z = np.zeros((sensor.dim_meas,1)) # measurement vector 162 | self.z[0] = z[0] 163 | self.z[1] = z[1] 164 | self.z[2] = z[2] 165 | self.sensor = sensor # sensor that generated this measurement 166 | self.R = np.matrix([[sigma_lidar_x**2, 0, 0], # measurement noise covariance matrix 167 | [0, sigma_lidar_y**2, 0], 168 | [0, 0, sigma_lidar_z**2]]) 169 | 170 | self.width = z[4] 171 | self.length = z[5] 172 | self.height = z[3] 173 | self.yaw = z[6] 174 | elif sensor.name == 'camera': 175 | 176 | ############ 177 | # TODO Step 4: initialize camera measurement including z, R, and sensor 178 | ############ 179 | 180 | sigma_cam_i = params.sigma_cam_i 181 | sigma_cam_j = params.sigma_cam_j 182 | 183 | self.z = np.zeros((sensor.dim_meas,1)) 184 | self.z[0] = z[0] 185 | self.z[1] = z[1] 186 | self.sensor = sensor 187 | self.R = np.matrix([[sigma_cam_i**2 , 0], 188 | [0,sigma_cam_j**2]]) 189 | 190 | self.width = z[2] 191 | self.length = z[3] 192 | 193 | ############ 194 | # END student code 195 | ############ 196 | -------------------------------------------------------------------------------- /student/objdet_detect.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------- 2 | # Project "Track 3D-Objects Over Time" 3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja. 4 | # 5 | # Purpose of this file : Detect 3D objects in lidar point clouds using deep learning 6 | # 7 | # You should have received a copy of the Udacity license together with this program. 8 | # 9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013 10 | # ---------------------------------------------------------------------- 11 | # 12 | 13 | # general package imports 14 | import numpy as np 15 | import torch 16 | from easydict import EasyDict as edict 17 | 18 | # add project directory to python path to enable relative imports 19 | import os 20 | import sys 21 | PACKAGE_PARENT = '..' 22 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__)))) 23 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) 24 | 25 | # model-related 26 | from tools.objdet_models.resnet.models import fpn_resnet 27 | from tools.objdet_models.resnet.utils.evaluation_utils import decode, post_processing 28 | 29 | from tools.objdet_models.darknet.models.darknet2pytorch import Darknet as darknet 30 | from tools.objdet_models.darknet.utils.evaluation_utils import post_processing_v2 31 | from tools.objdet_models.resnet.utils.torch_utils import _sigmoid 32 | 33 | # load model-related parameters into an edict 34 | def load_configs_model(model_name='darknet', configs=None): 35 | 36 | # init config file, if none has been passed 37 | if configs==None: 38 | configs = edict() 39 | 40 | # get parent directory of this file to enable relative paths 41 | curr_path = os.path.dirname(os.path.realpath(__file__)) 42 | parent_path = configs.model_path = os.path.abspath(os.path.join(curr_path, os.pardir)) 43 | 44 | # set parameters according to model type 45 | if model_name == "darknet": 46 | configs.model_path = os.path.join(parent_path, 'tools', 'objdet_models', 'darknet') 47 | configs.pretrained_filename = os.path.join(configs.model_path, 'pretrained', 'complex_yolov4_mse_loss.pth') 48 | configs.arch = 'darknet' 49 | configs.batch_size = 4 50 | configs.cfgfile = os.path.join(configs.model_path, 'config', 'complex_yolov4.cfg') 51 | configs.conf_thresh = 0.5 52 | configs.distributed = False 53 | configs.img_size = 608 54 | configs.nms_thresh = 0.4 55 | configs.num_samples = None 56 | configs.num_workers = 4 57 | configs.pin_memory = True 58 | configs.use_giou_loss = False 59 | 60 | elif model_name == 'fpn_resnet': 61 | ####### ID_S3_EX1-3 START ####### 62 | ####### 63 | print("student task ID_S3_EX1-3") 64 | configs.model_path = os.path.join(parent_path, 'tools', 'objdet_models', 'resnet') 65 | configs.pretrained_filename = os.path.join(configs.model_path, 'pretrained', 'fpn_resnet_18_epoch_300.pth') 66 | configs.arch = 'fpn_resnet' 67 | configs.pin_memory = True 68 | configs.conf_thresh = 0.5 69 | configs.input_size = 608 70 | configs.hm_size = (152, 152) 71 | configs.down_ratio = 4 72 | configs.max_objects = 50 73 | configs.K = 40 74 | configs.imagenet_pretrained = False 75 | configs.head_conv = 64 76 | configs.num_classes = 3 77 | configs.num_center_offset = 2 78 | configs.num_z = 1 79 | configs.num_dim = 3 80 | configs.num_direction = 2 # sin, cos 81 | 82 | configs.heads = { 83 | 'hm_cen': configs.num_classes, 84 | 'cen_offset': configs.num_center_offset, 85 | 'direction': configs.num_direction, 86 | 'z_coor': configs.num_z, 87 | 'dim': configs.num_dim 88 | } 89 | configs.num_input_features = 4 90 | 91 | ####### 92 | ####### ID_S3_EX1-3 END ####### 93 | 94 | else: 95 | raise ValueError("Error: Invalid model name") 96 | 97 | # GPU vs. CPU 98 | configs.no_cuda = True # if true, cuda is not used 99 | configs.gpu_idx = 0 # GPU index to use. 100 | configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx)) 101 | 102 | return configs 103 | 104 | 105 | # load all object-detection parameters into an edict 106 | def load_configs(model_name='fpn_resnet', configs=None): 107 | 108 | # init config file, if none has been passed 109 | if configs==None: 110 | configs = edict() 111 | 112 | # birds-eye view (bev) parameters 113 | configs.lim_x = [0, 50] # detection range in m 114 | configs.lim_y = [-25, 25] 115 | configs.lim_z = [-1, 3] 116 | configs.lim_r = [0, 1.0] # reflected lidar intensity 117 | configs.bev_width = 608 # pixel resolution of bev image 118 | configs.bev_height = 608 119 | 120 | # add model-dependent parameters 121 | configs = load_configs_model(model_name, configs) 122 | 123 | # visualization parameters 124 | configs.output_width = 608 # width of result image (height may vary) 125 | configs.obj_colors = [[0, 255, 255], [0, 0, 255], [255, 0, 0]] # 'Pedestrian': 0, 'Car': 1, 'Cyclist': 2 126 | 127 | return configs 128 | 129 | 130 | # create model according to selected model type 131 | def create_model(configs): 132 | 133 | # check for availability of model file 134 | assert os.path.isfile(configs.pretrained_filename), "No file at {}".format(configs.pretrained_filename) 135 | 136 | # create model depending on architecture name 137 | if (configs.arch == 'darknet') and (configs.cfgfile is not None): 138 | print('using darknet') 139 | model = darknet(cfgfile=configs.cfgfile, use_giou_loss=configs.use_giou_loss) 140 | 141 | elif 'fpn_resnet' in configs.arch: 142 | print('using ResNet architecture with feature pyramid') 143 | 144 | ####### ID_S3_EX1-4 START ####### 145 | ####### 146 | print("student task ID_S3_EX1-4") 147 | num_layers = 18 148 | model = fpn_resnet.get_pose_net(num_layers = num_layers, heads = configs.heads, 149 | head_conv= configs.head_conv, 150 | imagenet_pretrained = configs.imagenet_pretrained) 151 | 152 | ####### 153 | ####### ID_S3_EX1-4 END ####### 154 | 155 | else: 156 | assert False, 'Undefined model backbone' 157 | 158 | # load model weights 159 | model.load_state_dict(torch.load(configs.pretrained_filename, map_location='cpu')) 160 | print('Loaded weights from {}\n'.format(configs.pretrained_filename)) 161 | 162 | # set model to evaluation state 163 | configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx)) 164 | model = model.to(device=configs.device) # load model to either cpu or gpu 165 | model.eval() 166 | 167 | return model 168 | 169 | 170 | # detect trained objects in birds-eye view 171 | def detect_objects(input_bev_maps, model, configs): 172 | 173 | # deactivate autograd engine during test to reduce memory usage and speed up computations 174 | with torch.no_grad(): 175 | 176 | # perform inference 177 | outputs = model(input_bev_maps) 178 | 179 | # decode model output into target object format 180 | if 'darknet' in configs.arch: 181 | 182 | # perform post-processing 183 | output_post = post_processing_v2(outputs, conf_thresh=configs.conf_thresh, nms_thresh=configs.nms_thresh) 184 | detections = [] 185 | for sample_i in range(len(output_post)): 186 | if output_post[sample_i] is None: 187 | continue 188 | detection = output_post[sample_i] 189 | for obj in detection: 190 | x, y, w, l, im, re, _, _, _ = obj 191 | yaw = np.arctan2(im, re) 192 | detections.append([1, x, y, 0.0, 1.50, w, l, yaw]) 193 | 194 | elif 'fpn_resnet' in configs.arch: 195 | # decode output and perform post-processing 196 | 197 | ####### ID_S3_EX1-5 START ####### 198 | ####### 199 | print("student task ID_S3_EX1-5") 200 | 201 | outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) 202 | outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) 203 | # detections size (batch_size, K, 10) 204 | detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'],outputs['dim'], K=configs.K) 205 | detections = detections.numpy().astype(np.float32) 206 | detections = post_processing(detections, configs) 207 | detections = detections.cpu().numpy().astype(np.float32) 208 | # print(detections) 209 | detections = post_processing(detections, configs) 210 | detections = detections[0][1] 211 | print(detections) 212 | ####### 213 | ####### ID_S3_EX1-5 END ####### 214 | 215 | 216 | 217 | ####### ID_S3_EX2 START ####### 218 | ####### 219 | # Extract 3d bounding boxes from model response 220 | print("student task ID_S3_EX2") 221 | objects = [] 222 | ## step 1 : check whether there are any detections 223 | for box in detections: 224 | id, bev_x, bev_y, z, h, bev_w, bev_l, yaw = box 225 | ## step 2 : loop over all detections 226 | x = bev_y / configs.bev_height * (configs.lim_x[1] - configs.lim_x[0]) 227 | y = bev_x / configs.bev_width * (configs.lim_y[1] - configs.lim_y[0]) - (configs.lim_y[1] - configs.lim_y[0])/2.0 228 | w = bev_w / configs.bev_width * (configs.lim_y[1] - configs.lim_y[0]) 229 | l = bev_l / configs.bev_height * (configs.lim_x[1] - configs.lim_x[0]) 230 | 231 | ## step 3 : perform the conversion using the limits for x, y and z set in the configs structure 232 | if ((x >= configs.lim_x[0]) and (x <= configs.lim_x[1]) and (y >= configs.lim_y[0]) and (y <= configs.lim_y[1]) 233 | and (z >= configs.lim_z[0]) and (z <= configs.lim_z[1])): 234 | ## step 4 : append the current object to the 'objects' array 235 | objects.append([1, x, y, z, h, w, l, yaw]) 236 | ####### 237 | ####### ID_S3_EX2 END ####### 238 | return objects 239 | 240 | -------------------------------------------------------------------------------- /student/objdet_eval.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------- 2 | # Project "Track 3D-Objects Over Time" 3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja. 4 | # 5 | # Purpose of this file : Evaluate performance of object detection 6 | # 7 | # You should have received a copy of the Udacity license together with this program. 8 | # 9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013 10 | # ---------------------------------------------------------------------- 11 | # 12 | 13 | # general package imports 14 | import numpy as np 15 | import matplotlib 16 | #matplotlib.use('wxagg') # change backend so that figure maximizing works on Mac as well 17 | import matplotlib.pyplot as plt 18 | 19 | import torch 20 | from shapely.geometry import Polygon 21 | from operator import itemgetter 22 | 23 | # add project directory to python path to enable relative imports 24 | import os 25 | import sys 26 | PACKAGE_PARENT = '..' 27 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__)))) 28 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) 29 | 30 | # object detection tools and helper functions 31 | import misc.objdet_tools as tools 32 | 33 | 34 | # compute various performance measures to assess object detection 35 | def measure_detection_performance(detections, labels, labels_valid, min_iou=0.5): 36 | 37 | # find best detection for each valid label 38 | true_positives = 0 # no. of correctly detected objects 39 | center_devs = [] 40 | ious = [] 41 | for label, valid in zip(labels, labels_valid): 42 | matches_lab_det = [] 43 | if valid: # exclude all labels from statistics which are not considered valid 44 | 45 | # compute intersection over union (iou) and distance between centers 46 | 47 | ####### ID_S4_EX1 START ####### 48 | ####### 49 | print("student task ID_S4_EX1 ") 50 | 51 | ## step 1 : extract the four corners of the current label bounding-box 52 | box = label.box 53 | box_lab = tools.compute_box_corners(box.center_x, box.center_y, box.width, box.length, box.heading) 54 | 55 | 56 | ## step 2 : loop over all detected objects 57 | for bbox in detections: 58 | ## step 3 : extract the four corners of the current detection 59 | bid, x, y, z, h, w, l, yaw = bbox 60 | box_det = tools.compute_box_corners(x, y, w, l, yaw) 61 | ## step 4 : computer the center distance between label and detection bounding-box in x, y, and z 62 | dist_x = box.center_x - x 63 | dist_y = box.center_y - y 64 | dist_Z = box.center_z - z 65 | ## step 5 : compute the intersection over union (IOU) between label and detection bounding-box 66 | poly_1 = Polygon(box_lab) 67 | poly_2 = Polygon(box_det) 68 | intersection = poly_1.intersection(poly_2).area 69 | union = poly_1.union(poly_2).area 70 | iou = intersection / union 71 | ## step 6 : if IOU exceeds min_iou threshold, store [iou,dist_x, dist_y, dist_z] in matches_lab_det and increase the TP count 72 | if iou > min_iou: 73 | matches_lab_det.append([iou,dist_x, dist_y, dist_Z ]) 74 | true_positives = true_positives + 1 75 | ####### 76 | ####### ID_S4_EX1 END ####### 77 | 78 | # find best match and compute metrics 79 | if matches_lab_det: 80 | best_match = max(matches_lab_det,key=itemgetter(1)) # retrieve entry with max iou in case of multiple candidates 81 | ious.append(best_match[0]) 82 | center_devs.append(best_match[1:]) 83 | 84 | 85 | ####### ID_S4_EX2 START ####### 86 | ####### 87 | print("student task ID_S4_EX2") 88 | 89 | # compute positives and negatives for precision/recall 90 | 91 | ## step 1 : compute the total number of positives present in the scene 92 | all_positives = labels_valid.sum() 93 | 94 | ## step 2 : compute the number of false negatives 95 | true_positives = len(ious) 96 | false_negatives = all_positives - true_positives 97 | 98 | ## step 3 : compute the number of false positives 99 | false_positives = len(detections) - true_positives 100 | 101 | ####### 102 | ####### ID_S4_EX2 END ####### 103 | 104 | pos_negs = [all_positives, true_positives, false_negatives, false_positives] 105 | det_performance = [ious, center_devs, pos_negs] 106 | 107 | return det_performance 108 | 109 | 110 | # evaluate object detection performance based on all frames 111 | def compute_performance_stats(det_performance_all): 112 | 113 | # extract elements 114 | ious = [] 115 | center_devs = [] 116 | pos_negs = [] 117 | for item in det_performance_all: 118 | ious.append(item[0]) 119 | center_devs.append(item[1]) 120 | pos_negs.append(item[2]) 121 | pos_negs_arr = np.asarray(pos_negs) 122 | ####### ID_S4_EX3 START ####### 123 | ####### 124 | print('student task ID_S4_EX3') 125 | 126 | ## step 1 : extract the total number of positives, true positives, false negatives and false positives 127 | positives = sum(pos_negs_arr[:,0]) 128 | true_positives = sum(pos_negs_arr[:,1]) 129 | false_negatives = sum(pos_negs_arr[:,2]) 130 | false_positives = sum(pos_negs_arr[:,3]) 131 | 132 | ## step 2 : compute precision 133 | precision = true_positives /float(true_positives + false_positives) 134 | 135 | ## step 3 : compute recall 136 | recall = true_positives / float(true_positives + false_negatives) 137 | ####### 138 | ####### ID_S4_EX3 END ####### 139 | print('precision = ' + str(precision) + ", recall = " + str(recall)) 140 | 141 | # serialize intersection-over-union and deviations in x,y,z 142 | ious_all = [element for tupl in ious for element in tupl] 143 | devs_x_all = [] 144 | devs_y_all = [] 145 | devs_z_all = [] 146 | for tuple in center_devs: 147 | for elem in tuple: 148 | dev_x, dev_y, dev_z = elem 149 | devs_x_all.append(dev_x) 150 | devs_y_all.append(dev_y) 151 | devs_z_all.append(dev_z) 152 | 153 | 154 | # compute statistics 155 | stdev__ious = np.std(ious_all) 156 | mean__ious = np.mean(ious_all) 157 | 158 | stdev__devx = np.std(devs_x_all) 159 | mean__devx = np.mean(devs_x_all) 160 | 161 | stdev__devy = np.std(devs_y_all) 162 | mean__devy = np.mean(devs_y_all) 163 | 164 | stdev__devz = np.std(devs_z_all) 165 | mean__devz = np.mean(devs_z_all) 166 | #std_dev_x = np.std(devs_x) 167 | 168 | # plot results 169 | data = [precision, recall, ious_all, devs_x_all, devs_y_all, devs_z_all] 170 | titles = ['detection precision', 'detection recall', 'intersection over union', 'position errors in X', 'position errors in Y', 'position error in Z'] 171 | textboxes = ['', '', '', 172 | '\n'.join((r'$\mathrm{mean}=%.4f$' % (np.mean(devs_x_all), ), r'$\mathrm{sigma}=%.4f$' % (np.std(devs_x_all), ), r'$\mathrm{n}=%.0f$' % (len(devs_x_all), ))), 173 | '\n'.join((r'$\mathrm{mean}=%.4f$' % (np.mean(devs_y_all), ), r'$\mathrm{sigma}=%.4f$' % (np.std(devs_y_all), ), r'$\mathrm{n}=%.0f$' % (len(devs_x_all), ))), 174 | '\n'.join((r'$\mathrm{mean}=%.4f$' % (np.mean(devs_z_all), ), r'$\mathrm{sigma}=%.4f$' % (np.std(devs_z_all), ), r'$\mathrm{n}=%.0f$' % (len(devs_x_all), )))] 175 | 176 | f, a = plt.subplots(2, 3) 177 | a = a.ravel() 178 | num_bins = 20 179 | props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) 180 | for idx, ax in enumerate(a): 181 | ax.hist(data[idx], num_bins) 182 | ax.set_title(titles[idx]) 183 | if textboxes[idx]: 184 | ax.text(0.05, 0.95, textboxes[idx], transform=ax.transAxes, fontsize=10, 185 | verticalalignment='top', bbox=props) 186 | plt.tight_layout() 187 | plt.show() 188 | 189 | -------------------------------------------------------------------------------- /student/objdet_pcl.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------- 2 | # Project "Track 3D-Objects Over Time" 3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja. 4 | # 5 | # Purpose of this file : Process the point-cloud and prepare it for object detection 6 | # 7 | # You should have received a copy of the Udacity license together with this program. 8 | # 9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013 10 | # ---------------------------------------------------------------------- 11 | # 12 | 13 | # general package imports 14 | import cv2 15 | import numpy as np 16 | import torch 17 | import zlib 18 | import open3d as o3d 19 | # add project directory to python path to enable relative imports 20 | import os 21 | import sys 22 | PACKAGE_PARENT = '..' 23 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__)))) 24 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) 25 | 26 | # waymo open dataset reader 27 | from tools.waymo_reader.simple_waymo_open_dataset_reader import utils as waymo_utils 28 | from tools.waymo_reader.simple_waymo_open_dataset_reader import dataset_pb2, label_pb2 29 | 30 | # object detection tools and helper functions 31 | import misc.objdet_tools as tools 32 | 33 | # visualize lidar point-cloud 34 | def show_pcl(pcl): 35 | 36 | ####### ID_S1_EX2 START ####### 37 | ####### 38 | print("student task ID_S1_EX2") 39 | 40 | # step 1 : initialize open3d with key callback and create window 41 | vis_3d = o3d.visualization.VisualizerWithKeyCallback() 42 | vis_3d.create_window(window_name='Point Cloud image') 43 | global idx 44 | idx= True 45 | def right_click(vis_3d): 46 | global idx 47 | print('right arrow pressed') 48 | idx= False 49 | return 50 | vis_3d.register_key_callback(262, right_click) 51 | # step 2 : create instance of open3d point-cloud class 52 | pcd = o3d.geometry.PointCloud() 53 | # step 3 : set points in pcd instance by converting the point-cloud into 3d vectors (using open3d function Vector3dVector) 54 | pcd.points = o3d.utility.Vector3dVector(pcl[:,:3]) 55 | # step 4 : for the first frame, add the pcd instance to visualization using add_geometry; for all other frames, use update_geometry instead 56 | vis_3d.add_geometry(pcd) 57 | # step 5 : visualize point cloud and keep window open until right-arrow is pressed (key-code 262) 58 | while idx: 59 | vis_3d.poll_events() 60 | vis_3d.update_renderer() 61 | ####### 62 | ####### ID_S1_EX2 END ####### 63 | 64 | 65 | # visualize range image 66 | def show_range_image(frame, lidar_name): 67 | 68 | ####### ID_S1_EX1 START ####### 69 | ####### 70 | print("student task ID_S1_EX1") 71 | 72 | # step 1 : extract lidar data and range image for the roof-mounted lidar 73 | lidar = [obj for obj in frame.lasers if obj.name == lidar_name][0] 74 | 75 | # step 2 : extract the range and the intensity channel from the range image 76 | if len(lidar.ri_return1.range_image_compressed) > 0: # use first response 77 | ri = dataset_pb2.MatrixFloat() 78 | ri.ParseFromString(zlib.decompress(lidar.ri_return1.range_image_compressed)) 79 | ri = np.array(ri.data).reshape(ri.shape.dims) 80 | # step 3 : set values <0 to zero 81 | ri[ri<0]=0.0 82 | 83 | # step 4 : map the range channel onto an 8-bit scale and make sure that the full range of values is appropriately considered 84 | ri_range = ri[:,:,0] 85 | ri_range = ri_range * 255 / (np.amax(ri_range) - np.amin(ri_range)) 86 | img_range = ri_range.astype(np.uint8) 87 | # step 5 : map the intensity channel onto an 8-bit scale and normalize with the difference between the 1- and 99-percentile to mitigate the influence of outliers 88 | ri_intensity = ri[:,:,1] 89 | ri_intensity = np.amax(ri_intensity)/2 * ri_intensity * 255 / (np.amax(ri_intensity) - np.amin(ri_intensity)) 90 | img_intensity = ri_intensity.astype(np.uint8) 91 | # step 6 : stack the range and intensity image vertically using np.vstack and convert the result to an unsigned 8-bit integer 92 | 93 | img_range_intensity = np.vstack((img_range, img_intensity)) 94 | img_intensity = img_intensity.astype(np.uint8) 95 | deg90 = int(img_range_intensity.shape[1] / 4) 96 | ri_center = int(img_range_intensity.shape[1]/2) 97 | img_range_intensity = img_range_intensity[:,ri_center-deg90:ri_center+deg90] 98 | 99 | ####### 100 | ####### ID_S1_EX1 END ####### 101 | 102 | return img_range_intensity 103 | 104 | 105 | # create birds-eye view of lidar data 106 | def bev_from_pcl(lidar_pcl, configs): 107 | 108 | # remove lidar points outside detection area and with too low reflectivity 109 | mask = np.where((lidar_pcl[:, 0] >= configs.lim_x[0]) & (lidar_pcl[:, 0] <= configs.lim_x[1]) & 110 | (lidar_pcl[:, 1] >= configs.lim_y[0]) & (lidar_pcl[:, 1] <= configs.lim_y[1]) & 111 | (lidar_pcl[:, 2] >= configs.lim_z[0]) & (lidar_pcl[:, 2] <= configs.lim_z[1])) 112 | lidar_pcl = lidar_pcl[mask] 113 | 114 | # shift level of ground plane to avoid flipping from 0 to 255 for neighboring pixels 115 | lidar_pcl[:, 2] = lidar_pcl[:, 2] - configs.lim_z[0] 116 | 117 | # convert sensor coordinates to bev-map coordinates (center is bottom-middle) 118 | ####### ID_S2_EX1 START ####### 119 | ####### 120 | print("student task ID_S2_EX1") 121 | 122 | ## step 1 : compute bev-map discretization by dividing x-range by the bev-image height (see configs) 123 | bev_discret = (configs.lim_x[1] - configs.lim_x[0]) / configs.bev_height 124 | ## step 2 : create a copy of the lidar pcl and transform all metrix x-coordinates into bev-image coordinates 125 | lidar_pcl_cpy = np.copy(lidar_pcl) 126 | lidar_pcl_cpy[:, 0] = np.int_(np.floor(lidar_pcl_cpy[:, 0] / bev_discret)) 127 | # step 3 : perform the same operation as in step 2 for the y-coordinates but make sure that no negative bev-coordinates occur 128 | lidar_pcl_cpy[:, 1] = np.int_(np.floor(lidar_pcl_cpy[:, 1] / bev_discret)) + ((configs.bev_width + 1) / 2) 129 | lidar_pcl_cpy[:, 1] = np.abs(lidar_pcl_cpy[:,1]) 130 | # step 4 : visualize point-cloud using the function show_pcl from a previous task 131 | show_pcl(lidar_pcl_cpy) 132 | ####### 133 | ####### ID_S2_EX1 END ####### 134 | 135 | 136 | # Compute intensity layer of the BEV map 137 | ####### ID_S2_EX2 START ####### 138 | ####### 139 | print("student task ID_S2_EX2") 140 | 141 | ## step 1 : create a numpy array filled with zeros which has the same dimensions as the BEV map 142 | intensity_map = np.zeros((configs.bev_height, configs.bev_width)) 143 | 144 | # step 2 : re-arrange elements in lidar_pcl_cpy by sorting first by x, then y, then -z (use numpy.lexsort) 145 | lidar_pcl_cpy[lidar_pcl_cpy[:,3]>1.0,3] = 1.0 146 | idx_intensity = np.lexsort((-lidar_pcl_cpy[:, 2], lidar_pcl_cpy[:, 1], lidar_pcl_cpy[:, 0])) 147 | lidar_pcl_top = lidar_pcl_cpy[idx_intensity] 148 | ## step 3 : extract all points with identical x and y such that only the top-most z-coordinate is kept (use numpy.unique) 149 | ## also, store the number of points per x,y-cell in a variable named "counts" for use in the next task 150 | lidar_pcl_int, indices, counts = np.unique(lidar_pcl_cpy[:, 0:2], axis=0, return_index=True, return_counts=True) 151 | lidar_pcl_top = lidar_pcl_cpy[indices] 152 | ## step 4 : assign the intensity value of each unique entry in lidar_top_pcl to the intensity map 153 | ## make sure that the intensity is scaled in such a way that objects of interest (e.g. vehicles) are clearly visible 154 | ## also, make sure that the influence of outliers is mitigated by normalizing intensity on the difference between the max. and min. value within the point cloud 155 | intensity_map[np.int_(lidar_pcl_top[:, 0]), np.int_(lidar_pcl_top[:, 1])] = lidar_pcl_top[:, 3] / (np.amax(lidar_pcl_top[:, 3])-np.amin(lidar_pcl_top[:, 3])) 156 | 157 | ## step 5 : temporarily visualize the intensity map using OpenCV to make sure that vehicles separate well from the background 158 | img_intensity = intensity_map * 256 159 | img_intensity = img_intensity.astype(np.uint8) 160 | cv2.imshow('img_intensity', img_intensity) 161 | cv2.waitKey(0) 162 | cv2.destroyAllWindows() 163 | ####### 164 | ####### ID_S2_EX2 END ####### 165 | 166 | 167 | # Compute height layer of the BEV map 168 | ####### ID_S2_EX3 START ####### 169 | ####### 170 | print("student task ID_S2_EX3") 171 | 172 | ## step 1 : create a numpy array filled with zeros which has the same dimensions as the BEV map 173 | height_map = np.zeros((configs.bev_height, configs.bev_width)) 174 | ## step 2 : assign the height value of each unique entry in lidar_top_pcl to the height map 175 | ## make sure that each entry is normalized on the difference between the upper and lower height defined in the config file 176 | ## use the lidar_pcl_top data structure from the previous task to access the pixels of the height_map 177 | height_map[np.int_(lidar_pcl_top[:, 0]), np.int_(lidar_pcl_top[:, 1])] = lidar_pcl_top[:, 2] / float(np.abs(configs.lim_z[1] - configs.lim_z[0])) 178 | ## step 3 : temporarily visualize the intensity map using OpenCV to make sure that vehicles separate well from the background 179 | img_height = height_map * 256 180 | img_height = img_height.astype(np.uint8) 181 | cv2.imshow('height_map', height_map) 182 | cv2.waitKey(0) 183 | cv2.destroyAllWindows() 184 | ####### 185 | ####### ID_S2_EX3 END ####### 186 | 187 | # Compute density layer of the BEV map 188 | density_map = np.zeros((configs.bev_height + 1, configs.bev_width + 1)) 189 | _, _, counts = np.unique(lidar_pcl_cpy[:, 0:2], axis=0, return_index=True, return_counts=True) 190 | normalizedCounts = np.minimum(1.0, np.log(counts + 1) / np.log(64)) 191 | density_map[np.int_(lidar_pcl_top[:, 0]), np.int_(lidar_pcl_top[:, 1])] = normalizedCounts 192 | 193 | # assemble 3-channel bev-map from individual maps 194 | bev_map = np.zeros((3, configs.bev_height, configs.bev_width)) 195 | bev_map[2, :, :] = density_map[:configs.bev_height, :configs.bev_width] # r_map 196 | bev_map[1, :, :] = height_map[:configs.bev_height, :configs.bev_width] # g_map 197 | bev_map[0, :, :] = intensity_map[:configs.bev_height, :configs.bev_width] # b_map 198 | 199 | # expand dimension of bev_map before converting into a tensor 200 | s1, s2, s3 = bev_map.shape 201 | bev_maps = np.zeros((1, s1, s2, s3)) 202 | bev_maps[0] = bev_map 203 | 204 | bev_maps = torch.from_numpy(bev_maps) # create tensor from birds-eye view 205 | input_bev_maps = bev_maps.to(configs.device, non_blocking=True).float() 206 | return input_bev_maps 207 | 208 | 209 | 210 | 211 | 212 | -------------------------------------------------------------------------------- /student/trackmanagement.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------- 2 | # Project "Track 3D-Objects Over Time" 3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja. 4 | # 5 | # Purpose of this file : Classes for track and track management 6 | # 7 | # You should have received a copy of the Udacity license together with this program. 8 | # 9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013 10 | # ---------------------------------------------------------------------- 11 | # 12 | 13 | # imports 14 | import numpy as np 15 | import collections 16 | 17 | # add project directory to python path to enable relative imports 18 | import os 19 | import sys 20 | PACKAGE_PARENT = '..' 21 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__)))) 22 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) 23 | import misc.params as params 24 | 25 | class Track: 26 | '''Track class with state, covariance, id, score''' 27 | def __init__(self, meas, id): 28 | print('creating track no.', id) 29 | M_rot = meas.sensor.sens_to_veh[0:3, 0:3] # rotation matrix from sensor to vehicle coordinates 30 | 31 | ############ 32 | # TODO Step 2: initialization: 33 | # - replace fixed track initialization values by initialization of x and P based on 34 | # unassigned measurement transformed from sensor to vehicle coordinates 35 | # - initialize track state and track score with appropriate values 36 | ############ 37 | 38 | # self.x = np.matrix([[49.53980697], 39 | # [ 3.41006279], 40 | # [ 0.91790581], 41 | # [ 0. ], 42 | # [ 0. ], 43 | # [ 0. ]]) 44 | pos_sens = np.ones((4, 1)) 45 | pos_sens[0:3] = meas.z[0:3] 46 | pos_veh = meas.sensor.sens_to_veh*pos_sens 47 | self.x = np.zeros((6,1)) 48 | self.x[0:3] = pos_veh[0:3] 49 | # self.P = np.matrix([[9.0e-02, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00], 50 | # [0.0e+00, 9.0e-02, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00], 51 | # [0.0e+00, 0.0e+00, 6.4e-03, 0.0e+00, 0.0e+00, 0.0e+00], 52 | # [0.0e+00, 0.0e+00, 0.0e+00, 2.5e+03, 0.0e+00, 0.0e+00], 53 | # [0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 2.5e+03, 0.0e+00], 54 | # [0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 2.5e+01]]) 55 | P_pos = M_rot * meas.R * np.transpose(M_rot) 56 | P_vel = np.matrix([[params.sigma_p44**2, 0, 0], 57 | [0, params.sigma_p55**2, 0], 58 | [0, 0, params.sigma_p66**2]]) 59 | self.P = np.zeros((6, 6)) 60 | self.P[0:3, 0:3] = P_pos 61 | self.P[3:6, 3:6] = P_vel 62 | self.state = 'initialized' 63 | self.score = 1/params.window 64 | 65 | ############ 66 | # END student code 67 | ############ 68 | 69 | # other track attributes 70 | self.id = id 71 | self.width = meas.width 72 | self.length = meas.length 73 | self.height = meas.height 74 | self.yaw = np.arccos(M_rot[0,0]*np.cos(meas.yaw) + M_rot[0,1]*np.sin(meas.yaw)) # transform rotation from sensor to vehicle coordinates 75 | self.t = meas.t 76 | 77 | def set_x(self, x): 78 | self.x = x 79 | 80 | def set_P(self, P): 81 | self.P = P 82 | 83 | def set_t(self, t): 84 | self.t = t 85 | 86 | def update_attributes(self, meas): 87 | # use exponential sliding average to estimate dimensions and orientation 88 | if meas.sensor.name == 'lidar': 89 | c = params.weight_dim 90 | self.width = c*meas.width + (1 - c)*self.width 91 | self.length = c*meas.length + (1 - c)*self.length 92 | self.height = c*meas.height + (1 - c)*self.height 93 | M_rot = meas.sensor.sens_to_veh 94 | self.yaw = np.arccos(M_rot[0,0]*np.cos(meas.yaw) + M_rot[0,1]*np.sin(meas.yaw)) # transform rotation from sensor to vehicle coordinates 95 | 96 | 97 | ################### 98 | 99 | class Trackmanagement: 100 | '''Track manager with logic for initializing and deleting objects''' 101 | def __init__(self): 102 | self.N = 0 # current number of tracks 103 | self.track_list = [] 104 | self.last_id = -1 105 | self.result_list = [] 106 | 107 | def manage_tracks(self, unassigned_tracks, unassigned_meas, meas_list): 108 | ############ 109 | # TODO Step 2: implement track management: 110 | # - decrease the track score for unassigned tracks 111 | # - delete tracks if the score is too low or P is too big (check params.py for parameters that might be helpful, but 112 | # feel free to define your own parameters) 113 | ############ 114 | 115 | # decrease score for unassigned tracks 116 | for i in unassigned_tracks: 117 | track = self.track_list[i] 118 | # check visibility 119 | if meas_list: # if not empty 120 | if meas_list[0].sensor.in_fov(track.x): 121 | track.state = 'tentative' 122 | if track.score > params.delete_threshold + 1: 123 | track.score = params.delete_threshold + 1 124 | track.score -= 1./params.window 125 | 126 | # delete old tracks 127 | for track in self.track_list: 128 | if track.score <= params.delete_threshold: 129 | if track.P[0, 0] >= params.max_P or track.P[1, 1] >= params.max_P: 130 | self.delete_track(track) 131 | 132 | ############ 133 | # END student code 134 | ############ 135 | 136 | # initialize new track with unassigned measurement 137 | for j in unassigned_meas: 138 | if meas_list[j].sensor.name == 'lidar': # only initialize with lidar measurements 139 | self.init_track(meas_list[j]) 140 | 141 | def addTrackToList(self, track): 142 | self.track_list.append(track) 143 | self.N += 1 144 | self.last_id = track.id 145 | 146 | def init_track(self, meas): 147 | track = Track(meas, self.last_id + 1) 148 | self.addTrackToList(track) 149 | 150 | def delete_track(self, track): 151 | print('deleting track no.', track.id) 152 | self.track_list.remove(track) 153 | 154 | def handle_updated_track(self, track): 155 | ############ 156 | # TODO Step 2: implement track management for updated tracks: 157 | # - increase track score 158 | # - set track state to 'tentative' or 'confirmed' 159 | ############ 160 | 161 | track.score += 1./params.window 162 | if track.score > params.confirmed_threshold: 163 | track.state = 'confirmed' 164 | else: 165 | track.state = 'tentative' 166 | 167 | ############ 168 | # END student code 169 | ############ 170 | -------------------------------------------------------------------------------- /tools/objdet_models/darknet/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/tools/objdet_models/darknet/models/__init__.py -------------------------------------------------------------------------------- /tools/objdet_models/darknet/models/darknet_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | # -*- coding: utf-8 -*- 3 | ----------------------------------------------------------------------------------- 4 | # Refer: https://github.com/Tianxiaomo/pytorch-YOLOv4 5 | """ 6 | 7 | import sys 8 | 9 | import torch 10 | 11 | sys.path.append('../') 12 | from utils.torch_utils import convert2cpu 13 | 14 | __all__ = ['parse_cfg', 'print_cfg', 'load_conv', 'load_conv_bn', 'save_conv', 'save_conv_bn', 'load_fc', 'save_fc'] 15 | 16 | 17 | def parse_cfg(cfgfile): 18 | blocks = [] 19 | fp = open(cfgfile, 'r') 20 | block = None 21 | line = fp.readline() 22 | while line != '': 23 | line = line.rstrip() 24 | if line == '' or line[0] == '#': 25 | line = fp.readline() 26 | continue 27 | elif line[0] == '[': 28 | if block: 29 | blocks.append(block) 30 | block = dict() 31 | block['type'] = line.lstrip('[').rstrip(']') 32 | # set default value 33 | if block['type'] == 'convolutional': 34 | block['batch_normalize'] = 0 35 | else: 36 | key, value = line.split('=') 37 | key = key.strip() 38 | if key == 'type': 39 | key = '_type' 40 | value = value.strip() 41 | block[key] = value 42 | line = fp.readline() 43 | 44 | if block: 45 | blocks.append(block) 46 | fp.close() 47 | return blocks 48 | 49 | 50 | def print_cfg(blocks): 51 | print('layer filters size input output') 52 | prev_width = 416 53 | prev_height = 416 54 | prev_filters = 3 55 | out_filters = [] 56 | out_widths = [] 57 | out_heights = [] 58 | ind = -2 59 | for block in blocks: 60 | ind = ind + 1 61 | if block['type'] == 'net': 62 | prev_width = int(block['width']) 63 | prev_height = int(block['height']) 64 | continue 65 | elif block['type'] == 'convolutional': 66 | filters = int(block['filters']) 67 | kernel_size = int(block['size']) 68 | stride = int(block['stride']) 69 | is_pad = int(block['pad']) 70 | pad = (kernel_size - 1) // 2 if is_pad else 0 71 | width = (prev_width + 2 * pad - kernel_size) // stride + 1 72 | height = (prev_height + 2 * pad - kernel_size) // stride + 1 73 | print('%5d %-6s %4d %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( 74 | ind, 'conv', filters, kernel_size, kernel_size, stride, prev_width, prev_height, prev_filters, width, 75 | height, filters)) 76 | prev_width = width 77 | prev_height = height 78 | prev_filters = filters 79 | out_widths.append(prev_width) 80 | out_heights.append(prev_height) 81 | out_filters.append(prev_filters) 82 | elif block['type'] == 'maxpool': 83 | pool_size = int(block['size']) 84 | stride = int(block['stride']) 85 | width = prev_width // stride 86 | height = prev_height // stride 87 | print('%5d %-6s %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( 88 | ind, 'max', pool_size, pool_size, stride, prev_width, prev_height, prev_filters, width, height, 89 | filters)) 90 | prev_width = width 91 | prev_height = height 92 | prev_filters = filters 93 | out_widths.append(prev_width) 94 | out_heights.append(prev_height) 95 | out_filters.append(prev_filters) 96 | elif block['type'] == 'avgpool': 97 | width = 1 98 | height = 1 99 | print('%5d %-6s %3d x %3d x%4d -> %3d' % ( 100 | ind, 'avg', prev_width, prev_height, prev_filters, prev_filters)) 101 | prev_width = width 102 | prev_height = height 103 | prev_filters = filters 104 | out_widths.append(prev_width) 105 | out_heights.append(prev_height) 106 | out_filters.append(prev_filters) 107 | elif block['type'] == 'softmax': 108 | print('%5d %-6s -> %3d' % (ind, 'softmax', prev_filters)) 109 | out_widths.append(prev_width) 110 | out_heights.append(prev_height) 111 | out_filters.append(prev_filters) 112 | elif block['type'] == 'cost': 113 | print('%5d %-6s -> %3d' % (ind, 'cost', prev_filters)) 114 | out_widths.append(prev_width) 115 | out_heights.append(prev_height) 116 | out_filters.append(prev_filters) 117 | elif block['type'] == 'reorg': 118 | stride = int(block['stride']) 119 | filters = stride * stride * prev_filters 120 | width = prev_width // stride 121 | height = prev_height // stride 122 | print('%5d %-6s / %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( 123 | ind, 'reorg', stride, prev_width, prev_height, prev_filters, width, height, filters)) 124 | prev_width = width 125 | prev_height = height 126 | prev_filters = filters 127 | out_widths.append(prev_width) 128 | out_heights.append(prev_height) 129 | out_filters.append(prev_filters) 130 | elif block['type'] == 'upsample': 131 | stride = int(block['stride']) 132 | filters = prev_filters 133 | width = prev_width * stride 134 | height = prev_height * stride 135 | print('%5d %-6s * %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( 136 | ind, 'upsample', stride, prev_width, prev_height, prev_filters, width, height, filters)) 137 | prev_width = width 138 | prev_height = height 139 | prev_filters = filters 140 | out_widths.append(prev_width) 141 | out_heights.append(prev_height) 142 | out_filters.append(prev_filters) 143 | elif block['type'] == 'route': 144 | layers = block['layers'].split(',') 145 | layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers] 146 | if len(layers) == 1: 147 | print('%5d %-6s %d' % (ind, 'route', layers[0])) 148 | prev_width = out_widths[layers[0]] 149 | prev_height = out_heights[layers[0]] 150 | prev_filters = out_filters[layers[0]] 151 | elif len(layers) == 2: 152 | print('%5d %-6s %d %d' % (ind, 'route', layers[0], layers[1])) 153 | prev_width = out_widths[layers[0]] 154 | prev_height = out_heights[layers[0]] 155 | assert (prev_width == out_widths[layers[1]]) 156 | assert (prev_height == out_heights[layers[1]]) 157 | prev_filters = out_filters[layers[0]] + out_filters[layers[1]] 158 | elif len(layers) == 4: 159 | print('%5d %-6s %d %d %d %d' % (ind, 'route', layers[0], layers[1], layers[2], layers[3])) 160 | prev_width = out_widths[layers[0]] 161 | prev_height = out_heights[layers[0]] 162 | assert (prev_width == out_widths[layers[1]] == out_widths[layers[2]] == out_widths[layers[3]]) 163 | assert (prev_height == out_heights[layers[1]] == out_heights[layers[2]] == out_heights[layers[3]]) 164 | prev_filters = out_filters[layers[0]] + out_filters[layers[1]] + out_filters[layers[2]] + out_filters[ 165 | layers[3]] 166 | else: 167 | print("route error !!! {} {} {}".format(sys._getframe().f_code.co_filename, 168 | sys._getframe().f_code.co_name, sys._getframe().f_lineno)) 169 | 170 | out_widths.append(prev_width) 171 | out_heights.append(prev_height) 172 | out_filters.append(prev_filters) 173 | elif block['type'] in ['region', 'yolo']: 174 | print('%5d %-6s' % (ind, 'detection')) 175 | out_widths.append(prev_width) 176 | out_heights.append(prev_height) 177 | out_filters.append(prev_filters) 178 | elif block['type'] == 'shortcut': 179 | from_id = int(block['from']) 180 | from_id = from_id if from_id > 0 else from_id + ind 181 | print('%5d %-6s %d' % (ind, 'shortcut', from_id)) 182 | prev_width = out_widths[from_id] 183 | prev_height = out_heights[from_id] 184 | prev_filters = out_filters[from_id] 185 | out_widths.append(prev_width) 186 | out_heights.append(prev_height) 187 | out_filters.append(prev_filters) 188 | elif block['type'] == 'connected': 189 | filters = int(block['output']) 190 | print('%5d %-6s %d -> %3d' % (ind, 'connected', prev_filters, filters)) 191 | prev_filters = filters 192 | out_widths.append(1) 193 | out_heights.append(1) 194 | out_filters.append(prev_filters) 195 | else: 196 | print('unknown type %s' % (block['type'])) 197 | 198 | 199 | def load_conv(buf, start, conv_model): 200 | num_w = conv_model.weight.numel() 201 | num_b = conv_model.bias.numel() 202 | conv_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b])) 203 | start = start + num_b 204 | conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).reshape(conv_model.weight.data.shape)) 205 | start = start + num_w 206 | return start 207 | 208 | 209 | def save_conv(fp, conv_model): 210 | if conv_model.bias.is_cuda: 211 | convert2cpu(conv_model.bias.data).numpy().tofile(fp) 212 | convert2cpu(conv_model.weight.data).numpy().tofile(fp) 213 | else: 214 | conv_model.bias.data.numpy().tofile(fp) 215 | conv_model.weight.data.numpy().tofile(fp) 216 | 217 | 218 | def load_conv_bn(buf, start, conv_model, bn_model): 219 | num_w = conv_model.weight.numel() 220 | num_b = bn_model.bias.numel() 221 | bn_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b])) 222 | start = start + num_b 223 | bn_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_b])) 224 | start = start + num_b 225 | bn_model.running_mean.copy_(torch.from_numpy(buf[start:start + num_b])) 226 | start = start + num_b 227 | bn_model.running_var.copy_(torch.from_numpy(buf[start:start + num_b])) 228 | start = start + num_b 229 | conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).reshape(conv_model.weight.data.shape)) 230 | start = start + num_w 231 | return start 232 | 233 | 234 | def save_conv_bn(fp, conv_model, bn_model): 235 | if bn_model.bias.is_cuda: 236 | convert2cpu(bn_model.bias.data).numpy().tofile(fp) 237 | convert2cpu(bn_model.weight.data).numpy().tofile(fp) 238 | convert2cpu(bn_model.running_mean).numpy().tofile(fp) 239 | convert2cpu(bn_model.running_var).numpy().tofile(fp) 240 | convert2cpu(conv_model.weight.data).numpy().tofile(fp) 241 | else: 242 | bn_model.bias.data.numpy().tofile(fp) 243 | bn_model.weight.data.numpy().tofile(fp) 244 | bn_model.running_mean.numpy().tofile(fp) 245 | bn_model.running_var.numpy().tofile(fp) 246 | conv_model.weight.data.numpy().tofile(fp) 247 | 248 | 249 | def load_fc(buf, start, fc_model): 250 | num_w = fc_model.weight.numel() 251 | num_b = fc_model.bias.numel() 252 | fc_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b])) 253 | start = start + num_b 254 | fc_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w])) 255 | start = start + num_w 256 | return start 257 | 258 | 259 | def save_fc(fp, fc_model): 260 | fc_model.bias.data.numpy().tofile(fp) 261 | fc_model.weight.data.numpy().tofile(fp) 262 | 263 | 264 | if __name__ == '__main__': 265 | import sys 266 | 267 | blocks = parse_cfg('cfg/yolo.cfg') 268 | if len(sys.argv) == 2: 269 | blocks = parse_cfg(sys.argv[1]) 270 | print_cfg(blocks) 271 | -------------------------------------------------------------------------------- /tools/objdet_models/darknet/models/yolo_layer.py: -------------------------------------------------------------------------------- 1 | """ 2 | # -*- coding: utf-8 -*- 3 | ----------------------------------------------------------------------------------- 4 | # Author: Nguyen Mau Dung 5 | # DoC: 2020.07.05 6 | # email: nguyenmaudung93.kstn@gmail.com 7 | ----------------------------------------------------------------------------------- 8 | # Description: This script for the yolo layer 9 | 10 | # Refer: https://github.com/Tianxiaomo/pytorch-YOLOv4 11 | # Refer: https://github.com/VCasecnikovs/Yet-Another-YOLOv4-Pytorch 12 | """ 13 | 14 | import sys 15 | 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | 20 | #sys.path.append('../') 21 | # add project directory to python path to enable relative imports 22 | import os 23 | import sys 24 | PACKAGE_PARENT = '..' 25 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__)))) 26 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) 27 | 28 | from utils.torch_utils import to_cpu 29 | from utils.iou_rotated_boxes_utils import iou_pred_vs_target_boxes, iou_rotated_boxes_targets_vs_anchors, \ 30 | get_polygons_areas_fix_xy 31 | 32 | 33 | class YoloLayer(nn.Module): 34 | """Yolo layer""" 35 | 36 | def __init__(self, num_classes, anchors, stride, scale_x_y, ignore_thresh): 37 | super(YoloLayer, self).__init__() 38 | # Update the attributions when parsing the cfg during create the darknet 39 | self.num_classes = num_classes 40 | self.anchors = anchors 41 | self.num_anchors = len(anchors) 42 | self.stride = stride 43 | self.scale_x_y = scale_x_y 44 | self.ignore_thresh = ignore_thresh 45 | 46 | self.noobj_scale = 100 47 | self.obj_scale = 1 48 | self.lgiou_scale = 3.54 49 | self.leular_scale = 3.54 50 | self.lobj_scale = 64.3 51 | self.lcls_scale = 37.4 52 | 53 | self.seen = 0 54 | # Initialize dummy variables 55 | self.grid_size = 0 56 | self.img_size = 0 57 | self.metrics = {} 58 | 59 | def compute_grid_offsets(self, grid_size): 60 | self.grid_size = grid_size 61 | g = self.grid_size 62 | self.stride = self.img_size / self.grid_size 63 | # Calculate offsets for each grid 64 | self.grid_x = torch.arange(g, device=self.device, dtype=torch.float).repeat(g, 1).view([1, 1, g, g]) 65 | self.grid_y = torch.arange(g, device=self.device, dtype=torch.float).repeat(g, 1).t().view([1, 1, g, g]) 66 | self.scaled_anchors = torch.tensor( 67 | [(a_w / self.stride, a_h / self.stride, im, re) for a_w, a_h, im, re in self.anchors], device=self.device, 68 | dtype=torch.float) 69 | self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1)) 70 | self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1)) 71 | 72 | # Pre compute polygons and areas of anchors 73 | self.scaled_anchors_polygons, self.scaled_anchors_areas = get_polygons_areas_fix_xy(self.scaled_anchors) 74 | 75 | def build_targets(self, pred_boxes, pred_cls, target, anchors): 76 | """ Built yolo targets to compute loss 77 | :param out_boxes: [num_samples or batch, num_anchors, grid_size, grid_size, 6] 78 | :param pred_cls: [num_samples or batch, num_anchors, grid_size, grid_size, num_classes] 79 | :param target: [num_boxes, 8] 80 | :param anchors: [num_anchors, 4] 81 | :return: 82 | """ 83 | nB, nA, nG, _, nC = pred_cls.size() 84 | n_target_boxes = target.size(0) 85 | 86 | # Create output tensors on "device" 87 | obj_mask = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.uint8) 88 | noobj_mask = torch.full(size=(nB, nA, nG, nG), fill_value=1, device=self.device, dtype=torch.uint8) 89 | class_mask = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) 90 | iou_scores = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) 91 | tx = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) 92 | ty = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) 93 | tw = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) 94 | th = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) 95 | tim = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) 96 | tre = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float) 97 | tcls = torch.full(size=(nB, nA, nG, nG, nC), fill_value=0, device=self.device, dtype=torch.float) 98 | tconf = obj_mask.float() 99 | giou_loss = torch.tensor([0.], device=self.device, dtype=torch.float) 100 | 101 | if n_target_boxes > 0: # Make sure that there is at least 1 box 102 | b, target_labels = target[:, :2].long().t() 103 | target_boxes = torch.cat((target[:, 2:6] * nG, target[:, 6:8]), dim=-1) # scale up x, y, w, h 104 | 105 | gxy = target_boxes[:, :2] 106 | gwh = target_boxes[:, 2:4] 107 | gimre = target_boxes[:, 4:6] 108 | 109 | targets_polygons, targets_areas = get_polygons_areas_fix_xy(target_boxes[:, 2:6]) 110 | # Get anchors with best iou 111 | ious = iou_rotated_boxes_targets_vs_anchors(self.scaled_anchors_polygons, self.scaled_anchors_areas, 112 | targets_polygons, targets_areas) 113 | best_ious, best_n = ious.max(0) 114 | 115 | gx, gy = gxy.t() 116 | gw, gh = gwh.t() 117 | gim, gre = gimre.t() 118 | gi, gj = gxy.long().t() 119 | # Set masks 120 | obj_mask[b, best_n, gj, gi] = 1 121 | noobj_mask[b, best_n, gj, gi] = 0 122 | 123 | # Set noobj mask to zero where iou exceeds ignore threshold 124 | for i, anchor_ious in enumerate(ious.t()): 125 | noobj_mask[b[i], anchor_ious > self.ignore_thresh, gj[i], gi[i]] = 0 126 | 127 | # Coordinates 128 | tx[b, best_n, gj, gi] = gx - gx.floor() 129 | ty[b, best_n, gj, gi] = gy - gy.floor() 130 | # Width and height 131 | tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16) 132 | th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16) 133 | # Im and real part 134 | tim[b, best_n, gj, gi] = gim 135 | tre[b, best_n, gj, gi] = gre 136 | 137 | # One-hot encoding of label 138 | tcls[b, best_n, gj, gi, target_labels] = 1 139 | class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float() 140 | ious, giou_loss = iou_pred_vs_target_boxes(pred_boxes[b, best_n, gj, gi], target_boxes, 141 | GIoU=self.use_giou_loss) 142 | iou_scores[b, best_n, gj, gi] = ious 143 | if self.reduction == 'mean': 144 | giou_loss /= n_target_boxes 145 | tconf = obj_mask.float() 146 | 147 | return iou_scores, giou_loss, class_mask, obj_mask.type(torch.bool), noobj_mask.type(torch.bool), \ 148 | tx, ty, tw, th, tim, tre, tcls, tconf 149 | 150 | def forward(self, x, targets=None, img_size=608, use_giou_loss=False): 151 | """ 152 | :param x: [num_samples or batch, num_anchors * (6 + 1 + num_classes), grid_size, grid_size] 153 | :param targets: [num boxes, 8] (box_idx, class, x, y, w, l, sin(yaw), cos(yaw)) 154 | :param img_size: default 608 155 | :return: 156 | """ 157 | self.img_size = img_size 158 | self.use_giou_loss = use_giou_loss 159 | self.device = x.device 160 | num_samples, _, _, grid_size = x.size() 161 | 162 | prediction = x.view(num_samples, self.num_anchors, self.num_classes + 7, grid_size, grid_size) 163 | prediction = prediction.permute(0, 1, 3, 4, 2).contiguous() 164 | # prediction size: [num_samples, num_anchors, grid_size, grid_size, num_classes + 7] 165 | 166 | # Get outputs 167 | pred_x = torch.sigmoid(prediction[..., 0]) 168 | pred_y = torch.sigmoid(prediction[..., 1]) 169 | pred_w = prediction[..., 2] # Width 170 | pred_h = prediction[..., 3] # Height 171 | pred_im = prediction[..., 4] # angle imaginary part 172 | pred_re = prediction[..., 5] # angle real part 173 | pred_conf = torch.sigmoid(prediction[..., 6]) # Conf 174 | pred_cls = torch.sigmoid(prediction[..., 7:]) # Cls pred. 175 | 176 | # If grid size does not match current we compute new offsets 177 | if grid_size != self.grid_size: 178 | self.compute_grid_offsets(grid_size) 179 | 180 | # Add offset and scale with anchors 181 | # pred_boxes size: [num_samples, num_anchors, grid_size, grid_size, 6] 182 | pred_boxes = torch.empty(prediction[..., :6].shape, device=self.device, dtype=torch.float) 183 | pred_boxes[..., 0] = pred_x + self.grid_x 184 | pred_boxes[..., 1] = pred_y + self.grid_y 185 | pred_boxes[..., 2] = torch.exp(pred_w).clamp(max=1E3) * self.anchor_w 186 | pred_boxes[..., 3] = torch.exp(pred_h).clamp(max=1E3) * self.anchor_h 187 | pred_boxes[..., 4] = pred_im 188 | pred_boxes[..., 5] = pred_re 189 | 190 | output = torch.cat(( 191 | pred_boxes[..., :4].view(num_samples, -1, 4) * self.stride, 192 | pred_boxes[..., 4:6].view(num_samples, -1, 2), 193 | pred_conf.view(num_samples, -1, 1), 194 | pred_cls.view(num_samples, -1, self.num_classes), 195 | ), dim=-1) 196 | # output size: [num_samples, num boxes, 7 + num_classes] 197 | 198 | if targets is None: 199 | return output, 0 200 | else: 201 | self.reduction = 'mean' 202 | iou_scores, giou_loss, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = self.build_targets( 203 | pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors) 204 | 205 | loss_x = F.mse_loss(pred_x[obj_mask], tx[obj_mask], reduction=self.reduction) 206 | loss_y = F.mse_loss(pred_y[obj_mask], ty[obj_mask], reduction=self.reduction) 207 | loss_w = F.mse_loss(pred_w[obj_mask], tw[obj_mask], reduction=self.reduction) 208 | loss_h = F.mse_loss(pred_h[obj_mask], th[obj_mask], reduction=self.reduction) 209 | loss_im = F.mse_loss(pred_im[obj_mask], tim[obj_mask], reduction=self.reduction) 210 | loss_re = F.mse_loss(pred_re[obj_mask], tre[obj_mask], reduction=self.reduction) 211 | loss_im_re = (1. - torch.sqrt(pred_im[obj_mask] ** 2 + pred_re[obj_mask] ** 2)) ** 2 # as tim^2 + tre^2 = 1 212 | loss_im_re_red = loss_im_re.sum() if self.reduction == 'sum' else loss_im_re.mean() 213 | loss_eular = loss_im + loss_re + loss_im_re_red 214 | 215 | loss_conf_obj = F.binary_cross_entropy(pred_conf[obj_mask], tconf[obj_mask], reduction=self.reduction) 216 | loss_conf_noobj = F.binary_cross_entropy(pred_conf[noobj_mask], tconf[noobj_mask], reduction=self.reduction) 217 | loss_cls = F.binary_cross_entropy(pred_cls[obj_mask], tcls[obj_mask], reduction=self.reduction) 218 | 219 | if self.use_giou_loss: 220 | loss_obj = loss_conf_obj + loss_conf_noobj 221 | total_loss = giou_loss * self.lgiou_scale + loss_eular * self.leular_scale + loss_obj * self.lobj_scale + loss_cls * self.lcls_scale 222 | else: 223 | loss_obj = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj 224 | total_loss = loss_x + loss_y + loss_w + loss_h + loss_eular + loss_obj + loss_cls 225 | 226 | # Metrics (store loss values using tensorboard) 227 | cls_acc = 100 * class_mask[obj_mask].mean() 228 | conf_obj = pred_conf[obj_mask].mean() 229 | conf_noobj = pred_conf[noobj_mask].mean() 230 | conf50 = (pred_conf > 0.5).float() 231 | iou50 = (iou_scores > 0.5).float() 232 | iou75 = (iou_scores > 0.75).float() 233 | detected_mask = conf50 * class_mask * tconf 234 | precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) 235 | recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) 236 | recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) 237 | 238 | self.metrics = { 239 | "loss": to_cpu(total_loss).item(), 240 | "iou_score": to_cpu(iou_scores[obj_mask].mean()).item(), 241 | 'giou_loss': to_cpu(giou_loss).item(), 242 | 'loss_x': to_cpu(loss_x).item(), 243 | 'loss_y': to_cpu(loss_y).item(), 244 | 'loss_w': to_cpu(loss_w).item(), 245 | 'loss_h': to_cpu(loss_h).item(), 246 | 'loss_eular': to_cpu(loss_eular).item(), 247 | 'loss_im': to_cpu(loss_im).item(), 248 | 'loss_re': to_cpu(loss_re).item(), 249 | "loss_obj": to_cpu(loss_obj).item(), 250 | "loss_cls": to_cpu(loss_cls).item(), 251 | "cls_acc": to_cpu(cls_acc).item(), 252 | "recall50": to_cpu(recall50).item(), 253 | "recall75": to_cpu(recall75).item(), 254 | "precision": to_cpu(precision).item(), 255 | "conf_obj": to_cpu(conf_obj).item(), 256 | "conf_noobj": to_cpu(conf_noobj).item() 257 | } 258 | 259 | return output, total_loss 260 | -------------------------------------------------------------------------------- /tools/objdet_models/darknet/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/tools/objdet_models/darknet/utils/__init__.py -------------------------------------------------------------------------------- /tools/objdet_models/darknet/utils/cal_intersection_rotated_boxes.py: -------------------------------------------------------------------------------- 1 | """ 2 | # -*- coding: utf-8 -*- 3 | ----------------------------------------------------------------------------------- 4 | # Author: Nguyen Mau Dung 5 | # DoC: 2020.07.20 6 | # email: nguyenmaudung93.kstn@gmail.com 7 | ----------------------------------------------------------------------------------- 8 | # Description: This script for intersection calculation of rotated boxes (on GPU) 9 | 10 | Refer from # https://stackoverflow.com/questions/44797713/calculate-the-area-of-intersection-of-two-rotated-rectangles-in-python?noredirect=1&lq=1 11 | """ 12 | 13 | import torch 14 | 15 | 16 | class Line: 17 | # ax + by + c = 0 18 | def __init__(self, p1, p2): 19 | """ 20 | 21 | Args: 22 | p1: (x, y) 23 | p2: (x, y) 24 | """ 25 | self.a = p2[1] - p1[1] 26 | self.b = p1[0] - p2[0] 27 | self.c = p2[0] * p1[1] - p2[1] * p1[0] # cross 28 | self.device = p1.device 29 | 30 | def cal_values(self, pts): 31 | return self.a * pts[:, 0] + self.b * pts[:, 1] + self.c 32 | 33 | def find_intersection(self, other): 34 | # See e.g. https://en.wikipedia.org/wiki/Line%E2%80%93line_intersection#Using_homogeneous_coordinates 35 | if not isinstance(other, Line): 36 | return NotImplemented 37 | w = self.a * other.b - self.b * other.a 38 | return torch.tensor([(self.b * other.c - self.c * other.b) / w, (self.c * other.a - self.a * other.c) / w], 39 | device=self.device) 40 | 41 | 42 | def intersection_area(rect1, rect2): 43 | """Calculate the inter 44 | 45 | Args: 46 | rect1: vertices of the rectangles (4, 2) 47 | rect2: vertices of the rectangles (4, 2) 48 | 49 | Returns: 50 | 51 | """ 52 | 53 | # Use the vertices of the first rectangle as, starting vertices of the intersection polygon. 54 | intersection = rect1 55 | 56 | # Loop over the edges of the second rectangle 57 | roll_rect2 = torch.roll(rect2, -1, dims=0) 58 | for p, q in zip(rect2, roll_rect2): 59 | if len(intersection) <= 2: 60 | break # No intersection 61 | 62 | line = Line(p, q) 63 | 64 | # Any point p with line(p) <= 0 is on the "inside" (or on the boundary), 65 | # any point p with line(p) > 0 is on the "outside". 66 | # Loop over the edges of the intersection polygon, 67 | # and determine which part is inside and which is outside. 68 | new_intersection = [] 69 | line_values = line.cal_values(intersection) 70 | roll_intersection = torch.roll(intersection, -1, dims=0) 71 | roll_line_values = torch.roll(line_values, -1, dims=0) 72 | for s, t, s_value, t_value in zip(intersection, roll_intersection, line_values, roll_line_values): 73 | if s_value <= 0: 74 | new_intersection.append(s) 75 | if s_value * t_value < 0: 76 | # Points are on opposite sides. 77 | # Add the intersection of the lines to new_intersection. 78 | intersection_point = line.find_intersection(Line(s, t)) 79 | new_intersection.append(intersection_point) 80 | 81 | if len(new_intersection) > 0: 82 | intersection = torch.stack(new_intersection) 83 | else: 84 | break 85 | 86 | # Calculate area 87 | if len(intersection) <= 2: 88 | return 0. 89 | 90 | return PolyArea2D(intersection) 91 | 92 | 93 | def PolyArea2D(pts): 94 | roll_pts = torch.roll(pts, -1, dims=0) 95 | area = (pts[:, 0] * roll_pts[:, 1] - pts[:, 1] * roll_pts[:, 0]).sum().abs() * 0.5 96 | return area 97 | 98 | 99 | if __name__ == "__main__": 100 | import cv2 101 | import numpy as np 102 | from shapely.geometry import Polygon 103 | 104 | 105 | def cvt_box_2_polygon(box): 106 | """ 107 | :param array: an array of shape [num_conners, 2] 108 | :return: a shapely.geometry.Polygon object 109 | """ 110 | # use .buffer(0) to fix a line polygon 111 | # more infor: https://stackoverflow.com/questions/13062334/polygon-intersection-error-in-shapely-shapely-geos-topologicalerror-the-opera 112 | return Polygon([(box[i, 0], box[i, 1]) for i in range(len(box))]).buffer(0) 113 | 114 | 115 | def get_corners_torch(x, y, w, l, yaw): 116 | device = x.device 117 | bev_corners = torch.zeros((4, 2), dtype=torch.float, device=device) 118 | cos_yaw = torch.cos(yaw) 119 | sin_yaw = torch.sin(yaw) 120 | # front left 121 | bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw 122 | bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw 123 | 124 | # rear left 125 | bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw 126 | bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw 127 | 128 | # rear right 129 | bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw 130 | bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw 131 | 132 | # front right 133 | bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw 134 | bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw 135 | 136 | return bev_corners 137 | 138 | 139 | # Show convex in an image 140 | 141 | img_size = 300 142 | img = np.zeros((img_size, img_size, 3)) 143 | img = cv2.resize(img, (img_size, img_size)) 144 | 145 | box1 = torch.tensor([100, 100, 40, 10, np.pi / 2], dtype=torch.float).cuda() 146 | box2 = torch.tensor([100, 100, 40, 20, 0], dtype=torch.float).cuda() 147 | 148 | box1_conners = get_corners_torch(box1[0], box1[1], box1[2], box1[3], box1[4]) 149 | box1_polygon = cvt_box_2_polygon(box1_conners) 150 | box1_area = box1_polygon.area 151 | 152 | box2_conners = get_corners_torch(box2[0], box2[1], box2[2], box2[3], box2[4]) 153 | box2_polygon = cvt_box_2_polygon(box2_conners) 154 | box2_area = box2_polygon.area 155 | 156 | intersection = box2_polygon.intersection(box1_polygon).area 157 | union = box1_area + box2_area - intersection 158 | iou = intersection / (union + 1e-16) 159 | 160 | print('Shapely- box1_area: {:.2f}, box2_area: {:.2f}, inter: {:.2f}, iou: {:.4f}'.format(box1_area, box2_area, 161 | intersection, iou)) 162 | 163 | print('intersection from intersection_area(): {}'.format(intersection_area(box1_conners, box2_conners))) 164 | 165 | img = cv2.polylines(img, [box1_conners.cpu().numpy().astype(np.int)], True, (255, 0, 0), 2) 166 | img = cv2.polylines(img, [box2_conners.cpu().numpy().astype(np.int)], True, (0, 255, 0), 2) 167 | 168 | while True: 169 | cv2.imshow('img', img) 170 | if cv2.waitKey(0) & 0xff == 27: 171 | break 172 | -------------------------------------------------------------------------------- /tools/objdet_models/darknet/utils/evaluation_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import sys 3 | import tqdm 4 | 5 | import torch 6 | import numpy as np 7 | from shapely.geometry import Polygon 8 | 9 | # bev image coordinates format 10 | def get_corners(x, y, w, l, yaw): 11 | bev_corners = np.zeros((4, 2), dtype=np.float32) 12 | cos_yaw = np.cos(yaw) 13 | sin_yaw = np.sin(yaw) 14 | # front left 15 | bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw 16 | bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw 17 | 18 | # rear left 19 | bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw 20 | bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw 21 | 22 | # rear right 23 | bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw 24 | bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw 25 | 26 | # front right 27 | bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw 28 | bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw 29 | 30 | return bev_corners 31 | 32 | 33 | def cvt_box_2_polygon(box): 34 | """ 35 | :param box: an array of shape [4, 2] 36 | :return: a shapely.geometry.Polygon object 37 | """ 38 | # use .buffer(0) to fix a line polygon 39 | # more infor: https://stackoverflow.com/questions/13062334/polygon-intersection-error-in-shapely-shapely-geos-topologicalerror-the-opera 40 | return Polygon([(box[i, 0], box[i, 1]) for i in range(len(box))]).buffer(0) 41 | 42 | 43 | def compute_iou_nms(idx_self, idx_other, polygons, areas): 44 | """Calculates IoU of the given box with the array of the given boxes. 45 | box: a polygon 46 | boxes: a vector of polygons 47 | Note: the areas are passed in rather than calculated here for 48 | efficiency. Calculate once in the caller to avoid duplicate work. 49 | """ 50 | # Calculate intersection areas 51 | ious = [] 52 | box1 = polygons[idx_self] 53 | for idx in idx_other: 54 | box2 = polygons[idx] 55 | intersection = box1.intersection(box2).area 56 | iou = intersection / (areas[idx] + areas[idx_self] - intersection + 1e-12) 57 | ious.append(iou) 58 | 59 | return np.array(ious, dtype=np.float32) 60 | 61 | 62 | def load_classes(path): 63 | """ 64 | Loads class labels at 'path' 65 | """ 66 | fp = open(path, "r") 67 | names = fp.read().split("\n")[:-1] 68 | return names 69 | 70 | 71 | def rescale_boxes(boxes, current_dim, original_shape): 72 | """ Rescales bounding boxes to the original shape """ 73 | orig_h, orig_w = original_shape 74 | # The amount of padding that was added 75 | pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape)) 76 | pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape)) 77 | # Image height and width after padding is removed 78 | unpad_h = current_dim - pad_y 79 | unpad_w = current_dim - pad_x 80 | # Rescale bounding boxes to dimension of original image 81 | boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w 82 | boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h 83 | boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w 84 | boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h 85 | 86 | return boxes 87 | 88 | 89 | def ap_per_class(tp, conf, pred_cls, target_cls): 90 | """ Compute the average precision, given the recall and precision curves. 91 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 92 | # Arguments 93 | tp: True positives (list). 94 | conf: Objectness value from 0-1 (list). 95 | pred_cls: Predicted object classes (list). 96 | target_cls: True object classes (list). 97 | # Returns 98 | The average precision as computed in py-faster-rcnn. 99 | """ 100 | 101 | # Sort by objectness 102 | i = np.argsort(-conf) 103 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 104 | 105 | # Find unique classes 106 | unique_classes = np.unique(target_cls) 107 | 108 | # Create Precision-Recall curve and compute AP for each class 109 | ap, p, r = [], [], [] 110 | for c in tqdm.tqdm(unique_classes, desc="Computing AP"): 111 | i = pred_cls == c 112 | n_gt = (target_cls == c).sum() # Number of ground truth objects 113 | n_p = i.sum() # Number of predicted objects 114 | 115 | if n_p == 0 and n_gt == 0: 116 | continue 117 | elif n_p == 0 or n_gt == 0: 118 | ap.append(0) 119 | r.append(0) 120 | p.append(0) 121 | else: 122 | # Accumulate FPs and TPs 123 | fpc = (1 - tp[i]).cumsum() 124 | tpc = (tp[i]).cumsum() 125 | 126 | # Recall 127 | recall_curve = tpc / (n_gt + 1e-16) 128 | r.append(recall_curve[-1]) 129 | 130 | # Precision 131 | precision_curve = tpc / (tpc + fpc) 132 | p.append(precision_curve[-1]) 133 | 134 | # AP from recall-precision curve 135 | ap.append(compute_ap(recall_curve, precision_curve)) 136 | 137 | # Compute F1 score (harmonic mean of precision and recall) 138 | p, r, ap = np.array(p), np.array(r), np.array(ap) 139 | f1 = 2 * p * r / (p + r + 1e-16) 140 | 141 | return p, r, ap, f1, unique_classes.astype("int32") 142 | 143 | 144 | def compute_ap(recall, precision): 145 | """ Compute the average precision, given the recall and precision curves. 146 | Code originally from https://github.com/rbgirshick/py-faster-rcnn. 147 | # Arguments 148 | recall: The recall curve (list). 149 | precision: The precision curve (list). 150 | # Returns 151 | The average precision as computed in py-faster-rcnn. 152 | """ 153 | # correct AP calculation 154 | # first append sentinel values at the end 155 | mrec = np.concatenate(([0.0], recall, [1.0])) 156 | mpre = np.concatenate(([0.0], precision, [0.0])) 157 | 158 | # compute the precision envelope 159 | for i in range(mpre.size - 1, 0, -1): 160 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 161 | 162 | # to calculate area under PR curve, look for points 163 | # where X axis (recall) changes value 164 | i = np.where(mrec[1:] != mrec[:-1])[0] 165 | 166 | # and sum (\Delta recall) * prec 167 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 168 | return ap 169 | 170 | 171 | def get_batch_statistics_rotated_bbox(outputs, targets, iou_threshold): 172 | """ Compute true positives, predicted scores and predicted labels per sample """ 173 | batch_metrics = [] 174 | for sample_i in range(len(outputs)): 175 | 176 | if outputs[sample_i] is None: 177 | continue 178 | 179 | output = outputs[sample_i] 180 | pred_boxes = output[:, :6] 181 | pred_scores = output[:, 6] 182 | pred_labels = output[:, -1] 183 | 184 | true_positives = np.zeros(pred_boxes.shape[0]) 185 | 186 | annotations = targets[targets[:, 0] == sample_i][:, 1:] 187 | if len(annotations) > 0: 188 | target_labels = annotations[:, 0] 189 | detected_boxes = [] 190 | target_boxes = annotations[:, 1:] 191 | 192 | for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)): 193 | 194 | # If targets are found break 195 | if len(detected_boxes) == len(annotations): 196 | break 197 | 198 | # Ignore if label is not one of the target labels 199 | if pred_label not in target_labels: 200 | continue 201 | 202 | iou, box_index = iou_rotated_single_vs_multi_boxes_cpu(pred_box, target_boxes).max(dim=0) 203 | 204 | if iou >= iou_threshold and box_index not in detected_boxes: 205 | true_positives[pred_i] = 1 206 | detected_boxes += [box_index] 207 | batch_metrics.append([true_positives, pred_scores, pred_labels]) 208 | 209 | return batch_metrics 210 | 211 | 212 | def iou_rotated_single_vs_multi_boxes_cpu(single_box, multi_boxes): 213 | """ 214 | :param pred_box: Numpy array 215 | :param target_boxes: Numpy array 216 | :return: 217 | """ 218 | 219 | s_x, s_y, s_w, s_l, s_im, s_re = single_box 220 | s_area = s_w * s_l 221 | s_yaw = np.arctan2(s_im, s_re) 222 | s_conners = get_corners(s_x, s_y, s_w, s_l, s_yaw) 223 | s_polygon = cvt_box_2_polygon(s_conners) 224 | 225 | m_x, m_y, m_w, m_l, m_im, m_re = multi_boxes.transpose(1, 0) 226 | targets_areas = m_w * m_l 227 | m_yaw = np.arctan2(m_im, m_re) 228 | m_boxes_conners = get_corners_vectorize(m_x, m_y, m_w, m_l, m_yaw) 229 | m_boxes_polygons = [cvt_box_2_polygon(box_) for box_ in m_boxes_conners] 230 | 231 | ious = [] 232 | for m_idx in range(multi_boxes.shape[0]): 233 | intersection = s_polygon.intersection(m_boxes_polygons[m_idx]).area 234 | iou_ = intersection / (s_area + targets_areas[m_idx] - intersection + 1e-16) 235 | ious.append(iou_) 236 | 237 | return torch.tensor(ious, dtype=torch.float) 238 | 239 | 240 | def get_corners_vectorize(x, y, w, l, yaw): 241 | """bev image coordinates format - vectorization 242 | 243 | :param x, y, w, l, yaw: [num_boxes,] 244 | :return: num_boxes x (x,y) of 4 conners 245 | """ 246 | bbox2 = np.zeros((x.shape[0], 4, 2), dtype=np.float32) 247 | cos_yaw = np.cos(yaw) 248 | sin_yaw = np.sin(yaw) 249 | 250 | # front left 251 | bbox2[:, 0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw 252 | bbox2[:, 0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw 253 | 254 | # rear left 255 | bbox2[:, 1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw 256 | bbox2[:, 1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw 257 | 258 | # rear right 259 | bbox2[:, 2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw 260 | bbox2[:, 2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw 261 | 262 | # front right 263 | bbox2[:, 3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw 264 | bbox2[:, 3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw 265 | 266 | return bbox2 267 | 268 | 269 | def nms_cpu(boxes, confs, nms_thresh=0.5): 270 | """ 271 | :param boxes: [num, 6] 272 | :param confs: [num, num_classes] 273 | :param nms_thresh: 274 | :param min_mode: 275 | :return: 276 | """ 277 | # order of reduce confidence (high --> low) 278 | order = confs.argsort()[::-1] 279 | 280 | x, y, w, l, im, re = boxes.transpose(1, 0) 281 | yaw = np.arctan2(im, re) 282 | boxes_conners = get_corners_vectorize(x, y, w, l, yaw) 283 | boxes_polygons = [cvt_box_2_polygon(box_) for box_ in boxes_conners] # 4 vertices of the box 284 | boxes_areas = w * l 285 | 286 | keep = [] 287 | while order.size > 0: 288 | idx_self = order[0] 289 | idx_other = order[1:] 290 | keep.append(idx_self) 291 | over = compute_iou_nms(idx_self, idx_other, boxes_polygons, boxes_areas) 292 | inds = np.where(over <= nms_thresh)[0] 293 | order = order[inds + 1] 294 | 295 | return np.array(keep) 296 | 297 | 298 | def post_processing(outputs, conf_thresh=0.95, nms_thresh=0.4): 299 | """ 300 | Removes detections with lower object confidence score than 'conf_thres' and performs 301 | Non-Maximum Suppression to further filter detections. 302 | Returns detections with shape: 303 | (x, y, w, l, im, re, object_conf, class_score, class_pred) 304 | """ 305 | if type(outputs).__name__ != 'ndarray': 306 | outputs = outputs.numpy() 307 | # outputs shape: (batch_size, 22743, 10) 308 | batch_size = outputs.shape[0] 309 | # box_array: [batch, num, 6] 310 | box_array = outputs[:, :, :6] 311 | 312 | # confs: [batch, num, num_classes] 313 | confs = outputs[:, :, 6:7] * outputs[:, :, 7:] 314 | obj_confs = outputs[:, :, 6] 315 | 316 | # [batch, num, num_classes] --> [batch, num] 317 | max_conf = np.max(confs, axis=2) 318 | max_id = np.argmax(confs, axis=2) 319 | 320 | bboxes_batch = [None for _ in range(batch_size)] 321 | 322 | for i in range(batch_size): 323 | argwhere = max_conf[i] > conf_thresh 324 | l_box_array = box_array[i, argwhere, :] 325 | l_obj_confs = obj_confs[i, argwhere, :] 326 | l_max_conf = max_conf[i, argwhere] 327 | l_max_id = max_id[i, argwhere] 328 | 329 | keep = nms_cpu(l_box_array, l_max_conf, nms_thresh=nms_thresh) 330 | 331 | if (keep.size > 0): 332 | l_box_array = l_box_array[keep, :] 333 | l_obj_confs = l_obj_confs[keep].reshape(-1, 1) 334 | l_max_conf = l_max_conf[keep].reshape(-1, 1) 335 | l_max_id = l_max_id[keep].reshape(-1, 1) 336 | bboxes_batch[i] = np.concatenate((l_box_array, l_obj_confs, l_max_conf, l_max_id), axis=-1) 337 | return bboxes_batch 338 | 339 | 340 | def post_processing_v2(prediction, conf_thresh=0.95, nms_thresh=0.4): 341 | """ 342 | Removes detections with lower object confidence score than 'conf_thres' and performs 343 | Non-Maximum Suppression to further filter detections. 344 | Returns detections with shape: 345 | (x, y, w, l, im, re, object_conf, class_score, class_pred) 346 | """ 347 | output = [None for _ in range(len(prediction))] 348 | for image_i, image_pred in enumerate(prediction): 349 | # Filter out confidence scores below threshold 350 | image_pred = image_pred[image_pred[:, 6] >= conf_thresh] 351 | # If none are remaining => process next image 352 | if not image_pred.size(0): 353 | continue 354 | # Object confidence times class confidence 355 | score = image_pred[:, 6] * image_pred[:, 7:].max(dim=1)[0] 356 | # Sort by it 357 | image_pred = image_pred[(-score).argsort()] 358 | class_confs, class_preds = image_pred[:, 7:].max(dim=1, keepdim=True) 359 | detections = torch.cat((image_pred[:, :7].float(), class_confs.float(), class_preds.float()), dim=1) 360 | # Perform non-maximum suppression 361 | keep_boxes = [] 362 | while detections.size(0): 363 | # large_overlap = rotated_bbox_iou(detections[0, :6].unsqueeze(0), detections[:, :6], 1.0, False) > nms_thres # not working 364 | large_overlap = iou_rotated_single_vs_multi_boxes_cpu(detections[0, :6], detections[:, :6]) > nms_thresh 365 | label_match = detections[0, -1] == detections[:, -1] 366 | # Indices of boxes with lower confidence scores, large IOUs and matching labels 367 | invalid = large_overlap & label_match 368 | weights = detections[invalid, 6:7] 369 | # Merge overlapping bboxes by order of confidence 370 | detections[0, :6] = (weights * detections[invalid, :6]).sum(0) / weights.sum() 371 | keep_boxes += [detections[0]] 372 | detections = detections[~invalid] 373 | if len(keep_boxes) > 0: 374 | output[image_i] = torch.stack(keep_boxes) 375 | 376 | return output 377 | -------------------------------------------------------------------------------- /tools/objdet_models/darknet/utils/iou_rotated_boxes_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | # -*- coding: utf-8 -*- 3 | ----------------------------------------------------------------------------------- 4 | # Author: Nguyen Mau Dung 5 | # DoC: 2020.07.20 6 | # email: nguyenmaudung93.kstn@gmail.com 7 | ----------------------------------------------------------------------------------- 8 | # Description: This script for iou calculation of rotated boxes (on GPU) 9 | 10 | """ 11 | 12 | from __future__ import division 13 | import sys 14 | 15 | import torch 16 | from shapely.geometry import Polygon 17 | from scipy.spatial import ConvexHull 18 | 19 | sys.path.append('../') 20 | 21 | from utils.cal_intersection_rotated_boxes import intersection_area, PolyArea2D 22 | 23 | 24 | def cvt_box_2_polygon(box): 25 | """ 26 | :param array: an array of shape [num_conners, 2] 27 | :return: a shapely.geometry.Polygon object 28 | """ 29 | # use .buffer(0) to fix a line polygon 30 | # more infor: https://stackoverflow.com/questions/13062334/polygon-intersection-error-in-shapely-shapely-geos-topologicalerror-the-opera 31 | return Polygon([(box[i, 0], box[i, 1]) for i in range(len(box))]).buffer(0) 32 | 33 | 34 | def get_corners_vectorize(x, y, w, l, yaw): 35 | """bev image coordinates format - vectorization 36 | 37 | :param x, y, w, l, yaw: [num_boxes,] 38 | :return: num_boxes x (x,y) of 4 conners 39 | """ 40 | device = x.device 41 | bbox2 = torch.zeros((x.size(0), 4, 2), device=device, dtype=torch.float) 42 | cos_yaw = torch.cos(yaw) 43 | sin_yaw = torch.sin(yaw) 44 | 45 | # front left 46 | bbox2[:, 0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw 47 | bbox2[:, 0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw 48 | 49 | # rear left 50 | bbox2[:, 1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw 51 | bbox2[:, 1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw 52 | 53 | # rear right 54 | bbox2[:, 2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw 55 | bbox2[:, 2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw 56 | 57 | # front right 58 | bbox2[:, 3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw 59 | bbox2[:, 3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw 60 | 61 | return bbox2 62 | 63 | 64 | def get_polygons_areas_fix_xy(boxes, fix_xy=100.): 65 | """ 66 | Args: 67 | box: (num_boxes, 4) --> w, l, im, re 68 | """ 69 | device = boxes.device 70 | n_boxes = boxes.size(0) 71 | x = torch.full(size=(n_boxes,), fill_value=fix_xy, device=device, dtype=torch.float) 72 | y = torch.full(size=(n_boxes,), fill_value=fix_xy, device=device, dtype=torch.float) 73 | w, l, im, re = boxes.t() 74 | yaw = torch.atan2(im, re) 75 | boxes_conners = get_corners_vectorize(x, y, w, l, yaw) 76 | boxes_polygons = [cvt_box_2_polygon(box_) for box_ in boxes_conners] 77 | boxes_areas = w * l 78 | 79 | return boxes_polygons, boxes_areas 80 | 81 | 82 | def iou_rotated_boxes_targets_vs_anchors(anchors_polygons, anchors_areas, targets_polygons, targets_areas): 83 | device = anchors_areas.device 84 | num_anchors = len(anchors_areas) 85 | num_targets_boxes = len(targets_areas) 86 | 87 | ious = torch.zeros(size=(num_anchors, num_targets_boxes), device=device, dtype=torch.float) 88 | 89 | for a_idx in range(num_anchors): 90 | for tg_idx in range(num_targets_boxes): 91 | intersection = anchors_polygons[a_idx].intersection(targets_polygons[tg_idx]).area 92 | iou = intersection / (anchors_areas[a_idx] + targets_areas[tg_idx] - intersection + 1e-16) 93 | ious[a_idx, tg_idx] = iou 94 | 95 | return ious 96 | 97 | 98 | def iou_pred_vs_target_boxes(pred_boxes, target_boxes, GIoU=False, DIoU=False, CIoU=False): 99 | assert pred_boxes.size() == target_boxes.size(), "Unmatch size of pred_boxes and target_boxes" 100 | device = pred_boxes.device 101 | n_boxes = pred_boxes.size(0) 102 | 103 | t_x, t_y, t_w, t_l, t_im, t_re = target_boxes.t() 104 | t_yaw = torch.atan2(t_im, t_re) 105 | t_conners = get_corners_vectorize(t_x, t_y, t_w, t_l, t_yaw) 106 | t_areas = t_w * t_l 107 | 108 | p_x, p_y, p_w, p_l, p_im, p_re = pred_boxes.t() 109 | p_yaw = torch.atan2(p_im, p_re) 110 | p_conners = get_corners_vectorize(p_x, p_y, p_w, p_l, p_yaw) 111 | p_areas = p_w * p_l 112 | 113 | ious = [] 114 | giou_loss = torch.tensor([0.], device=device, dtype=torch.float) 115 | # Thinking to apply vectorization this step 116 | for box_idx in range(n_boxes): 117 | p_cons, t_cons = p_conners[box_idx], t_conners[box_idx] 118 | if not GIoU: 119 | p_poly, t_poly = cvt_box_2_polygon(p_cons), cvt_box_2_polygon(t_cons) 120 | intersection = p_poly.intersection(t_poly).area 121 | else: 122 | intersection = intersection_area(p_cons, t_cons) 123 | 124 | p_area, t_area = p_areas[box_idx], t_areas[box_idx] 125 | union = p_area + t_area - intersection 126 | iou = intersection / (union + 1e-16) 127 | 128 | if GIoU: 129 | convex_conners = torch.cat((p_cons, t_cons), dim=0) 130 | hull = ConvexHull(convex_conners.clone().detach().cpu().numpy()) # done on cpu, just need indices output 131 | convex_conners = convex_conners[hull.vertices] 132 | convex_area = PolyArea2D(convex_conners) 133 | giou_loss += 1. - (iou - (convex_area - union) / (convex_area + 1e-16)) 134 | else: 135 | giou_loss += 1. - iou 136 | 137 | if DIoU or CIoU: 138 | raise NotImplementedError 139 | 140 | ious.append(iou) 141 | 142 | return torch.tensor(ious, device=device, dtype=torch.float), giou_loss 143 | 144 | 145 | if __name__ == "__main__": 146 | import cv2 147 | import numpy as np 148 | 149 | 150 | def get_corners_torch(x, y, w, l, yaw): 151 | device = x.device 152 | bev_corners = torch.zeros((4, 2), dtype=torch.float, device=device) 153 | cos_yaw = torch.cos(yaw) 154 | sin_yaw = torch.sin(yaw) 155 | # front left 156 | bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw 157 | bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw 158 | 159 | # rear left 160 | bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw 161 | bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw 162 | 163 | # rear right 164 | bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw 165 | bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw 166 | 167 | # front right 168 | bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw 169 | bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw 170 | 171 | return bev_corners 172 | 173 | 174 | # Show convex in an image 175 | 176 | img_size = 300 177 | img = np.zeros((img_size, img_size, 3)) 178 | img = cv2.resize(img, (img_size, img_size)) 179 | 180 | box1 = torch.tensor([100, 100, 60, 10, 0.5], dtype=torch.float).cuda() 181 | box2 = torch.tensor([100, 100, 40, 20, 0], dtype=torch.float).cuda() 182 | 183 | box1_conners = get_corners_torch(box1[0], box1[1], box1[2], box1[3], box1[4]) 184 | box1_polygon = cvt_box_2_polygon(box1_conners) 185 | box1_area = box1_polygon.area 186 | 187 | box2_conners = get_corners_torch(box2[0], box2[1], box2[2], box2[3], box2[4]) 188 | box2_polygon = cvt_box_2_polygon(box2_conners) 189 | box2_area = box2_polygon.area 190 | 191 | intersection = box2_polygon.intersection(box1_polygon).area 192 | union = box1_area + box2_area - intersection 193 | iou = intersection / (union + 1e-16) 194 | 195 | convex_conners = torch.cat((box1_conners, box2_conners), dim=0) 196 | hull = ConvexHull(convex_conners.clone().detach().cpu().numpy()) # done on cpu, just need indices output 197 | convex_conners = convex_conners[hull.vertices] 198 | convex_polygon = cvt_box_2_polygon(convex_conners) 199 | convex_area = convex_polygon.area 200 | giou_loss = 1. - (iou - (convex_area - union) / (convex_area + 1e-16)) 201 | 202 | print( 203 | 'box1_area: {:.2f}, box2_area: {:.2f}, intersection: {:.2f}, iou: {:.4f}, convex_area: {:.4f}, giou_loss: {}'.format( 204 | box1_area, box2_area, intersection, iou, convex_area, giou_loss)) 205 | 206 | print('intersection_area: {}'.format(intersection_area(box1_conners, box2_conners))) 207 | print('convex_area using PolyArea2D: {}'.format(PolyArea2D(convex_conners))) 208 | 209 | img = cv2.polylines(img, [box1_conners.cpu().numpy().astype(np.int)], True, (255, 0, 0), 2) 210 | img = cv2.polylines(img, [box2_conners.cpu().numpy().astype(np.int)], True, (0, 255, 0), 2) 211 | img = cv2.polylines(img, [convex_conners.cpu().numpy().astype(np.int)], True, (0, 0, 255), 2) 212 | 213 | while True: 214 | cv2.imshow('img', img) 215 | if cv2.waitKey(0) & 0xff == 27: 216 | break 217 | -------------------------------------------------------------------------------- /tools/objdet_models/darknet/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | # -*- coding: utf-8 -*- 3 | ----------------------------------------------------------------------------------- 4 | # Author: Nguyen Mau Dung 5 | # DoC: 2020.07.05 6 | # email: nguyenmaudung93.kstn@gmail.com 7 | ----------------------------------------------------------------------------------- 8 | # Description: some utilities of torch (conversion) 9 | ----------------------------------------------------------------------------------- 10 | # Refer: https://github.com/Tianxiaomo/pytorch-YOLOv4 11 | """ 12 | 13 | import torch 14 | 15 | __all__ = ['convert2cpu', 'convert2cpu_long', 'to_cpu'] 16 | 17 | 18 | def convert2cpu(gpu_matrix): 19 | return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix) 20 | 21 | 22 | def convert2cpu_long(gpu_matrix): 23 | return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix) 24 | 25 | 26 | def to_cpu(tensor): 27 | return tensor.detach().cpu() 28 | -------------------------------------------------------------------------------- /tools/objdet_models/resnet/models/fpn_resnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | # --------------------------------------------------------------------------------- 3 | # -*- coding: utf-8 -*- 4 | ----------------------------------------------------------------------------------- 5 | # Copyright (c) Microsoft 6 | # Licensed under the MIT License. 7 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 8 | # Modified by Xingyi Zhou 9 | # Refer from: https://github.com/xingyizhou/CenterNet 10 | 11 | # Modifier: Nguyen Mau Dung (2020.08.09) 12 | # ------------------------------------------------------------------------------ 13 | """ 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import os 20 | 21 | import torch 22 | import torch.nn as nn 23 | import torch.utils.model_zoo as model_zoo 24 | import torch.nn.functional as F 25 | 26 | BN_MOMENTUM = 0.1 27 | 28 | model_urls = { 29 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 30 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 31 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 32 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 33 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 34 | } 35 | 36 | 37 | def conv3x3(in_planes, out_planes, stride=1): 38 | """3x3 convolution with padding""" 39 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 40 | 41 | 42 | class BasicBlock(nn.Module): 43 | expansion = 1 44 | 45 | def __init__(self, inplanes, planes, stride=1, downsample=None): 46 | super(BasicBlock, self).__init__() 47 | self.conv1 = conv3x3(inplanes, planes, stride) 48 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 49 | self.relu = nn.ReLU(inplace=True) 50 | self.conv2 = conv3x3(planes, planes) 51 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 52 | self.downsample = downsample 53 | self.stride = stride 54 | 55 | def forward(self, x): 56 | residual = x 57 | 58 | out = self.conv1(x) 59 | out = self.bn1(out) 60 | out = self.relu(out) 61 | 62 | out = self.conv2(out) 63 | out = self.bn2(out) 64 | 65 | if self.downsample is not None: 66 | residual = self.downsample(x) 67 | 68 | out += residual 69 | out = self.relu(out) 70 | 71 | return out 72 | 73 | 74 | class Bottleneck(nn.Module): 75 | expansion = 4 76 | 77 | def __init__(self, inplanes, planes, stride=1, downsample=None): 78 | super(Bottleneck, self).__init__() 79 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 80 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 81 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 82 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 83 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) 84 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM) 85 | self.relu = nn.ReLU(inplace=True) 86 | self.downsample = downsample 87 | self.stride = stride 88 | 89 | def forward(self, x): 90 | residual = x 91 | 92 | out = self.conv1(x) 93 | out = self.bn1(out) 94 | out = self.relu(out) 95 | 96 | out = self.conv2(out) 97 | out = self.bn2(out) 98 | out = self.relu(out) 99 | 100 | out = self.conv3(out) 101 | out = self.bn3(out) 102 | 103 | if self.downsample is not None: 104 | residual = self.downsample(x) 105 | 106 | out += residual 107 | out = self.relu(out) 108 | 109 | return out 110 | 111 | 112 | class PoseResNet(nn.Module): 113 | 114 | def __init__(self, block, layers, heads, head_conv, **kwargs): 115 | self.inplanes = 64 116 | self.deconv_with_bias = False 117 | self.heads = heads 118 | 119 | super(PoseResNet, self).__init__() 120 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 121 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 122 | self.relu = nn.ReLU(inplace=True) 123 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 124 | self.layer1 = self._make_layer(block, 64, layers[0]) 125 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 126 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 127 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 128 | 129 | self.conv_up_level1 = nn.Conv2d(768, 256, kernel_size=1, stride=1, padding=0) 130 | self.conv_up_level2 = nn.Conv2d(384, 128, kernel_size=1, stride=1, padding=0) 131 | self.conv_up_level3 = nn.Conv2d(192, 64, kernel_size=1, stride=1, padding=0) 132 | 133 | fpn_channels = [256, 128, 64] 134 | for fpn_idx, fpn_c in enumerate(fpn_channels): 135 | for head in sorted(self.heads): 136 | num_output = self.heads[head] 137 | if head_conv > 0: 138 | fc = nn.Sequential( 139 | nn.Conv2d(fpn_c, head_conv, kernel_size=3, padding=1, bias=True), 140 | nn.ReLU(inplace=True), 141 | nn.Conv2d(head_conv, num_output, kernel_size=1, stride=1, padding=0)) 142 | else: 143 | fc = nn.Conv2d(in_channels=fpn_c, out_channels=num_output, kernel_size=1, stride=1, padding=0) 144 | 145 | self.__setattr__('fpn{}_{}'.format(fpn_idx, head), fc) 146 | 147 | def _make_layer(self, block, planes, blocks, stride=1): 148 | downsample = None 149 | if stride != 1 or self.inplanes != planes * block.expansion: 150 | downsample = nn.Sequential( 151 | nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), 152 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 153 | ) 154 | 155 | layers = [] 156 | layers.append(block(self.inplanes, planes, stride, downsample)) 157 | self.inplanes = planes * block.expansion 158 | for i in range(1, blocks): 159 | layers.append(block(self.inplanes, planes)) 160 | 161 | return nn.Sequential(*layers) 162 | 163 | def forward(self, x): 164 | _, _, input_h, input_w = x.size() 165 | hm_h, hm_w = input_h // 4, input_w // 4 166 | x = self.conv1(x) 167 | x = self.bn1(x) 168 | x = self.relu(x) 169 | x = self.maxpool(x) 170 | 171 | out_layer1 = self.layer1(x) 172 | out_layer2 = self.layer2(out_layer1) 173 | 174 | out_layer3 = self.layer3(out_layer2) 175 | 176 | out_layer4 = self.layer4(out_layer3) 177 | 178 | # up_level1: torch.Size([b, 512, 14, 14]) 179 | up_level1 = F.interpolate(out_layer4, scale_factor=2, mode='bilinear', align_corners=True) 180 | 181 | concat_level1 = torch.cat((up_level1, out_layer3), dim=1) 182 | # up_level2: torch.Size([b, 256, 28, 28]) 183 | up_level2 = F.interpolate(self.conv_up_level1(concat_level1), scale_factor=2, mode='bilinear', 184 | align_corners=True) 185 | 186 | concat_level2 = torch.cat((up_level2, out_layer2), dim=1) 187 | # up_level3: torch.Size([b, 128, 56, 56]), 188 | up_level3 = F.interpolate(self.conv_up_level2(concat_level2), scale_factor=2, mode='bilinear', 189 | align_corners=True) 190 | # up_level4: torch.Size([b, 64, 56, 56]) 191 | up_level4 = self.conv_up_level3(torch.cat((up_level3, out_layer1), dim=1)) 192 | 193 | ret = {} 194 | for head in self.heads: 195 | temp_outs = [] 196 | for fpn_idx, fdn_input in enumerate([up_level2, up_level3, up_level4]): 197 | fpn_out = self.__getattr__('fpn{}_{}'.format(fpn_idx, head))(fdn_input) 198 | _, _, fpn_out_h, fpn_out_w = fpn_out.size() 199 | # Make sure the added features having same size of heatmap output 200 | if (fpn_out_w != hm_w) or (fpn_out_h != hm_h): 201 | fpn_out = F.interpolate(fpn_out, size=(hm_h, hm_w)) 202 | temp_outs.append(fpn_out) 203 | # Take the softmax in the keypoint feature pyramid network 204 | final_out = self.apply_kfpn(temp_outs) 205 | 206 | ret[head] = final_out 207 | 208 | return ret 209 | 210 | def apply_kfpn(self, outs): 211 | outs = torch.cat([out.unsqueeze(-1) for out in outs], dim=-1) 212 | softmax_outs = F.softmax(outs, dim=-1) 213 | ret_outs = (outs * softmax_outs).sum(dim=-1) 214 | return ret_outs 215 | 216 | def init_weights(self, num_layers, pretrained=True): 217 | if pretrained: 218 | # TODO: Check initial weights for head later 219 | for fpn_idx in [0, 1, 2]: # 3 FPN layers 220 | for head in self.heads: 221 | final_layer = self.__getattr__('fpn{}_{}'.format(fpn_idx, head)) 222 | for i, m in enumerate(final_layer.modules()): 223 | if isinstance(m, nn.Conv2d): 224 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 225 | # print('=> init {}.weight as normal(0, 0.001)'.format(name)) 226 | # print('=> init {}.bias as 0'.format(name)) 227 | if m.weight.shape[0] == self.heads[head]: 228 | if 'hm' in head: 229 | nn.init.constant_(m.bias, -2.19) 230 | else: 231 | nn.init.normal_(m.weight, std=0.001) 232 | nn.init.constant_(m.bias, 0) 233 | # pretrained_state_dict = torch.load(pretrained) 234 | url = model_urls['resnet{}'.format(num_layers)] 235 | pretrained_state_dict = model_zoo.load_url(url) 236 | print('=> loading pretrained model {}'.format(url)) 237 | self.load_state_dict(pretrained_state_dict, strict=False) 238 | 239 | 240 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), 241 | 34: (BasicBlock, [3, 4, 6, 3]), 242 | 50: (Bottleneck, [3, 4, 6, 3]), 243 | 101: (Bottleneck, [3, 4, 23, 3]), 244 | 152: (Bottleneck, [3, 8, 36, 3])} 245 | 246 | 247 | def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained): 248 | block_class, layers = resnet_spec[num_layers] 249 | 250 | model = PoseResNet(block_class, layers, heads, head_conv=head_conv) 251 | model.init_weights(num_layers, pretrained=imagenet_pretrained) 252 | return model 253 | -------------------------------------------------------------------------------- /tools/objdet_models/resnet/models/resnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | # --------------------------------------------------------------------------------- 3 | # -*- coding: utf-8 -*- 4 | ----------------------------------------------------------------------------------- 5 | # Copyright (c) Microsoft 6 | # Licensed under the MIT License. 7 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 8 | # Modified by Xingyi Zhou 9 | # Refer from: https://github.com/xingyizhou/CenterNet 10 | 11 | # Modifier: Nguyen Mau Dung (2020.08.09) 12 | # ------------------------------------------------------------------------------ 13 | """ 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import os 20 | 21 | import torch 22 | import torch.nn as nn 23 | import torch.utils.model_zoo as model_zoo 24 | 25 | BN_MOMENTUM = 0.1 26 | 27 | model_urls = { 28 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 29 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 30 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 31 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 32 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 33 | } 34 | 35 | 36 | def conv3x3(in_planes, out_planes, stride=1): 37 | """3x3 convolution with padding""" 38 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 39 | padding=1, bias=False) 40 | 41 | 42 | class BasicBlock(nn.Module): 43 | expansion = 1 44 | 45 | def __init__(self, inplanes, planes, stride=1, downsample=None): 46 | super(BasicBlock, self).__init__() 47 | self.conv1 = conv3x3(inplanes, planes, stride) 48 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 49 | self.relu = nn.ReLU(inplace=True) 50 | self.conv2 = conv3x3(planes, planes) 51 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 52 | self.downsample = downsample 53 | self.stride = stride 54 | 55 | def forward(self, x): 56 | residual = x 57 | 58 | out = self.conv1(x) 59 | out = self.bn1(out) 60 | out = self.relu(out) 61 | 62 | out = self.conv2(out) 63 | out = self.bn2(out) 64 | 65 | if self.downsample is not None: 66 | residual = self.downsample(x) 67 | 68 | out += residual 69 | out = self.relu(out) 70 | 71 | return out 72 | 73 | 74 | class Bottleneck(nn.Module): 75 | expansion = 4 76 | 77 | def __init__(self, inplanes, planes, stride=1, downsample=None): 78 | super(Bottleneck, self).__init__() 79 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 80 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 81 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 82 | padding=1, bias=False) 83 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 84 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 85 | bias=False) 86 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 87 | momentum=BN_MOMENTUM) 88 | self.relu = nn.ReLU(inplace=True) 89 | self.downsample = downsample 90 | self.stride = stride 91 | 92 | def forward(self, x): 93 | residual = x 94 | 95 | out = self.conv1(x) 96 | out = self.bn1(out) 97 | out = self.relu(out) 98 | 99 | out = self.conv2(out) 100 | out = self.bn2(out) 101 | out = self.relu(out) 102 | 103 | out = self.conv3(out) 104 | out = self.bn3(out) 105 | 106 | if self.downsample is not None: 107 | residual = self.downsample(x) 108 | 109 | out += residual 110 | out = self.relu(out) 111 | 112 | return out 113 | 114 | 115 | class PoseResNet(nn.Module): 116 | 117 | def __init__(self, block, layers, heads, head_conv, **kwargs): 118 | self.inplanes = 64 119 | self.deconv_with_bias = False 120 | self.heads = heads 121 | 122 | super(PoseResNet, self).__init__() 123 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 124 | bias=False) 125 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 126 | self.relu = nn.ReLU(inplace=True) 127 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 128 | self.layer1 = self._make_layer(block, 64, layers[0]) 129 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 130 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 131 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 132 | 133 | # used for deconv layers 134 | self.deconv_layers = self._make_deconv_layer( 135 | 3, 136 | [256, 256, 256], 137 | [4, 4, 4], 138 | ) 139 | # self.final_layer = [] 140 | 141 | for head in sorted(self.heads): 142 | num_output = self.heads[head] 143 | if head_conv > 0: 144 | fc = nn.Sequential( 145 | nn.Conv2d(256, head_conv, 146 | kernel_size=3, padding=1, bias=True), 147 | nn.ReLU(inplace=True), 148 | nn.Conv2d(head_conv, num_output, 149 | kernel_size=1, stride=1, padding=0)) 150 | else: 151 | fc = nn.Conv2d( 152 | in_channels=256, 153 | out_channels=num_output, 154 | kernel_size=1, 155 | stride=1, 156 | padding=0 157 | ) 158 | self.__setattr__(head, fc) 159 | 160 | # self.final_layer = nn.ModuleList(self.final_layer) 161 | 162 | def _make_layer(self, block, planes, blocks, stride=1): 163 | downsample = None 164 | if stride != 1 or self.inplanes != planes * block.expansion: 165 | downsample = nn.Sequential( 166 | nn.Conv2d(self.inplanes, planes * block.expansion, 167 | kernel_size=1, stride=stride, bias=False), 168 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 169 | ) 170 | 171 | layers = [] 172 | layers.append(block(self.inplanes, planes, stride, downsample)) 173 | self.inplanes = planes * block.expansion 174 | for i in range(1, blocks): 175 | layers.append(block(self.inplanes, planes)) 176 | 177 | return nn.Sequential(*layers) 178 | 179 | def _get_deconv_cfg(self, deconv_kernel, index): 180 | if deconv_kernel == 4: 181 | padding = 1 182 | output_padding = 0 183 | elif deconv_kernel == 3: 184 | padding = 1 185 | output_padding = 1 186 | elif deconv_kernel == 2: 187 | padding = 0 188 | output_padding = 0 189 | 190 | return deconv_kernel, padding, output_padding 191 | 192 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels): 193 | assert num_layers == len(num_filters), \ 194 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 195 | assert num_layers == len(num_kernels), \ 196 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 197 | 198 | layers = [] 199 | for i in range(num_layers): 200 | kernel, padding, output_padding = \ 201 | self._get_deconv_cfg(num_kernels[i], i) 202 | 203 | planes = num_filters[i] 204 | layers.append( 205 | nn.ConvTranspose2d( 206 | in_channels=self.inplanes, 207 | out_channels=planes, 208 | kernel_size=kernel, 209 | stride=2, 210 | padding=padding, 211 | output_padding=output_padding, 212 | bias=self.deconv_with_bias)) 213 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 214 | layers.append(nn.ReLU(inplace=True)) 215 | self.inplanes = planes 216 | 217 | return nn.Sequential(*layers) 218 | 219 | def forward(self, x): 220 | x = self.conv1(x) 221 | x = self.bn1(x) 222 | x = self.relu(x) 223 | x = self.maxpool(x) 224 | 225 | x = self.layer1(x) 226 | x = self.layer2(x) 227 | x = self.layer3(x) 228 | x = self.layer4(x) 229 | 230 | x = self.deconv_layers(x) 231 | ret = {} 232 | for head in self.heads: 233 | ret[head] = self.__getattr__(head)(x) 234 | return ret 235 | 236 | def init_weights(self, num_layers, pretrained=True): 237 | if pretrained: 238 | # print('=> init resnet deconv weights from normal distribution') 239 | for _, m in self.deconv_layers.named_modules(): 240 | if isinstance(m, nn.ConvTranspose2d): 241 | # print('=> init {}.weight as normal(0, 0.001)'.format(name)) 242 | # print('=> init {}.bias as 0'.format(name)) 243 | nn.init.normal_(m.weight, std=0.001) 244 | if self.deconv_with_bias: 245 | nn.init.constant_(m.bias, 0) 246 | elif isinstance(m, nn.BatchNorm2d): 247 | # print('=> init {}.weight as 1'.format(name)) 248 | # print('=> init {}.bias as 0'.format(name)) 249 | nn.init.constant_(m.weight, 1) 250 | nn.init.constant_(m.bias, 0) 251 | # print('=> init final conv weights from normal distribution') 252 | for head in self.heads: 253 | final_layer = self.__getattr__(head) 254 | for i, m in enumerate(final_layer.modules()): 255 | if isinstance(m, nn.Conv2d): 256 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 257 | # print('=> init {}.weight as normal(0, 0.001)'.format(name)) 258 | # print('=> init {}.bias as 0'.format(name)) 259 | if m.weight.shape[0] == self.heads[head]: 260 | if 'hm' in head: 261 | nn.init.constant_(m.bias, -2.19) 262 | else: 263 | nn.init.normal_(m.weight, std=0.001) 264 | nn.init.constant_(m.bias, 0) 265 | # pretrained_state_dict = torch.load(pretrained) 266 | url = model_urls['resnet{}'.format(num_layers)] 267 | pretrained_state_dict = model_zoo.load_url(url) 268 | print('=> loading pretrained model {}'.format(url)) 269 | self.load_state_dict(pretrained_state_dict, strict=False) 270 | 271 | 272 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), 273 | 34: (BasicBlock, [3, 4, 6, 3]), 274 | 50: (Bottleneck, [3, 4, 6, 3]), 275 | 101: (Bottleneck, [3, 4, 23, 3]), 276 | 152: (Bottleneck, [3, 8, 36, 3])} 277 | 278 | 279 | def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained): 280 | block_class, layers = resnet_spec[num_layers] 281 | 282 | model = PoseResNet(block_class, layers, heads, head_conv=head_conv) 283 | model.init_weights(num_layers, pretrained=imagenet_pretrained) 284 | return model 285 | -------------------------------------------------------------------------------- /tools/objdet_models/resnet/utils/evaluation_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | # -*- coding: utf-8 -*- 3 | ----------------------------------------------------------------------------------- 4 | # Author: Nguyen Mau Dung 5 | # DoC: 2020.08.17 6 | # email: nguyenmaudung93.kstn@gmail.com 7 | ----------------------------------------------------------------------------------- 8 | # Description: The utils for evaluation 9 | # Refer from: https://github.com/xingyizhou/CenterNet 10 | """ 11 | 12 | from __future__ import division 13 | import sys 14 | 15 | import torch 16 | import numpy as np 17 | import torch.nn.functional as F 18 | import cv2 19 | 20 | def _nms(heat, kernel=3): 21 | pad = (kernel - 1) // 2 22 | hmax = F.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) 23 | keep = (hmax == heat).float() 24 | 25 | return heat * keep 26 | 27 | 28 | def _gather_feat(feat, ind, mask=None): 29 | dim = feat.size(2) 30 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 31 | feat = feat.gather(1, ind) 32 | if mask is not None: 33 | mask = mask.unsqueeze(2).expand_as(feat) 34 | feat = feat[mask] 35 | feat = feat.view(-1, dim) 36 | return feat 37 | 38 | 39 | def _transpose_and_gather_feat(feat, ind): 40 | feat = feat.permute(0, 2, 3, 1).contiguous() 41 | feat = feat.view(feat.size(0), -1, feat.size(3)) 42 | feat = _gather_feat(feat, ind) 43 | return feat 44 | 45 | 46 | def _topk(scores, K=40): 47 | batch, cat, height, width = scores.size() 48 | 49 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) 50 | 51 | topk_inds = topk_inds % (height * width) 52 | topk_ys = (topk_inds // width).int().float() 53 | topk_xs = (topk_inds % width).int().float() 54 | 55 | topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) 56 | topk_clses = (topk_ind // K).int() 57 | topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) 58 | topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) 59 | topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) 60 | 61 | return topk_score, topk_inds, topk_clses, topk_ys, topk_xs 62 | 63 | 64 | def _topk_channel(scores, K=40): 65 | batch, cat, height, width = scores.size() 66 | 67 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) 68 | 69 | topk_inds = topk_inds % (height * width) 70 | topk_ys = (topk_inds / width).int().float() 71 | topk_xs = (topk_inds % width).int().float() 72 | 73 | return topk_scores, topk_inds, topk_ys, topk_xs 74 | 75 | 76 | def decode(hm_cen, cen_offset, direction, z_coor, dim, K=40): 77 | batch_size, num_classes, height, width = hm_cen.size() 78 | 79 | hm_cen = _nms(hm_cen) 80 | scores, inds, clses, ys, xs = _topk(hm_cen, K=K) 81 | if cen_offset is not None: 82 | cen_offset = _transpose_and_gather_feat(cen_offset, inds) 83 | cen_offset = cen_offset.view(batch_size, K, 2) 84 | xs = xs.view(batch_size, K, 1) + cen_offset[:, :, 0:1] 85 | ys = ys.view(batch_size, K, 1) + cen_offset[:, :, 1:2] 86 | else: 87 | xs = xs.view(batch_size, K, 1) + 0.5 88 | ys = ys.view(batch_size, K, 1) + 0.5 89 | 90 | direction = _transpose_and_gather_feat(direction, inds) 91 | direction = direction.view(batch_size, K, 2) 92 | z_coor = _transpose_and_gather_feat(z_coor, inds) 93 | z_coor = z_coor.view(batch_size, K, 1) 94 | dim = _transpose_and_gather_feat(dim, inds) 95 | dim = dim.view(batch_size, K, 3) 96 | clses = clses.view(batch_size, K, 1).float() 97 | scores = scores.view(batch_size, K, 1) 98 | 99 | # (scores x 1, ys x 1, xs x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) 100 | # (scores-0:1, ys-1:2, xs-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) 101 | # detections: [batch_size, K, 10] 102 | detections = torch.cat([scores, xs, ys, z_coor, dim, direction, clses], dim=2) 103 | 104 | return detections 105 | 106 | 107 | def get_yaw(direction): 108 | return np.arctan2(direction[:, 0:1], direction[:, 1:2]) 109 | 110 | 111 | def post_processing(detections, configs): 112 | """ 113 | :param detections: [batch_size, K, 10] 114 | # (scores x 1, xs x 1, ys x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) 115 | # (scores-0:1, xs-1:2, ys-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) 116 | :return: 117 | """ 118 | ret = [] 119 | for i in range(detections.shape[0]): 120 | top_preds = {} 121 | classes = detections[i, :, -1] 122 | for j in range(configs.num_classes): 123 | inds = (classes == j) 124 | # x, y, z, h, w, l, yaw 125 | top_preds[j] = np.concatenate([ 126 | detections[i, inds, 0:1], 127 | detections[i, inds, 1:2] * configs.down_ratio, 128 | detections[i, inds, 2:3] * configs.down_ratio, 129 | detections[i, inds, 3:4], 130 | detections[i, inds, 4:5], 131 | detections[i, inds, 5:6] / (configs.lim_y[1]-configs.lim_y[0]) * configs.bev_width, 132 | detections[i, inds, 6:7] / (configs.lim_x[1]-configs.lim_x[0]) * configs.bev_height, 133 | get_yaw(detections[i, inds, 7:9]).astype(np.float32)], axis=1) 134 | # Filter by conf_thresh 135 | if len(top_preds[j]) > 0: 136 | keep_inds = (top_preds[j][:, 0] > configs.conf_thresh) 137 | top_preds[j] = top_preds[j][keep_inds] 138 | ret.append(top_preds) 139 | 140 | return ret 141 | -------------------------------------------------------------------------------- /tools/objdet_models/resnet/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | # -*- coding: utf-8 -*- 3 | ----------------------------------------------------------------------------------- 4 | # Author: Nguyen Mau Dung 5 | # DoC: 2020.08.09 6 | # email: nguyenmaudung93.kstn@gmail.com 7 | ----------------------------------------------------------------------------------- 8 | # Description: some utilities of torch (conversion) 9 | ----------------------------------------------------------------------------------- 10 | """ 11 | 12 | import torch 13 | import torch.distributed as dist 14 | 15 | __all__ = ['convert2cpu', 'convert2cpu_long', 'to_cpu', 'reduce_tensor', 'to_python_float', '_sigmoid'] 16 | 17 | 18 | def convert2cpu(gpu_matrix): 19 | return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix) 20 | 21 | 22 | def convert2cpu_long(gpu_matrix): 23 | return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix) 24 | 25 | 26 | def to_cpu(tensor): 27 | return tensor.detach().cpu() 28 | 29 | 30 | def reduce_tensor(tensor, world_size): 31 | rt = tensor.clone() 32 | dist.all_reduce(rt, op=dist.reduce_op.SUM) 33 | rt /= world_size 34 | return rt 35 | 36 | 37 | def to_python_float(t): 38 | if hasattr(t, 'item'): 39 | return t.item() 40 | else: 41 | return t[0] 42 | 43 | 44 | def _sigmoid(x): 45 | return torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4) 46 | -------------------------------------------------------------------------------- /tools/waymo_reader/README.md: -------------------------------------------------------------------------------- 1 | # Simple Waymo Open Dataset Reader 2 | 3 | This is a simple file reader for the [Waymo Open Dataset](https://waymo.com/open/) which does not depend on TensorFlow and Bazel. The main goal is to be able to quickly integrate Waymo’s dataset with other deep learning frameworks without having to pull tons of dependencies. It does not aim to replace the [whole framework](https://github.com/waymo-research/waymo-open-dataset), especially the evaluation metrics that they provide. 4 | 5 | ## Installation 6 | 7 | Use the provided `setup.py`: 8 | 9 | ``` 10 | python setup.py install 11 | ``` 12 | 13 | ## Usage 14 | 15 | Please refer to the examples in `examples/` for how to use the file reader. Refer to [https://github.com/waymo-research/waymo-open-dataset/blob/master/tutorial/tutorial.ipynb](https://github.com/waymo-research/waymo-open-dataset/blob/master/tutorial/tutorial.ipynb) for more details on Waymo’s dataset. 16 | 17 | ## License 18 | 19 | This code is released under the Apache License, version 2.0. This projects incorporate some parts of the [Waymo Open Dataset code](https://github.com/waymo-research/waymo-open-dataset/blob/master/README.md) (the files `simple_waymo_open_dataset_reader/*.proto`) and is licensed to you under their original license terms. See `LICENSE` file for details. 20 | 21 | -------------------------------------------------------------------------------- /tools/waymo_reader/build/lib/simple_waymo_open_dataset_reader/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019, Grégoire Payen de La Garanderie, Durham University 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import struct 17 | from . import dataset_pb2 18 | 19 | class WaymoDataFileReader: 20 | def __init__(self, filename): 21 | self.file = open(filename, "rb") 22 | 23 | def get_record_table(self): 24 | """ Generate and return a table of the offset of all frame records in the file. 25 | 26 | This is particularly useful to determine the number of frames in the file 27 | and access random frames rather than read the file sequentially. 28 | """ 29 | 30 | self.file.seek(0,0) 31 | 32 | table = [] 33 | 34 | while self.file: 35 | offset = self.file.tell() 36 | 37 | try: 38 | self.read_record(header_only=True) 39 | table.append(offset) 40 | except StopIteration: 41 | break 42 | 43 | self.file.seek(0,0) 44 | 45 | return table 46 | 47 | def seek(self, offset): 48 | """ Seek to a specific frame record by offset. 49 | 50 | The offset of each frame in the file can be obtained with the function reader.get_record_table() 51 | """ 52 | 53 | self.file.seek(offset,0) 54 | 55 | def read_record(self, header_only = False): 56 | """ Read the current frame record in the file. 57 | 58 | If repeatedly called, it will return sequential records until the end of file. When the end is reached, it will raise a StopIteration exception. 59 | To reset to the first frame, call reader.seek(0) 60 | """ 61 | 62 | # TODO: Check CRCs. 63 | 64 | header = self.file.read(12) 65 | 66 | if header == b'': 67 | raise StopIteration() 68 | 69 | length, lengthcrc = struct.unpack("QI", header) 70 | 71 | 72 | if header_only: 73 | # Skip length+4 bytes ahead 74 | self.file.seek(length+4,1) 75 | return None 76 | else: 77 | data = self.file.read(length) 78 | datacrc = struct.unpack("I",self.file.read(4)) 79 | 80 | frame = dataset_pb2.Frame() 81 | frame.ParseFromString(data) 82 | return frame 83 | 84 | def __iter__(self): 85 | """ Simple iterator through the file. Note that the iterator will iterate from the current position, does not support concurrent iterators and will not reset back to the beginning when the end is reached. To reset to the first frame, call reader.seek(0) 86 | """ 87 | return self 88 | 89 | def __next__(self): 90 | return self.read_record() 91 | 92 | 93 | -------------------------------------------------------------------------------- /tools/waymo_reader/build/lib/simple_waymo_open_dataset_reader/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019, Grégoire Payen de La Garanderie, Durham University 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import numpy as np 17 | from simple_waymo_open_dataset_reader import dataset_pb2, label_pb2 18 | import zlib 19 | import math 20 | import io 21 | 22 | 23 | def get_box_transformation_matrix(box): 24 | """Create a transformation matrix for a given label box pose.""" 25 | 26 | tx,ty,tz = box.center_x,box.center_y,box.center_z 27 | c = math.cos(box.heading) 28 | s = math.sin(box.heading) 29 | 30 | sl, sh, sw = box.length, box.height, box.width 31 | 32 | return np.array([ 33 | [ sl*c,-sw*s, 0,tx], 34 | [ sl*s, sw*c, 0,ty], 35 | [ 0, 0, sh,tz], 36 | [ 0, 0, 0, 1]]) 37 | 38 | def get_3d_box_projected_corners(vehicle_to_image, label): 39 | """Get the 2D coordinates of the 8 corners of a label's 3D bounding box. 40 | 41 | vehicle_to_image: Transformation matrix from the vehicle frame to the image frame. 42 | label: The object label 43 | """ 44 | 45 | box = label.box 46 | 47 | # Get the vehicle pose 48 | box_to_vehicle = get_box_transformation_matrix(box) 49 | 50 | # Calculate the projection from the box space to the image space. 51 | box_to_image = np.matmul(vehicle_to_image, box_to_vehicle) 52 | 53 | 54 | # Loop through the 8 corners constituting the 3D box 55 | # and project them onto the image 56 | vertices = np.empty([2,2,2,2]) 57 | for k in [0, 1]: 58 | for l in [0, 1]: 59 | for m in [0, 1]: 60 | # 3D point in the box space 61 | v = np.array([(k-0.5), (l-0.5), (m-0.5), 1.]) 62 | 63 | # Project the point onto the image 64 | v = np.matmul(box_to_image, v) 65 | 66 | # If any of the corner is behind the camera, ignore this object. 67 | if v[2] < 0: 68 | return None 69 | 70 | vertices[k,l,m,:] = [v[0]/v[2], v[1]/v[2]] 71 | 72 | vertices = vertices.astype(np.int32) 73 | 74 | return vertices 75 | 76 | def compute_2d_bounding_box(img_or_shape,points): 77 | """Compute the 2D bounding box for a set of 2D points. 78 | 79 | img_or_shape: Either an image or the shape of an image. 80 | img_or_shape is used to clamp the bounding box coordinates. 81 | 82 | points: The set of 2D points to use 83 | """ 84 | 85 | if isinstance(img_or_shape,tuple): 86 | shape = img_or_shape 87 | else: 88 | shape = img_or_shape.shape 89 | 90 | # Compute the 2D bounding box and draw a rectangle 91 | x1 = np.amin(points[...,0]) 92 | x2 = np.amax(points[...,0]) 93 | y1 = np.amin(points[...,1]) 94 | y2 = np.amax(points[...,1]) 95 | 96 | x1 = min(max(0,x1),shape[1]) 97 | x2 = min(max(0,x2),shape[1]) 98 | y1 = min(max(0,y1),shape[0]) 99 | y2 = min(max(0,y2),shape[0]) 100 | 101 | return (x1,y1,x2,y2) 102 | 103 | def draw_3d_box(img, vehicle_to_image, label, colour=(255,128,128), draw_2d_bounding_box=False): 104 | """Draw a 3D bounding from a given 3D label on a given "img". "vehicle_to_image" must be a projection matrix from the vehicle reference frame to the image space. 105 | 106 | draw_2d_bounding_box: If set a 2D bounding box encompassing the 3D box will be drawn 107 | """ 108 | import cv2 109 | 110 | vertices = get_3d_box_projected_corners(vehicle_to_image, label) 111 | 112 | if vertices is None: 113 | # The box is not visible in this image 114 | return 115 | 116 | if draw_2d_bounding_box: 117 | x1,y1,x2,y2 = compute_2d_bounding_box(img.shape, vertices) 118 | 119 | if (x1 != x2 and y1 != y2): 120 | cv2.rectangle(img, (x1,y1), (x2,y2), colour, thickness = 1) 121 | else: 122 | # Draw the edges of the 3D bounding box 123 | for k in [0, 1]: 124 | for l in [0, 1]: 125 | for idx1,idx2 in [((0,k,l),(1,k,l)), ((k,0,l),(k,1,l)), ((k,l,0),(k,l,1))]: 126 | cv2.line(img, tuple(vertices[idx1]), tuple(vertices[idx2]), colour, thickness=1) 127 | # Draw a cross on the front face to identify front & back. 128 | for idx1,idx2 in [((1,0,0),(1,1,1)), ((1,1,0),(1,0,1))]: 129 | cv2.line(img, tuple(vertices[idx1]), tuple(vertices[idx2]), colour, thickness=1) 130 | 131 | def draw_2d_box(img, label, colour=(255,128,128)): 132 | """Draw a 2D bounding from a given 2D label on a given "img". 133 | """ 134 | import cv2 135 | 136 | box = label.box 137 | 138 | # Extract the 2D coordinates 139 | # It seems that "length" is the actual width and "width" is the actual height of the bounding box. Most peculiar. 140 | x1 = int(box.center_x - box.length/2) 141 | x2 = int(box.center_x + box.length/2) 142 | y1 = int(box.center_y - box.width/2) 143 | y2 = int(box.center_y + box.width/2) 144 | 145 | # Draw the rectangle 146 | cv2.rectangle(img, (x1,y1), (x2,y2), colour, thickness = 1) 147 | 148 | 149 | def decode_image(camera): 150 | """ Decode the JPEG image. """ 151 | 152 | from PIL import Image 153 | return np.array(Image.open(io.BytesIO(camera.image))) 154 | 155 | def get_image_transform(camera_calibration): 156 | """ For a given camera calibration, compute the transformation matrix 157 | from the vehicle reference frame to the image space. 158 | """ 159 | 160 | # TODO: Handle the camera distortions 161 | extrinsic = np.array(camera_calibration.extrinsic.transform).reshape(4,4) 162 | intrinsic = camera_calibration.intrinsic 163 | 164 | # Camera model: 165 | # | fx 0 cx 0 | 166 | # | 0 fy cy 0 | 167 | # | 0 0 1 0 | 168 | camera_model = np.array([ 169 | [intrinsic[0], 0, intrinsic[2], 0], 170 | [0, intrinsic[1], intrinsic[3], 0], 171 | [0, 0, 1, 0]]) 172 | 173 | # Swap the axes around 174 | axes_transformation = np.array([ 175 | [0,-1,0,0], 176 | [0,0,-1,0], 177 | [1,0,0,0], 178 | [0,0,0,1]]) 179 | 180 | # Compute the projection matrix from the vehicle space to image space. 181 | vehicle_to_image = np.matmul(camera_model, np.matmul(axes_transformation, np.linalg.inv(extrinsic))) 182 | return vehicle_to_image 183 | 184 | def get_rotation_matrix(roll, pitch, yaw): 185 | """ Convert Euler angles to a rotation matrix""" 186 | 187 | cos_roll = np.cos(roll) 188 | sin_roll = np.sin(roll) 189 | cos_yaw = np.cos(yaw) 190 | sin_yaw = np.sin(yaw) 191 | cos_pitch = np.cos(pitch) 192 | sin_pitch = np.sin(pitch) 193 | 194 | ones = np.ones_like(yaw) 195 | zeros = np.zeros_like(yaw) 196 | 197 | r_roll = np.stack([ 198 | [ones, zeros, zeros], 199 | [zeros, cos_roll, -sin_roll], 200 | [zeros, sin_roll, cos_roll]]) 201 | 202 | r_pitch = np.stack([ 203 | [ cos_pitch, zeros, sin_pitch], 204 | [ zeros, ones, zeros], 205 | [-sin_pitch, zeros, cos_pitch]]) 206 | 207 | r_yaw = np.stack([ 208 | [cos_yaw, -sin_yaw, zeros], 209 | [sin_yaw, cos_yaw, zeros], 210 | [zeros, zeros, ones]]) 211 | 212 | pose = np.einsum('ijhw,jkhw,klhw->ilhw',r_yaw,r_pitch,r_roll) 213 | pose = pose.transpose(2,3,0,1) 214 | return pose 215 | 216 | def parse_range_image_and_camera_projection(laser, second_response=False): 217 | """ Parse the range image for a given laser. 218 | 219 | second_response: If true, return the second strongest response instead of the primary response. 220 | The second_response might be useful to detect the edge of objects 221 | """ 222 | 223 | range_image_pose = None 224 | camera_projection = None 225 | 226 | if not second_response: 227 | # Return the strongest response if available 228 | if len(laser.ri_return1.range_image_compressed) > 0: 229 | ri = dataset_pb2.MatrixFloat() 230 | ri.ParseFromString( 231 | zlib.decompress(laser.ri_return1.range_image_compressed)) 232 | ri = np.array(ri.data).reshape(ri.shape.dims) 233 | 234 | if laser.name == dataset_pb2.LaserName.TOP: 235 | range_image_top_pose = dataset_pb2.MatrixFloat() 236 | range_image_top_pose.ParseFromString( 237 | zlib.decompress(laser.ri_return1.range_image_pose_compressed)) 238 | range_image_pose = np.array(range_image_top_pose.data).reshape(range_image_top_pose.shape.dims) 239 | 240 | camera_projection = dataset_pb2.MatrixInt32() 241 | camera_projection.ParseFromString( 242 | zlib.decompress(laser.ri_return1.camera_projection_compressed)) 243 | camera_projection = np.array(camera_projection.data).reshape(camera_projection.shape.dims) 244 | 245 | else: 246 | # Return the second strongest response if available 247 | 248 | if len(laser.ri_return2.range_image_compressed) > 0: 249 | ri = dataset_pb2.MatrixFloat() 250 | ri.ParseFromString( 251 | zlib.decompress(laser.ri_return2.range_image_compressed)) 252 | ri = np.array(ri.data).reshape(ri.shape.dims) 253 | 254 | camera_projection = dataset_pb2.MatrixInt32() 255 | camera_projection.ParseFromString( 256 | zlib.decompress(laser.ri_return2.camera_projection_compressed)) 257 | camera_projection = np.array(camera_projection.data).reshape(camera_projection.shape.dims) 258 | 259 | return ri, camera_projection, range_image_pose 260 | 261 | def compute_beam_inclinations(calibration, height): 262 | """ Compute the inclination angle for each beam in a range image. """ 263 | 264 | if len(calibration.beam_inclinations) > 0: 265 | return np.array(calibration.beam_inclinations) 266 | else: 267 | inclination_min = calibration.beam_inclination_min 268 | inclination_max = calibration.beam_inclination_max 269 | 270 | return np.linspace(inclination_min, inclination_max, height) 271 | 272 | def compute_range_image_polar(range_image, extrinsic, inclination): 273 | """ Convert a range image to polar coordinates. """ 274 | 275 | height = range_image.shape[0] 276 | width = range_image.shape[1] 277 | 278 | az_correction = math.atan2(extrinsic[1,0], extrinsic[0,0]) 279 | azimuth = np.linspace(np.pi,-np.pi,width) - az_correction 280 | 281 | azimuth_tiled = np.broadcast_to(azimuth[np.newaxis,:], (height,width)) 282 | inclination_tiled = np.broadcast_to(inclination[:,np.newaxis],(height,width)) 283 | 284 | return np.stack((azimuth_tiled,inclination_tiled,range_image)) 285 | 286 | def compute_range_image_cartesian(range_image_polar, extrinsic, pixel_pose, frame_pose): 287 | """ Convert polar coordinates to cartesian coordinates. """ 288 | 289 | azimuth = range_image_polar[0] 290 | inclination = range_image_polar[1] 291 | range_image_range = range_image_polar[2] 292 | 293 | cos_azimuth = np.cos(azimuth) 294 | sin_azimuth = np.sin(azimuth) 295 | cos_incl = np.cos(inclination) 296 | sin_incl = np.sin(inclination) 297 | 298 | x = cos_azimuth * cos_incl * range_image_range 299 | y = sin_azimuth * cos_incl * range_image_range 300 | z = sin_incl * range_image_range 301 | 302 | range_image_points = np.stack([x,y,z,np.ones_like(z)]) 303 | 304 | range_image_points = np.einsum('ij,jkl->ikl', extrinsic,range_image_points) 305 | 306 | # TODO: Use the pixel_pose matrix. It seems that the bottom part of the pixel pose 307 | # matrix is missing. Not sure if this is a bug in the dataset. 308 | 309 | #if pixel_pose is not None: 310 | # range_image_points = np.einsum('hwij,jhw->ihw', pixel_pose, range_image_points) 311 | # frame_pos_inv = np.linalg.inv(frame_pose) 312 | # range_image_points = np.einsum('ij,jhw->ihw',frame_pos_inv,range_image_points) 313 | 314 | 315 | return range_image_points 316 | 317 | 318 | def project_to_pointcloud(frame, ri, camera_projection, range_image_pose, calibration): 319 | """ Create a pointcloud in vehicle space from LIDAR range image. """ 320 | beam_inclinations = compute_beam_inclinations(calibration, ri.shape[0]) 321 | beam_inclinations = np.flip(beam_inclinations) 322 | 323 | extrinsic = np.array(calibration.extrinsic.transform).reshape(4,4) 324 | frame_pose = np.array(frame.pose.transform).reshape(4,4) 325 | 326 | ri_polar = compute_range_image_polar(ri[:,:,0], extrinsic, beam_inclinations) 327 | 328 | if range_image_pose is None: 329 | pixel_pose = None 330 | else: 331 | pixel_pose = get_rotation_matrix(range_image_pose[:,:,0], range_image_pose[:,:,1], range_image_pose[:,:,2]) 332 | translation = range_image_pose[:,:,3:] 333 | pixel_pose = np.block([ 334 | [pixel_pose, translation[:,:,:,np.newaxis]], 335 | [np.zeros_like(translation)[:,:,np.newaxis],np.ones_like(translation[:,:,0])[:,:,np.newaxis,np.newaxis]]]) 336 | 337 | 338 | ri_cartesian = compute_range_image_cartesian(ri_polar, extrinsic, pixel_pose, frame_pose) 339 | ri_cartesian = ri_cartesian.transpose(1,2,0) 340 | 341 | mask = ri[:,:,0] > 0 342 | 343 | return ri_cartesian[mask,:3], ri[mask] 344 | 345 | 346 | def get(object_list, name): 347 | """ Search for an object by name in an object list. """ 348 | 349 | object_list = [obj for obj in object_list if obj.name == name] 350 | return object_list[0] 351 | 352 | -------------------------------------------------------------------------------- /tools/waymo_reader/dist/simple_waymo_open_dataset_reader-0.0.0-py3.8.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/tools/waymo_reader/dist/simple_waymo_open_dataset_reader-0.0.0-py3.8.egg -------------------------------------------------------------------------------- /tools/waymo_reader/generate_proto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | protoc -I=. --python_out=. simple_waymo_open_dataset_reader/label.proto 4 | protoc -I=. --python_out=. simple_waymo_open_dataset_reader/dataset.proto 5 | 6 | -------------------------------------------------------------------------------- /tools/waymo_reader/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name="simple_waymo_open_dataset_reader", 5 | packages=['simple_waymo_open_dataset_reader'], 6 | install_requires=['protobuf']) 7 | 8 | -------------------------------------------------------------------------------- /tools/waymo_reader/simple_waymo_open_dataset_reader.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: simple-waymo-open-dataset-reader 3 | Version: 0.0.0 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | Author: UNKNOWN 7 | Author-email: UNKNOWN 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /tools/waymo_reader/simple_waymo_open_dataset_reader.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | README.md 2 | setup.py 3 | simple_waymo_open_dataset_reader/__init__.py 4 | simple_waymo_open_dataset_reader/dataset_pb2.py 5 | simple_waymo_open_dataset_reader/label_pb2.py 6 | simple_waymo_open_dataset_reader/utils.py 7 | simple_waymo_open_dataset_reader.egg-info/PKG-INFO 8 | simple_waymo_open_dataset_reader.egg-info/SOURCES.txt 9 | simple_waymo_open_dataset_reader.egg-info/dependency_links.txt 10 | simple_waymo_open_dataset_reader.egg-info/requires.txt 11 | simple_waymo_open_dataset_reader.egg-info/top_level.txt -------------------------------------------------------------------------------- /tools/waymo_reader/simple_waymo_open_dataset_reader.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tools/waymo_reader/simple_waymo_open_dataset_reader.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | protobuf 2 | -------------------------------------------------------------------------------- /tools/waymo_reader/simple_waymo_open_dataset_reader.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | simple_waymo_open_dataset_reader 2 | -------------------------------------------------------------------------------- /tools/waymo_reader/simple_waymo_open_dataset_reader/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019, Grégoire Payen de La Garanderie, Durham University 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import struct 17 | from . import dataset_pb2 18 | 19 | class WaymoDataFileReader: 20 | def __init__(self, filename): 21 | self.file = open(filename, "rb") 22 | 23 | def get_record_table(self): 24 | """ Generate and return a table of the offset of all frame records in the file. 25 | 26 | This is particularly useful to determine the number of frames in the file 27 | and access random frames rather than read the file sequentially. 28 | """ 29 | 30 | self.file.seek(0,0) 31 | 32 | table = [] 33 | 34 | while self.file: 35 | offset = self.file.tell() 36 | 37 | try: 38 | self.read_record(header_only=True) 39 | table.append(offset) 40 | except StopIteration: 41 | break 42 | 43 | self.file.seek(0,0) 44 | 45 | return table 46 | 47 | def seek(self, offset): 48 | """ Seek to a specific frame record by offset. 49 | 50 | The offset of each frame in the file can be obtained with the function reader.get_record_table() 51 | """ 52 | 53 | self.file.seek(offset,0) 54 | 55 | def read_record(self, header_only = False): 56 | """ Read the current frame record in the file. 57 | 58 | If repeatedly called, it will return sequential records until the end of file. When the end is reached, it will raise a StopIteration exception. 59 | To reset to the first frame, call reader.seek(0) 60 | """ 61 | 62 | # TODO: Check CRCs. 63 | 64 | header = self.file.read(12) 65 | 66 | if header == b'': 67 | raise StopIteration() 68 | 69 | length, lengthcrc = struct.unpack("QI", header) 70 | 71 | 72 | if header_only: 73 | # Skip length+4 bytes ahead 74 | self.file.seek(length+4,1) 75 | return None 76 | else: 77 | data = self.file.read(length) 78 | datacrc = struct.unpack("I",self.file.read(4)) 79 | 80 | frame = dataset_pb2.Frame() 81 | frame.ParseFromString(data) 82 | return frame 83 | 84 | def __iter__(self): 85 | """ Simple iterator through the file. Note that the iterator will iterate from the current position, does not support concurrent iterators and will not reset back to the beginning when the end is reached. To reset to the first frame, call reader.seek(0) 86 | """ 87 | return self 88 | 89 | def __next__(self): 90 | return self.read_record() 91 | 92 | 93 | -------------------------------------------------------------------------------- /tools/waymo_reader/simple_waymo_open_dataset_reader/dataset.proto: -------------------------------------------------------------------------------- 1 | /* Copyright 2019 The Waymo Open Dataset Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | syntax = "proto2"; 17 | 18 | package waymo.open_dataset; 19 | 20 | import "simple_waymo_open_dataset_reader/label.proto"; 21 | 22 | message MatrixShape { 23 | // Dimensions for the Matrix messages defined below. Must not be empty. 24 | // 25 | // The order of entries in 'dims' matters, as it indicates the layout of the 26 | // values in the tensor in-memory representation. 27 | // 28 | // The first entry in 'dims' is the outermost dimension used to lay out the 29 | // values; the last entry is the innermost dimension. This matches the 30 | // in-memory layout of row-major matrices. 31 | repeated int32 dims = 1; 32 | } 33 | 34 | // Row-major matrix. 35 | // Requires: data.size() = product(shape.dims()). 36 | message MatrixFloat { 37 | repeated float data = 1 [packed = true]; 38 | optional MatrixShape shape = 2; 39 | } 40 | 41 | // Row-major matrix. 42 | // Requires: data.size() = product(shape.dims()). 43 | message MatrixInt32 { 44 | repeated int32 data = 1 [packed = true]; 45 | optional MatrixShape shape = 2; 46 | } 47 | 48 | message CameraName { 49 | enum Name { 50 | UNKNOWN = 0; 51 | FRONT = 1; 52 | FRONT_LEFT = 2; 53 | FRONT_RIGHT = 3; 54 | SIDE_LEFT = 4; 55 | SIDE_RIGHT = 5; 56 | } 57 | } 58 | 59 | // 'Laser' is used interchangeably with 'Lidar' in this file. 60 | message LaserName { 61 | enum Name { 62 | UNKNOWN = 0; 63 | TOP = 1; 64 | FRONT = 2; 65 | SIDE_LEFT = 3; 66 | SIDE_RIGHT = 4; 67 | REAR = 5; 68 | } 69 | } 70 | 71 | // 4x4 row major transform matrix that tranforms 3d points from one frame to 72 | // another. 73 | message Transform { 74 | repeated double transform = 1; 75 | } 76 | 77 | message Velocity { 78 | // Velocity in m/s. 79 | optional float v_x = 1; 80 | optional float v_y = 2; 81 | optional float v_z = 3; 82 | 83 | // Angular velocity in rad/s. 84 | optional double w_x = 4; 85 | optional double w_y = 5; 86 | optional double w_z = 6; 87 | } 88 | 89 | message CameraCalibration { 90 | optional CameraName.Name name = 1; 91 | // 1d Array of [f_u, f_v, c_u, c_v, k{1, 2}, p{1, 2}, k{3}]. 92 | // Note that this intrinsic corresponds to the images after scaling. 93 | // Camera model: pinhole camera. 94 | // Lens distortion: 95 | // Radial distortion coefficients: k1, k2, k3. 96 | // Tangential distortion coefficients: p1, p2. 97 | // k_{1, 2, 3}, p_{1, 2} follows the same definition as OpenCV. 98 | // https://en.wikipedia.org/wiki/Distortion_(optics) 99 | // https://docs.opencv.org/2.4/doc/tutorials/calib3d/camera_calibration/camera_calibration.html 100 | repeated double intrinsic = 2; 101 | // Vehicle frame to camera frame. 102 | optional Transform extrinsic = 3; 103 | // Camera image size. 104 | optional int32 width = 4; 105 | optional int32 height = 5; 106 | 107 | enum RollingShutterReadOutDirection { 108 | UNKNOWN = 0; 109 | TOP_TO_BOTTOM = 1; 110 | LEFT_TO_RIGHT = 2; 111 | BOTTOM_TO_TOP = 3; 112 | RIGHT_TO_LEFT = 4; 113 | GLOBAL_SHUTTER = 5; 114 | } 115 | optional RollingShutterReadOutDirection rolling_shutter_direction = 6; 116 | } 117 | 118 | message LaserCalibration { 119 | optional LaserName.Name name = 1; 120 | // If non-empty, the beam pitch (in radians) is non-uniform. When constructing 121 | // a range image, this mapping is used to map from beam pitch to range image 122 | // row. If this is empty, we assume a uniform distribution. 123 | repeated double beam_inclinations = 2; 124 | // beam_inclination_{min,max} (in radians) are used to determine the mapping. 125 | optional double beam_inclination_min = 3; 126 | optional double beam_inclination_max = 4; 127 | // Lidar frame to vehicle frame. 128 | optional Transform extrinsic = 5; 129 | } 130 | 131 | message Context { 132 | // A unique name that identifies the frame sequence. 133 | optional string name = 1; 134 | repeated CameraCalibration camera_calibrations = 2; 135 | repeated LaserCalibration laser_calibrations = 3; 136 | // Some stats for the run segment used. 137 | message Stats { 138 | message ObjectCount { 139 | optional Label.Type type = 1; 140 | // The number of unique objects with the type in the segment. 141 | optional int32 count = 2; 142 | } 143 | repeated ObjectCount laser_object_counts = 1; 144 | repeated ObjectCount camera_object_counts = 5; 145 | // Day, Dawn/Dusk, or Night, determined from sun elevation. 146 | optional string time_of_day = 2; 147 | // Human readable location (e.g. CHD, SF) of the run segment. 148 | optional string location = 3; 149 | // Currently either Sunny or Rain. 150 | optional string weather = 4; 151 | } 152 | optional Stats stats = 4; 153 | } 154 | 155 | // Range image is a 2d tensor. The first dim (row) represents pitch. The second 156 | // dim represents yaw. 157 | // There are two types of range images: 158 | // 1. Raw range image: Raw range image with a non-empty 159 | // 'range_image_pose_compressed' which tells the vehicle pose of each 160 | // range image cell. 161 | // 2. Virtual range image: Range image with an empty 162 | // 'range_image_pose_compressed'. This range image is constructed by 163 | // transforming all lidar points into a fixed vehicle frame (usually the 164 | // vehicle frame of the middle scan). 165 | // NOTE: 'range_image_pose_compressed' is only populated for the first range 166 | // image return. The second return has the exact the same range image pose as 167 | // the first one. 168 | message RangeImage { 169 | // Zlib compressed [H, W, 4] serialized version of MatrixFloat. 170 | // To decompress: 171 | // string val = ZlibDecompress(range_image_compressed); 172 | // MatrixFloat range_image; 173 | // range_image.ParseFromString(val); 174 | // Inner dimensions are: 175 | // * channel 0: range 176 | // * channel 1: intensity 177 | // * channel 2: elongation 178 | // * channel 3: is in any no label zone. 179 | optional bytes range_image_compressed = 2; 180 | 181 | // Lidar point to camera image projections. A point can be projected to 182 | // multiple camera images. We pick the first two at the following order: 183 | // [FRONT, FRONT_LEFT, FRONT_RIGHT, SIDE_LEFT, SIDE_RIGHT]. 184 | // 185 | // Zlib compressed [H, W, 6] serialized version of MatrixInt32. 186 | // To decompress: 187 | // string val = ZlibDecompress(camera_projection_compressed); 188 | // MatrixInt32 camera_projection; 189 | // camera_projection.ParseFromString(val); 190 | // Inner dimensions are: 191 | // * channel 0: CameraName.Name of 1st projection. Set to UNKNOWN if no 192 | // projection. 193 | // * channel 1: x (axis along image width) 194 | // * channel 2: y (axis along image height) 195 | // * channel 3: CameraName.Name of 2nd projection. Set to UNKNOWN if no 196 | // projection. 197 | // * channel 4: x (axis along image width) 198 | // * channel 5: y (axis along image height) 199 | // Note: pixel 0 corresponds to the left edge of the first pixel in the image. 200 | optional bytes camera_projection_compressed = 3; 201 | 202 | // Zlib compressed [H, W, 6] serialized version of MatrixFloat. 203 | // To decompress: 204 | // string val = ZlibDecompress(range_image_pose_compressed); 205 | // MatrixFloat range_image_pose; 206 | // range_image_pose.ParseFromString(val); 207 | // Inner dimensions are [roll, pitch, yaw, x, y, z] represents a transform 208 | // from vehicle frame to global frame for every range image pixel. 209 | // This is ONLY populated for the first return. The second return is assumed 210 | // to have exactly the same range_image_pose_compressed. 211 | // 212 | // The roll, pitch and yaw are specified as 3-2-1 Euler angle rotations, 213 | // meaning that rotating from the navigation to vehicle frame consists of a 214 | // yaw, then pitch and finally roll rotation about the z, y and x axes 215 | // respectively. All rotations use the right hand rule and are positive 216 | // in the counter clockwise direction. 217 | optional bytes range_image_pose_compressed = 4; 218 | 219 | // Deprecated, do not use. 220 | optional MatrixFloat range_image = 1 [deprecated = true]; 221 | } 222 | 223 | // All timestamps in this proto are represented as seconds since Unix epoch. 224 | message CameraImage { 225 | optional CameraName.Name name = 1; 226 | // JPEG image. 227 | optional bytes image = 2; 228 | // SDC pose. 229 | optional Transform pose = 3; 230 | // SDC velocity at 'pose_timestamp' below. The velocity value is represented 231 | // at vehicle frame. 232 | // With this velocity, the pose can be extrapolated. 233 | // r(t+dt) = r(t) + dr/dt * dt where dr/dt = v_{x,y,z}. 234 | // R(t+dt) = R(t) + R(t)*SkewSymmetric(w_{x,y,z})*dt 235 | // r(t) = (x(t), y(t), z(t)) is vehicle location at t in the global frame. 236 | // R(t) = Rotation Matrix (3x3) from the body frame to the global frame at t. 237 | // SkewSymmetric(x,y,z) is defined as the cross-product matrix in the 238 | // following: 239 | // https://en.wikipedia.org/wiki/Cross_product#Conversion_to_matrix_multiplication 240 | optional Velocity velocity = 4; 241 | // Timestamp of the `pose` above. 242 | optional double pose_timestamp = 5; 243 | 244 | // Rolling shutter params. 245 | 246 | // Shutter duration in seconds. Time taken for one shutter. 247 | optional double shutter = 6; 248 | // Time when the sensor was triggered and when readout finished. 249 | // The difference between trigger time and readout done time includes 250 | // the exposure time and the actual sensor readout time. 251 | optional double camera_trigger_time = 7; 252 | optional double camera_readout_done_time = 8; 253 | } 254 | 255 | // The camera labels associated with a given camera image. This message 256 | // indicates the ground truth information for the camera image 257 | // recorded by the given camera. If there are no labeled objects in the image, 258 | // then the labels field is empty. 259 | message CameraLabels { 260 | optional CameraName.Name name = 1; 261 | repeated Label labels = 2; 262 | } 263 | 264 | message Laser { 265 | optional LaserName.Name name = 1; 266 | optional RangeImage ri_return1 = 2; 267 | optional RangeImage ri_return2 = 3; 268 | } 269 | 270 | message Frame { 271 | // This context is the same for all frames belong to the same driving run 272 | // segment. Use context.name to identify frames belong to the same driving 273 | // segment. We do not store all frames from one driving segment in one proto 274 | // to avoid huge protos. 275 | optional Context context = 1; 276 | 277 | // Frame start time, which is the timestamp of the first top lidar spin 278 | // within this frame. 279 | optional int64 timestamp_micros = 2; 280 | // The vehicle pose. 281 | optional Transform pose = 3; 282 | repeated CameraImage images = 4; 283 | repeated Laser lasers = 5; 284 | repeated Label laser_labels = 6; 285 | // Lidar labels (laser_labels) projected to camera images. A projected 286 | // label is the smallest image axis aligned rectangle that can cover all 287 | // projected points from the 3d lidar label. The projected label is ignored if 288 | // the projection is fully outside a camera image. The projected label is 289 | // clamped to the camera image if it is partially outside. 290 | repeated CameraLabels projected_lidar_labels = 9; 291 | // NOTE: if a camera identified by CameraLabels.name has an entry in this 292 | // field, then it has been labeled, even though it is possible that there are 293 | // no labeled objects in the corresponding image, which is identified by a 294 | // zero sized CameraLabels.labels. 295 | repeated CameraLabels camera_labels = 8; 296 | // No label zones in the *global* frame. 297 | repeated Polygon2dProto no_label_zones = 7; 298 | } 299 | -------------------------------------------------------------------------------- /tools/waymo_reader/simple_waymo_open_dataset_reader/label.proto: -------------------------------------------------------------------------------- 1 | /* Copyright 2019 The Waymo Open Dataset Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | syntax = "proto2"; 17 | 18 | package waymo.open_dataset; 19 | 20 | message Label { 21 | // Upright box, zero pitch and roll. 22 | message Box { 23 | // Box coordinates in vehicle frame. 24 | optional double center_x = 1; 25 | optional double center_y = 2; 26 | optional double center_z = 3; 27 | 28 | // Dimensions of the box. length: dim x. width: dim y. height: dim z. 29 | optional double length = 5; 30 | optional double width = 4; 31 | optional double height = 6; 32 | 33 | // The heading of the bounding box (in radians). The heading is the angle 34 | // required to rotate +x to the surface normal of the SDC front face. 35 | optional double heading = 7; 36 | 37 | enum Type { 38 | TYPE_UNKNOWN = 0; 39 | // 7-DOF 3D (a.k.a upright 3D box). 40 | TYPE_3D = 1; 41 | // 5-DOF 2D. Mostly used for laser top down representation. 42 | TYPE_2D = 2; 43 | // Axis aligned 2D. Mostly used for image. 44 | TYPE_AA_2D = 3; 45 | } 46 | } 47 | 48 | optional Box box = 1; 49 | 50 | message Metadata { 51 | optional double speed_x = 1; 52 | optional double speed_y = 2; 53 | optional double accel_x = 3; 54 | optional double accel_y = 4; 55 | } 56 | optional Metadata metadata = 2; 57 | 58 | enum Type { 59 | TYPE_UNKNOWN = 0; 60 | TYPE_VEHICLE = 1; 61 | TYPE_PEDESTRIAN = 2; 62 | TYPE_SIGN = 3; 63 | TYPE_CYCLIST = 4; 64 | } 65 | optional Type type = 3; 66 | // Object ID. 67 | optional string id = 4; 68 | 69 | // The difficulty level of this label. The higher the level, the harder it is. 70 | enum DifficultyLevel { 71 | UNKNOWN = 0; 72 | LEVEL_1 = 1; 73 | LEVEL_2 = 2; 74 | } 75 | 76 | // Difficulty level for detection problem. 77 | optional DifficultyLevel detection_difficulty_level = 5; 78 | // Difficulty level for tracking problem. 79 | optional DifficultyLevel tracking_difficulty_level = 6; 80 | } 81 | 82 | // Non-self-intersecting 2d polygons. This polygon is not necessarily convex. 83 | message Polygon2dProto { 84 | repeated double x = 1; 85 | repeated double y = 2; 86 | 87 | // A globally unique ID. 88 | optional string id = 3; 89 | } 90 | -------------------------------------------------------------------------------- /tools/waymo_reader/simple_waymo_open_dataset_reader/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019, Grégoire Payen de La Garanderie, Durham University 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import numpy as np 17 | import zlib 18 | import math 19 | import io 20 | 21 | # add project directory to python path to enable relative imports 22 | import os 23 | import sys 24 | PACKAGE_PARENT = '..' 25 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__)))) 26 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) 27 | 28 | # from simple_waymo_open_dataset_reader import dataset_pb2, label_pb2 29 | from tools.waymo_reader.simple_waymo_open_dataset_reader import dataset_pb2, label_pb2 30 | 31 | 32 | 33 | def get_box_transformation_matrix(box): 34 | """Create a transformation matrix for a given label box pose.""" 35 | 36 | tx,ty,tz = box.center_x,box.center_y,box.center_z 37 | c = math.cos(box.heading) 38 | s = math.sin(box.heading) 39 | 40 | sl, sh, sw = box.length, box.height, box.width 41 | 42 | return np.array([ 43 | [ sl*c,-sw*s, 0,tx], 44 | [ sl*s, sw*c, 0,ty], 45 | [ 0, 0, sh,tz], 46 | [ 0, 0, 0, 1]]) 47 | 48 | def get_3d_box_projected_corners(vehicle_to_image, label): 49 | """Get the 2D coordinates of the 8 corners of a label's 3D bounding box. 50 | 51 | vehicle_to_image: Transformation matrix from the vehicle frame to the image frame. 52 | label: The object label 53 | """ 54 | 55 | box = label.box 56 | 57 | # Get the vehicle pose 58 | box_to_vehicle = get_box_transformation_matrix(box) 59 | 60 | # Calculate the projection from the box space to the image space. 61 | box_to_image = np.matmul(vehicle_to_image, box_to_vehicle) 62 | 63 | 64 | # Loop through the 8 corners constituting the 3D box 65 | # and project them onto the image 66 | vertices = np.empty([2,2,2,2]) 67 | for k in [0, 1]: 68 | for l in [0, 1]: 69 | for m in [0, 1]: 70 | # 3D point in the box space 71 | v = np.array([(k-0.5), (l-0.5), (m-0.5), 1.]) 72 | 73 | # Project the point onto the image 74 | v = np.matmul(box_to_image, v) 75 | 76 | # If any of the corner is behind the camera, ignore this object. 77 | if v[2] < 0: 78 | return None 79 | 80 | vertices[k,l,m,:] = [v[0]/v[2], v[1]/v[2]] 81 | 82 | vertices = vertices.astype(np.int32) 83 | 84 | return vertices 85 | 86 | def compute_2d_bounding_box(img_or_shape,points): 87 | """Compute the 2D bounding box for a set of 2D points. 88 | 89 | img_or_shape: Either an image or the shape of an image. 90 | img_or_shape is used to clamp the bounding box coordinates. 91 | 92 | points: The set of 2D points to use 93 | """ 94 | 95 | if isinstance(img_or_shape,tuple): 96 | shape = img_or_shape 97 | else: 98 | shape = img_or_shape.shape 99 | 100 | # Compute the 2D bounding box and draw a rectangle 101 | x1 = np.amin(points[...,0]) 102 | x2 = np.amax(points[...,0]) 103 | y1 = np.amin(points[...,1]) 104 | y2 = np.amax(points[...,1]) 105 | 106 | x1 = min(max(0,x1),shape[1]) 107 | x2 = min(max(0,x2),shape[1]) 108 | y1 = min(max(0,y1),shape[0]) 109 | y2 = min(max(0,y2),shape[0]) 110 | 111 | return (x1,y1,x2,y2) 112 | 113 | def draw_3d_box(img, vehicle_to_image, label, colour=(255,128,128), draw_2d_bounding_box=False): 114 | """Draw a 3D bounding from a given 3D label on a given "img". "vehicle_to_image" must be a projection matrix from the vehicle reference frame to the image space. 115 | 116 | draw_2d_bounding_box: If set a 2D bounding box encompassing the 3D box will be drawn 117 | """ 118 | import cv2 119 | 120 | vertices = get_3d_box_projected_corners(vehicle_to_image, label) 121 | 122 | if vertices is None: 123 | # The box is not visible in this image 124 | return 125 | 126 | if draw_2d_bounding_box: 127 | x1,y1,x2,y2 = compute_2d_bounding_box(img.shape, vertices) 128 | 129 | if (x1 != x2 and y1 != y2): 130 | cv2.rectangle(img, (x1,y1), (x2,y2), colour, thickness = 2) 131 | else: 132 | # Draw the edges of the 3D bounding box 133 | for k in [0, 1]: 134 | for l in [0, 1]: 135 | for idx1,idx2 in [((0,k,l),(1,k,l)), ((k,0,l),(k,1,l)), ((k,l,0),(k,l,1))]: 136 | cv2.line(img, tuple(vertices[idx1]), tuple(vertices[idx2]), colour, thickness=2) 137 | # Draw a cross on the front face to identify front & back. 138 | for idx1,idx2 in [((1,0,0),(1,1,1)), ((1,1,0),(1,0,1))]: 139 | cv2.line(img, tuple(vertices[idx1]), tuple(vertices[idx2]), colour, thickness=2) 140 | 141 | def draw_2d_box(img, label, colour=(255,128,128)): 142 | """Draw a 2D bounding from a given 2D label on a given "img". 143 | """ 144 | import cv2 145 | 146 | box = label.box 147 | 148 | # Extract the 2D coordinates 149 | # It seems that "length" is the actual width and "width" is the actual height of the bounding box. Most peculiar. 150 | x1 = int(box.center_x - box.length/2) 151 | x2 = int(box.center_x + box.length/2) 152 | y1 = int(box.center_y - box.width/2) 153 | y2 = int(box.center_y + box.width/2) 154 | 155 | # Draw the rectangle 156 | cv2.rectangle(img, (x1,y1), (x2,y2), colour, thickness = 1) 157 | 158 | 159 | def decode_image(camera): 160 | """ Decode the JPEG image. """ 161 | 162 | from PIL import Image 163 | return np.array(Image.open(io.BytesIO(camera.image))) 164 | 165 | def get_image_transform(camera_calibration): 166 | """ For a given camera calibration, compute the transformation matrix 167 | from the vehicle reference frame to the image space. 168 | """ 169 | 170 | # TODO: Handle the camera distortions 171 | extrinsic = np.array(camera_calibration.extrinsic.transform).reshape(4,4) 172 | intrinsic = camera_calibration.intrinsic 173 | 174 | # Camera model: 175 | # | fx 0 cx 0 | 176 | # | 0 fy cy 0 | 177 | # | 0 0 1 0 | 178 | camera_model = np.array([ 179 | [intrinsic[0], 0, intrinsic[2], 0], 180 | [0, intrinsic[1], intrinsic[3], 0], 181 | [0, 0, 1, 0]]) 182 | 183 | # Swap the axes around 184 | axes_transformation = np.array([ 185 | [0,-1,0,0], 186 | [0,0,-1,0], 187 | [1,0,0,0], 188 | [0,0,0,1]]) 189 | 190 | # Compute the projection matrix from the vehicle space to image space. 191 | vehicle_to_image = np.matmul(camera_model, np.matmul(axes_transformation, np.linalg.inv(extrinsic))) 192 | return vehicle_to_image 193 | 194 | def parse_range_image_and_camera_projection(laser, second_response=False): 195 | """ Parse the range image for a given laser. 196 | 197 | second_response: If true, return the second strongest response instead of the primary response. 198 | The second_response might be useful to detect the edge of objects 199 | """ 200 | 201 | range_image_pose = None 202 | camera_projection = None 203 | 204 | if not second_response: 205 | # Return the strongest response if available 206 | if len(laser.ri_return1.range_image_compressed) > 0: 207 | ri = dataset_pb2.MatrixFloat() 208 | ri.ParseFromString( 209 | zlib.decompress(laser.ri_return1.range_image_compressed)) 210 | ri = np.array(ri.data).reshape(ri.shape.dims) 211 | 212 | if laser.name == dataset_pb2.LaserName.TOP: 213 | range_image_top_pose = dataset_pb2.MatrixFloat() 214 | range_image_top_pose.ParseFromString( 215 | zlib.decompress(laser.ri_return1.range_image_pose_compressed)) 216 | range_image_pose = np.array(range_image_top_pose.data).reshape(range_image_top_pose.shape.dims) 217 | 218 | camera_projection = dataset_pb2.MatrixInt32() 219 | camera_projection.ParseFromString( 220 | zlib.decompress(laser.ri_return1.camera_projection_compressed)) 221 | camera_projection = np.array(camera_projection.data).reshape(camera_projection.shape.dims) 222 | 223 | else: 224 | # Return the second strongest response if available 225 | 226 | if len(laser.ri_return2.range_image_compressed) > 0: 227 | ri = dataset_pb2.MatrixFloat() 228 | ri.ParseFromString( 229 | zlib.decompress(laser.ri_return2.range_image_compressed)) 230 | ri = np.array(ri.data).reshape(ri.shape.dims) 231 | 232 | camera_projection = dataset_pb2.MatrixInt32() 233 | camera_projection.ParseFromString( 234 | zlib.decompress(laser.ri_return2.camera_projection_compressed)) 235 | camera_projection = np.array(camera_projection.data).reshape(camera_projection.shape.dims) 236 | 237 | return ri, camera_projection, range_image_pose 238 | 239 | 240 | def get(object_list, name): 241 | """ Search for an object by name in an object list. """ 242 | 243 | object_list = [obj for obj in object_list if obj.name == name] 244 | return object_list[0] 245 | 246 | -------------------------------------------------------------------------------- /writeup.md: -------------------------------------------------------------------------------- 1 | # Writeup: Track 3D-Objects Over Time 2 | 3 | Please use this starter template to answer the following questions: 4 | 5 | ### 1. Write a short recap of the four tracking steps and what you implemented there (filter, track management, association, camera fusion). Which results did you achieve? Which part of the project was most difficult for you to complete, and why? 6 | 7 | 8 | ### 2. Do you see any benefits in camera-lidar fusion compared to lidar-only tracking (in theory and in your concrete results)? 9 | 10 | 11 | ### 3. Which challenges will a sensor fusion system face in real-life scenarios? Did you see any of these challenges in the project? 12 | 13 | 14 | ### 4. Can you think of ways to improve your tracking results in the future? 15 | 16 | --------------------------------------------------------------------------------