├── .github
    └── workflows
    │   └── manual.yml
├── CODEOWNERS
├── LICENSE.md
├── README.md
├── img
    ├── img_title_1.jpeg
    ├── img_title_2.png
    └── img_title_2_new.png
├── loop_over_dataset.py
├── misc
    ├── evaluation.py
    ├── helpers.py
    ├── objdet_tools.py
    └── params.py
├── my_tracking_result.mp4
├── my_tracking_results.rar
├── requirements.txt
├── student
    ├── association.py
    ├── filter.py
    ├── measurements.py
    ├── objdet_detect.py
    ├── objdet_eval.py
    ├── objdet_pcl.py
    └── trackmanagement.py
├── tools
    ├── objdet_models
    │   ├── darknet
    │   │   ├── config
    │   │   │   └── complex_yolov4.cfg
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── darknet2pytorch.py
    │   │   │   ├── darknet_utils.py
    │   │   │   └── yolo_layer.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── cal_intersection_rotated_boxes.py
    │   │   │   ├── evaluation_utils.py
    │   │   │   ├── iou_rotated_boxes_utils.py
    │   │   │   └── torch_utils.py
    │   └── resnet
    │   │   ├── models
    │   │       ├── fpn_resnet.py
    │   │       └── resnet.py
    │   │   └── utils
    │   │       ├── evaluation_utils.py
    │   │       └── torch_utils.py
    └── waymo_reader
    │   ├── LICENSE
    │   ├── README.md
    │   ├── build
    │       └── lib
    │       │   └── simple_waymo_open_dataset_reader
    │       │       ├── __init__.py
    │       │       ├── dataset_pb2.py
    │       │       ├── label_pb2.py
    │       │       └── utils.py
    │   ├── dist
    │       └── simple_waymo_open_dataset_reader-0.0.0-py3.8.egg
    │   ├── generate_proto.sh
    │   ├── setup.py
    │   ├── simple_waymo_open_dataset_reader.egg-info
    │       ├── PKG-INFO
    │       ├── SOURCES.txt
    │       ├── dependency_links.txt
    │       ├── requires.txt
    │       └── top_level.txt
    │   └── simple_waymo_open_dataset_reader
    │       ├── __init__.py
    │       ├── dataset.proto
    │       ├── dataset_pb2.py
    │       ├── label.proto
    │       ├── label_pb2.py
    │       └── utils.py
└── writeup.md


/.github/workflows/manual.yml:
--------------------------------------------------------------------------------
 1 | # Workflow to ensure whenever a Github PR is submitted, 
 2 | # a JIRA ticket gets created automatically. 
 3 | name: Manual Workflow
 4 | 
 5 | # Controls when the action will run. 
 6 | on:
 7 |   # Triggers the workflow on pull request events but only for the master branch
 8 |   pull_request_target:
 9 |     types: [assigned, opened, reopened]
10 | 
11 |   # Allows you to run this workflow manually from the Actions tab
12 |   workflow_dispatch:
13 | 
14 | jobs:
15 |   test-transition-issue:
16 |     name: Convert Github Issue to Jira Issue
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |     - name: Checkout
20 |       uses: actions/checkout@master
21 | 
22 |     - name: Login
23 |       uses: atlassian/gajira-login@master
24 |       env:
25 |         JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
26 |         JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
27 |         JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
28 |         
29 |     - name: Create NEW JIRA ticket
30 |       id: create
31 |       uses: atlassian/gajira-create@master
32 |       with:
33 |         project: CONUPDATE
34 |         issuetype: Task
35 |         summary: |
36 |           Github PR - nd013 Self-Driving Car Engineer C2 Fusion Starter | Repo: ${{ github.repository }}  | PR# ${{github.event.number}}
37 |         description: |
38 |            Repo link: https://github.com/${{ github.repository }}   
39 |            PR no. ${{ github.event.pull_request.number }} 
40 |            PR title: ${{ github.event.pull_request.title }}  
41 |            PR description: ${{ github.event.pull_request.description }}  
42 |            In addition, please resolve other issues, if any. 
43 |         fields: '{"components": [{"name":"nd013 - Self Driving Car Engineer ND"}], "customfield_16449":"https://classroom.udacity.com/", "customfield_16450":"Resolve the PR", "labels": ["github"], "priority":{"id": "4"}}'
44 | 
45 |     - name: Log created issue
46 |       run: echo "Issue ${{ steps.create.outputs.issue }} was created"
47 | 


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *           @udacity/active-public-content


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright © 2012 - 2021, Udacity, Inc.
 3 | 
 4 | Udacity hereby grants you a license in and to the Educational Content, including but not limited to homework assignments, programming assignments, code samples, and other educational materials and tools (as further described in the Udacity Terms of Use),  subject to, as modified herein, the terms and conditions of the Creative Commons Attribution-NonCommercial- NoDerivs 3.0 License located at http://creativecommons.org/licenses/by-nc-nd/4.0 and successor locations for such license (the "CC License") provided that, in each case, the Educational Content is specifically marked as being subject to the CC License.
 5 | Udacity expressly defines the following as falling outside the definition of "non-commercial":
 6 | (a) the sale or rental of (i) any part of the Educational Content, (ii) any derivative works based at least in part on the Educational Content, or (iii) any collective work that includes any part of the Educational Content;
 7 | (b) the sale of access or a link to any part of the Educational Content without first obtaining informed consent from the buyer (that the buyer is aware that the Educational Content, or such part thereof, is available at the Website free of charge);
 8 | (c) providing training, support, or editorial services that use or reference the Educational Content in exchange for a fee;
 9 | (d) the sale of advertisements, sponsorships, or promotions placed on the Educational Content, or any part thereof, or the sale of advertisements, sponsorships, or promotions on any website or blog containing any part of the Educational Material, including without limitation any "pop-up advertisements";
10 | (e) the use of Educational Content by a college, university, school, or other educational institution for instruction where tuition is charged; and
11 | (f) the use of Educational Content by a for-profit corporation or non-profit entity for internal professional development or training.
12 | 
13 | 
14 | 
15 | THE SERVICES AND ONLINE COURSES (INCLUDING ANY CONTENT) ARE PROVIDED "AS IS" AND "AS AVAILABLE" WITH NO REPRESENTATIONS OR WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. YOU ASSUME TOTAL RESPONSIBILITY AND THE ENTIRE RISK FOR YOUR USE OF THE SERVICES, ONLINE COURSES, AND CONTENT. WITHOUT LIMITING THE FOREGOING, WE DO NOT WARRANT THAT (A) THE SERVICES, WEBSITES, CONTENT, OR THE ONLINE COURSES WILL MEET YOUR REQUIREMENTS OR EXPECTATIONS OR ACHIEVE THE INTENDED PURPOSES, (B) THE WEBSITES OR THE ONLINE COURSES WILL NOT EXPERIENCE OUTAGES OR OTHERWISE BE UNINTERRUPTED, TIMELY, SECURE OR ERROR-FREE, (C) THE INFORMATION OR CONTENT OBTAINED THROUGH THE SERVICES, SUCH AS CHAT ROOM SERVICES, WILL BE ACCURATE, COMPLETE, CURRENT, ERROR- FREE, COMPLETELY SECURE OR RELIABLE, OR (D) THAT DEFECTS IN OR ON THE SERVICES OR CONTENT WILL BE CORRECTED. YOU ASSUME ALL RISK OF PERSONAL INJURY, INCLUDING DEATH AND DAMAGE TO PERSONAL PROPERTY, SUSTAINED FROM USE OF SERVICES.
16 | 


--------------------------------------------------------------------------------
/img/img_title_1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/img/img_title_1.jpeg


--------------------------------------------------------------------------------
/img/img_title_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/img/img_title_2.png


--------------------------------------------------------------------------------
/img/img_title_2_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/img/img_title_2_new.png


--------------------------------------------------------------------------------
/loop_over_dataset.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------------------
  2 | # Project "Track 3D-Objects Over Time"
  3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja.  
  4 | #
  5 | # Purpose of this file : Loop over all frames in a Waymo Open Dataset file,
  6 | #                        detect and track objects and visualize results
  7 | #
  8 | # You should have received a copy of the Udacity license together with this program.
  9 | #
 10 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013
 11 | # ----------------------------------------------------------------------
 12 | #
 13 | 
 14 | ##################
 15 | ## Imports
 16 | 
 17 | ## general package imports
 18 | import os
 19 | import sys
 20 | import numpy as np
 21 | import math
 22 | import cv2
 23 | import matplotlib.pyplot as plt
 24 | import copy
 25 | 
 26 | ## Add current working directory to path
 27 | sys.path.append(os.getcwd())
 28 | 
 29 | ## Waymo open dataset reader
 30 | from tools.waymo_reader.simple_waymo_open_dataset_reader import utils as waymo_utils
 31 | from tools.waymo_reader.simple_waymo_open_dataset_reader import WaymoDataFileReader, dataset_pb2, label_pb2
 32 | 
 33 | ## 3d object detection
 34 | import student.objdet_pcl as pcl
 35 | import student.objdet_detect as det
 36 | import student.objdet_eval as eval
 37 | 
 38 | import misc.objdet_tools as tools 
 39 | from misc.helpers import save_object_to_file, load_object_from_file, make_exec_list
 40 | 
 41 | ## Tracking
 42 | from student.filter import Filter
 43 | from student.trackmanagement import Trackmanagement
 44 | from student.association import Association
 45 | from student.measurements import Sensor, Measurement
 46 | from misc.evaluation import plot_tracks, plot_rmse, make_movie
 47 | import misc.params as params 
 48 |  
 49 | ##################
 50 | ## Set parameters and perform initializations
 51 | 
 52 | ## Select Waymo Open Dataset file and frame numbers
 53 | data_filename = 'training_segment-1005081002024129653_5313_150_5333_150_with_camera_labels.tfrecord' # Sequence 1
 54 | # data_filename = 'training_segment-10072231702153043603_5725_000_5745_000_with_camera_labels.tfrecord' # Sequence 2
 55 | # data_filename = 'training_segment-10963653239323173269_1924_000_1944_000_with_camera_labels.tfrecord' # Sequence 3
 56 | show_only_frames = [0, 200] # show only frames in interval for debugging
 57 | 
 58 | data_fullpath = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'dataset', data_filename) # adjustable path in case this script is called from another working directory
 59 | model= "fpn-resnet" # options are 'darknet', 'resnet'
 60 | model_res = "resnet"
 61 | sequence = "1"
 62 | results_fullpath = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results/' + model + '/results_sequence_' + sequence + '_' + model_res)
 63 | datafile = WaymoDataFileReader(data_fullpath)
 64 | datafile_iter = iter(datafile)  # initialize dataset iterator
 65 | 
 66 | ## Initialize object detection
 67 | configs_det = det.load_configs(model_name='fpn_resnet') # options are 'darknet', 'fpn_resnet'
 68 | model_det = det.create_model(configs_det)
 69 | 
 70 | configs_det.use_labels_as_objects = False # True = use groundtruth labels as objects, False = use model-based detection
 71 | 
 72 | ## Uncomment this setting to restrict the y-range in the final project
 73 | # configs_det.lim_y = [-25, 25] 
 74 | 
 75 | ## Initialize tracking
 76 | KF = Filter() # set up Kalman filter 
 77 | association = Association() # init data association
 78 | manager = Trackmanagement() # init track manager
 79 | lidar = None # init lidar sensor object
 80 | camera = None # init camera sensor object
 81 | np.random.seed(10) # make random values predictable
 82 | 
 83 | ## Selective execution and visualization
 84 | exec_detection = ['bev_from_pcl', 'detect_objects', 'validate_object_labels', 'measure_detection_performance'] # options are 'bev_from_pcl', 'detect_objects', 'validate_object_labels', 'measure_detection_performance'; options not in the list will be loaded from file
 85 | exec_tracking = [] # options are 'perform_tracking'
 86 | exec_visualization = [] # options are 'show_range_image', 'show_bev', 'show_pcl', 'show_labels_in_image', 'show_objects_and_labels_in_bev', 'show_objects_in_bev_labels_in_camera', 'show_tracks', 'show_detection_performance', 'make_tracking_movie'
 87 | exec_list = make_exec_list(exec_detection, exec_tracking, exec_visualization)
 88 | vis_pause_time = 0 # set pause time between frames in ms (0 = stop between frames until key is pressed)
 89 | 
 90 | 
 91 | ##################
 92 | ## Perform detection & tracking over all selected frames
 93 | 
 94 | cnt_frame = 0 
 95 | all_labels = []
 96 | det_performance_all = [] 
 97 | np.random.seed(0) # make random values predictable
 98 | if 'show_tracks' in exec_list:    
 99 |     fig, (ax2, ax) = plt.subplots(1,2) # init track plot
100 | 
101 | while True:
102 |     try:
103 |         ## Get next frame from Waymo dataset
104 |         frame = next(datafile_iter)
105 |         if cnt_frame < show_only_frames[0]:
106 |             cnt_frame = cnt_frame + 1
107 |             continue
108 |         elif cnt_frame > show_only_frames[1]:
109 |             print('reached end of selected frames')
110 |             break
111 |         
112 |         print('------------------------------')
113 |         print('processing frame #' + str(cnt_frame))
114 | 
115 |         #################################
116 |         ## Perform 3D object detection
117 | 
118 |         ## Extract calibration data and front camera image from frame
119 |         lidar_name = dataset_pb2.LaserName.TOP
120 |         camera_name = dataset_pb2.CameraName.FRONT
121 |         lidar_calibration = waymo_utils.get(frame.context.laser_calibrations, lidar_name)        
122 |         camera_calibration = waymo_utils.get(frame.context.camera_calibrations, camera_name)
123 |         if 'load_image' in exec_list:
124 |             image = tools.extract_front_camera_image(frame) 
125 | 
126 |         ## Compute lidar point-cloud from range image    
127 |         if 'pcl_from_rangeimage' in exec_list:
128 |             print('computing point-cloud from lidar range image')
129 |             lidar_pcl = tools.pcl_from_range_image(frame, lidar_name)
130 |         else:
131 |             print('loading lidar point-cloud from result file')
132 |             lidar_pcl = load_object_from_file(results_fullpath, data_filename, 'lidar_pcl', cnt_frame)
133 |             
134 |         ## Compute lidar birds-eye view (bev)
135 |         if 'bev_from_pcl' in exec_list:
136 |             print('computing birds-eye view from lidar pointcloud')
137 |             lidar_bev = pcl.bev_from_pcl(lidar_pcl, configs_det)
138 |         else:
139 |             print('loading birds-eve view from result file')
140 |             lidar_bev = load_object_from_file(results_fullpath, data_filename, 'lidar_bev', cnt_frame)
141 | 
142 |         ## 3D object detection
143 |         if (configs_det.use_labels_as_objects==True):
144 |             print('using groundtruth labels as objects')
145 |             detections = tools.convert_labels_into_objects(frame.laser_labels, configs_det)
146 |         else:
147 |             if 'detect_objects' in exec_list:
148 |                 print('detecting objects in lidar pointcloud')   
149 |                 detections = det.detect_objects(lidar_bev, model_det, configs_det)
150 |             else:
151 |                 print('loading detected objects from result file')
152 |                 # load different data for final project vs. mid-term project
153 |                 if 'perform_tracking' in exec_list:
154 |                     detections = load_object_from_file(results_fullpath, data_filename, 'detections', cnt_frame)
155 |                 else:
156 |                     detections = load_object_from_file(results_fullpath, data_filename, 'detections_' + configs_det.arch + '_' + str(configs_det.conf_thresh), cnt_frame)
157 | 
158 |         ## Validate object labels
159 |         if 'validate_object_labels' in exec_list:
160 |             print("validating object labels")
161 |             valid_label_flags = tools.validate_object_labels(frame.laser_labels, lidar_pcl, configs_det, 0 if configs_det.use_labels_as_objects==True else 10)
162 |         else:
163 |             print('loading object labels and validation from result file')
164 |             valid_label_flags = load_object_from_file(results_fullpath, data_filename, 'valid_labels', cnt_frame)            
165 | 
166 |         ## Performance evaluation for object detection
167 |         if 'measure_detection_performance' in exec_list:
168 |             print('measuring detection performance')
169 |             det_performance = eval.measure_detection_performance(detections, frame.laser_labels, valid_label_flags, configs_det.min_iou)     
170 |         else:
171 |             print('loading detection performance measures from file')
172 |             # load different data for final project vs. mid-term project
173 |             if 'perform_tracking' in exec_list:
174 |                 det_performance = load_object_from_file(results_fullpath, data_filename, 'det_performance', cnt_frame)
175 |             else:
176 |                 det_performance = load_object_from_file(results_fullpath, data_filename, 'det_performance_' + configs_det.arch + '_' + str(configs_det.conf_thresh), cnt_frame)   
177 | 
178 |         det_performance_all.append(det_performance) # store all evaluation results in a list for performance assessment at the end
179 |         
180 | 
181 |         ## Visualization for object detection
182 |         if 'show_range_image' in exec_list:
183 |             img_range = pcl.show_range_image(frame, lidar_name)
184 |             img_range = img_range.astype(np.uint8)
185 |             cv2.imshow('range_image', img_range)
186 |             cv2.waitKey(vis_pause_time)
187 | 
188 |         if 'show_pcl' in exec_list:
189 |             pcl.show_pcl(lidar_pcl)
190 | 
191 |         if 'show_bev' in exec_list:
192 |             tools.show_bev(lidar_bev, configs_det)  
193 |             cv2.waitKey(vis_pause_time)          
194 | 
195 |         if 'show_labels_in_image' in exec_list:
196 |             img_labels = tools.project_labels_into_camera(camera_calibration, image, frame.laser_labels, valid_label_flags, 0.5)
197 |             cv2.imshow('img_labels', img_labels)
198 |             cv2.waitKey(vis_pause_time)
199 | 
200 |         if 'show_objects_and_labels_in_bev' in exec_list:
201 |             tools.show_objects_labels_in_bev(detections, frame.laser_labels, lidar_bev, configs_det)
202 |             cv2.waitKey(vis_pause_time)         
203 | 
204 |         if 'show_objects_in_bev_labels_in_camera' in exec_list:
205 |             tools.show_objects_in_bev_labels_in_camera(detections, lidar_bev, image, frame.laser_labels, valid_label_flags, camera_calibration, configs_det)
206 |             cv2.waitKey(vis_pause_time)               
207 | 
208 | 
209 |         #################################
210 |         ## Perform tracking
211 |         if 'perform_tracking' in exec_list:
212 |             # set up sensor objects
213 |             if lidar is None:
214 |                 lidar = Sensor('lidar', lidar_calibration)
215 |             if camera is None:
216 |                 camera = Sensor('camera', camera_calibration)
217 |             
218 |             # preprocess lidar detections
219 |             meas_list_lidar = []
220 |             for detection in detections:
221 |                 # check if measurement lies inside specified range
222 |                 if detection[1] > configs_det.lim_x[0] and detection[1] < configs_det.lim_x[1] and detection[2] > configs_det.lim_y[0] and detection[2] < configs_det.lim_y[1]:
223 |                     meas_list_lidar = lidar.generate_measurement(cnt_frame, detection[1:], meas_list_lidar)
224 | 
225 |             # preprocess camera detections
226 |             meas_list_cam = []
227 |             for label in frame.camera_labels[0].labels:
228 |                 if(label.type == label_pb2.Label.Type.TYPE_VEHICLE):
229 |                 
230 |                     box = label.box
231 |                     # use camera labels as measurements and add some random noise
232 |                     z = [box.center_x, box.center_y, box.width, box.length]
233 |                     z[0] = z[0] + np.random.normal(0, params.sigma_cam_i) 
234 |                     z[1] = z[1] + np.random.normal(0, params.sigma_cam_j)
235 |                     meas_list_cam = camera.generate_measurement(cnt_frame, z, meas_list_cam)
236 |             
237 |             # Kalman prediction
238 |             for track in manager.track_list:
239 |                 print('predict track', track.id)
240 |                 KF.predict(track)
241 |                 track.set_t((cnt_frame - 1)*0.1) # save next timestamp
242 |                 
243 |             # associate all lidar measurements to all tracks
244 |             association.associate_and_update(manager, meas_list_lidar, KF)
245 |             
246 |             # associate all camera measurements to all tracks
247 |             association.associate_and_update(manager, meas_list_cam, KF)
248 |             
249 |             # save results for evaluation
250 |             result_dict = {}
251 |             for track in manager.track_list:
252 |                 result_dict[track.id] = track
253 |             manager.result_list.append(copy.deepcopy(result_dict))
254 |             label_list = [frame.laser_labels, valid_label_flags]
255 |             all_labels.append(label_list)
256 |             
257 |             # visualization
258 |             if 'show_tracks' in exec_list:
259 |                 fig, ax, ax2 = plot_tracks(fig, ax, ax2, manager.track_list, meas_list_lidar, frame.laser_labels, 
260 |                                         valid_label_flags, image, camera, configs_det)
261 |                 if 'make_tracking_movie' in exec_list:
262 |                     # save track plots to file
263 |                     fname = results_fullpath + '/tracking%03d.png' % cnt_frame
264 |                     print('Saving frame', fname)
265 |                     fig.savefig(fname)
266 | 
267 |         # increment frame counter
268 |         cnt_frame = cnt_frame + 1    
269 | 
270 |     except StopIteration:
271 |         # if StopIteration is raised, break from loop
272 |         print("StopIteration has been raised\n")
273 |         break
274 | 
275 | 
276 | #################################
277 | ## Post-processing
278 | 
279 | ## Evaluate object detection performance
280 | if 'show_detection_performance' in exec_list:
281 |     eval.compute_performance_stats(det_performance_all, configs_det)
282 | 
283 | ## Plot RMSE for all tracks
284 | if 'show_tracks' in exec_list:
285 |     plot_rmse(manager, all_labels, configs_det)
286 | 
287 | ## Make movie from tracking results    
288 | if 'make_tracking_movie' in exec_list:
289 |     make_movie(results_fullpath)
290 | 


--------------------------------------------------------------------------------
/misc/evaluation.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------------------
  2 | # Project "Track 3D-Objects Over Time"
  3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja.
  4 | #
  5 | # Purpose of this file : Evaluate and plot results
  6 | #
  7 | # You should have received a copy of the Udacity license together with this program.
  8 | #
  9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013
 10 | # ----------------------------------------------------------------------
 11 | #
 12 | 
 13 | # imports
 14 | import numpy as np
 15 | import matplotlib
 16 | matplotlib.use('wxagg') # change backend so that figure maximizing works on Mac as well     
 17 | import matplotlib.pyplot as plt
 18 | import matplotlib.patches as patches
 19 | from matplotlib.path import Path
 20 | from matplotlib import colors
 21 | from matplotlib.transforms import Affine2D
 22 | import matplotlib.ticker as ticker
 23 | import os
 24 | import cv2
 25 | 
 26 | # add project directory to python path to enable relative imports
 27 | import os
 28 | import sys
 29 | PACKAGE_PARENT = '..'
 30 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
 31 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
 32 | 
 33 | from tools.waymo_reader.simple_waymo_open_dataset_reader import label_pb2
 34 |     
 35 | def plot_tracks(fig, ax, ax2, track_list, meas_list, lidar_labels, lidar_labels_valid, 
 36 |                       image, camera, configs_det, state=None):
 37 |     
 38 |     # plot image
 39 |     ax.cla()
 40 |     ax2.cla()
 41 |     ax2.imshow(image)
 42 |     
 43 |     # plot tracks, measurements and ground truth in birds-eye view
 44 |     for track in track_list:
 45 |         if state == None or track.state == state: # plot e.g. only confirmed tracks
 46 |             
 47 |             # choose color according to track state
 48 |             if track.state == 'confirmed':
 49 |                 col = 'green'
 50 |             elif track.state == 'tentative':
 51 |                 col = 'orange'
 52 |             else:
 53 |                 col = 'red'
 54 |             
 55 |             # get current state variables    
 56 |             w = track.width
 57 |             h = track.height
 58 |             l = track.length
 59 |             x = track.x[0]
 60 |             y = track.x[1]
 61 |             z = track.x[2] 
 62 |             yaw = track.yaw 
 63 |                 
 64 |             # plot boxes in top view
 65 |             point_of_rotation = np.array([w/2, l/2])        
 66 |             rec = plt.Rectangle(-point_of_rotation, width=w, height=l, 
 67 |                                     color=col, alpha=0.2,
 68 |                                     transform=Affine2D().rotate_around(*(0,0), -yaw)+Affine2D().translate(-y,x)+ax.transData)
 69 |             ax.add_patch(rec)
 70 |             
 71 |             # write track id for debugging
 72 |             ax.text(float(-track.x[1]), float(track.x[0]+1), str(track.id))
 73 |            
 74 |             if track.state =='initialized':
 75 |                 ax.scatter(float(-track.x[1]), float(track.x[0]), color=col, s=80, marker='x', label='initialized track')
 76 |             elif track.state =='tentative':
 77 |                 ax.scatter(float(-track.x[1]), float(track.x[0]), color=col, s=80, marker='x', label='tentative track')
 78 |             elif track.state =='confirmed':
 79 |                 ax.scatter(float(-track.x[1]), float(track.x[0]), color=col, s=80, marker='x', label='confirmed track')
 80 |          
 81 |             # project tracks in image
 82 |             # transform from vehicle to camera coordinates
 83 |             pos_veh = np.ones((4, 1)) # homogeneous coordinates
 84 |             pos_veh[0:3] = track.x[0:3] 
 85 |             pos_sens = camera.veh_to_sens*pos_veh # transform from vehicle to sensor coordinates
 86 |             x = pos_sens[0]
 87 |             y = pos_sens[1]
 88 |             z = pos_sens[2] 
 89 |             
 90 |             # compute rotation around z axis
 91 |             R = np.matrix([[np.cos(yaw), np.sin(yaw), 0],
 92 |                         [-np.sin(yaw), np.cos(yaw), 0],
 93 |                         [0, 0, 1]])
 94 |             
 95 |             # bounding box corners
 96 |             x_corners = [-l/2, l/2, l/2, l/2, l/2, -l/2, -l/2, -l/2]  
 97 |             y_corners = [-w/2, -w/2, -w/2, w/2, w/2, w/2, w/2, -w/2]  
 98 |             z_corners = [-h/2, -h/2, h/2, h/2, -h/2, -h/2, h/2, h/2]  
 99 |             
100 |             # bounding box
101 |             corners_3D = np.array([x_corners, y_corners, z_corners])
102 |             
103 |             # rotate
104 |             corners_3D = R*corners_3D
105 | 
106 |             # translate
107 |             corners_3D += np.array([x, y, z]).reshape((3, 1))
108 |             # print ( 'corners_3d', corners_3D)
109 |             
110 |             # remove bounding boxes that include negative x, projection makes no sense
111 |             if np.any(corners_3D[0,:] <= 0):
112 |                 continue
113 |             
114 |             # project to image
115 |             corners_2D = np.zeros((2,8))
116 |             for k in range(8):
117 |                 corners_2D[0,k] = camera.c_i - camera.f_i * corners_3D[1,k] / corners_3D[0,k]
118 |                 corners_2D[1,k] = camera.c_j - camera.f_j * corners_3D[2,k] / corners_3D[0,k]
119 |             # print ( 'corners_2d', corners_2D)
120 | 
121 |             # edges of bounding box in vertex index from above, e.g. index 0 stands for [-l/2, -w/2, -h/2]
122 |             draw_line_indices = [0, 1, 2, 3, 4, 5, 6, 7, 0, 5, 4, 1, 2, 7, 6, 3]
123 | 
124 |             paths_2D = np.transpose(corners_2D[:, draw_line_indices])
125 |             # print ( 'paths_2D', paths_2D)
126 |             
127 |             codes = [Path.LINETO]*paths_2D.shape[0]
128 |             codes[0] = Path.MOVETO
129 |             path = Path(paths_2D, codes)
130 |                 
131 |             # plot bounding box in image
132 |             p = patches.PathPatch(
133 |                 path, fill=False, color=col, linewidth=3)
134 |             ax2.add_patch(p)
135 |         
136 |     # plot labels
137 |     for label, valid in zip(lidar_labels, lidar_labels_valid):
138 |         if valid:        
139 |             ax.scatter(-1*label.box.center_y, label.box.center_x, color='gray', s=80, marker='+', label='ground truth')
140 |     # plot measurements
141 |     for meas in meas_list:
142 |         ax.scatter(-1*meas.z[1], meas.z[0], color='blue', marker='.', label='measurement')
143 |     
144 |     # maximize window        
145 |     mng = plt.get_current_fig_manager()
146 |     mng.frame.Maximize(True)
147 |     
148 |     # axis 
149 |     ax.set_xlabel('y [m]')
150 |     ax.set_ylabel('x [m]')
151 |     ax.set_aspect('equal')
152 |     ax.set_ylim(configs_det.lim_x[0], configs_det.lim_x[1]) # x forward, y left in vehicle coordinates
153 |     ax.set_xlim(-configs_det.lim_y[1], -configs_det.lim_y[0])
154 |     # correct x ticks (positive to the left)
155 |     ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(-x) if x!=0 else '{0:g}'.format(x))
156 |     ax.xaxis.set_major_formatter(ticks_x)
157 |     
158 |     # remove repeated labels
159 |     handles, labels = ax.get_legend_handles_labels()
160 |     handle_list, label_list = [], []
161 |     for handle, label in zip(handles, labels):
162 |         if label not in label_list:
163 |             handle_list.append(handle)
164 |             label_list.append(label)
165 |     ax.legend(handle_list, label_list, loc='center left', shadow=True, fontsize='x-large', bbox_to_anchor=(0.8, 0.5))
166 | 
167 |     plt.pause(0.01)
168 |     
169 |     return fig, ax, ax2
170 | 
171 | 
172 | def plot_rmse(manager, all_labels, configs_det):
173 |     fig, ax = plt.subplots()
174 |     plot_empty = True
175 |     
176 |     # loop over all tracks
177 |     for track_id in range(manager.last_id+1):
178 |         rmse_sum = 0
179 |         cnt = 0
180 |         rmse = []
181 |         time = []
182 |         
183 |         # loop over timesteps
184 |         for i, result_dict in enumerate(manager.result_list):
185 |             label_list = all_labels[i]
186 |             if track_id not in result_dict:
187 |                 continue
188 |             track = result_dict[track_id]
189 |             if track.state != 'confirmed':
190 |                 continue
191 |             
192 |             # find closest label and calculate error at this timestamp
193 |             min_error = np.inf
194 |             for label, valid in zip(label_list[0], label_list[1]):
195 |                 error = 0
196 |                 if valid: 
197 |                     # check if label lies inside specified range
198 |                     if label.box.center_x > configs_det.lim_x[0] and label.box.center_x < configs_det.lim_x[1] and label.box.center_y > configs_det.lim_y[0] and label.box.center_y < configs_det.lim_y[1]:
199 |                         error += (label.box.center_x - float(track.x[0]))**2
200 |                         error += (label.box.center_y - float(track.x[1]))**2
201 |                         error += (label.box.center_z - float(track.x[2]))**2
202 |                         if error < min_error:
203 |                             min_error = error
204 |             if min_error < np.inf:
205 |                 error = np.sqrt(min_error)
206 |                 time.append(track.t)
207 |                 rmse.append(error)     
208 |                 rmse_sum += error
209 |                 cnt += 1
210 |             
211 |         # calc overall RMSE
212 |         if cnt != 0:
213 |             plot_empty = False
214 |             rmse_sum /= cnt
215 |             # plot RMSE
216 |             ax.plot(time, rmse, marker='x', label='RMSE track ' + str(track_id) + '\n(mean: ' 
217 |                     + '{:.2f}'.format(rmse_sum) + ')')
218 |     
219 |     # maximize window     
220 |     mng = plt.get_current_fig_manager()
221 |     mng.frame.Maximize(True)
222 |     ax.set_ylim(0,1)
223 |     if plot_empty: 
224 |         print('No confirmed tracks found to plot RMSE!')
225 |     else:
226 |         plt.legend(loc='center left', shadow=True, fontsize='x-large', bbox_to_anchor=(0.9, 0.5))
227 |         plt.xlabel('time [s]')
228 |         plt.ylabel('RMSE [m]')
229 |         plt.show()
230 |         
231 |         
232 | def make_movie(path):
233 |     # read track plots
234 |     images = [img for img in sorted(os.listdir(path)) if img.endswith(".png")]
235 |     frame = cv2.imread(os.path.join(path, images[0]))
236 |     height, width, layers = frame.shape
237 | 
238 |     # save with 10fps to result dir
239 |     video = cv2.VideoWriter(os.path.join(path, 'my_tracking_results.avi'), 0, 10, (width,height))
240 | 
241 |     for image in images:
242 |         fname = os.path.join(path, image)
243 |         video.write(cv2.imread(fname))
244 |         os.remove(fname) # clean up
245 | 
246 |     cv2.destroyAllWindows()
247 |     video.release()


--------------------------------------------------------------------------------
/misc/helpers.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------------------
 2 | # Project "Track 3D-Objects Over Time"
 3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja.
 4 | #
 5 | # Purpose of this file : helper functions for loop_over_dataset.py
 6 | #
 7 | # You should have received a copy of the Udacity license together with this program.
 8 | #
 9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013
10 | # ----------------------------------------------------------------------
11 | #
12 | 
13 | # imports
14 | import os
15 | import pickle
16 | 
17 | ## Saves an object to a binary file
18 | def save_object_to_file(object, file_path, base_filename, object_name, frame_id=1):
19 |     object_filename = os.path.join(file_path, os.path.splitext(base_filename)[0]
20 |                                    + "__frame-" + str(frame_id) + "__" + object_name + ".pkl")
21 |     with open(object_filename, 'wb') as f:
22 |         pickle.dump(object, f)
23 | 
24 | ## Loads an object from a binary file
25 | def load_object_from_file(file_path, base_filename, object_name, frame_id=1):
26 |     object_filename = os.path.join(file_path, os.path.splitext(base_filename)[0]
27 |                                    + "__frame-" + str(frame_id) + "__" + object_name + ".pkl")
28 |     with open(object_filename, 'rb') as f:
29 |         object = pickle.load(f)
30 |         return object
31 |     
32 | ## Prepares an exec_list with all tasks to be executed
33 | def make_exec_list(exec_detection, exec_tracking, exec_visualization): 
34 |     
35 |     # save all tasks in exec_list
36 |     exec_list = exec_detection + exec_tracking + exec_visualization
37 |     
38 |     # check if we need pcl
39 |     if any(i in exec_list for i in ('validate_object_labels', 'bev_from_pcl')):
40 |         exec_list.append('pcl_from_rangeimage')
41 |     # check if we need image
42 |     if any(i in exec_list for i in ('show_tracks', 'show_labels_in_image', 'show_objects_in_bev_labels_in_camera')):
43 |         exec_list.append('load_image')
44 |     # movie does not work without show_tracks
45 |     if 'make_tracking_movie' in exec_list:  
46 |         exec_list.append('show_tracks')  
47 |     return exec_list


--------------------------------------------------------------------------------
/misc/params.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------------------------------
 2 | # Project "Track 3D-Objects Over Time"
 3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja.
 4 | #
 5 | # Purpose of this file : Parameter file for tracking
 6 | #
 7 | # You should have received a copy of the Udacity license together with this program.
 8 | #
 9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013
10 | # ----------------------------------------------------------------------
11 | #
12 | 
13 | # general parameters
14 | dim_state = 6 # process model dimension
15 | 
16 | # Kalman filter parameters (Step 1)
17 | dt = 0.1 # time increment
18 | q=3 # process noise variable for Kalman filter Q
19 | 
20 | # track management parameters (Step 2)
21 | confirmed_threshold = 0.8 # track score threshold to switch from 'tentative' to 'confirmed'
22 | delete_threshold = 0.6 # track score threshold to delete confirmed tracks
23 | window = 6 # number of frames for track score calculation
24 | max_P = 3**2 # delete track if covariance of px or py bigger than this
25 | sigma_p44 = 50 # initial setting for estimation error covariance P entry for vx
26 | sigma_p55 = 50 # initial setting for estimation error covariance P entry for vy
27 | sigma_p66 = 5 # initial setting for estimation error covariance P entry for vz
28 | weight_dim = 0.1 # sliding average parameter for dimension estimation
29 | 
30 | # association parameters (Step 3)
31 | gating_threshold = 0.995 # percentage of correct measurements that shall lie inside gate
32 | 
33 | # measurement parameters (Step 4)
34 | sigma_lidar_x = 0.1 # measurement noise standard deviation for lidar x position   
35 | sigma_lidar_y = 0.1 # measurement noise standard deviation for lidar y position   
36 | sigma_lidar_z = 0.1 # measurement noise standard deviation for lidar z position   
37 | sigma_cam_i = 5 # measurement noise standard deviation for image i coordinate
38 | sigma_cam_j = 5 # measurement noise standard deviation for image j coordinate
39 | 


--------------------------------------------------------------------------------
/my_tracking_result.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/my_tracking_result.mp4


--------------------------------------------------------------------------------
/my_tracking_results.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/my_tracking_results.rar


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | opencv-python
 3 | protobuf
 4 | easydict
 5 | pytorch
 6 | pillow
 7 | matplotlib
 8 | wxpython
 9 | shapely
10 | tqdm
11 | open3d


--------------------------------------------------------------------------------
/student/association.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------------------
  2 | # Project "Track 3D-Objects Over Time"
  3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja.
  4 | #
  5 | # Purpose of this file : Data association class with single nearest neighbor association and gating based on Mahalanobis distance
  6 | #
  7 | # You should have received a copy of the Udacity license together with this program.
  8 | #
  9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013
 10 | # ----------------------------------------------------------------------
 11 | #
 12 | 
 13 | # imports
 14 | import numpy as np
 15 | from scipy.stats.distributions import chi2
 16 | 
 17 | # add project directory to python path to enable relative imports
 18 | import os
 19 | import sys
 20 | PACKAGE_PARENT = '..'
 21 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
 22 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
 23 | 
 24 | import misc.params as params 
 25 | 
 26 | class Association:
 27 |     '''Data association class with single nearest neighbor association and gating based on Mahalanobis distance'''
 28 |     def __init__(self):
 29 |         self.association_matrix = np.matrix([])
 30 |         self.unassigned_tracks = []
 31 |         self.unassigned_meas = []
 32 |         
 33 |     def associate(self, track_list, meas_list, KF):
 34 |              
 35 |         ############
 36 |         # TODO Step 3: association:
 37 |         # - replace association_matrix with the actual association matrix based on Mahalanobis distance (see below) for all tracks and all measurements
 38 |         # - update list of unassigned measurements and unassigned tracks
 39 |         ############
 40 |         
 41 |         # the following only works for at most one track and one measurement
 42 |         self.association_matrix = np.matrix([]) # reset matrix
 43 |         self.unassigned_tracks = [] # reset lists
 44 |         self.unassigned_meas = []
 45 |         
 46 |         
 47 |         if len(meas_list) > 0:
 48 |             self.unassigned_meas = [0]
 49 |         if len(track_list) > 0:
 50 |             self.unassigned_tracks = [0]
 51 |         if len(meas_list) > 0 and len(track_list) > 0: 
 52 |             self.association_matrix = np.matrix([[0]])
 53 |         
 54 |         N = len(track_list)
 55 |         M = len(meas_list)
 56 |         self.unassigned_tracks = list(range(N))
 57 |         self.unassigned_meas = list(range(M))
 58 |         
 59 |         self.association_matrix = np.asmatrix(np.inf * np.ones((N,M)))
 60 |         
 61 |         for i in range(N):
 62 |             track = track_list[i]
 63 |             for j in range(M):
 64 |                 meas = meas_list[j]
 65 |                 dist = self.MHD(track,meas,KF)
 66 |                 if self.gating(dist, meas.sensor):
 67 |                     self.association_matrix[i,j] = dist
 68 |         
 69 |         ############
 70 |         # END student code
 71 |         ############ 
 72 |                 
 73 |     def get_closest_track_and_meas(self):
 74 |         ############
 75 |         # TODO Step 3: find closest track and measurement:
 76 |         # - find minimum entry in association matrix
 77 |         # - delete row and column
 78 |         # - remove corresponding track and measurement from unassigned_tracks and unassigned_meas
 79 |         # - return this track and measurement
 80 |         ############
 81 | 
 82 |         # the following only works for at most one track and one measurement
 83 |         update_track = 0
 84 |         update_meas = 0
 85 |         
 86 |         A = self.association_matrix
 87 |         if np.min(A) == np.inf:
 88 |             return np.nan, np.nan
 89 | 
 90 |         # get indices of minimum entry
 91 |         ij_min = np.unravel_index(np.argmin(A, axis=None), A.shape) 
 92 |         ind_track = ij_min[0]
 93 |         ind_meas = ij_min[1]
 94 | 
 95 |         # delete row and column for next update
 96 |         A = np.delete(A, ind_track, 0) 
 97 |         A = np.delete(A, ind_meas, 1)
 98 |         self.association_matrix = A
 99 | 
100 |         # update this track with this measurement
101 |         update_track = self.unassigned_tracks[ind_track] 
102 |         update_meas = self.unassigned_meas[ind_meas]
103 | 
104 |         # remove this track and measurement from list
105 |         self.unassigned_tracks.remove(update_track) 
106 |         self.unassigned_meas.remove(update_meas)
107 | 
108 |         return update_track, update_meas
109 |             
110 |         ############
111 |         # END student code
112 |         ############ 
113 | 
114 |     def gating(self, MHD, sensor): 
115 |         ############
116 |         # TODO Step 3: return True if measurement lies inside gate, otherwise False
117 |         ############
118 |         limit = chi2.ppf(params.gating_threshold, df = sensor.dim_meas)
119 |         if MHD < limit:
120 |             return True
121 |         else:
122 |             return False
123 |         
124 |         ############
125 |         # END student code
126 |         ############ 
127 |         
128 |     def MHD(self, track, meas, KF):
129 |         ############
130 |         # TODO Step 3: calculate and return Mahalanobis distance
131 |         ########### 
132 |         H = meas.sensor.get_H(track.x)
133 |         S_inv = np.linalg.inv(KF.S(track,meas,H))
134 |         gamma = KF.gamma(track, meas)
135 |         return gamma.T*S_inv*gamma
136 |         ############
137 |         # END student code
138 |         ############ 
139 |     
140 |     def associate_and_update(self, manager, meas_list, KF):
141 |         # associate measurements and tracks
142 |         self.associate(manager.track_list, meas_list, KF)
143 |     
144 |         # update associated tracks with measurements
145 |         while self.association_matrix.shape[0]>0 and self.association_matrix.shape[1]>0:
146 |             
147 |             # search for next association between a track and a measurement
148 |             ind_track, ind_meas = self.get_closest_track_and_meas()
149 |             if np.isnan(ind_track):
150 |                 print('---no more associations---')
151 |                 break
152 |             track = manager.track_list[ind_track]
153 |             
154 |             # check visibility, only update tracks in fov    
155 |             if not meas_list[0].sensor.in_fov(track.x):
156 |                 continue
157 |             
158 |             # Kalman update
159 |             print('update track', track.id, 'with', meas_list[ind_meas].sensor.name, 'measurement', ind_meas)
160 |             KF.update(track, meas_list[ind_meas])
161 |             
162 |             # update score and track state 
163 |             manager.handle_updated_track(track)
164 |             
165 |             # save updated track
166 |             manager.track_list[ind_track] = track
167 |             
168 |         # run track management 
169 |         manager.manage_tracks(self.unassigned_tracks, self.unassigned_meas, meas_list)
170 |         
171 |         for track in manager.track_list:            
172 |             print('track', track.id, 'score =', track.score)
173 | 


--------------------------------------------------------------------------------
/student/filter.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------------------
  2 | # Project "Track 3D-Objects Over Time"
  3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja.
  4 | #
  5 | # Purpose of this file : Kalman filter class
  6 | #
  7 | # You should have received a copy of the Udacity license together with this program.
  8 | #
  9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013
 10 | # ----------------------------------------------------------------------
 11 | #
 12 | 
 13 | # imports
 14 | import numpy as np
 15 | 
 16 | # add project directory to python path to enable relative imports
 17 | import os
 18 | import sys
 19 | PACKAGE_PARENT = '..'
 20 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
 21 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
 22 | import misc.params as params 
 23 | 
 24 | class Filter:
 25 |     '''Kalman filter class'''
 26 |     def __init__(self):
 27 |         pass
 28 | 
 29 |     def F(self):
 30 |         ############
 31 |         # TODO Step 1: implement and return system matrix F
 32 |         ############
 33 |         dt = params.dt
 34 |         return np.matrix([[1, 0, 0, dt, 0 ,0],
 35 |                         [0, 1, 0, 0, dt, 0],
 36 |                         [0, 0, 1, 0, 0 , dt],
 37 |                         [0, 0, 0, 1, 0, 0],
 38 |                         [0, 0, 0, 0, 1, 0], 
 39 |                         [0, 0, 0, 0, 0, 1]])
 40 |         
 41 |         ############
 42 |         # END student code
 43 |         ############ 
 44 | 
 45 |     def Q(self):
 46 |         ############
 47 |         # TODO Step 1: implement and return process noise covariance Q
 48 |         q = params.q
 49 |         dt = params.dt
 50 |         q1 = ((dt**3)/3) * q 
 51 |         q2 = ((dt**2)/2) * q 
 52 |         q3 = dt * q 
 53 |         return np.matrix([[q1, 0, 0, q2, 0, 0],
 54 |                           [0, q1, 0, 0, q2, 0],
 55 |                           [0, 0, q1, 0, 0, q2],
 56 |                           [q2, 0, 0, q3, 0, 0],
 57 |                           [0, q2, 0, 0, q3, 0],
 58 |                           [0, 0, q2, 0, 0, q3]])
 59 |         
 60 |         ############
 61 |         # END student code
 62 |         ############ 
 63 | 
 64 |     def predict(self, track):
 65 |         ############
 66 |         # TODO Step 1: predict state x and estimation error covariance P to next timestep, save x and P in track
 67 |         ############
 68 |         F = self.F()
 69 |         x = track.x
 70 |         P = track.P
 71 |         x = F*track.x # state prediction
 72 |         P = F*track.P*F.transpose() + self.Q() # covariance prediction
 73 |         track.set_x(x)
 74 |         track.set_P(P)
 75 |         
 76 |         ############
 77 |         # END student code
 78 |         ############ 
 79 | 
 80 |     def update(self, track, meas):
 81 |         ############
 82 |         # TODO Step 1: update state x and covariance P with associated measurement, save x and P in track
 83 |         ############
 84 |         H = meas.sensor.get_H(track.x) # measurement matrix
 85 |         gamma = self.gamma(track, meas) # residual
 86 |         S = self.S(track, meas, H) # covariance of residual
 87 |         K = track.P * H.transpose()* S.I # Kalman gain
 88 |         x = track.x + K * gamma # state update
 89 |         I = np.identity(params.dim_state)
 90 |         P = (I - K * H) * track.P # covariance update
 91 |         track.set_x(x)
 92 |         track.set_P(P)
 93 |         track.update_attributes(meas)
 94 |        
 95 |     
 96 |     def gamma(self, track, meas):
 97 |         ############
 98 |         # TODO Step 1: calculate and return residual gamma
 99 |         ############
100 |         g = meas.z - meas.sensor.get_hx(track.x)
101 |         return g
102 |         
103 |         
104 |         ############
105 |         # END student code
106 |         ############ 
107 | 
108 |     def S(self, track, meas, H):
109 |         ############
110 |         # TODO Step 1: calculate and return covariance of residual S
111 |         ############
112 |         s = H * track.P * H.transpose() + meas.R
113 |         return s
114 |         
115 |         ############
116 |         # END student code
117 |         ############ 
118 | 


--------------------------------------------------------------------------------
/student/measurements.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------------------
  2 | # Project "Track 3D-Objects Over Time"
  3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja.
  4 | #
  5 | # Purpose of this file : Classes for sensor and measurement 
  6 | #
  7 | # You should have received a copy of the Udacity license together with this program.
  8 | #
  9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013
 10 | # ----------------------------------------------------------------------
 11 | #
 12 | 
 13 | # imports
 14 | import numpy as np
 15 | 
 16 | # add project directory to python path to enable relative imports
 17 | import os
 18 | import sys
 19 | PACKAGE_PARENT = '..'
 20 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
 21 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
 22 | import misc.params as params 
 23 | 
 24 | class Sensor:
 25 |     '''Sensor class including measurement matrix'''
 26 |     def __init__(self, name, calib):
 27 |         self.name = name
 28 |         if name == 'lidar':
 29 |             self.dim_meas = 3
 30 |             self.sens_to_veh = np.matrix(np.identity((4))) # transformation sensor to vehicle coordinates equals identity matrix because lidar detections are already in vehicle coordinates
 31 |             self.fov = [-np.pi/2, np.pi/2] # angle of field of view in radians
 32 |         
 33 |         elif name == 'camera':
 34 |             self.dim_meas = 2
 35 |             self.sens_to_veh = np.matrix(calib.extrinsic.transform).reshape(4,4) # transformation sensor to vehicle coordinates
 36 |             self.f_i = calib.intrinsic[0] # focal length i-coordinate
 37 |             self.f_j = calib.intrinsic[1] # focal length j-coordinate
 38 |             self.c_i = calib.intrinsic[2] # principal point i-coordinate
 39 |             self.c_j = calib.intrinsic[3] # principal point j-coordinate
 40 |             self.fov = [-0.35, 0.35] # angle of field of view in radians, inaccurate boundary region was removed
 41 |             
 42 |         self.veh_to_sens = np.linalg.inv(self.sens_to_veh) # transformation vehicle to sensor coordinates
 43 |     
 44 |     def in_fov(self, x):
 45 |         # check if an object x can be seen by this sensor
 46 |         ############
 47 |         # TODO Step 4: implement a function that returns True if x lies in the sensor's field of view, 
 48 |         # otherwise False.
 49 |         ############
 50 |         pos_veh = np.ones((4,1))
 51 |         pos_veh[0:3] = x[0:3]
 52 |         pos_sens = self.veh_to_sens * pos_veh
 53 |         if pos_sens[0] > 0:
 54 |             alpha = np.arctan(pos_sens[1]/pos_sens[0])
 55 |             if alpha > self.fov[0] and alpha < self.fov[1]:
 56 |                 return True
 57 |             else:
 58 |                 return False
 59 |         ############
 60 |         # END student code
 61 |         ############ 
 62 |              
 63 |     def get_hx(self, x):    
 64 |         # calculate nonlinear measurement expectation value h(x)   
 65 |         if self.name == 'lidar':
 66 |             pos_veh = np.ones((4, 1)) # homogeneous coordinates
 67 |             pos_veh[0:3] = x[0:3] 
 68 |             pos_sens = self.veh_to_sens*pos_veh # transform from vehicle to lidar coordinates
 69 |             return pos_sens[0:3]
 70 |         elif self.name == 'camera':
 71 |             
 72 |             ############
 73 |             # TODO Step 4: implement nonlinear camera measurement function h:
 74 |             # - transform position estimate from vehicle to camera coordinates
 75 |             # - project from camera to image coordinates
 76 |             # - make sure to not divide by zero, raise an error if needed
 77 |             # - return h(x)
 78 |             ############
 79 | 
 80 |             veh_to_cam = np.ones((4,1))
 81 |             veh_to_cam[0:3] = x[0:3]
 82 |             cam_sens = self.veh_to_sens * veh_to_cam
 83 |             
 84 |             #project camera cord to image cord
 85 |             fi = self.f_i
 86 |             fj = self.f_j
 87 |             ci = self.c_i
 88 |             cj = self.c_j
 89 |             hx = np.zeros((2,1))
 90 |             if cam_sens[0] == 0:
 91 |                 raise NameError('Divided number cannot be zero')
 92 |             else:
 93 |                 hx[0,0] = ci - fi * cam_sens[1]/cam_sens[0]
 94 |                 hx[1,0] = cj - fj * cam_sens[2]/cam_sens[0]
 95 |                 
 96 |             return hx
 97 |         
 98 |             ############
 99 |             # END student code
100 |             ############ 
101 |         
102 |     def get_H(self, x):
103 |         # calculate Jacobian H at current x from h(x)
104 |         H = np.matrix(np.zeros((self.dim_meas, params.dim_state)))
105 |         R = self.veh_to_sens[0:3, 0:3] # rotation
106 |         T = self.veh_to_sens[0:3, 3] # translation
107 |         if self.name == 'lidar':
108 |             H[0:3, 0:3] = R
109 |         elif self.name == 'camera':
110 |             # check and print error message if dividing by zero
111 |             if R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0] == 0: 
112 |                 raise NameError('Jacobian not defined for this x!')
113 |             else:
114 |                 H[0,0] = self.f_i * (-R[1,0] / (R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])
115 |                                     + R[0,0] * (R[1,0]*x[0] + R[1,1]*x[1] + R[1,2]*x[2] + T[1]) \
116 |                                         / ((R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])**2))
117 |                 H[1,0] = self.f_j * (-R[2,0] / (R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])
118 |                                     + R[0,0] * (R[2,0]*x[0] + R[2,1]*x[1] + R[2,2]*x[2] + T[2]) \
119 |                                         / ((R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])**2))
120 |                 H[0,1] = self.f_i * (-R[1,1] / (R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])
121 |                                     + R[0,1] * (R[1,0]*x[0] + R[1,1]*x[1] + R[1,2]*x[2] + T[1]) \
122 |                                         / ((R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])**2))
123 |                 H[1,1] = self.f_j * (-R[2,1] / (R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])
124 |                                     + R[0,1] * (R[2,0]*x[0] + R[2,1]*x[1] + R[2,2]*x[2] + T[2]) \
125 |                                         / ((R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])**2))
126 |                 H[0,2] = self.f_i * (-R[1,2] / (R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])
127 |                                     + R[0,2] * (R[1,0]*x[0] + R[1,1]*x[1] + R[1,2]*x[2] + T[1]) \
128 |                                         / ((R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])**2))
129 |                 H[1,2] = self.f_j * (-R[2,2] / (R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])
130 |                                     + R[0,2] * (R[2,0]*x[0] + R[2,1]*x[1] + R[2,2]*x[2] + T[2]) \
131 |                                         / ((R[0,0]*x[0] + R[0,1]*x[1] + R[0,2]*x[2] + T[0])**2))
132 |         return H   
133 |         
134 |     def generate_measurement(self, num_frame, z, meas_list):
135 |         # generate new measurement from this sensor and add to measurement list
136 |         ############
137 |         # TODO Step 4: remove restriction to lidar in order to include camera as well
138 |         ############
139 |         
140 |         # if self.name == 'lidar':
141 |         meas = Measurement(num_frame, z, self)
142 |         meas_list.append(meas)
143 |         return meas_list
144 |         
145 |         ############
146 |         # END student code
147 |         ############ 
148 |         
149 |         
150 | ################### 
151 |         
152 | class Measurement:
153 |     '''Measurement class including measurement values, covariance, timestamp, sensor'''
154 |     def __init__(self, num_frame, z, sensor):
155 |         # create measurement object
156 |         self.t = (num_frame - 1) * params.dt # time
157 |         if sensor.name == 'lidar':
158 |             sigma_lidar_x = params.sigma_lidar_x # load params
159 |             sigma_lidar_y = params.sigma_lidar_y
160 |             sigma_lidar_z = params.sigma_lidar_z
161 |             self.z = np.zeros((sensor.dim_meas,1)) # measurement vector
162 |             self.z[0] = z[0]
163 |             self.z[1] = z[1]
164 |             self.z[2] = z[2]
165 |             self.sensor = sensor # sensor that generated this measurement
166 |             self.R = np.matrix([[sigma_lidar_x**2, 0, 0], # measurement noise covariance matrix
167 |                                 [0, sigma_lidar_y**2, 0], 
168 |                                 [0, 0, sigma_lidar_z**2]])
169 |             
170 |             self.width = z[4]
171 |             self.length = z[5]
172 |             self.height = z[3]
173 |             self.yaw = z[6]
174 |         elif sensor.name == 'camera':
175 |             
176 |             ############
177 |             # TODO Step 4: initialize camera measurement including z, R, and sensor 
178 |             ############
179 | 
180 |             sigma_cam_i = params.sigma_cam_i
181 |             sigma_cam_j = params.sigma_cam_j
182 |             
183 |             self.z = np.zeros((sensor.dim_meas,1))
184 |             self.z[0] = z[0]
185 |             self.z[1] = z[1]
186 |             self.sensor = sensor
187 |             self.R = np.matrix([[sigma_cam_i**2 , 0],
188 |                                 [0,sigma_cam_j**2]])
189 |             
190 |             self.width = z[2]
191 |             self.length = z[3]
192 |         
193 |             ############
194 |             # END student code
195 |             ############ 
196 | 


--------------------------------------------------------------------------------
/student/objdet_detect.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------------------
  2 | # Project "Track 3D-Objects Over Time"
  3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja.
  4 | #
  5 | # Purpose of this file : Detect 3D objects in lidar point clouds using deep learning
  6 | #
  7 | # You should have received a copy of the Udacity license together with this program.
  8 | #
  9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013
 10 | # ----------------------------------------------------------------------
 11 | #
 12 | 
 13 | # general package imports
 14 | import numpy as np
 15 | import torch
 16 | from easydict import EasyDict as edict
 17 | 
 18 | # add project directory to python path to enable relative imports
 19 | import os
 20 | import sys
 21 | PACKAGE_PARENT = '..'
 22 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
 23 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
 24 | 
 25 | # model-related
 26 | from tools.objdet_models.resnet.models import fpn_resnet
 27 | from tools.objdet_models.resnet.utils.evaluation_utils import decode, post_processing 
 28 | 
 29 | from tools.objdet_models.darknet.models.darknet2pytorch import Darknet as darknet
 30 | from tools.objdet_models.darknet.utils.evaluation_utils import post_processing_v2
 31 | from tools.objdet_models.resnet.utils.torch_utils import _sigmoid
 32 | 
 33 | # load model-related parameters into an edict
 34 | def load_configs_model(model_name='darknet', configs=None):
 35 | 
 36 |     # init config file, if none has been passed
 37 |     if configs==None:
 38 |         configs = edict()  
 39 | 
 40 |     # get parent directory of this file to enable relative paths
 41 |     curr_path = os.path.dirname(os.path.realpath(__file__))
 42 |     parent_path = configs.model_path = os.path.abspath(os.path.join(curr_path, os.pardir))    
 43 |     
 44 |     # set parameters according to model type
 45 |     if model_name == "darknet":
 46 |         configs.model_path = os.path.join(parent_path, 'tools', 'objdet_models', 'darknet')
 47 |         configs.pretrained_filename = os.path.join(configs.model_path, 'pretrained', 'complex_yolov4_mse_loss.pth')
 48 |         configs.arch = 'darknet'
 49 |         configs.batch_size = 4
 50 |         configs.cfgfile = os.path.join(configs.model_path, 'config', 'complex_yolov4.cfg')
 51 |         configs.conf_thresh = 0.5
 52 |         configs.distributed = False
 53 |         configs.img_size = 608
 54 |         configs.nms_thresh = 0.4
 55 |         configs.num_samples = None
 56 |         configs.num_workers = 4
 57 |         configs.pin_memory = True
 58 |         configs.use_giou_loss = False
 59 | 
 60 |     elif model_name == 'fpn_resnet':
 61 |         ####### ID_S3_EX1-3 START #######     
 62 |         #######
 63 |         print("student task ID_S3_EX1-3")
 64 |         configs.model_path = os.path.join(parent_path, 'tools', 'objdet_models', 'resnet')
 65 |         configs.pretrained_filename = os.path.join(configs.model_path, 'pretrained', 'fpn_resnet_18_epoch_300.pth')
 66 |         configs.arch = 'fpn_resnet'
 67 |         configs.pin_memory = True
 68 |         configs.conf_thresh = 0.5
 69 |         configs.input_size = 608
 70 |         configs.hm_size = (152, 152)
 71 |         configs.down_ratio = 4
 72 |         configs.max_objects = 50
 73 |         configs.K = 40
 74 |         configs.imagenet_pretrained = False
 75 |         configs.head_conv = 64
 76 |         configs.num_classes = 3
 77 |         configs.num_center_offset = 2
 78 |         configs.num_z = 1
 79 |         configs.num_dim = 3
 80 |         configs.num_direction = 2  # sin, cos
 81 | 
 82 |         configs.heads = {
 83 |             'hm_cen': configs.num_classes,
 84 |             'cen_offset': configs.num_center_offset,
 85 |             'direction': configs.num_direction,
 86 |             'z_coor': configs.num_z,
 87 |             'dim': configs.num_dim
 88 |         }
 89 |         configs.num_input_features = 4
 90 |         
 91 |         #######
 92 |         ####### ID_S3_EX1-3 END #######     
 93 | 
 94 |     else:
 95 |         raise ValueError("Error: Invalid model name")
 96 | 
 97 |     # GPU vs. CPU
 98 |     configs.no_cuda = True # if true, cuda is not used
 99 |     configs.gpu_idx = 0  # GPU index to use.
100 |     configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx))
101 | 
102 |     return configs
103 | 
104 | 
105 | # load all object-detection parameters into an edict
106 | def load_configs(model_name='fpn_resnet', configs=None):
107 | 
108 |     # init config file, if none has been passed
109 |     if configs==None:
110 |         configs = edict()    
111 | 
112 |     # birds-eye view (bev) parameters
113 |     configs.lim_x = [0, 50] # detection range in m
114 |     configs.lim_y = [-25, 25]
115 |     configs.lim_z = [-1, 3]
116 |     configs.lim_r = [0, 1.0] # reflected lidar intensity
117 |     configs.bev_width = 608  # pixel resolution of bev image
118 |     configs.bev_height = 608 
119 | 
120 |     # add model-dependent parameters
121 |     configs = load_configs_model(model_name, configs)
122 | 
123 |     # visualization parameters
124 |     configs.output_width = 608 # width of result image (height may vary)
125 |     configs.obj_colors = [[0, 255, 255], [0, 0, 255], [255, 0, 0]] # 'Pedestrian': 0, 'Car': 1, 'Cyclist': 2
126 | 
127 |     return configs
128 | 
129 | 
130 | # create model according to selected model type
131 | def create_model(configs):
132 | 
133 |     # check for availability of model file
134 |     assert os.path.isfile(configs.pretrained_filename), "No file at {}".format(configs.pretrained_filename)
135 | 
136 |     # create model depending on architecture name
137 |     if (configs.arch == 'darknet') and (configs.cfgfile is not None):
138 |         print('using darknet')
139 |         model = darknet(cfgfile=configs.cfgfile, use_giou_loss=configs.use_giou_loss)    
140 |     
141 |     elif 'fpn_resnet' in configs.arch:
142 |         print('using ResNet architecture with feature pyramid')
143 |         
144 |         ####### ID_S3_EX1-4 START #######     
145 |         #######
146 |         print("student task ID_S3_EX1-4")
147 |         num_layers = 18
148 |         model = fpn_resnet.get_pose_net(num_layers = num_layers, heads = configs.heads, 
149 |                                         head_conv= configs.head_conv, 
150 |                                         imagenet_pretrained = configs.imagenet_pretrained)
151 | 
152 |         #######
153 |         ####### ID_S3_EX1-4 END #######     
154 |     
155 |     else:
156 |         assert False, 'Undefined model backbone'
157 | 
158 |     # load model weights
159 |     model.load_state_dict(torch.load(configs.pretrained_filename, map_location='cpu'))
160 |     print('Loaded weights from {}\n'.format(configs.pretrained_filename))
161 | 
162 |     # set model to evaluation state
163 |     configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx))
164 |     model = model.to(device=configs.device)  # load model to either cpu or gpu
165 |     model.eval()          
166 | 
167 |     return model
168 | 
169 | 
170 | # detect trained objects in birds-eye view
171 | def detect_objects(input_bev_maps, model, configs):
172 | 
173 |     # deactivate autograd engine during test to reduce memory usage and speed up computations
174 |     with torch.no_grad():  
175 | 
176 |         # perform inference
177 |         outputs = model(input_bev_maps)
178 | 
179 |         # decode model output into target object format
180 |         if 'darknet' in configs.arch:
181 | 
182 |             # perform post-processing
183 |             output_post = post_processing_v2(outputs, conf_thresh=configs.conf_thresh, nms_thresh=configs.nms_thresh) 
184 |             detections = []
185 |             for sample_i in range(len(output_post)):
186 |                 if output_post[sample_i] is None:
187 |                     continue
188 |                 detection = output_post[sample_i]
189 |                 for obj in detection:
190 |                     x, y, w, l, im, re, _, _, _ = obj
191 |                     yaw = np.arctan2(im, re)
192 |                     detections.append([1, x, y, 0.0, 1.50, w, l, yaw])    
193 | 
194 |         elif 'fpn_resnet' in configs.arch:
195 |             # decode output and perform post-processing
196 |             
197 |             ####### ID_S3_EX1-5 START #######     
198 |             #######
199 |             print("student task ID_S3_EX1-5")
200 |             
201 |             outputs['hm_cen'] = _sigmoid(outputs['hm_cen'])
202 |             outputs['cen_offset'] = _sigmoid(outputs['cen_offset'])
203 |             # detections size (batch_size, K, 10)
204 |             detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'],outputs['dim'], K=configs.K)
205 |             detections = detections.numpy().astype(np.float32)
206 |             detections = post_processing(detections, configs)
207 |             detections = detections.cpu().numpy().astype(np.float32)
208 |             # print(detections)
209 |             detections = post_processing(detections, configs)
210 |             detections = detections[0][1]
211 |             print(detections)
212 |             #######
213 |             ####### ID_S3_EX1-5 END #######     
214 | 
215 |             
216 | 
217 |     ####### ID_S3_EX2 START #######     
218 |     #######
219 |     # Extract 3d bounding boxes from model response
220 |     print("student task ID_S3_EX2")
221 |     objects = [] 
222 |     ## step 1 : check whether there are any detections
223 |     for box in detections:
224 |         id, bev_x, bev_y, z, h, bev_w, bev_l, yaw = box
225 |         ## step 2 : loop over all detections
226 |         x = bev_y / configs.bev_height * (configs.lim_x[1] - configs.lim_x[0])
227 |         y = bev_x / configs.bev_width * (configs.lim_y[1] - configs.lim_y[0]) - (configs.lim_y[1] - configs.lim_y[0])/2.0 
228 |         w = bev_w / configs.bev_width * (configs.lim_y[1] - configs.lim_y[0]) 
229 |         l = bev_l / configs.bev_height * (configs.lim_x[1] - configs.lim_x[0])
230 |         
231 |         ## step 3 : perform the conversion using the limits for x, y and z set in the configs structure
232 |         if ((x >= configs.lim_x[0]) and (x <= configs.lim_x[1]) and (y >= configs.lim_y[0]) and (y <= configs.lim_y[1])
233 |             and (z >= configs.lim_z[0]) and (z <= configs.lim_z[1])):
234 |             ## step 4 : append the current object to the 'objects' array
235 |             objects.append([1, x, y, z, h, w, l, yaw])
236 |     #######
237 |     ####### ID_S3_EX2 END #######       
238 |     return objects    
239 | 
240 | 


--------------------------------------------------------------------------------
/student/objdet_eval.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------------------
  2 | # Project "Track 3D-Objects Over Time"
  3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja.
  4 | #
  5 | # Purpose of this file : Evaluate performance of object detection
  6 | #
  7 | # You should have received a copy of the Udacity license together with this program.
  8 | #
  9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013
 10 | # ----------------------------------------------------------------------
 11 | #
 12 | 
 13 | # general package imports
 14 | import numpy as np
 15 | import matplotlib
 16 | #matplotlib.use('wxagg') # change backend so that figure maximizing works on Mac as well     
 17 | import matplotlib.pyplot as plt
 18 | 
 19 | import torch
 20 | from shapely.geometry import Polygon
 21 | from operator import itemgetter
 22 | 
 23 | # add project directory to python path to enable relative imports
 24 | import os
 25 | import sys
 26 | PACKAGE_PARENT = '..'
 27 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
 28 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
 29 | 
 30 | # object detection tools and helper functions
 31 | import misc.objdet_tools as tools
 32 | 
 33 | 
 34 | # compute various performance measures to assess object detection
 35 | def measure_detection_performance(detections, labels, labels_valid, min_iou=0.5):
 36 |     
 37 |      # find best detection for each valid label 
 38 |     true_positives = 0 # no. of correctly detected objects
 39 |     center_devs = []
 40 |     ious = []
 41 |     for label, valid in zip(labels, labels_valid):
 42 |         matches_lab_det = []
 43 |         if valid: # exclude all labels from statistics which are not considered valid
 44 |             
 45 |             # compute intersection over union (iou) and distance between centers
 46 | 
 47 |             ####### ID_S4_EX1 START #######     
 48 |             #######
 49 |             print("student task ID_S4_EX1 ")
 50 | 
 51 |             ## step 1 : extract the four corners of the current label bounding-box
 52 |             box = label.box
 53 |             box_lab = tools.compute_box_corners(box.center_x, box.center_y, box.width, box.length, box.heading)
 54 |             
 55 |             
 56 |             ## step 2 : loop over all detected objects
 57 |             for bbox in detections:
 58 |                 ## step 3 : extract the four corners of the current detection
 59 |                 bid, x, y, z, h, w, l, yaw = bbox
 60 |                 box_det = tools.compute_box_corners(x, y, w, l, yaw)
 61 |                 ## step 4 : computer the center distance between label and detection bounding-box in x, y, and z
 62 |                 dist_x = box.center_x - x
 63 |                 dist_y = box.center_y - y
 64 |                 dist_Z = box.center_z - z
 65 |                 ## step 5 : compute the intersection over union (IOU) between label and detection bounding-box
 66 |                 poly_1 = Polygon(box_lab)
 67 |                 poly_2 = Polygon(box_det)
 68 |                 intersection = poly_1.intersection(poly_2).area 
 69 |                 union = poly_1.union(poly_2).area
 70 |                 iou = intersection / union
 71 |                 ## step 6 : if IOU exceeds min_iou threshold, store [iou,dist_x, dist_y, dist_z] in matches_lab_det and increase the TP count
 72 |                 if iou > min_iou:
 73 |                     matches_lab_det.append([iou,dist_x, dist_y, dist_Z ])
 74 |                     true_positives = true_positives + 1
 75 |             #######
 76 |             ####### ID_S4_EX1 END #######     
 77 |             
 78 |         # find best match and compute metrics
 79 |         if matches_lab_det:
 80 |             best_match = max(matches_lab_det,key=itemgetter(1)) # retrieve entry with max iou in case of multiple candidates   
 81 |             ious.append(best_match[0])
 82 |             center_devs.append(best_match[1:])
 83 | 
 84 | 
 85 |     ####### ID_S4_EX2 START #######     
 86 |     #######
 87 |     print("student task ID_S4_EX2")
 88 |     
 89 |     # compute positives and negatives for precision/recall
 90 |     
 91 |     ## step 1 : compute the total number of positives present in the scene
 92 |     all_positives = labels_valid.sum()
 93 | 
 94 |     ## step 2 : compute the number of false negatives
 95 |     true_positives = len(ious)
 96 |     false_negatives = all_positives - true_positives
 97 | 
 98 |     ## step 3 : compute the number of false positives
 99 |     false_positives = len(detections) - true_positives
100 |     
101 |     #######
102 |     ####### ID_S4_EX2 END #######     
103 |     
104 |     pos_negs = [all_positives, true_positives, false_negatives, false_positives]
105 |     det_performance = [ious, center_devs, pos_negs]
106 |     
107 |     return det_performance
108 | 
109 | 
110 | # evaluate object detection performance based on all frames
111 | def compute_performance_stats(det_performance_all):
112 | 
113 |     # extract elements
114 |     ious = []
115 |     center_devs = []
116 |     pos_negs = []
117 |     for item in det_performance_all:
118 |         ious.append(item[0])
119 |         center_devs.append(item[1])
120 |         pos_negs.append(item[2])
121 |         pos_negs_arr = np.asarray(pos_negs)
122 |     ####### ID_S4_EX3 START #######     
123 |     #######    
124 |     print('student task ID_S4_EX3')
125 | 
126 |     ## step 1 : extract the total number of positives, true positives, false negatives and false positives
127 |     positives = sum(pos_negs_arr[:,0])
128 |     true_positives = sum(pos_negs_arr[:,1])
129 |     false_negatives = sum(pos_negs_arr[:,2])
130 |     false_positives = sum(pos_negs_arr[:,3])
131 | 
132 |     ## step 2 : compute precision
133 |     precision = true_positives /float(true_positives + false_positives)  
134 | 
135 |     ## step 3 : compute recall 
136 |     recall = true_positives / float(true_positives + false_negatives)
137 |     #######    
138 |     ####### ID_S4_EX3 END #######     
139 |     print('precision = ' + str(precision) + ", recall = " + str(recall))   
140 | 
141 |     # serialize intersection-over-union and deviations in x,y,z
142 |     ious_all = [element for tupl in ious for element in tupl]
143 |     devs_x_all = []
144 |     devs_y_all = []
145 |     devs_z_all = []
146 |     for tuple in center_devs:
147 |         for elem in tuple:
148 |             dev_x, dev_y, dev_z = elem
149 |             devs_x_all.append(dev_x)
150 |             devs_y_all.append(dev_y)
151 |             devs_z_all.append(dev_z)
152 |     
153 | 
154 |     # compute statistics
155 |     stdev__ious = np.std(ious_all)
156 |     mean__ious = np.mean(ious_all)
157 | 
158 |     stdev__devx = np.std(devs_x_all)
159 |     mean__devx = np.mean(devs_x_all)
160 | 
161 |     stdev__devy = np.std(devs_y_all)
162 |     mean__devy = np.mean(devs_y_all)
163 | 
164 |     stdev__devz = np.std(devs_z_all)
165 |     mean__devz = np.mean(devs_z_all)
166 |     #std_dev_x = np.std(devs_x)
167 | 
168 |     # plot results
169 |     data = [precision, recall, ious_all, devs_x_all, devs_y_all, devs_z_all]
170 |     titles = ['detection precision', 'detection recall', 'intersection over union', 'position errors in X', 'position errors in Y', 'position error in Z']
171 |     textboxes = ['', '', '',
172 |                  '\n'.join((r'$\mathrm{mean}=%.4f$' % (np.mean(devs_x_all), ), r'$\mathrm{sigma}=%.4f$' % (np.std(devs_x_all), ), r'$\mathrm{n}=%.0f$' % (len(devs_x_all), ))),
173 |                  '\n'.join((r'$\mathrm{mean}=%.4f$' % (np.mean(devs_y_all), ), r'$\mathrm{sigma}=%.4f$' % (np.std(devs_y_all), ), r'$\mathrm{n}=%.0f$' % (len(devs_x_all), ))),
174 |                  '\n'.join((r'$\mathrm{mean}=%.4f$' % (np.mean(devs_z_all), ), r'$\mathrm{sigma}=%.4f$' % (np.std(devs_z_all), ), r'$\mathrm{n}=%.0f$' % (len(devs_x_all), )))]
175 | 
176 |     f, a = plt.subplots(2, 3)
177 |     a = a.ravel()
178 |     num_bins = 20
179 |     props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
180 |     for idx, ax in enumerate(a):
181 |         ax.hist(data[idx], num_bins)
182 |         ax.set_title(titles[idx])
183 |         if textboxes[idx]:
184 |             ax.text(0.05, 0.95, textboxes[idx], transform=ax.transAxes, fontsize=10,
185 |                     verticalalignment='top', bbox=props)
186 |     plt.tight_layout()
187 |     plt.show()
188 | 
189 | 


--------------------------------------------------------------------------------
/student/objdet_pcl.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------------------
  2 | # Project "Track 3D-Objects Over Time"
  3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja.
  4 | #
  5 | # Purpose of this file : Process the point-cloud and prepare it for object detection
  6 | #
  7 | # You should have received a copy of the Udacity license together with this program.
  8 | #
  9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013
 10 | # ----------------------------------------------------------------------
 11 | #
 12 | 
 13 | # general package imports
 14 | import cv2
 15 | import numpy as np
 16 | import torch
 17 | import zlib
 18 | import open3d as o3d
 19 | # add project directory to python path to enable relative imports
 20 | import os
 21 | import sys
 22 | PACKAGE_PARENT = '..'
 23 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
 24 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
 25 | 
 26 | # waymo open dataset reader
 27 | from tools.waymo_reader.simple_waymo_open_dataset_reader import utils as waymo_utils
 28 | from tools.waymo_reader.simple_waymo_open_dataset_reader import dataset_pb2, label_pb2
 29 | 
 30 | # object detection tools and helper functions
 31 | import misc.objdet_tools as tools
 32 | 
 33 | # visualize lidar point-cloud
 34 | def show_pcl(pcl):
 35 | 
 36 |     ####### ID_S1_EX2 START #######     
 37 |     #######
 38 |     print("student task ID_S1_EX2")
 39 | 
 40 |     # step 1 : initialize open3d with key callback and create window
 41 |     vis_3d = o3d.visualization.VisualizerWithKeyCallback()
 42 |     vis_3d.create_window(window_name='Point Cloud image') 
 43 |     global idx
 44 |     idx= True
 45 |     def right_click(vis_3d):
 46 |         global idx
 47 |         print('right arrow pressed')
 48 |         idx= False
 49 |         return
 50 |     vis_3d.register_key_callback(262, right_click)
 51 |     # step 2 : create instance of open3d point-cloud class
 52 |     pcd = o3d.geometry.PointCloud()
 53 |     # step 3 : set points in pcd instance by converting the point-cloud into 3d vectors (using open3d function Vector3dVector)
 54 |     pcd.points = o3d.utility.Vector3dVector(pcl[:,:3])  
 55 |     # step 4 : for the first frame, add the pcd instance to visualization using add_geometry; for all other frames, use update_geometry instead
 56 |     vis_3d.add_geometry(pcd)
 57 |     # step 5 : visualize point cloud and keep window open until right-arrow is pressed (key-code 262)
 58 |     while idx:
 59 |         vis_3d.poll_events()
 60 |         vis_3d.update_renderer()
 61 |     #######
 62 |     ####### ID_S1_EX2 END #######     
 63 |        
 64 | 
 65 | # visualize range image
 66 | def show_range_image(frame, lidar_name):
 67 | 
 68 |     ####### ID_S1_EX1 START #######     
 69 |     #######
 70 |     print("student task ID_S1_EX1")
 71 | 
 72 |     # step 1 : extract lidar data and range image for the roof-mounted lidar
 73 |     lidar = [obj for obj in frame.lasers if obj.name == lidar_name][0]
 74 |     
 75 |     # step 2 : extract the range and the intensity channel from the range image
 76 |     if len(lidar.ri_return1.range_image_compressed) > 0: # use first response
 77 |         ri = dataset_pb2.MatrixFloat()
 78 |         ri.ParseFromString(zlib.decompress(lidar.ri_return1.range_image_compressed))
 79 |         ri = np.array(ri.data).reshape(ri.shape.dims)
 80 |     # step 3 : set values <0 to zero
 81 |     ri[ri<0]=0.0
 82 |       
 83 |     # step 4 : map the range channel onto an 8-bit scale and make sure that the full range of values is appropriately considered
 84 |     ri_range = ri[:,:,0]
 85 |     ri_range = ri_range * 255 / (np.amax(ri_range) - np.amin(ri_range))
 86 |     img_range = ri_range.astype(np.uint8)
 87 |     # step 5 : map the intensity channel onto an 8-bit scale and normalize with the difference between the 1- and 99-percentile to mitigate the influence of outliers
 88 |     ri_intensity = ri[:,:,1]
 89 |     ri_intensity = np.amax(ri_intensity)/2 * ri_intensity * 255 / (np.amax(ri_intensity) - np.amin(ri_intensity)) 
 90 |     img_intensity = ri_intensity.astype(np.uint8)
 91 |     # step 6 : stack the range and intensity image vertically using np.vstack and convert the result to an unsigned 8-bit integer
 92 |     
 93 |     img_range_intensity = np.vstack((img_range, img_intensity))
 94 |     img_intensity = img_intensity.astype(np.uint8)
 95 |     deg90 = int(img_range_intensity.shape[1] / 4)
 96 |     ri_center = int(img_range_intensity.shape[1]/2)
 97 |     img_range_intensity = img_range_intensity[:,ri_center-deg90:ri_center+deg90]
 98 | 
 99 |     #######
100 |     ####### ID_S1_EX1 END #######     
101 |     
102 |     return img_range_intensity
103 | 
104 | 
105 | # create birds-eye view of lidar data
106 | def bev_from_pcl(lidar_pcl, configs):
107 | 
108 |     # remove lidar points outside detection area and with too low reflectivity
109 |     mask = np.where((lidar_pcl[:, 0] >= configs.lim_x[0]) & (lidar_pcl[:, 0] <= configs.lim_x[1]) &
110 |                     (lidar_pcl[:, 1] >= configs.lim_y[0]) & (lidar_pcl[:, 1] <= configs.lim_y[1]) &
111 |                     (lidar_pcl[:, 2] >= configs.lim_z[0]) & (lidar_pcl[:, 2] <= configs.lim_z[1]))
112 |     lidar_pcl = lidar_pcl[mask]
113 |     
114 |     # shift level of ground plane to avoid flipping from 0 to 255 for neighboring pixels
115 |     lidar_pcl[:, 2] = lidar_pcl[:, 2] - configs.lim_z[0]  
116 | 
117 |     # convert sensor coordinates to bev-map coordinates (center is bottom-middle)
118 |     ####### ID_S2_EX1 START #######     
119 |     #######
120 |     print("student task ID_S2_EX1")
121 | 
122 |     ## step 1 :  compute bev-map discretization by dividing x-range by the bev-image height (see configs)
123 |     bev_discret = (configs.lim_x[1] - configs.lim_x[0]) / configs.bev_height
124 |     ## step 2 : create a copy of the lidar pcl and transform all metrix x-coordinates into bev-image coordinates    
125 |     lidar_pcl_cpy = np.copy(lidar_pcl)
126 |     lidar_pcl_cpy[:, 0] = np.int_(np.floor(lidar_pcl_cpy[:, 0] / bev_discret))
127 |     # step 3 : perform the same operation as in step 2 for the y-coordinates but make sure that no negative bev-coordinates occur
128 |     lidar_pcl_cpy[:, 1] = np.int_(np.floor(lidar_pcl_cpy[:, 1] / bev_discret)) + ((configs.bev_width + 1) / 2)
129 |     lidar_pcl_cpy[:, 1] = np.abs(lidar_pcl_cpy[:,1])
130 |     # step 4 : visualize point-cloud using the function show_pcl from a previous task
131 |     show_pcl(lidar_pcl_cpy)   
132 |     #######
133 |     ####### ID_S2_EX1 END #######     
134 |     
135 |     
136 |     # Compute intensity layer of the BEV map
137 |     ####### ID_S2_EX2 START #######     
138 |     #######
139 |     print("student task ID_S2_EX2")
140 | 
141 |     ## step 1 : create a numpy array filled with zeros which has the same dimensions as the BEV map
142 |     intensity_map = np.zeros((configs.bev_height, configs.bev_width))
143 | 
144 |     # step 2 : re-arrange elements in lidar_pcl_cpy by sorting first by x, then y, then -z (use numpy.lexsort)
145 |     lidar_pcl_cpy[lidar_pcl_cpy[:,3]>1.0,3] = 1.0
146 |     idx_intensity = np.lexsort((-lidar_pcl_cpy[:, 2], lidar_pcl_cpy[:, 1], lidar_pcl_cpy[:, 0]))
147 |     lidar_pcl_top = lidar_pcl_cpy[idx_intensity]
148 |     ## step 3 : extract all points with identical x and y such that only the top-most z-coordinate is kept (use numpy.unique)
149 |     ##          also, store the number of points per x,y-cell in a variable named "counts" for use in the next task
150 |     lidar_pcl_int, indices, counts = np.unique(lidar_pcl_cpy[:, 0:2], axis=0, return_index=True, return_counts=True)
151 |     lidar_pcl_top = lidar_pcl_cpy[indices]
152 |     ## step 4 : assign the intensity value of each unique entry in lidar_top_pcl to the intensity map 
153 |     ##          make sure that the intensity is scaled in such a way that objects of interest (e.g. vehicles) are clearly visible    
154 |     ##          also, make sure that the influence of outliers is mitigated by normalizing intensity on the difference between the max. and min. value within the point cloud
155 |     intensity_map[np.int_(lidar_pcl_top[:, 0]), np.int_(lidar_pcl_top[:, 1])] = lidar_pcl_top[:, 3] / (np.amax(lidar_pcl_top[:, 3])-np.amin(lidar_pcl_top[:, 3]))
156 | 
157 |     ## step 5 : temporarily visualize the intensity map using OpenCV to make sure that vehicles separate well from the background
158 |     img_intensity = intensity_map * 256
159 |     img_intensity = img_intensity.astype(np.uint8)
160 |     cv2.imshow('img_intensity', img_intensity)
161 |     cv2.waitKey(0)
162 |     cv2.destroyAllWindows()
163 |     #######
164 |     ####### ID_S2_EX2 END ####### 
165 | 
166 | 
167 |     # Compute height layer of the BEV map
168 |     ####### ID_S2_EX3 START #######     
169 |     #######
170 |     print("student task ID_S2_EX3")
171 | 
172 |     ## step 1 : create a numpy array filled with zeros which has the same dimensions as the BEV map
173 |     height_map = np.zeros((configs.bev_height, configs.bev_width))
174 |     ## step 2 : assign the height value of each unique entry in lidar_top_pcl to the height map 
175 |     ##          make sure that each entry is normalized on the difference between the upper and lower height defined in the config file
176 |     ##          use the lidar_pcl_top data structure from the previous task to access the pixels of the height_map
177 |     height_map[np.int_(lidar_pcl_top[:, 0]), np.int_(lidar_pcl_top[:, 1])] = lidar_pcl_top[:, 2] / float(np.abs(configs.lim_z[1] - configs.lim_z[0]))
178 |     ## step 3 : temporarily visualize the intensity map using OpenCV to make sure that vehicles separate well from the background
179 |     img_height = height_map * 256
180 |     img_height = img_height.astype(np.uint8)
181 |     cv2.imshow('height_map', height_map)
182 |     cv2.waitKey(0)
183 |     cv2.destroyAllWindows()
184 |     #######
185 |     ####### ID_S2_EX3 END #######       
186 | 
187 |     # Compute density layer of the BEV map
188 |     density_map = np.zeros((configs.bev_height + 1, configs.bev_width + 1))
189 |     _, _, counts = np.unique(lidar_pcl_cpy[:, 0:2], axis=0, return_index=True, return_counts=True)
190 |     normalizedCounts = np.minimum(1.0, np.log(counts + 1) / np.log(64)) 
191 |     density_map[np.int_(lidar_pcl_top[:, 0]), np.int_(lidar_pcl_top[:, 1])] = normalizedCounts
192 |         
193 |     # assemble 3-channel bev-map from individual maps
194 |     bev_map = np.zeros((3, configs.bev_height, configs.bev_width))
195 |     bev_map[2, :, :] = density_map[:configs.bev_height, :configs.bev_width]  # r_map
196 |     bev_map[1, :, :] = height_map[:configs.bev_height, :configs.bev_width]  # g_map
197 |     bev_map[0, :, :] = intensity_map[:configs.bev_height, :configs.bev_width]  # b_map
198 | 
199 |     # expand dimension of bev_map before converting into a tensor
200 |     s1, s2, s3 = bev_map.shape
201 |     bev_maps = np.zeros((1, s1, s2, s3))
202 |     bev_maps[0] = bev_map
203 | 
204 |     bev_maps = torch.from_numpy(bev_maps)  # create tensor from birds-eye view
205 |     input_bev_maps = bev_maps.to(configs.device, non_blocking=True).float()
206 |     return input_bev_maps
207 | 
208 | 
209 | 
210 | 
211 | 
212 | 


--------------------------------------------------------------------------------
/student/trackmanagement.py:
--------------------------------------------------------------------------------
  1 | # ---------------------------------------------------------------------
  2 | # Project "Track 3D-Objects Over Time"
  3 | # Copyright (C) 2020, Dr. Antje Muntzinger / Dr. Andreas Haja.
  4 | #
  5 | # Purpose of this file : Classes for track and track management
  6 | #
  7 | # You should have received a copy of the Udacity license together with this program.
  8 | #
  9 | # https://www.udacity.com/course/self-driving-car-engineer-nanodegree--nd013
 10 | # ----------------------------------------------------------------------
 11 | #
 12 | 
 13 | # imports
 14 | import numpy as np
 15 | import collections
 16 | 
 17 | # add project directory to python path to enable relative imports
 18 | import os
 19 | import sys
 20 | PACKAGE_PARENT = '..'
 21 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
 22 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
 23 | import misc.params as params 
 24 | 
 25 | class Track:
 26 |     '''Track class with state, covariance, id, score'''
 27 |     def __init__(self, meas, id):
 28 |         print('creating track no.', id)
 29 |         M_rot = meas.sensor.sens_to_veh[0:3, 0:3] # rotation matrix from sensor to vehicle coordinates
 30 |         
 31 |         ############
 32 |         # TODO Step 2: initialization:
 33 |         # - replace fixed track initialization values by initialization of x and P based on 
 34 |         # unassigned measurement transformed from sensor to vehicle coordinates
 35 |         # - initialize track state and track score with appropriate values
 36 |         ############
 37 | 
 38 |         # self.x = np.matrix([[49.53980697],
 39 |         #                [ 3.41006279],
 40 |         #                [ 0.91790581],
 41 |         #                [ 0.        ],
 42 |         #                [ 0.        ],
 43 |         #                [ 0.        ]])
 44 |         pos_sens = np.ones((4, 1))
 45 |         pos_sens[0:3] = meas.z[0:3] 
 46 |         pos_veh = meas.sensor.sens_to_veh*pos_sens
 47 |         self.x = np.zeros((6,1))
 48 |         self.x[0:3] = pos_veh[0:3]
 49 |         # self.P = np.matrix([[9.0e-02, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00],
 50 |         #                 [0.0e+00, 9.0e-02, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00],
 51 |         #                 [0.0e+00, 0.0e+00, 6.4e-03, 0.0e+00, 0.0e+00, 0.0e+00],
 52 |         #                 [0.0e+00, 0.0e+00, 0.0e+00, 2.5e+03, 0.0e+00, 0.0e+00],
 53 |         #                 [0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 2.5e+03, 0.0e+00],
 54 |         #                 [0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 0.0e+00, 2.5e+01]])
 55 |         P_pos = M_rot * meas.R * np.transpose(M_rot)
 56 |         P_vel = np.matrix([[params.sigma_p44**2, 0, 0],
 57 |                            [0, params.sigma_p55**2, 0],
 58 |                            [0, 0, params.sigma_p66**2]])
 59 |         self.P = np.zeros((6, 6))
 60 |         self.P[0:3, 0:3] = P_pos
 61 |         self.P[3:6, 3:6] = P_vel
 62 |         self.state = 'initialized'
 63 |         self.score = 1/params.window
 64 |         
 65 |         ############
 66 |         # END student code
 67 |         ############ 
 68 |                
 69 |         # other track attributes
 70 |         self.id = id
 71 |         self.width = meas.width
 72 |         self.length = meas.length
 73 |         self.height = meas.height
 74 |         self.yaw =  np.arccos(M_rot[0,0]*np.cos(meas.yaw) + M_rot[0,1]*np.sin(meas.yaw)) # transform rotation from sensor to vehicle coordinates
 75 |         self.t = meas.t
 76 | 
 77 |     def set_x(self, x):
 78 |         self.x = x
 79 |         
 80 |     def set_P(self, P):
 81 |         self.P = P  
 82 |         
 83 |     def set_t(self, t):
 84 |         self.t = t  
 85 |         
 86 |     def update_attributes(self, meas):
 87 |         # use exponential sliding average to estimate dimensions and orientation
 88 |         if meas.sensor.name == 'lidar':
 89 |             c = params.weight_dim
 90 |             self.width = c*meas.width + (1 - c)*self.width
 91 |             self.length = c*meas.length + (1 - c)*self.length
 92 |             self.height = c*meas.height + (1 - c)*self.height
 93 |             M_rot = meas.sensor.sens_to_veh
 94 |             self.yaw = np.arccos(M_rot[0,0]*np.cos(meas.yaw) + M_rot[0,1]*np.sin(meas.yaw)) # transform rotation from sensor to vehicle coordinates
 95 |         
 96 |         
 97 | ###################        
 98 | 
 99 | class Trackmanagement:
100 |     '''Track manager with logic for initializing and deleting objects'''
101 |     def __init__(self):
102 |         self.N = 0 # current number of tracks
103 |         self.track_list = []
104 |         self.last_id = -1
105 |         self.result_list = []
106 |         
107 |     def manage_tracks(self, unassigned_tracks, unassigned_meas, meas_list):  
108 |         ############
109 |         # TODO Step 2: implement track management:
110 |         # - decrease the track score for unassigned tracks
111 |         # - delete tracks if the score is too low or P is too big (check params.py for parameters that might be helpful, but
112 |         # feel free to define your own parameters)
113 |         ############
114 |         
115 |         # decrease score for unassigned tracks
116 |         for i in unassigned_tracks:
117 |             track = self.track_list[i]
118 |             # check visibility    
119 |             if meas_list: # if not empty
120 |                 if meas_list[0].sensor.in_fov(track.x):
121 |                     track.state =  'tentative'
122 |                     if track.score > params.delete_threshold + 1:
123 |                         track.score = params.delete_threshold + 1
124 |                     track.score -= 1./params.window
125 | 
126 |         # delete old tracks   
127 |         for track in self.track_list:
128 |             if track.score <= params.delete_threshold:
129 |                 if track.P[0, 0] >= params.max_P or track.P[1, 1] >= params.max_P:
130 |                     self.delete_track(track)
131 | 
132 |         ############
133 |         # END student code
134 |         ############ 
135 |             
136 |         # initialize new track with unassigned measurement
137 |         for j in unassigned_meas: 
138 |             if meas_list[j].sensor.name == 'lidar': # only initialize with lidar measurements
139 |                 self.init_track(meas_list[j])
140 |             
141 |     def addTrackToList(self, track):
142 |         self.track_list.append(track)
143 |         self.N += 1
144 |         self.last_id = track.id
145 | 
146 |     def init_track(self, meas):
147 |         track = Track(meas, self.last_id + 1)
148 |         self.addTrackToList(track)
149 | 
150 |     def delete_track(self, track):
151 |         print('deleting track no.', track.id)
152 |         self.track_list.remove(track)
153 |         
154 |     def handle_updated_track(self, track):      
155 |         ############
156 |         # TODO Step 2: implement track management for updated tracks:
157 |         # - increase track score
158 |         # - set track state to 'tentative' or 'confirmed'
159 |         ############
160 |         
161 |         track.score += 1./params.window
162 |         if track.score > params.confirmed_threshold:
163 |             track.state =  'confirmed'
164 |         else:
165 |             track.state =  'tentative'
166 |         
167 |         ############
168 |         # END student code
169 |         ############ 
170 | 


--------------------------------------------------------------------------------
/tools/objdet_models/darknet/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/tools/objdet_models/darknet/models/__init__.py


--------------------------------------------------------------------------------
/tools/objdet_models/darknet/models/darknet_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | # -*- coding: utf-8 -*-
  3 | -----------------------------------------------------------------------------------
  4 | # Refer: https://github.com/Tianxiaomo/pytorch-YOLOv4
  5 | """
  6 | 
  7 | import sys
  8 | 
  9 | import torch
 10 | 
 11 | sys.path.append('../')
 12 | from utils.torch_utils import convert2cpu
 13 | 
 14 | __all__ = ['parse_cfg', 'print_cfg', 'load_conv', 'load_conv_bn', 'save_conv', 'save_conv_bn', 'load_fc', 'save_fc']
 15 | 
 16 | 
 17 | def parse_cfg(cfgfile):
 18 |     blocks = []
 19 |     fp = open(cfgfile, 'r')
 20 |     block = None
 21 |     line = fp.readline()
 22 |     while line != '':
 23 |         line = line.rstrip()
 24 |         if line == '' or line[0] == '#':
 25 |             line = fp.readline()
 26 |             continue
 27 |         elif line[0] == '[':
 28 |             if block:
 29 |                 blocks.append(block)
 30 |             block = dict()
 31 |             block['type'] = line.lstrip('[').rstrip(']')
 32 |             # set default value
 33 |             if block['type'] == 'convolutional':
 34 |                 block['batch_normalize'] = 0
 35 |         else:
 36 |             key, value = line.split('=')
 37 |             key = key.strip()
 38 |             if key == 'type':
 39 |                 key = '_type'
 40 |             value = value.strip()
 41 |             block[key] = value
 42 |         line = fp.readline()
 43 | 
 44 |     if block:
 45 |         blocks.append(block)
 46 |     fp.close()
 47 |     return blocks
 48 | 
 49 | 
 50 | def print_cfg(blocks):
 51 |     print('layer     filters    size              input                output')
 52 |     prev_width = 416
 53 |     prev_height = 416
 54 |     prev_filters = 3
 55 |     out_filters = []
 56 |     out_widths = []
 57 |     out_heights = []
 58 |     ind = -2
 59 |     for block in blocks:
 60 |         ind = ind + 1
 61 |         if block['type'] == 'net':
 62 |             prev_width = int(block['width'])
 63 |             prev_height = int(block['height'])
 64 |             continue
 65 |         elif block['type'] == 'convolutional':
 66 |             filters = int(block['filters'])
 67 |             kernel_size = int(block['size'])
 68 |             stride = int(block['stride'])
 69 |             is_pad = int(block['pad'])
 70 |             pad = (kernel_size - 1) // 2 if is_pad else 0
 71 |             width = (prev_width + 2 * pad - kernel_size) // stride + 1
 72 |             height = (prev_height + 2 * pad - kernel_size) // stride + 1
 73 |             print('%5d %-6s %4d  %d x %d / %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (
 74 |                 ind, 'conv', filters, kernel_size, kernel_size, stride, prev_width, prev_height, prev_filters, width,
 75 |                 height, filters))
 76 |             prev_width = width
 77 |             prev_height = height
 78 |             prev_filters = filters
 79 |             out_widths.append(prev_width)
 80 |             out_heights.append(prev_height)
 81 |             out_filters.append(prev_filters)
 82 |         elif block['type'] == 'maxpool':
 83 |             pool_size = int(block['size'])
 84 |             stride = int(block['stride'])
 85 |             width = prev_width // stride
 86 |             height = prev_height // stride
 87 |             print('%5d %-6s       %d x %d / %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (
 88 |                 ind, 'max', pool_size, pool_size, stride, prev_width, prev_height, prev_filters, width, height,
 89 |                 filters))
 90 |             prev_width = width
 91 |             prev_height = height
 92 |             prev_filters = filters
 93 |             out_widths.append(prev_width)
 94 |             out_heights.append(prev_height)
 95 |             out_filters.append(prev_filters)
 96 |         elif block['type'] == 'avgpool':
 97 |             width = 1
 98 |             height = 1
 99 |             print('%5d %-6s                   %3d x %3d x%4d   ->  %3d' % (
100 |                 ind, 'avg', prev_width, prev_height, prev_filters, prev_filters))
101 |             prev_width = width
102 |             prev_height = height
103 |             prev_filters = filters
104 |             out_widths.append(prev_width)
105 |             out_heights.append(prev_height)
106 |             out_filters.append(prev_filters)
107 |         elif block['type'] == 'softmax':
108 |             print('%5d %-6s                                    ->  %3d' % (ind, 'softmax', prev_filters))
109 |             out_widths.append(prev_width)
110 |             out_heights.append(prev_height)
111 |             out_filters.append(prev_filters)
112 |         elif block['type'] == 'cost':
113 |             print('%5d %-6s                                     ->  %3d' % (ind, 'cost', prev_filters))
114 |             out_widths.append(prev_width)
115 |             out_heights.append(prev_height)
116 |             out_filters.append(prev_filters)
117 |         elif block['type'] == 'reorg':
118 |             stride = int(block['stride'])
119 |             filters = stride * stride * prev_filters
120 |             width = prev_width // stride
121 |             height = prev_height // stride
122 |             print('%5d %-6s             / %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (
123 |                 ind, 'reorg', stride, prev_width, prev_height, prev_filters, width, height, filters))
124 |             prev_width = width
125 |             prev_height = height
126 |             prev_filters = filters
127 |             out_widths.append(prev_width)
128 |             out_heights.append(prev_height)
129 |             out_filters.append(prev_filters)
130 |         elif block['type'] == 'upsample':
131 |             stride = int(block['stride'])
132 |             filters = prev_filters
133 |             width = prev_width * stride
134 |             height = prev_height * stride
135 |             print('%5d %-6s           * %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (
136 |                 ind, 'upsample', stride, prev_width, prev_height, prev_filters, width, height, filters))
137 |             prev_width = width
138 |             prev_height = height
139 |             prev_filters = filters
140 |             out_widths.append(prev_width)
141 |             out_heights.append(prev_height)
142 |             out_filters.append(prev_filters)
143 |         elif block['type'] == 'route':
144 |             layers = block['layers'].split(',')
145 |             layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
146 |             if len(layers) == 1:
147 |                 print('%5d %-6s %d' % (ind, 'route', layers[0]))
148 |                 prev_width = out_widths[layers[0]]
149 |                 prev_height = out_heights[layers[0]]
150 |                 prev_filters = out_filters[layers[0]]
151 |             elif len(layers) == 2:
152 |                 print('%5d %-6s %d %d' % (ind, 'route', layers[0], layers[1]))
153 |                 prev_width = out_widths[layers[0]]
154 |                 prev_height = out_heights[layers[0]]
155 |                 assert (prev_width == out_widths[layers[1]])
156 |                 assert (prev_height == out_heights[layers[1]])
157 |                 prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
158 |             elif len(layers) == 4:
159 |                 print('%5d %-6s %d %d %d %d' % (ind, 'route', layers[0], layers[1], layers[2], layers[3]))
160 |                 prev_width = out_widths[layers[0]]
161 |                 prev_height = out_heights[layers[0]]
162 |                 assert (prev_width == out_widths[layers[1]] == out_widths[layers[2]] == out_widths[layers[3]])
163 |                 assert (prev_height == out_heights[layers[1]] == out_heights[layers[2]] == out_heights[layers[3]])
164 |                 prev_filters = out_filters[layers[0]] + out_filters[layers[1]] + out_filters[layers[2]] + out_filters[
165 |                     layers[3]]
166 |             else:
167 |                 print("route error !!! {} {} {}".format(sys._getframe().f_code.co_filename,
168 |                                                         sys._getframe().f_code.co_name, sys._getframe().f_lineno))
169 | 
170 |             out_widths.append(prev_width)
171 |             out_heights.append(prev_height)
172 |             out_filters.append(prev_filters)
173 |         elif block['type'] in ['region', 'yolo']:
174 |             print('%5d %-6s' % (ind, 'detection'))
175 |             out_widths.append(prev_width)
176 |             out_heights.append(prev_height)
177 |             out_filters.append(prev_filters)
178 |         elif block['type'] == 'shortcut':
179 |             from_id = int(block['from'])
180 |             from_id = from_id if from_id > 0 else from_id + ind
181 |             print('%5d %-6s %d' % (ind, 'shortcut', from_id))
182 |             prev_width = out_widths[from_id]
183 |             prev_height = out_heights[from_id]
184 |             prev_filters = out_filters[from_id]
185 |             out_widths.append(prev_width)
186 |             out_heights.append(prev_height)
187 |             out_filters.append(prev_filters)
188 |         elif block['type'] == 'connected':
189 |             filters = int(block['output'])
190 |             print('%5d %-6s                            %d  ->  %3d' % (ind, 'connected', prev_filters, filters))
191 |             prev_filters = filters
192 |             out_widths.append(1)
193 |             out_heights.append(1)
194 |             out_filters.append(prev_filters)
195 |         else:
196 |             print('unknown type %s' % (block['type']))
197 | 
198 | 
199 | def load_conv(buf, start, conv_model):
200 |     num_w = conv_model.weight.numel()
201 |     num_b = conv_model.bias.numel()
202 |     conv_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]))
203 |     start = start + num_b
204 |     conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).reshape(conv_model.weight.data.shape))
205 |     start = start + num_w
206 |     return start
207 | 
208 | 
209 | def save_conv(fp, conv_model):
210 |     if conv_model.bias.is_cuda:
211 |         convert2cpu(conv_model.bias.data).numpy().tofile(fp)
212 |         convert2cpu(conv_model.weight.data).numpy().tofile(fp)
213 |     else:
214 |         conv_model.bias.data.numpy().tofile(fp)
215 |         conv_model.weight.data.numpy().tofile(fp)
216 | 
217 | 
218 | def load_conv_bn(buf, start, conv_model, bn_model):
219 |     num_w = conv_model.weight.numel()
220 |     num_b = bn_model.bias.numel()
221 |     bn_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]))
222 |     start = start + num_b
223 |     bn_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_b]))
224 |     start = start + num_b
225 |     bn_model.running_mean.copy_(torch.from_numpy(buf[start:start + num_b]))
226 |     start = start + num_b
227 |     bn_model.running_var.copy_(torch.from_numpy(buf[start:start + num_b]))
228 |     start = start + num_b
229 |     conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).reshape(conv_model.weight.data.shape))
230 |     start = start + num_w
231 |     return start
232 | 
233 | 
234 | def save_conv_bn(fp, conv_model, bn_model):
235 |     if bn_model.bias.is_cuda:
236 |         convert2cpu(bn_model.bias.data).numpy().tofile(fp)
237 |         convert2cpu(bn_model.weight.data).numpy().tofile(fp)
238 |         convert2cpu(bn_model.running_mean).numpy().tofile(fp)
239 |         convert2cpu(bn_model.running_var).numpy().tofile(fp)
240 |         convert2cpu(conv_model.weight.data).numpy().tofile(fp)
241 |     else:
242 |         bn_model.bias.data.numpy().tofile(fp)
243 |         bn_model.weight.data.numpy().tofile(fp)
244 |         bn_model.running_mean.numpy().tofile(fp)
245 |         bn_model.running_var.numpy().tofile(fp)
246 |         conv_model.weight.data.numpy().tofile(fp)
247 | 
248 | 
249 | def load_fc(buf, start, fc_model):
250 |     num_w = fc_model.weight.numel()
251 |     num_b = fc_model.bias.numel()
252 |     fc_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]))
253 |     start = start + num_b
254 |     fc_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]))
255 |     start = start + num_w
256 |     return start
257 | 
258 | 
259 | def save_fc(fp, fc_model):
260 |     fc_model.bias.data.numpy().tofile(fp)
261 |     fc_model.weight.data.numpy().tofile(fp)
262 | 
263 | 
264 | if __name__ == '__main__':
265 |     import sys
266 | 
267 |     blocks = parse_cfg('cfg/yolo.cfg')
268 |     if len(sys.argv) == 2:
269 |         blocks = parse_cfg(sys.argv[1])
270 |     print_cfg(blocks)
271 | 


--------------------------------------------------------------------------------
/tools/objdet_models/darknet/models/yolo_layer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | # -*- coding: utf-8 -*-
  3 | -----------------------------------------------------------------------------------
  4 | # Author: Nguyen Mau Dung
  5 | # DoC: 2020.07.05
  6 | # email: nguyenmaudung93.kstn@gmail.com
  7 | -----------------------------------------------------------------------------------
  8 | # Description: This script for the yolo layer
  9 | 
 10 | # Refer: https://github.com/Tianxiaomo/pytorch-YOLOv4
 11 | # Refer: https://github.com/VCasecnikovs/Yet-Another-YOLOv4-Pytorch
 12 | """
 13 | 
 14 | import sys
 15 | 
 16 | import torch
 17 | import torch.nn as nn
 18 | import torch.nn.functional as F
 19 | 
 20 | #sys.path.append('../')
 21 | # add project directory to python path to enable relative imports
 22 | import os
 23 | import sys
 24 | PACKAGE_PARENT = '..'
 25 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
 26 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
 27 | 
 28 | from utils.torch_utils import to_cpu
 29 | from utils.iou_rotated_boxes_utils import iou_pred_vs_target_boxes, iou_rotated_boxes_targets_vs_anchors, \
 30 |     get_polygons_areas_fix_xy
 31 | 
 32 | 
 33 | class YoloLayer(nn.Module):
 34 |     """Yolo layer"""
 35 | 
 36 |     def __init__(self, num_classes, anchors, stride, scale_x_y, ignore_thresh):
 37 |         super(YoloLayer, self).__init__()
 38 |         # Update the attributions when parsing the cfg during create the darknet
 39 |         self.num_classes = num_classes
 40 |         self.anchors = anchors
 41 |         self.num_anchors = len(anchors)
 42 |         self.stride = stride
 43 |         self.scale_x_y = scale_x_y
 44 |         self.ignore_thresh = ignore_thresh
 45 | 
 46 |         self.noobj_scale = 100
 47 |         self.obj_scale = 1
 48 |         self.lgiou_scale = 3.54
 49 |         self.leular_scale = 3.54
 50 |         self.lobj_scale = 64.3
 51 |         self.lcls_scale = 37.4
 52 | 
 53 |         self.seen = 0
 54 |         # Initialize dummy variables
 55 |         self.grid_size = 0
 56 |         self.img_size = 0
 57 |         self.metrics = {}
 58 | 
 59 |     def compute_grid_offsets(self, grid_size):
 60 |         self.grid_size = grid_size
 61 |         g = self.grid_size
 62 |         self.stride = self.img_size / self.grid_size
 63 |         # Calculate offsets for each grid
 64 |         self.grid_x = torch.arange(g, device=self.device, dtype=torch.float).repeat(g, 1).view([1, 1, g, g])
 65 |         self.grid_y = torch.arange(g, device=self.device, dtype=torch.float).repeat(g, 1).t().view([1, 1, g, g])
 66 |         self.scaled_anchors = torch.tensor(
 67 |             [(a_w / self.stride, a_h / self.stride, im, re) for a_w, a_h, im, re in self.anchors], device=self.device,
 68 |             dtype=torch.float)
 69 |         self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))
 70 |         self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))
 71 | 
 72 |         # Pre compute polygons and areas of anchors
 73 |         self.scaled_anchors_polygons, self.scaled_anchors_areas = get_polygons_areas_fix_xy(self.scaled_anchors)
 74 | 
 75 |     def build_targets(self, pred_boxes, pred_cls, target, anchors):
 76 |         """ Built yolo targets to compute loss
 77 |         :param out_boxes: [num_samples or batch, num_anchors, grid_size, grid_size, 6]
 78 |         :param pred_cls: [num_samples or batch, num_anchors, grid_size, grid_size, num_classes]
 79 |         :param target: [num_boxes, 8]
 80 |         :param anchors: [num_anchors, 4]
 81 |         :return:
 82 |         """
 83 |         nB, nA, nG, _, nC = pred_cls.size()
 84 |         n_target_boxes = target.size(0)
 85 | 
 86 |         # Create output tensors on "device"
 87 |         obj_mask = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.uint8)
 88 |         noobj_mask = torch.full(size=(nB, nA, nG, nG), fill_value=1, device=self.device, dtype=torch.uint8)
 89 |         class_mask = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float)
 90 |         iou_scores = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float)
 91 |         tx = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float)
 92 |         ty = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float)
 93 |         tw = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float)
 94 |         th = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float)
 95 |         tim = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float)
 96 |         tre = torch.full(size=(nB, nA, nG, nG), fill_value=0, device=self.device, dtype=torch.float)
 97 |         tcls = torch.full(size=(nB, nA, nG, nG, nC), fill_value=0, device=self.device, dtype=torch.float)
 98 |         tconf = obj_mask.float()
 99 |         giou_loss = torch.tensor([0.], device=self.device, dtype=torch.float)
100 | 
101 |         if n_target_boxes > 0:  # Make sure that there is at least 1 box
102 |             b, target_labels = target[:, :2].long().t()
103 |             target_boxes = torch.cat((target[:, 2:6] * nG, target[:, 6:8]), dim=-1)  # scale up x, y, w, h
104 | 
105 |             gxy = target_boxes[:, :2]
106 |             gwh = target_boxes[:, 2:4]
107 |             gimre = target_boxes[:, 4:6]
108 | 
109 |             targets_polygons, targets_areas = get_polygons_areas_fix_xy(target_boxes[:, 2:6])
110 |             # Get anchors with best iou
111 |             ious = iou_rotated_boxes_targets_vs_anchors(self.scaled_anchors_polygons, self.scaled_anchors_areas,
112 |                                                         targets_polygons, targets_areas)
113 |             best_ious, best_n = ious.max(0)
114 | 
115 |             gx, gy = gxy.t()
116 |             gw, gh = gwh.t()
117 |             gim, gre = gimre.t()
118 |             gi, gj = gxy.long().t()
119 |             # Set masks
120 |             obj_mask[b, best_n, gj, gi] = 1
121 |             noobj_mask[b, best_n, gj, gi] = 0
122 | 
123 |             # Set noobj mask to zero where iou exceeds ignore threshold
124 |             for i, anchor_ious in enumerate(ious.t()):
125 |                 noobj_mask[b[i], anchor_ious > self.ignore_thresh, gj[i], gi[i]] = 0
126 | 
127 |             # Coordinates
128 |             tx[b, best_n, gj, gi] = gx - gx.floor()
129 |             ty[b, best_n, gj, gi] = gy - gy.floor()
130 |             # Width and height
131 |             tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
132 |             th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
133 |             # Im and real part
134 |             tim[b, best_n, gj, gi] = gim
135 |             tre[b, best_n, gj, gi] = gre
136 | 
137 |             # One-hot encoding of label
138 |             tcls[b, best_n, gj, gi, target_labels] = 1
139 |             class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
140 |             ious, giou_loss = iou_pred_vs_target_boxes(pred_boxes[b, best_n, gj, gi], target_boxes,
141 |                                                        GIoU=self.use_giou_loss)
142 |             iou_scores[b, best_n, gj, gi] = ious
143 |             if self.reduction == 'mean':
144 |                 giou_loss /= n_target_boxes
145 |             tconf = obj_mask.float()
146 | 
147 |         return iou_scores, giou_loss, class_mask, obj_mask.type(torch.bool), noobj_mask.type(torch.bool), \
148 |                tx, ty, tw, th, tim, tre, tcls, tconf
149 | 
150 |     def forward(self, x, targets=None, img_size=608, use_giou_loss=False):
151 |         """
152 |         :param x: [num_samples or batch, num_anchors * (6 + 1 + num_classes), grid_size, grid_size]
153 |         :param targets: [num boxes, 8] (box_idx, class, x, y, w, l, sin(yaw), cos(yaw))
154 |         :param img_size: default 608
155 |         :return:
156 |         """
157 |         self.img_size = img_size
158 |         self.use_giou_loss = use_giou_loss
159 |         self.device = x.device
160 |         num_samples, _, _, grid_size = x.size()
161 | 
162 |         prediction = x.view(num_samples, self.num_anchors, self.num_classes + 7, grid_size, grid_size)
163 |         prediction = prediction.permute(0, 1, 3, 4, 2).contiguous()
164 |         # prediction size: [num_samples, num_anchors, grid_size, grid_size, num_classes + 7]
165 | 
166 |         # Get outputs
167 |         pred_x = torch.sigmoid(prediction[..., 0])
168 |         pred_y = torch.sigmoid(prediction[..., 1])
169 |         pred_w = prediction[..., 2]  # Width
170 |         pred_h = prediction[..., 3]  # Height
171 |         pred_im = prediction[..., 4]  # angle imaginary part
172 |         pred_re = prediction[..., 5]  # angle real part
173 |         pred_conf = torch.sigmoid(prediction[..., 6])  # Conf
174 |         pred_cls = torch.sigmoid(prediction[..., 7:])  # Cls pred.
175 | 
176 |         # If grid size does not match current we compute new offsets
177 |         if grid_size != self.grid_size:
178 |             self.compute_grid_offsets(grid_size)
179 | 
180 |         # Add offset and scale with anchors
181 |         # pred_boxes size: [num_samples, num_anchors, grid_size, grid_size, 6]
182 |         pred_boxes = torch.empty(prediction[..., :6].shape, device=self.device, dtype=torch.float)
183 |         pred_boxes[..., 0] = pred_x + self.grid_x
184 |         pred_boxes[..., 1] = pred_y + self.grid_y
185 |         pred_boxes[..., 2] = torch.exp(pred_w).clamp(max=1E3) * self.anchor_w
186 |         pred_boxes[..., 3] = torch.exp(pred_h).clamp(max=1E3) * self.anchor_h
187 |         pred_boxes[..., 4] = pred_im
188 |         pred_boxes[..., 5] = pred_re
189 | 
190 |         output = torch.cat((
191 |             pred_boxes[..., :4].view(num_samples, -1, 4) * self.stride,
192 |             pred_boxes[..., 4:6].view(num_samples, -1, 2),
193 |             pred_conf.view(num_samples, -1, 1),
194 |             pred_cls.view(num_samples, -1, self.num_classes),
195 |         ), dim=-1)
196 |         # output size: [num_samples, num boxes, 7 + num_classes]
197 | 
198 |         if targets is None:
199 |             return output, 0
200 |         else:
201 |             self.reduction = 'mean'
202 |             iou_scores, giou_loss, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tim, tre, tcls, tconf = self.build_targets(
203 |                 pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors)
204 | 
205 |             loss_x = F.mse_loss(pred_x[obj_mask], tx[obj_mask], reduction=self.reduction)
206 |             loss_y = F.mse_loss(pred_y[obj_mask], ty[obj_mask], reduction=self.reduction)
207 |             loss_w = F.mse_loss(pred_w[obj_mask], tw[obj_mask], reduction=self.reduction)
208 |             loss_h = F.mse_loss(pred_h[obj_mask], th[obj_mask], reduction=self.reduction)
209 |             loss_im = F.mse_loss(pred_im[obj_mask], tim[obj_mask], reduction=self.reduction)
210 |             loss_re = F.mse_loss(pred_re[obj_mask], tre[obj_mask], reduction=self.reduction)
211 |             loss_im_re = (1. - torch.sqrt(pred_im[obj_mask] ** 2 + pred_re[obj_mask] ** 2)) ** 2  # as tim^2 + tre^2 = 1
212 |             loss_im_re_red = loss_im_re.sum() if self.reduction == 'sum' else loss_im_re.mean()
213 |             loss_eular = loss_im + loss_re + loss_im_re_red
214 | 
215 |             loss_conf_obj = F.binary_cross_entropy(pred_conf[obj_mask], tconf[obj_mask], reduction=self.reduction)
216 |             loss_conf_noobj = F.binary_cross_entropy(pred_conf[noobj_mask], tconf[noobj_mask], reduction=self.reduction)
217 |             loss_cls = F.binary_cross_entropy(pred_cls[obj_mask], tcls[obj_mask], reduction=self.reduction)
218 | 
219 |             if self.use_giou_loss:
220 |                 loss_obj = loss_conf_obj + loss_conf_noobj
221 |                 total_loss = giou_loss * self.lgiou_scale + loss_eular * self.leular_scale + loss_obj * self.lobj_scale + loss_cls * self.lcls_scale
222 |             else:
223 |                 loss_obj = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
224 |                 total_loss = loss_x + loss_y + loss_w + loss_h + loss_eular + loss_obj + loss_cls
225 | 
226 |                 # Metrics (store loss values using tensorboard)
227 |             cls_acc = 100 * class_mask[obj_mask].mean()
228 |             conf_obj = pred_conf[obj_mask].mean()
229 |             conf_noobj = pred_conf[noobj_mask].mean()
230 |             conf50 = (pred_conf > 0.5).float()
231 |             iou50 = (iou_scores > 0.5).float()
232 |             iou75 = (iou_scores > 0.75).float()
233 |             detected_mask = conf50 * class_mask * tconf
234 |             precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
235 |             recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
236 |             recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16)
237 | 
238 |             self.metrics = {
239 |                 "loss": to_cpu(total_loss).item(),
240 |                 "iou_score": to_cpu(iou_scores[obj_mask].mean()).item(),
241 |                 'giou_loss': to_cpu(giou_loss).item(),
242 |                 'loss_x': to_cpu(loss_x).item(),
243 |                 'loss_y': to_cpu(loss_y).item(),
244 |                 'loss_w': to_cpu(loss_w).item(),
245 |                 'loss_h': to_cpu(loss_h).item(),
246 |                 'loss_eular': to_cpu(loss_eular).item(),
247 |                 'loss_im': to_cpu(loss_im).item(),
248 |                 'loss_re': to_cpu(loss_re).item(),
249 |                 "loss_obj": to_cpu(loss_obj).item(),
250 |                 "loss_cls": to_cpu(loss_cls).item(),
251 |                 "cls_acc": to_cpu(cls_acc).item(),
252 |                 "recall50": to_cpu(recall50).item(),
253 |                 "recall75": to_cpu(recall75).item(),
254 |                 "precision": to_cpu(precision).item(),
255 |                 "conf_obj": to_cpu(conf_obj).item(),
256 |                 "conf_noobj": to_cpu(conf_noobj).item()
257 |             }
258 | 
259 |             return output, total_loss
260 | 


--------------------------------------------------------------------------------
/tools/objdet_models/darknet/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/tools/objdet_models/darknet/utils/__init__.py


--------------------------------------------------------------------------------
/tools/objdet_models/darknet/utils/cal_intersection_rotated_boxes.py:
--------------------------------------------------------------------------------
  1 | """
  2 | # -*- coding: utf-8 -*-
  3 | -----------------------------------------------------------------------------------
  4 | # Author: Nguyen Mau Dung
  5 | # DoC: 2020.07.20
  6 | # email: nguyenmaudung93.kstn@gmail.com
  7 | -----------------------------------------------------------------------------------
  8 | # Description: This script for intersection calculation of rotated boxes (on GPU)
  9 | 
 10 | Refer from # https://stackoverflow.com/questions/44797713/calculate-the-area-of-intersection-of-two-rotated-rectangles-in-python?noredirect=1&lq=1
 11 | """
 12 | 
 13 | import torch
 14 | 
 15 | 
 16 | class Line:
 17 |     # ax + by + c = 0
 18 |     def __init__(self, p1, p2):
 19 |         """
 20 | 
 21 |         Args:
 22 |             p1: (x, y)
 23 |             p2: (x, y)
 24 |         """
 25 |         self.a = p2[1] - p1[1]
 26 |         self.b = p1[0] - p2[0]
 27 |         self.c = p2[0] * p1[1] - p2[1] * p1[0]  # cross
 28 |         self.device = p1.device
 29 | 
 30 |     def cal_values(self, pts):
 31 |         return self.a * pts[:, 0] + self.b * pts[:, 1] + self.c
 32 | 
 33 |     def find_intersection(self, other):
 34 |         # See e.g.     https://en.wikipedia.org/wiki/Line%E2%80%93line_intersection#Using_homogeneous_coordinates
 35 |         if not isinstance(other, Line):
 36 |             return NotImplemented
 37 |         w = self.a * other.b - self.b * other.a
 38 |         return torch.tensor([(self.b * other.c - self.c * other.b) / w, (self.c * other.a - self.a * other.c) / w],
 39 |                             device=self.device)
 40 | 
 41 | 
 42 | def intersection_area(rect1, rect2):
 43 |     """Calculate the inter
 44 | 
 45 |     Args:
 46 |         rect1: vertices of the rectangles (4, 2)
 47 |         rect2: vertices of the rectangles (4, 2)
 48 | 
 49 |     Returns:
 50 | 
 51 |     """
 52 | 
 53 |     # Use the vertices of the first rectangle as, starting vertices of the intersection polygon.
 54 |     intersection = rect1
 55 | 
 56 |     # Loop over the edges of the second rectangle
 57 |     roll_rect2 = torch.roll(rect2, -1, dims=0)
 58 |     for p, q in zip(rect2, roll_rect2):
 59 |         if len(intersection) <= 2:
 60 |             break  # No intersection
 61 | 
 62 |         line = Line(p, q)
 63 | 
 64 |         # Any point p with line(p) <= 0 is on the "inside" (or on the boundary),
 65 |         # any point p with line(p) > 0 is on the "outside".
 66 |         # Loop over the edges of the intersection polygon,
 67 |         # and determine which part is inside and which is outside.
 68 |         new_intersection = []
 69 |         line_values = line.cal_values(intersection)
 70 |         roll_intersection = torch.roll(intersection, -1, dims=0)
 71 |         roll_line_values = torch.roll(line_values, -1, dims=0)
 72 |         for s, t, s_value, t_value in zip(intersection, roll_intersection, line_values, roll_line_values):
 73 |             if s_value <= 0:
 74 |                 new_intersection.append(s)
 75 |             if s_value * t_value < 0:
 76 |                 # Points are on opposite sides.
 77 |                 # Add the intersection of the lines to new_intersection.
 78 |                 intersection_point = line.find_intersection(Line(s, t))
 79 |                 new_intersection.append(intersection_point)
 80 | 
 81 |         if len(new_intersection) > 0:
 82 |             intersection = torch.stack(new_intersection)
 83 |         else:
 84 |             break
 85 | 
 86 |     # Calculate area
 87 |     if len(intersection) <= 2:
 88 |         return 0.
 89 | 
 90 |     return PolyArea2D(intersection)
 91 | 
 92 | 
 93 | def PolyArea2D(pts):
 94 |     roll_pts = torch.roll(pts, -1, dims=0)
 95 |     area = (pts[:, 0] * roll_pts[:, 1] - pts[:, 1] * roll_pts[:, 0]).sum().abs() * 0.5
 96 |     return area
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     import cv2
101 |     import numpy as np
102 |     from shapely.geometry import Polygon
103 | 
104 | 
105 |     def cvt_box_2_polygon(box):
106 |         """
107 |         :param array: an array of shape [num_conners, 2]
108 |         :return: a shapely.geometry.Polygon object
109 |         """
110 |         # use .buffer(0) to fix a line polygon
111 |         # more infor: https://stackoverflow.com/questions/13062334/polygon-intersection-error-in-shapely-shapely-geos-topologicalerror-the-opera
112 |         return Polygon([(box[i, 0], box[i, 1]) for i in range(len(box))]).buffer(0)
113 | 
114 | 
115 |     def get_corners_torch(x, y, w, l, yaw):
116 |         device = x.device
117 |         bev_corners = torch.zeros((4, 2), dtype=torch.float, device=device)
118 |         cos_yaw = torch.cos(yaw)
119 |         sin_yaw = torch.sin(yaw)
120 |         # front left
121 |         bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw
122 |         bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw
123 | 
124 |         # rear left
125 |         bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw
126 |         bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw
127 | 
128 |         # rear right
129 |         bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw
130 |         bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw
131 | 
132 |         # front right
133 |         bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw
134 |         bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw
135 | 
136 |         return bev_corners
137 | 
138 | 
139 |     # Show convex in an image
140 | 
141 |     img_size = 300
142 |     img = np.zeros((img_size, img_size, 3))
143 |     img = cv2.resize(img, (img_size, img_size))
144 | 
145 |     box1 = torch.tensor([100, 100, 40, 10, np.pi / 2], dtype=torch.float).cuda()
146 |     box2 = torch.tensor([100, 100, 40, 20, 0], dtype=torch.float).cuda()
147 | 
148 |     box1_conners = get_corners_torch(box1[0], box1[1], box1[2], box1[3], box1[4])
149 |     box1_polygon = cvt_box_2_polygon(box1_conners)
150 |     box1_area = box1_polygon.area
151 | 
152 |     box2_conners = get_corners_torch(box2[0], box2[1], box2[2], box2[3], box2[4])
153 |     box2_polygon = cvt_box_2_polygon(box2_conners)
154 |     box2_area = box2_polygon.area
155 | 
156 |     intersection = box2_polygon.intersection(box1_polygon).area
157 |     union = box1_area + box2_area - intersection
158 |     iou = intersection / (union + 1e-16)
159 | 
160 |     print('Shapely- box1_area: {:.2f}, box2_area: {:.2f}, inter: {:.2f}, iou: {:.4f}'.format(box1_area, box2_area,
161 |                                                                                              intersection, iou))
162 | 
163 |     print('intersection from intersection_area(): {}'.format(intersection_area(box1_conners, box2_conners)))
164 | 
165 |     img = cv2.polylines(img, [box1_conners.cpu().numpy().astype(np.int)], True, (255, 0, 0), 2)
166 |     img = cv2.polylines(img, [box2_conners.cpu().numpy().astype(np.int)], True, (0, 255, 0), 2)
167 | 
168 |     while True:
169 |         cv2.imshow('img', img)
170 |         if cv2.waitKey(0) & 0xff == 27:
171 |             break
172 | 


--------------------------------------------------------------------------------
/tools/objdet_models/darknet/utils/evaluation_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import sys
  3 | import tqdm
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | from shapely.geometry import Polygon
  8 | 
  9 | # bev image coordinates format
 10 | def get_corners(x, y, w, l, yaw):
 11 |     bev_corners = np.zeros((4, 2), dtype=np.float32)
 12 |     cos_yaw = np.cos(yaw)
 13 |     sin_yaw = np.sin(yaw)
 14 |     # front left
 15 |     bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw
 16 |     bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw
 17 | 
 18 |     # rear left
 19 |     bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw
 20 |     bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw
 21 | 
 22 |     # rear right
 23 |     bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw
 24 |     bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw
 25 | 
 26 |     # front right
 27 |     bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw
 28 |     bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw
 29 | 
 30 |     return bev_corners
 31 |     
 32 | 
 33 | def cvt_box_2_polygon(box):
 34 |     """
 35 |     :param box: an array of shape [4, 2]
 36 |     :return: a shapely.geometry.Polygon object
 37 |     """
 38 |     # use .buffer(0) to fix a line polygon
 39 |     # more infor: https://stackoverflow.com/questions/13062334/polygon-intersection-error-in-shapely-shapely-geos-topologicalerror-the-opera
 40 |     return Polygon([(box[i, 0], box[i, 1]) for i in range(len(box))]).buffer(0)
 41 | 
 42 | 
 43 | def compute_iou_nms(idx_self, idx_other, polygons, areas):
 44 |     """Calculates IoU of the given box with the array of the given boxes.
 45 |     box: a polygon
 46 |     boxes: a vector of polygons
 47 |     Note: the areas are passed in rather than calculated here for
 48 |     efficiency. Calculate once in the caller to avoid duplicate work.
 49 |     """
 50 |     # Calculate intersection areas
 51 |     ious = []
 52 |     box1 = polygons[idx_self]
 53 |     for idx in idx_other:
 54 |         box2 = polygons[idx]
 55 |         intersection = box1.intersection(box2).area
 56 |         iou = intersection / (areas[idx] + areas[idx_self] - intersection + 1e-12)
 57 |         ious.append(iou)
 58 | 
 59 |     return np.array(ious, dtype=np.float32)
 60 | 
 61 | 
 62 | def load_classes(path):
 63 |     """
 64 |     Loads class labels at 'path'
 65 |     """
 66 |     fp = open(path, "r")
 67 |     names = fp.read().split("\n")[:-1]
 68 |     return names
 69 | 
 70 | 
 71 | def rescale_boxes(boxes, current_dim, original_shape):
 72 |     """ Rescales bounding boxes to the original shape """
 73 |     orig_h, orig_w = original_shape
 74 |     # The amount of padding that was added
 75 |     pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
 76 |     pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
 77 |     # Image height and width after padding is removed
 78 |     unpad_h = current_dim - pad_y
 79 |     unpad_w = current_dim - pad_x
 80 |     # Rescale bounding boxes to dimension of original image
 81 |     boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
 82 |     boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
 83 |     boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
 84 |     boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
 85 | 
 86 |     return boxes
 87 | 
 88 | 
 89 | def ap_per_class(tp, conf, pred_cls, target_cls):
 90 |     """ Compute the average precision, given the recall and precision curves.
 91 |     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
 92 |     # Arguments
 93 |         tp:    True positives (list).
 94 |         conf:  Objectness value from 0-1 (list).
 95 |         pred_cls: Predicted object classes (list).
 96 |         target_cls: True object classes (list).
 97 |     # Returns
 98 |         The average precision as computed in py-faster-rcnn.
 99 |     """
100 | 
101 |     # Sort by objectness
102 |     i = np.argsort(-conf)
103 |     tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
104 | 
105 |     # Find unique classes
106 |     unique_classes = np.unique(target_cls)
107 | 
108 |     # Create Precision-Recall curve and compute AP for each class
109 |     ap, p, r = [], [], []
110 |     for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
111 |         i = pred_cls == c
112 |         n_gt = (target_cls == c).sum()  # Number of ground truth objects
113 |         n_p = i.sum()  # Number of predicted objects
114 | 
115 |         if n_p == 0 and n_gt == 0:
116 |             continue
117 |         elif n_p == 0 or n_gt == 0:
118 |             ap.append(0)
119 |             r.append(0)
120 |             p.append(0)
121 |         else:
122 |             # Accumulate FPs and TPs
123 |             fpc = (1 - tp[i]).cumsum()
124 |             tpc = (tp[i]).cumsum()
125 | 
126 |             # Recall
127 |             recall_curve = tpc / (n_gt + 1e-16)
128 |             r.append(recall_curve[-1])
129 | 
130 |             # Precision
131 |             precision_curve = tpc / (tpc + fpc)
132 |             p.append(precision_curve[-1])
133 | 
134 |             # AP from recall-precision curve
135 |             ap.append(compute_ap(recall_curve, precision_curve))
136 | 
137 |     # Compute F1 score (harmonic mean of precision and recall)
138 |     p, r, ap = np.array(p), np.array(r), np.array(ap)
139 |     f1 = 2 * p * r / (p + r + 1e-16)
140 | 
141 |     return p, r, ap, f1, unique_classes.astype("int32")
142 | 
143 | 
144 | def compute_ap(recall, precision):
145 |     """ Compute the average precision, given the recall and precision curves.
146 |     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
147 |     # Arguments
148 |         recall:    The recall curve (list).
149 |         precision: The precision curve (list).
150 |     # Returns
151 |         The average precision as computed in py-faster-rcnn.
152 |     """
153 |     # correct AP calculation
154 |     # first append sentinel values at the end
155 |     mrec = np.concatenate(([0.0], recall, [1.0]))
156 |     mpre = np.concatenate(([0.0], precision, [0.0]))
157 | 
158 |     # compute the precision envelope
159 |     for i in range(mpre.size - 1, 0, -1):
160 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
161 | 
162 |     # to calculate area under PR curve, look for points
163 |     # where X axis (recall) changes value
164 |     i = np.where(mrec[1:] != mrec[:-1])[0]
165 | 
166 |     # and sum (\Delta recall) * prec
167 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
168 |     return ap
169 | 
170 | 
171 | def get_batch_statistics_rotated_bbox(outputs, targets, iou_threshold):
172 |     """ Compute true positives, predicted scores and predicted labels per sample """
173 |     batch_metrics = []
174 |     for sample_i in range(len(outputs)):
175 | 
176 |         if outputs[sample_i] is None:
177 |             continue
178 | 
179 |         output = outputs[sample_i]
180 |         pred_boxes = output[:, :6]
181 |         pred_scores = output[:, 6]
182 |         pred_labels = output[:, -1]
183 | 
184 |         true_positives = np.zeros(pred_boxes.shape[0])
185 | 
186 |         annotations = targets[targets[:, 0] == sample_i][:, 1:]
187 |         if len(annotations) > 0:
188 |             target_labels = annotations[:, 0]
189 |             detected_boxes = []
190 |             target_boxes = annotations[:, 1:]
191 | 
192 |             for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
193 | 
194 |                 # If targets are found break
195 |                 if len(detected_boxes) == len(annotations):
196 |                     break
197 | 
198 |                 # Ignore if label is not one of the target labels
199 |                 if pred_label not in target_labels:
200 |                     continue
201 | 
202 |                 iou, box_index = iou_rotated_single_vs_multi_boxes_cpu(pred_box, target_boxes).max(dim=0)
203 | 
204 |                 if iou >= iou_threshold and box_index not in detected_boxes:
205 |                     true_positives[pred_i] = 1
206 |                     detected_boxes += [box_index]
207 |         batch_metrics.append([true_positives, pred_scores, pred_labels])
208 | 
209 |     return batch_metrics
210 | 
211 | 
212 | def iou_rotated_single_vs_multi_boxes_cpu(single_box, multi_boxes):
213 |     """
214 |     :param pred_box: Numpy array
215 |     :param target_boxes: Numpy array
216 |     :return:
217 |     """
218 | 
219 |     s_x, s_y, s_w, s_l, s_im, s_re = single_box
220 |     s_area = s_w * s_l
221 |     s_yaw = np.arctan2(s_im, s_re)
222 |     s_conners = get_corners(s_x, s_y, s_w, s_l, s_yaw)
223 |     s_polygon = cvt_box_2_polygon(s_conners)
224 | 
225 |     m_x, m_y, m_w, m_l, m_im, m_re = multi_boxes.transpose(1, 0)
226 |     targets_areas = m_w * m_l
227 |     m_yaw = np.arctan2(m_im, m_re)
228 |     m_boxes_conners = get_corners_vectorize(m_x, m_y, m_w, m_l, m_yaw)
229 |     m_boxes_polygons = [cvt_box_2_polygon(box_) for box_ in m_boxes_conners]
230 | 
231 |     ious = []
232 |     for m_idx in range(multi_boxes.shape[0]):
233 |         intersection = s_polygon.intersection(m_boxes_polygons[m_idx]).area
234 |         iou_ = intersection / (s_area + targets_areas[m_idx] - intersection + 1e-16)
235 |         ious.append(iou_)
236 | 
237 |     return torch.tensor(ious, dtype=torch.float)
238 | 
239 | 
240 | def get_corners_vectorize(x, y, w, l, yaw):
241 |     """bev image coordinates format - vectorization
242 | 
243 |     :param x, y, w, l, yaw: [num_boxes,]
244 |     :return: num_boxes x (x,y) of 4 conners
245 |     """
246 |     bbox2 = np.zeros((x.shape[0], 4, 2), dtype=np.float32)
247 |     cos_yaw = np.cos(yaw)
248 |     sin_yaw = np.sin(yaw)
249 | 
250 |     # front left
251 |     bbox2[:, 0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw
252 |     bbox2[:, 0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw
253 | 
254 |     # rear left
255 |     bbox2[:, 1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw
256 |     bbox2[:, 1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw
257 | 
258 |     # rear right
259 |     bbox2[:, 2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw
260 |     bbox2[:, 2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw
261 | 
262 |     # front right
263 |     bbox2[:, 3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw
264 |     bbox2[:, 3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw
265 | 
266 |     return bbox2
267 | 
268 | 
269 | def nms_cpu(boxes, confs, nms_thresh=0.5):
270 |     """
271 |     :param boxes: [num, 6]
272 |     :param confs: [num, num_classes]
273 |     :param nms_thresh:
274 |     :param min_mode:
275 |     :return:
276 |     """
277 |     # order of reduce confidence (high --> low)
278 |     order = confs.argsort()[::-1]
279 | 
280 |     x, y, w, l, im, re = boxes.transpose(1, 0)
281 |     yaw = np.arctan2(im, re)
282 |     boxes_conners = get_corners_vectorize(x, y, w, l, yaw)
283 |     boxes_polygons = [cvt_box_2_polygon(box_) for box_ in boxes_conners]  # 4 vertices of the box
284 |     boxes_areas = w * l
285 | 
286 |     keep = []
287 |     while order.size > 0:
288 |         idx_self = order[0]
289 |         idx_other = order[1:]
290 |         keep.append(idx_self)
291 |         over = compute_iou_nms(idx_self, idx_other, boxes_polygons, boxes_areas)
292 |         inds = np.where(over <= nms_thresh)[0]
293 |         order = order[inds + 1]
294 | 
295 |     return np.array(keep)
296 | 
297 | 
298 | def post_processing(outputs, conf_thresh=0.95, nms_thresh=0.4):
299 |     """
300 |         Removes detections with lower object confidence score than 'conf_thres' and performs
301 |         Non-Maximum Suppression to further filter detections.
302 |         Returns detections with shape:
303 |             (x, y, w, l, im, re, object_conf, class_score, class_pred)
304 |     """
305 |     if type(outputs).__name__ != 'ndarray':
306 |         outputs = outputs.numpy()
307 |     # outputs shape: (batch_size, 22743, 10)
308 |     batch_size = outputs.shape[0]
309 |     # box_array: [batch, num, 6]
310 |     box_array = outputs[:, :, :6]
311 | 
312 |     # confs: [batch, num, num_classes]
313 |     confs = outputs[:, :, 6:7] * outputs[:, :, 7:]
314 |     obj_confs = outputs[:, :, 6]
315 | 
316 |     # [batch, num, num_classes] --> [batch, num]
317 |     max_conf = np.max(confs, axis=2)
318 |     max_id = np.argmax(confs, axis=2)
319 | 
320 |     bboxes_batch = [None for _ in range(batch_size)]
321 | 
322 |     for i in range(batch_size):
323 |         argwhere = max_conf[i] > conf_thresh
324 |         l_box_array = box_array[i, argwhere, :]
325 |         l_obj_confs = obj_confs[i, argwhere, :]
326 |         l_max_conf = max_conf[i, argwhere]
327 |         l_max_id = max_id[i, argwhere]
328 | 
329 |         keep = nms_cpu(l_box_array, l_max_conf, nms_thresh=nms_thresh)
330 | 
331 |         if (keep.size > 0):
332 |             l_box_array = l_box_array[keep, :]
333 |             l_obj_confs = l_obj_confs[keep].reshape(-1, 1)
334 |             l_max_conf = l_max_conf[keep].reshape(-1, 1)
335 |             l_max_id = l_max_id[keep].reshape(-1, 1)
336 |             bboxes_batch[i] = np.concatenate((l_box_array, l_obj_confs, l_max_conf, l_max_id), axis=-1)
337 |     return bboxes_batch
338 | 
339 | 
340 | def post_processing_v2(prediction, conf_thresh=0.95, nms_thresh=0.4):
341 |     """
342 |         Removes detections with lower object confidence score than 'conf_thres' and performs
343 |         Non-Maximum Suppression to further filter detections.
344 |         Returns detections with shape:
345 |             (x, y, w, l, im, re, object_conf, class_score, class_pred)
346 |     """
347 |     output = [None for _ in range(len(prediction))]
348 |     for image_i, image_pred in enumerate(prediction):
349 |         # Filter out confidence scores below threshold
350 |         image_pred = image_pred[image_pred[:, 6] >= conf_thresh]
351 |         # If none are remaining => process next image
352 |         if not image_pred.size(0):
353 |             continue
354 |         # Object confidence times class confidence
355 |         score = image_pred[:, 6] * image_pred[:, 7:].max(dim=1)[0]
356 |         # Sort by it
357 |         image_pred = image_pred[(-score).argsort()]
358 |         class_confs, class_preds = image_pred[:, 7:].max(dim=1, keepdim=True)
359 |         detections = torch.cat((image_pred[:, :7].float(), class_confs.float(), class_preds.float()), dim=1)
360 |         # Perform non-maximum suppression
361 |         keep_boxes = []
362 |         while detections.size(0):
363 |             # large_overlap = rotated_bbox_iou(detections[0, :6].unsqueeze(0), detections[:, :6], 1.0, False) > nms_thres # not working
364 |             large_overlap = iou_rotated_single_vs_multi_boxes_cpu(detections[0, :6], detections[:, :6]) > nms_thresh
365 |             label_match = detections[0, -1] == detections[:, -1]
366 |             # Indices of boxes with lower confidence scores, large IOUs and matching labels
367 |             invalid = large_overlap & label_match
368 |             weights = detections[invalid, 6:7]
369 |             # Merge overlapping bboxes by order of confidence
370 |             detections[0, :6] = (weights * detections[invalid, :6]).sum(0) / weights.sum()
371 |             keep_boxes += [detections[0]]
372 |             detections = detections[~invalid]
373 |         if len(keep_boxes) > 0:
374 |             output[image_i] = torch.stack(keep_boxes)
375 | 
376 |     return output
377 | 


--------------------------------------------------------------------------------
/tools/objdet_models/darknet/utils/iou_rotated_boxes_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | # -*- coding: utf-8 -*-
  3 | -----------------------------------------------------------------------------------
  4 | # Author: Nguyen Mau Dung
  5 | # DoC: 2020.07.20
  6 | # email: nguyenmaudung93.kstn@gmail.com
  7 | -----------------------------------------------------------------------------------
  8 | # Description: This script for iou calculation of rotated boxes (on GPU)
  9 | 
 10 | """
 11 | 
 12 | from __future__ import division
 13 | import sys
 14 | 
 15 | import torch
 16 | from shapely.geometry import Polygon
 17 | from scipy.spatial import ConvexHull
 18 | 
 19 | sys.path.append('../')
 20 | 
 21 | from utils.cal_intersection_rotated_boxes import intersection_area, PolyArea2D
 22 | 
 23 | 
 24 | def cvt_box_2_polygon(box):
 25 |     """
 26 |     :param array: an array of shape [num_conners, 2]
 27 |     :return: a shapely.geometry.Polygon object
 28 |     """
 29 |     # use .buffer(0) to fix a line polygon
 30 |     # more infor: https://stackoverflow.com/questions/13062334/polygon-intersection-error-in-shapely-shapely-geos-topologicalerror-the-opera
 31 |     return Polygon([(box[i, 0], box[i, 1]) for i in range(len(box))]).buffer(0)
 32 | 
 33 | 
 34 | def get_corners_vectorize(x, y, w, l, yaw):
 35 |     """bev image coordinates format - vectorization
 36 | 
 37 |     :param x, y, w, l, yaw: [num_boxes,]
 38 |     :return: num_boxes x (x,y) of 4 conners
 39 |     """
 40 |     device = x.device
 41 |     bbox2 = torch.zeros((x.size(0), 4, 2), device=device, dtype=torch.float)
 42 |     cos_yaw = torch.cos(yaw)
 43 |     sin_yaw = torch.sin(yaw)
 44 | 
 45 |     # front left
 46 |     bbox2[:, 0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw
 47 |     bbox2[:, 0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw
 48 | 
 49 |     # rear left
 50 |     bbox2[:, 1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw
 51 |     bbox2[:, 1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw
 52 | 
 53 |     # rear right
 54 |     bbox2[:, 2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw
 55 |     bbox2[:, 2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw
 56 | 
 57 |     # front right
 58 |     bbox2[:, 3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw
 59 |     bbox2[:, 3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw
 60 | 
 61 |     return bbox2
 62 | 
 63 | 
 64 | def get_polygons_areas_fix_xy(boxes, fix_xy=100.):
 65 |     """
 66 |     Args:
 67 |         box: (num_boxes, 4) --> w, l, im, re
 68 |     """
 69 |     device = boxes.device
 70 |     n_boxes = boxes.size(0)
 71 |     x = torch.full(size=(n_boxes,), fill_value=fix_xy, device=device, dtype=torch.float)
 72 |     y = torch.full(size=(n_boxes,), fill_value=fix_xy, device=device, dtype=torch.float)
 73 |     w, l, im, re = boxes.t()
 74 |     yaw = torch.atan2(im, re)
 75 |     boxes_conners = get_corners_vectorize(x, y, w, l, yaw)
 76 |     boxes_polygons = [cvt_box_2_polygon(box_) for box_ in boxes_conners]
 77 |     boxes_areas = w * l
 78 | 
 79 |     return boxes_polygons, boxes_areas
 80 | 
 81 | 
 82 | def iou_rotated_boxes_targets_vs_anchors(anchors_polygons, anchors_areas, targets_polygons, targets_areas):
 83 |     device = anchors_areas.device
 84 |     num_anchors = len(anchors_areas)
 85 |     num_targets_boxes = len(targets_areas)
 86 | 
 87 |     ious = torch.zeros(size=(num_anchors, num_targets_boxes), device=device, dtype=torch.float)
 88 | 
 89 |     for a_idx in range(num_anchors):
 90 |         for tg_idx in range(num_targets_boxes):
 91 |             intersection = anchors_polygons[a_idx].intersection(targets_polygons[tg_idx]).area
 92 |             iou = intersection / (anchors_areas[a_idx] + targets_areas[tg_idx] - intersection + 1e-16)
 93 |             ious[a_idx, tg_idx] = iou
 94 | 
 95 |     return ious
 96 | 
 97 | 
 98 | def iou_pred_vs_target_boxes(pred_boxes, target_boxes, GIoU=False, DIoU=False, CIoU=False):
 99 |     assert pred_boxes.size() == target_boxes.size(), "Unmatch size of pred_boxes and target_boxes"
100 |     device = pred_boxes.device
101 |     n_boxes = pred_boxes.size(0)
102 | 
103 |     t_x, t_y, t_w, t_l, t_im, t_re = target_boxes.t()
104 |     t_yaw = torch.atan2(t_im, t_re)
105 |     t_conners = get_corners_vectorize(t_x, t_y, t_w, t_l, t_yaw)
106 |     t_areas = t_w * t_l
107 | 
108 |     p_x, p_y, p_w, p_l, p_im, p_re = pred_boxes.t()
109 |     p_yaw = torch.atan2(p_im, p_re)
110 |     p_conners = get_corners_vectorize(p_x, p_y, p_w, p_l, p_yaw)
111 |     p_areas = p_w * p_l
112 | 
113 |     ious = []
114 |     giou_loss = torch.tensor([0.], device=device, dtype=torch.float)
115 |     # Thinking to apply vectorization this step
116 |     for box_idx in range(n_boxes):
117 |         p_cons, t_cons = p_conners[box_idx], t_conners[box_idx]
118 |         if not GIoU:
119 |             p_poly, t_poly = cvt_box_2_polygon(p_cons), cvt_box_2_polygon(t_cons)
120 |             intersection = p_poly.intersection(t_poly).area
121 |         else:
122 |             intersection = intersection_area(p_cons, t_cons)
123 | 
124 |         p_area, t_area = p_areas[box_idx], t_areas[box_idx]
125 |         union = p_area + t_area - intersection
126 |         iou = intersection / (union + 1e-16)
127 | 
128 |         if GIoU:
129 |             convex_conners = torch.cat((p_cons, t_cons), dim=0)
130 |             hull = ConvexHull(convex_conners.clone().detach().cpu().numpy())  # done on cpu, just need indices output
131 |             convex_conners = convex_conners[hull.vertices]
132 |             convex_area = PolyArea2D(convex_conners)
133 |             giou_loss += 1. - (iou - (convex_area - union) / (convex_area + 1e-16))
134 |         else:
135 |             giou_loss += 1. - iou
136 | 
137 |         if DIoU or CIoU:
138 |             raise NotImplementedError
139 | 
140 |         ious.append(iou)
141 | 
142 |     return torch.tensor(ious, device=device, dtype=torch.float), giou_loss
143 | 
144 | 
145 | if __name__ == "__main__":
146 |     import cv2
147 |     import numpy as np
148 | 
149 | 
150 |     def get_corners_torch(x, y, w, l, yaw):
151 |         device = x.device
152 |         bev_corners = torch.zeros((4, 2), dtype=torch.float, device=device)
153 |         cos_yaw = torch.cos(yaw)
154 |         sin_yaw = torch.sin(yaw)
155 |         # front left
156 |         bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw
157 |         bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw
158 | 
159 |         # rear left
160 |         bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw
161 |         bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw
162 | 
163 |         # rear right
164 |         bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw
165 |         bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw
166 | 
167 |         # front right
168 |         bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw
169 |         bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw
170 | 
171 |         return bev_corners
172 | 
173 | 
174 |     # Show convex in an image
175 | 
176 |     img_size = 300
177 |     img = np.zeros((img_size, img_size, 3))
178 |     img = cv2.resize(img, (img_size, img_size))
179 | 
180 |     box1 = torch.tensor([100, 100, 60, 10, 0.5], dtype=torch.float).cuda()
181 |     box2 = torch.tensor([100, 100, 40, 20, 0], dtype=torch.float).cuda()
182 | 
183 |     box1_conners = get_corners_torch(box1[0], box1[1], box1[2], box1[3], box1[4])
184 |     box1_polygon = cvt_box_2_polygon(box1_conners)
185 |     box1_area = box1_polygon.area
186 | 
187 |     box2_conners = get_corners_torch(box2[0], box2[1], box2[2], box2[3], box2[4])
188 |     box2_polygon = cvt_box_2_polygon(box2_conners)
189 |     box2_area = box2_polygon.area
190 | 
191 |     intersection = box2_polygon.intersection(box1_polygon).area
192 |     union = box1_area + box2_area - intersection
193 |     iou = intersection / (union + 1e-16)
194 | 
195 |     convex_conners = torch.cat((box1_conners, box2_conners), dim=0)
196 |     hull = ConvexHull(convex_conners.clone().detach().cpu().numpy())  # done on cpu, just need indices output
197 |     convex_conners = convex_conners[hull.vertices]
198 |     convex_polygon = cvt_box_2_polygon(convex_conners)
199 |     convex_area = convex_polygon.area
200 |     giou_loss = 1. - (iou - (convex_area - union) / (convex_area + 1e-16))
201 | 
202 |     print(
203 |         'box1_area: {:.2f}, box2_area: {:.2f}, intersection: {:.2f}, iou: {:.4f}, convex_area: {:.4f}, giou_loss: {}'.format(
204 |             box1_area, box2_area, intersection, iou, convex_area, giou_loss))
205 | 
206 |     print('intersection_area: {}'.format(intersection_area(box1_conners, box2_conners)))
207 |     print('convex_area using PolyArea2D: {}'.format(PolyArea2D(convex_conners)))
208 | 
209 |     img = cv2.polylines(img, [box1_conners.cpu().numpy().astype(np.int)], True, (255, 0, 0), 2)
210 |     img = cv2.polylines(img, [box2_conners.cpu().numpy().astype(np.int)], True, (0, 255, 0), 2)
211 |     img = cv2.polylines(img, [convex_conners.cpu().numpy().astype(np.int)], True, (0, 0, 255), 2)
212 | 
213 |     while True:
214 |         cv2.imshow('img', img)
215 |         if cv2.waitKey(0) & 0xff == 27:
216 |             break
217 | 


--------------------------------------------------------------------------------
/tools/objdet_models/darknet/utils/torch_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | # -*- coding: utf-8 -*-
 3 | -----------------------------------------------------------------------------------
 4 | # Author: Nguyen Mau Dung
 5 | # DoC: 2020.07.05
 6 | # email: nguyenmaudung93.kstn@gmail.com
 7 | -----------------------------------------------------------------------------------
 8 | # Description: some utilities of torch (conversion)
 9 | -----------------------------------------------------------------------------------
10 | # Refer: https://github.com/Tianxiaomo/pytorch-YOLOv4
11 | """
12 | 
13 | import torch
14 | 
15 | __all__ = ['convert2cpu', 'convert2cpu_long', 'to_cpu']
16 | 
17 | 
18 | def convert2cpu(gpu_matrix):
19 |     return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
20 | 
21 | 
22 | def convert2cpu_long(gpu_matrix):
23 |     return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
24 | 
25 | 
26 | def to_cpu(tensor):
27 |     return tensor.detach().cpu()
28 | 


--------------------------------------------------------------------------------
/tools/objdet_models/resnet/models/fpn_resnet.py:
--------------------------------------------------------------------------------
  1 | """
  2 | # ---------------------------------------------------------------------------------
  3 | # -*- coding: utf-8 -*-
  4 | -----------------------------------------------------------------------------------
  5 | # Copyright (c) Microsoft
  6 | # Licensed under the MIT License.
  7 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  8 | # Modified by Xingyi Zhou
  9 | # Refer from: https://github.com/xingyizhou/CenterNet
 10 | 
 11 | # Modifier: Nguyen Mau Dung (2020.08.09)
 12 | # ------------------------------------------------------------------------------
 13 | """
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import os
 20 | 
 21 | import torch
 22 | import torch.nn as nn
 23 | import torch.utils.model_zoo as model_zoo
 24 | import torch.nn.functional as F
 25 | 
 26 | BN_MOMENTUM = 0.1
 27 | 
 28 | model_urls = {
 29 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 30 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 31 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 32 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 33 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 34 | }
 35 | 
 36 | 
 37 | def conv3x3(in_planes, out_planes, stride=1):
 38 |     """3x3 convolution with padding"""
 39 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
 40 | 
 41 | 
 42 | class BasicBlock(nn.Module):
 43 |     expansion = 1
 44 | 
 45 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 46 |         super(BasicBlock, self).__init__()
 47 |         self.conv1 = conv3x3(inplanes, planes, stride)
 48 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 49 |         self.relu = nn.ReLU(inplace=True)
 50 |         self.conv2 = conv3x3(planes, planes)
 51 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 52 |         self.downsample = downsample
 53 |         self.stride = stride
 54 | 
 55 |     def forward(self, x):
 56 |         residual = x
 57 | 
 58 |         out = self.conv1(x)
 59 |         out = self.bn1(out)
 60 |         out = self.relu(out)
 61 | 
 62 |         out = self.conv2(out)
 63 |         out = self.bn2(out)
 64 | 
 65 |         if self.downsample is not None:
 66 |             residual = self.downsample(x)
 67 | 
 68 |         out += residual
 69 |         out = self.relu(out)
 70 | 
 71 |         return out
 72 | 
 73 | 
 74 | class Bottleneck(nn.Module):
 75 |     expansion = 4
 76 | 
 77 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 78 |         super(Bottleneck, self).__init__()
 79 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 80 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 81 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 82 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 83 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
 84 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
 85 |         self.relu = nn.ReLU(inplace=True)
 86 |         self.downsample = downsample
 87 |         self.stride = stride
 88 | 
 89 |     def forward(self, x):
 90 |         residual = x
 91 | 
 92 |         out = self.conv1(x)
 93 |         out = self.bn1(out)
 94 |         out = self.relu(out)
 95 | 
 96 |         out = self.conv2(out)
 97 |         out = self.bn2(out)
 98 |         out = self.relu(out)
 99 | 
100 |         out = self.conv3(out)
101 |         out = self.bn3(out)
102 | 
103 |         if self.downsample is not None:
104 |             residual = self.downsample(x)
105 | 
106 |         out += residual
107 |         out = self.relu(out)
108 | 
109 |         return out
110 | 
111 | 
112 | class PoseResNet(nn.Module):
113 | 
114 |     def __init__(self, block, layers, heads, head_conv, **kwargs):
115 |         self.inplanes = 64
116 |         self.deconv_with_bias = False
117 |         self.heads = heads
118 | 
119 |         super(PoseResNet, self).__init__()
120 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
121 |         self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
122 |         self.relu = nn.ReLU(inplace=True)
123 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
124 |         self.layer1 = self._make_layer(block, 64, layers[0])
125 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
126 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
127 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
128 | 
129 |         self.conv_up_level1 = nn.Conv2d(768, 256, kernel_size=1, stride=1, padding=0)
130 |         self.conv_up_level2 = nn.Conv2d(384, 128, kernel_size=1, stride=1, padding=0)
131 |         self.conv_up_level3 = nn.Conv2d(192, 64, kernel_size=1, stride=1, padding=0)
132 | 
133 |         fpn_channels = [256, 128, 64]
134 |         for fpn_idx, fpn_c in enumerate(fpn_channels):
135 |             for head in sorted(self.heads):
136 |                 num_output = self.heads[head]
137 |                 if head_conv > 0:
138 |                     fc = nn.Sequential(
139 |                         nn.Conv2d(fpn_c, head_conv, kernel_size=3, padding=1, bias=True),
140 |                         nn.ReLU(inplace=True),
141 |                         nn.Conv2d(head_conv, num_output, kernel_size=1, stride=1, padding=0))
142 |                 else:
143 |                     fc = nn.Conv2d(in_channels=fpn_c, out_channels=num_output, kernel_size=1, stride=1, padding=0)
144 | 
145 |                 self.__setattr__('fpn{}_{}'.format(fpn_idx, head), fc)
146 | 
147 |     def _make_layer(self, block, planes, blocks, stride=1):
148 |         downsample = None
149 |         if stride != 1 or self.inplanes != planes * block.expansion:
150 |             downsample = nn.Sequential(
151 |                 nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
152 |                 nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
153 |             )
154 | 
155 |         layers = []
156 |         layers.append(block(self.inplanes, planes, stride, downsample))
157 |         self.inplanes = planes * block.expansion
158 |         for i in range(1, blocks):
159 |             layers.append(block(self.inplanes, planes))
160 | 
161 |         return nn.Sequential(*layers)
162 | 
163 |     def forward(self, x):
164 |         _, _, input_h, input_w = x.size()
165 |         hm_h, hm_w = input_h // 4, input_w // 4
166 |         x = self.conv1(x)
167 |         x = self.bn1(x)
168 |         x = self.relu(x)
169 |         x = self.maxpool(x)
170 | 
171 |         out_layer1 = self.layer1(x)
172 |         out_layer2 = self.layer2(out_layer1)
173 | 
174 |         out_layer3 = self.layer3(out_layer2)
175 | 
176 |         out_layer4 = self.layer4(out_layer3)
177 | 
178 |         # up_level1: torch.Size([b, 512, 14, 14])
179 |         up_level1 = F.interpolate(out_layer4, scale_factor=2, mode='bilinear', align_corners=True)
180 | 
181 |         concat_level1 = torch.cat((up_level1, out_layer3), dim=1)
182 |         # up_level2: torch.Size([b, 256, 28, 28])
183 |         up_level2 = F.interpolate(self.conv_up_level1(concat_level1), scale_factor=2, mode='bilinear',
184 |                                   align_corners=True)
185 | 
186 |         concat_level2 = torch.cat((up_level2, out_layer2), dim=1)
187 |         # up_level3: torch.Size([b, 128, 56, 56]),
188 |         up_level3 = F.interpolate(self.conv_up_level2(concat_level2), scale_factor=2, mode='bilinear',
189 |                                   align_corners=True)
190 |         # up_level4: torch.Size([b, 64, 56, 56])
191 |         up_level4 = self.conv_up_level3(torch.cat((up_level3, out_layer1), dim=1))
192 | 
193 |         ret = {}
194 |         for head in self.heads:
195 |             temp_outs = []
196 |             for fpn_idx, fdn_input in enumerate([up_level2, up_level3, up_level4]):
197 |                 fpn_out = self.__getattr__('fpn{}_{}'.format(fpn_idx, head))(fdn_input)
198 |                 _, _, fpn_out_h, fpn_out_w = fpn_out.size()
199 |                 # Make sure the added features having same size of heatmap output
200 |                 if (fpn_out_w != hm_w) or (fpn_out_h != hm_h):
201 |                     fpn_out = F.interpolate(fpn_out, size=(hm_h, hm_w))
202 |                 temp_outs.append(fpn_out)
203 |             # Take the softmax in the keypoint feature pyramid network
204 |             final_out = self.apply_kfpn(temp_outs)
205 | 
206 |             ret[head] = final_out
207 | 
208 |         return ret
209 | 
210 |     def apply_kfpn(self, outs):
211 |         outs = torch.cat([out.unsqueeze(-1) for out in outs], dim=-1)
212 |         softmax_outs = F.softmax(outs, dim=-1)
213 |         ret_outs = (outs * softmax_outs).sum(dim=-1)
214 |         return ret_outs
215 | 
216 |     def init_weights(self, num_layers, pretrained=True):
217 |         if pretrained:
218 |             # TODO: Check initial weights for head later
219 |             for fpn_idx in [0, 1, 2]:  # 3 FPN layers
220 |                 for head in self.heads:
221 |                     final_layer = self.__getattr__('fpn{}_{}'.format(fpn_idx, head))
222 |                     for i, m in enumerate(final_layer.modules()):
223 |                         if isinstance(m, nn.Conv2d):
224 |                             # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
225 |                             # print('=> init {}.weight as normal(0, 0.001)'.format(name))
226 |                             # print('=> init {}.bias as 0'.format(name))
227 |                             if m.weight.shape[0] == self.heads[head]:
228 |                                 if 'hm' in head:
229 |                                     nn.init.constant_(m.bias, -2.19)
230 |                                 else:
231 |                                     nn.init.normal_(m.weight, std=0.001)
232 |                                     nn.init.constant_(m.bias, 0)
233 |             # pretrained_state_dict = torch.load(pretrained)
234 |             url = model_urls['resnet{}'.format(num_layers)]
235 |             pretrained_state_dict = model_zoo.load_url(url)
236 |             print('=> loading pretrained model {}'.format(url))
237 |             self.load_state_dict(pretrained_state_dict, strict=False)
238 | 
239 | 
240 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
241 |                34: (BasicBlock, [3, 4, 6, 3]),
242 |                50: (Bottleneck, [3, 4, 6, 3]),
243 |                101: (Bottleneck, [3, 4, 23, 3]),
244 |                152: (Bottleneck, [3, 8, 36, 3])}
245 | 
246 | 
247 | def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained):
248 |     block_class, layers = resnet_spec[num_layers]
249 | 
250 |     model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
251 |     model.init_weights(num_layers, pretrained=imagenet_pretrained)
252 |     return model
253 | 


--------------------------------------------------------------------------------
/tools/objdet_models/resnet/models/resnet.py:
--------------------------------------------------------------------------------
  1 | """
  2 | # ---------------------------------------------------------------------------------
  3 | # -*- coding: utf-8 -*-
  4 | -----------------------------------------------------------------------------------
  5 | # Copyright (c) Microsoft
  6 | # Licensed under the MIT License.
  7 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  8 | # Modified by Xingyi Zhou
  9 | # Refer from: https://github.com/xingyizhou/CenterNet
 10 | 
 11 | # Modifier: Nguyen Mau Dung (2020.08.09)
 12 | # ------------------------------------------------------------------------------
 13 | """
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import os
 20 | 
 21 | import torch
 22 | import torch.nn as nn
 23 | import torch.utils.model_zoo as model_zoo
 24 | 
 25 | BN_MOMENTUM = 0.1
 26 | 
 27 | model_urls = {
 28 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 29 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 30 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 31 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 32 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 33 | }
 34 | 
 35 | 
 36 | def conv3x3(in_planes, out_planes, stride=1):
 37 |     """3x3 convolution with padding"""
 38 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 39 |                      padding=1, bias=False)
 40 | 
 41 | 
 42 | class BasicBlock(nn.Module):
 43 |     expansion = 1
 44 | 
 45 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 46 |         super(BasicBlock, self).__init__()
 47 |         self.conv1 = conv3x3(inplanes, planes, stride)
 48 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 49 |         self.relu = nn.ReLU(inplace=True)
 50 |         self.conv2 = conv3x3(planes, planes)
 51 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 52 |         self.downsample = downsample
 53 |         self.stride = stride
 54 | 
 55 |     def forward(self, x):
 56 |         residual = x
 57 | 
 58 |         out = self.conv1(x)
 59 |         out = self.bn1(out)
 60 |         out = self.relu(out)
 61 | 
 62 |         out = self.conv2(out)
 63 |         out = self.bn2(out)
 64 | 
 65 |         if self.downsample is not None:
 66 |             residual = self.downsample(x)
 67 | 
 68 |         out += residual
 69 |         out = self.relu(out)
 70 | 
 71 |         return out
 72 | 
 73 | 
 74 | class Bottleneck(nn.Module):
 75 |     expansion = 4
 76 | 
 77 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 78 |         super(Bottleneck, self).__init__()
 79 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 80 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 81 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 82 |                                padding=1, bias=False)
 83 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 84 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
 85 |                                bias=False)
 86 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion,
 87 |                                   momentum=BN_MOMENTUM)
 88 |         self.relu = nn.ReLU(inplace=True)
 89 |         self.downsample = downsample
 90 |         self.stride = stride
 91 | 
 92 |     def forward(self, x):
 93 |         residual = x
 94 | 
 95 |         out = self.conv1(x)
 96 |         out = self.bn1(out)
 97 |         out = self.relu(out)
 98 | 
 99 |         out = self.conv2(out)
100 |         out = self.bn2(out)
101 |         out = self.relu(out)
102 | 
103 |         out = self.conv3(out)
104 |         out = self.bn3(out)
105 | 
106 |         if self.downsample is not None:
107 |             residual = self.downsample(x)
108 | 
109 |         out += residual
110 |         out = self.relu(out)
111 | 
112 |         return out
113 | 
114 | 
115 | class PoseResNet(nn.Module):
116 | 
117 |     def __init__(self, block, layers, heads, head_conv, **kwargs):
118 |         self.inplanes = 64
119 |         self.deconv_with_bias = False
120 |         self.heads = heads
121 | 
122 |         super(PoseResNet, self).__init__()
123 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
124 |                                bias=False)
125 |         self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
126 |         self.relu = nn.ReLU(inplace=True)
127 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
128 |         self.layer1 = self._make_layer(block, 64, layers[0])
129 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
130 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
131 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
132 | 
133 |         # used for deconv layers
134 |         self.deconv_layers = self._make_deconv_layer(
135 |             3,
136 |             [256, 256, 256],
137 |             [4, 4, 4],
138 |         )
139 |         # self.final_layer = []
140 | 
141 |         for head in sorted(self.heads):
142 |             num_output = self.heads[head]
143 |             if head_conv > 0:
144 |                 fc = nn.Sequential(
145 |                     nn.Conv2d(256, head_conv,
146 |                               kernel_size=3, padding=1, bias=True),
147 |                     nn.ReLU(inplace=True),
148 |                     nn.Conv2d(head_conv, num_output,
149 |                               kernel_size=1, stride=1, padding=0))
150 |             else:
151 |                 fc = nn.Conv2d(
152 |                     in_channels=256,
153 |                     out_channels=num_output,
154 |                     kernel_size=1,
155 |                     stride=1,
156 |                     padding=0
157 |                 )
158 |             self.__setattr__(head, fc)
159 | 
160 |         # self.final_layer = nn.ModuleList(self.final_layer)
161 | 
162 |     def _make_layer(self, block, planes, blocks, stride=1):
163 |         downsample = None
164 |         if stride != 1 or self.inplanes != planes * block.expansion:
165 |             downsample = nn.Sequential(
166 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
167 |                           kernel_size=1, stride=stride, bias=False),
168 |                 nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
169 |             )
170 | 
171 |         layers = []
172 |         layers.append(block(self.inplanes, planes, stride, downsample))
173 |         self.inplanes = planes * block.expansion
174 |         for i in range(1, blocks):
175 |             layers.append(block(self.inplanes, planes))
176 | 
177 |         return nn.Sequential(*layers)
178 | 
179 |     def _get_deconv_cfg(self, deconv_kernel, index):
180 |         if deconv_kernel == 4:
181 |             padding = 1
182 |             output_padding = 0
183 |         elif deconv_kernel == 3:
184 |             padding = 1
185 |             output_padding = 1
186 |         elif deconv_kernel == 2:
187 |             padding = 0
188 |             output_padding = 0
189 | 
190 |         return deconv_kernel, padding, output_padding
191 | 
192 |     def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
193 |         assert num_layers == len(num_filters), \
194 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
195 |         assert num_layers == len(num_kernels), \
196 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
197 | 
198 |         layers = []
199 |         for i in range(num_layers):
200 |             kernel, padding, output_padding = \
201 |                 self._get_deconv_cfg(num_kernels[i], i)
202 | 
203 |             planes = num_filters[i]
204 |             layers.append(
205 |                 nn.ConvTranspose2d(
206 |                     in_channels=self.inplanes,
207 |                     out_channels=planes,
208 |                     kernel_size=kernel,
209 |                     stride=2,
210 |                     padding=padding,
211 |                     output_padding=output_padding,
212 |                     bias=self.deconv_with_bias))
213 |             layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
214 |             layers.append(nn.ReLU(inplace=True))
215 |             self.inplanes = planes
216 | 
217 |         return nn.Sequential(*layers)
218 | 
219 |     def forward(self, x):
220 |         x = self.conv1(x)
221 |         x = self.bn1(x)
222 |         x = self.relu(x)
223 |         x = self.maxpool(x)
224 | 
225 |         x = self.layer1(x)
226 |         x = self.layer2(x)
227 |         x = self.layer3(x)
228 |         x = self.layer4(x)
229 | 
230 |         x = self.deconv_layers(x)
231 |         ret = {}
232 |         for head in self.heads:
233 |             ret[head] = self.__getattr__(head)(x)
234 |         return ret
235 | 
236 |     def init_weights(self, num_layers, pretrained=True):
237 |         if pretrained:
238 |             # print('=> init resnet deconv weights from normal distribution')
239 |             for _, m in self.deconv_layers.named_modules():
240 |                 if isinstance(m, nn.ConvTranspose2d):
241 |                     # print('=> init {}.weight as normal(0, 0.001)'.format(name))
242 |                     # print('=> init {}.bias as 0'.format(name))
243 |                     nn.init.normal_(m.weight, std=0.001)
244 |                     if self.deconv_with_bias:
245 |                         nn.init.constant_(m.bias, 0)
246 |                 elif isinstance(m, nn.BatchNorm2d):
247 |                     # print('=> init {}.weight as 1'.format(name))
248 |                     # print('=> init {}.bias as 0'.format(name))
249 |                     nn.init.constant_(m.weight, 1)
250 |                     nn.init.constant_(m.bias, 0)
251 |             # print('=> init final conv weights from normal distribution')
252 |             for head in self.heads:
253 |                 final_layer = self.__getattr__(head)
254 |                 for i, m in enumerate(final_layer.modules()):
255 |                     if isinstance(m, nn.Conv2d):
256 |                         # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
257 |                         # print('=> init {}.weight as normal(0, 0.001)'.format(name))
258 |                         # print('=> init {}.bias as 0'.format(name))
259 |                         if m.weight.shape[0] == self.heads[head]:
260 |                             if 'hm' in head:
261 |                                 nn.init.constant_(m.bias, -2.19)
262 |                             else:
263 |                                 nn.init.normal_(m.weight, std=0.001)
264 |                                 nn.init.constant_(m.bias, 0)
265 |             # pretrained_state_dict = torch.load(pretrained)
266 |             url = model_urls['resnet{}'.format(num_layers)]
267 |             pretrained_state_dict = model_zoo.load_url(url)
268 |             print('=> loading pretrained model {}'.format(url))
269 |             self.load_state_dict(pretrained_state_dict, strict=False)
270 | 
271 | 
272 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
273 |                34: (BasicBlock, [3, 4, 6, 3]),
274 |                50: (Bottleneck, [3, 4, 6, 3]),
275 |                101: (Bottleneck, [3, 4, 23, 3]),
276 |                152: (Bottleneck, [3, 8, 36, 3])}
277 | 
278 | 
279 | def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained):
280 |     block_class, layers = resnet_spec[num_layers]
281 | 
282 |     model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
283 |     model.init_weights(num_layers, pretrained=imagenet_pretrained)
284 |     return model
285 | 


--------------------------------------------------------------------------------
/tools/objdet_models/resnet/utils/evaluation_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | # -*- coding: utf-8 -*-
  3 | -----------------------------------------------------------------------------------
  4 | # Author: Nguyen Mau Dung
  5 | # DoC: 2020.08.17
  6 | # email: nguyenmaudung93.kstn@gmail.com
  7 | -----------------------------------------------------------------------------------
  8 | # Description: The utils for evaluation
  9 | # Refer from: https://github.com/xingyizhou/CenterNet
 10 | """
 11 | 
 12 | from __future__ import division
 13 | import sys
 14 | 
 15 | import torch
 16 | import numpy as np
 17 | import torch.nn.functional as F
 18 | import cv2
 19 | 
 20 | def _nms(heat, kernel=3):
 21 |     pad = (kernel - 1) // 2
 22 |     hmax = F.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad)
 23 |     keep = (hmax == heat).float()
 24 | 
 25 |     return heat * keep
 26 | 
 27 | 
 28 | def _gather_feat(feat, ind, mask=None):
 29 |     dim = feat.size(2)
 30 |     ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
 31 |     feat = feat.gather(1, ind)
 32 |     if mask is not None:
 33 |         mask = mask.unsqueeze(2).expand_as(feat)
 34 |         feat = feat[mask]
 35 |         feat = feat.view(-1, dim)
 36 |     return feat
 37 | 
 38 | 
 39 | def _transpose_and_gather_feat(feat, ind):
 40 |     feat = feat.permute(0, 2, 3, 1).contiguous()
 41 |     feat = feat.view(feat.size(0), -1, feat.size(3))
 42 |     feat = _gather_feat(feat, ind)
 43 |     return feat
 44 | 
 45 | 
 46 | def _topk(scores, K=40):
 47 |     batch, cat, height, width = scores.size()
 48 | 
 49 |     topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
 50 | 
 51 |     topk_inds = topk_inds % (height * width)
 52 |     topk_ys = (topk_inds // width).int().float()
 53 |     topk_xs = (topk_inds % width).int().float()
 54 | 
 55 |     topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
 56 |     topk_clses = (topk_ind // K).int()
 57 |     topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
 58 |     topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
 59 |     topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
 60 | 
 61 |     return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
 62 | 
 63 | 
 64 | def _topk_channel(scores, K=40):
 65 |     batch, cat, height, width = scores.size()
 66 | 
 67 |     topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
 68 | 
 69 |     topk_inds = topk_inds % (height * width)
 70 |     topk_ys = (topk_inds / width).int().float()
 71 |     topk_xs = (topk_inds % width).int().float()
 72 | 
 73 |     return topk_scores, topk_inds, topk_ys, topk_xs
 74 | 
 75 | 
 76 | def decode(hm_cen, cen_offset, direction, z_coor, dim, K=40):
 77 |     batch_size, num_classes, height, width = hm_cen.size()
 78 | 
 79 |     hm_cen = _nms(hm_cen)
 80 |     scores, inds, clses, ys, xs = _topk(hm_cen, K=K)
 81 |     if cen_offset is not None:
 82 |         cen_offset = _transpose_and_gather_feat(cen_offset, inds)
 83 |         cen_offset = cen_offset.view(batch_size, K, 2)
 84 |         xs = xs.view(batch_size, K, 1) + cen_offset[:, :, 0:1]
 85 |         ys = ys.view(batch_size, K, 1) + cen_offset[:, :, 1:2]
 86 |     else:
 87 |         xs = xs.view(batch_size, K, 1) + 0.5
 88 |         ys = ys.view(batch_size, K, 1) + 0.5
 89 | 
 90 |     direction = _transpose_and_gather_feat(direction, inds)
 91 |     direction = direction.view(batch_size, K, 2)
 92 |     z_coor = _transpose_and_gather_feat(z_coor, inds)
 93 |     z_coor = z_coor.view(batch_size, K, 1)
 94 |     dim = _transpose_and_gather_feat(dim, inds)
 95 |     dim = dim.view(batch_size, K, 3)
 96 |     clses = clses.view(batch_size, K, 1).float()
 97 |     scores = scores.view(batch_size, K, 1)
 98 | 
 99 |     # (scores x 1, ys x 1, xs x 1, z_coor x 1, dim x 3, direction x 2, clses x 1)
100 |     # (scores-0:1, ys-1:2, xs-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10)
101 |     # detections: [batch_size, K, 10]
102 |     detections = torch.cat([scores, xs, ys, z_coor, dim, direction, clses], dim=2)
103 | 
104 |     return detections
105 | 
106 | 
107 | def get_yaw(direction):
108 |     return np.arctan2(direction[:, 0:1], direction[:, 1:2])
109 | 
110 | 
111 | def post_processing(detections, configs):
112 |     """
113 |     :param detections: [batch_size, K, 10]
114 |     # (scores x 1, xs x 1, ys x 1, z_coor x 1, dim x 3, direction x 2, clses x 1)
115 |     # (scores-0:1, xs-1:2, ys-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10)
116 |     :return:
117 |     """
118 |     ret = []
119 |     for i in range(detections.shape[0]):
120 |         top_preds = {}
121 |         classes = detections[i, :, -1]
122 |         for j in range(configs.num_classes):
123 |             inds = (classes == j)
124 |             # x, y, z, h, w, l, yaw
125 |             top_preds[j] = np.concatenate([
126 |                 detections[i, inds, 0:1],
127 |                 detections[i, inds, 1:2] * configs.down_ratio,
128 |                 detections[i, inds, 2:3] * configs.down_ratio,
129 |                 detections[i, inds, 3:4],
130 |                 detections[i, inds, 4:5],
131 |                 detections[i, inds, 5:6] / (configs.lim_y[1]-configs.lim_y[0]) * configs.bev_width,
132 |                 detections[i, inds, 6:7] / (configs.lim_x[1]-configs.lim_x[0]) * configs.bev_height,
133 |                 get_yaw(detections[i, inds, 7:9]).astype(np.float32)], axis=1)
134 |             # Filter by conf_thresh
135 |             if len(top_preds[j]) > 0:
136 |                 keep_inds = (top_preds[j][:, 0] > configs.conf_thresh)
137 |                 top_preds[j] = top_preds[j][keep_inds]
138 |         ret.append(top_preds)
139 | 
140 |     return ret
141 | 


--------------------------------------------------------------------------------
/tools/objdet_models/resnet/utils/torch_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | # -*- coding: utf-8 -*-
 3 | -----------------------------------------------------------------------------------
 4 | # Author: Nguyen Mau Dung
 5 | # DoC: 2020.08.09
 6 | # email: nguyenmaudung93.kstn@gmail.com
 7 | -----------------------------------------------------------------------------------
 8 | # Description: some utilities of torch (conversion)
 9 | -----------------------------------------------------------------------------------
10 | """
11 | 
12 | import torch
13 | import torch.distributed as dist
14 | 
15 | __all__ = ['convert2cpu', 'convert2cpu_long', 'to_cpu', 'reduce_tensor', 'to_python_float', '_sigmoid']
16 | 
17 | 
18 | def convert2cpu(gpu_matrix):
19 |     return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
20 | 
21 | 
22 | def convert2cpu_long(gpu_matrix):
23 |     return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
24 | 
25 | 
26 | def to_cpu(tensor):
27 |     return tensor.detach().cpu()
28 | 
29 | 
30 | def reduce_tensor(tensor, world_size):
31 |     rt = tensor.clone()
32 |     dist.all_reduce(rt, op=dist.reduce_op.SUM)
33 |     rt /= world_size
34 |     return rt
35 | 
36 | 
37 | def to_python_float(t):
38 |     if hasattr(t, 'item'):
39 |         return t.item()
40 |     else:
41 |         return t[0]
42 | 
43 | 
44 | def _sigmoid(x):
45 |     return torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4)
46 | 


--------------------------------------------------------------------------------
/tools/waymo_reader/README.md:
--------------------------------------------------------------------------------
 1 | # Simple Waymo Open Dataset Reader
 2 | 
 3 | This is a simple file reader for the [Waymo Open Dataset](https://waymo.com/open/) which does not depend on TensorFlow and Bazel. The main goal is to be able to quickly integrate Waymo’s dataset with other deep learning frameworks without having to pull tons of dependencies. It does not aim to replace the [whole framework](https://github.com/waymo-research/waymo-open-dataset), especially the evaluation metrics that they provide.
 4 | 
 5 | ## Installation
 6 | 
 7 | Use the provided `setup.py`:
 8 | 
 9 | ```
10 | python setup.py install
11 | ```
12 | 
13 | ## Usage
14 | 
15 | Please refer to the examples in `examples/` for how to use the file reader. Refer to [https://github.com/waymo-research/waymo-open-dataset/blob/master/tutorial/tutorial.ipynb](https://github.com/waymo-research/waymo-open-dataset/blob/master/tutorial/tutorial.ipynb) for more details on Waymo’s dataset.
16 | 
17 | ## License
18 | 
19 | This code is released under the Apache License, version 2.0. This projects incorporate some parts of the [Waymo Open Dataset code](https://github.com/waymo-research/waymo-open-dataset/blob/master/README.md) (the files `simple_waymo_open_dataset_reader/*.proto`) and is licensed to you under their original license terms. See `LICENSE` file for details.
20 | 
21 | 


--------------------------------------------------------------------------------
/tools/waymo_reader/build/lib/simple_waymo_open_dataset_reader/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019, Grégoire Payen de La Garanderie, Durham University
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | # 
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | import struct
17 | from . import dataset_pb2
18 | 
19 | class WaymoDataFileReader:
20 |     def __init__(self, filename):
21 |         self.file = open(filename, "rb")
22 | 
23 |     def get_record_table(self):
24 |         """ Generate and return a table of the offset of all frame records in the file.
25 | 
26 |             This is particularly useful to determine the number of frames in the file
27 |             and access random frames rather than read the file sequentially.
28 |         """
29 | 
30 |         self.file.seek(0,0)
31 | 
32 |         table = []
33 | 
34 |         while self.file:
35 |             offset = self.file.tell()
36 | 
37 |             try:
38 |                 self.read_record(header_only=True)
39 |                 table.append(offset)
40 |             except StopIteration:
41 |                 break
42 | 
43 |         self.file.seek(0,0)
44 | 
45 |         return table
46 |     
47 |     def seek(self, offset):
48 |         """ Seek to a specific frame record by offset.
49 | 
50 |         The offset of each frame in the file can be obtained with the function reader.get_record_table()
51 |         """
52 | 
53 |         self.file.seek(offset,0)
54 | 
55 |     def read_record(self, header_only = False):
56 |         """ Read the current frame record in the file.
57 | 
58 |         If repeatedly called, it will return sequential records until the end of file. When the end is reached, it will raise a StopIteration exception.
59 |         To reset to the first frame, call reader.seek(0)
60 |         """
61 |         
62 |         # TODO: Check CRCs.
63 | 
64 |         header = self.file.read(12)
65 | 
66 |         if header == b'':
67 |             raise StopIteration()
68 | 
69 |         length, lengthcrc = struct.unpack("QI", header)
70 | 
71 | 
72 |         if header_only:
73 |             # Skip length+4 bytes ahead
74 |             self.file.seek(length+4,1)
75 |             return None
76 |         else:
77 |             data = self.file.read(length)
78 |             datacrc = struct.unpack("I",self.file.read(4))
79 | 
80 |             frame = dataset_pb2.Frame()
81 |             frame.ParseFromString(data)
82 |             return frame
83 | 
84 |     def __iter__(self):
85 |         """ Simple iterator through the file. Note that the iterator will iterate from the current position, does not support concurrent iterators and will not reset back to the beginning when the end is reached. To reset to the first frame, call reader.seek(0)
86 |         """
87 |         return self
88 | 
89 |     def __next__(self):
90 |         return self.read_record()
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/tools/waymo_reader/build/lib/simple_waymo_open_dataset_reader/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019, Grégoire Payen de La Garanderie, Durham University
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | # 
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | import numpy as np
 17 | from simple_waymo_open_dataset_reader import dataset_pb2, label_pb2
 18 | import zlib
 19 | import math
 20 | import io
 21 | 
 22 | 
 23 | def get_box_transformation_matrix(box):
 24 |     """Create a transformation matrix for a given label box pose."""
 25 | 
 26 |     tx,ty,tz = box.center_x,box.center_y,box.center_z
 27 |     c = math.cos(box.heading)
 28 |     s = math.sin(box.heading)
 29 | 
 30 |     sl, sh, sw = box.length, box.height, box.width
 31 | 
 32 |     return np.array([
 33 |         [ sl*c,-sw*s,  0,tx],
 34 |         [ sl*s, sw*c,  0,ty],
 35 |         [    0,    0, sh,tz],
 36 |         [    0,    0,  0, 1]])
 37 | 
 38 | def get_3d_box_projected_corners(vehicle_to_image, label):
 39 |     """Get the 2D coordinates of the 8 corners of a label's 3D bounding box.
 40 | 
 41 |     vehicle_to_image: Transformation matrix from the vehicle frame to the image frame.
 42 |     label: The object label
 43 |     """
 44 | 
 45 |     box = label.box
 46 | 
 47 |     # Get the vehicle pose
 48 |     box_to_vehicle = get_box_transformation_matrix(box)
 49 | 
 50 |     # Calculate the projection from the box space to the image space.
 51 |     box_to_image = np.matmul(vehicle_to_image, box_to_vehicle)
 52 | 
 53 | 
 54 |     # Loop through the 8 corners constituting the 3D box
 55 |     # and project them onto the image
 56 |     vertices = np.empty([2,2,2,2])
 57 |     for k in [0, 1]:
 58 |         for l in [0, 1]:
 59 |             for m in [0, 1]:
 60 |                 # 3D point in the box space
 61 |                 v = np.array([(k-0.5), (l-0.5), (m-0.5), 1.])
 62 | 
 63 |                 # Project the point onto the image
 64 |                 v = np.matmul(box_to_image, v)
 65 | 
 66 |                 # If any of the corner is behind the camera, ignore this object.
 67 |                 if v[2] < 0:
 68 |                     return None
 69 | 
 70 |                 vertices[k,l,m,:] = [v[0]/v[2], v[1]/v[2]]
 71 | 
 72 |     vertices = vertices.astype(np.int32)
 73 | 
 74 |     return vertices
 75 | 
 76 | def compute_2d_bounding_box(img_or_shape,points):
 77 |     """Compute the 2D bounding box for a set of 2D points.
 78 |     
 79 |     img_or_shape: Either an image or the shape of an image.
 80 |                   img_or_shape is used to clamp the bounding box coordinates.
 81 |     
 82 |     points: The set of 2D points to use
 83 |     """
 84 | 
 85 |     if isinstance(img_or_shape,tuple):
 86 |         shape = img_or_shape
 87 |     else:
 88 |         shape = img_or_shape.shape
 89 | 
 90 |     # Compute the 2D bounding box and draw a rectangle
 91 |     x1 = np.amin(points[...,0])
 92 |     x2 = np.amax(points[...,0])
 93 |     y1 = np.amin(points[...,1])
 94 |     y2 = np.amax(points[...,1])
 95 | 
 96 |     x1 = min(max(0,x1),shape[1])
 97 |     x2 = min(max(0,x2),shape[1])
 98 |     y1 = min(max(0,y1),shape[0])
 99 |     y2 = min(max(0,y2),shape[0])
100 | 
101 |     return (x1,y1,x2,y2)
102 | 
103 | def draw_3d_box(img, vehicle_to_image, label, colour=(255,128,128), draw_2d_bounding_box=False):
104 |     """Draw a 3D bounding from a given 3D label on a given "img". "vehicle_to_image" must be a projection matrix from the vehicle reference frame to the image space.
105 | 
106 |     draw_2d_bounding_box: If set a 2D bounding box encompassing the 3D box will be drawn
107 |     """
108 |     import cv2
109 | 
110 |     vertices = get_3d_box_projected_corners(vehicle_to_image, label)
111 | 
112 |     if vertices is None:
113 |         # The box is not visible in this image
114 |         return
115 | 
116 |     if draw_2d_bounding_box:
117 |         x1,y1,x2,y2 = compute_2d_bounding_box(img.shape, vertices)
118 | 
119 |         if (x1 != x2 and y1 != y2):
120 |             cv2.rectangle(img, (x1,y1), (x2,y2), colour, thickness = 1)
121 |     else:
122 |         # Draw the edges of the 3D bounding box
123 |         for k in [0, 1]:
124 |             for l in [0, 1]:
125 |                 for idx1,idx2 in [((0,k,l),(1,k,l)), ((k,0,l),(k,1,l)), ((k,l,0),(k,l,1))]:
126 |                     cv2.line(img, tuple(vertices[idx1]), tuple(vertices[idx2]), colour, thickness=1)
127 |         # Draw a cross on the front face to identify front & back.
128 |         for idx1,idx2 in [((1,0,0),(1,1,1)), ((1,1,0),(1,0,1))]:
129 |             cv2.line(img, tuple(vertices[idx1]), tuple(vertices[idx2]), colour, thickness=1)
130 | 
131 | def draw_2d_box(img, label, colour=(255,128,128)):
132 |     """Draw a 2D bounding from a given 2D label on a given "img".
133 |     """
134 |     import cv2
135 | 
136 |     box = label.box
137 | 
138 |     # Extract the 2D coordinates
139 |     # It seems that "length" is the actual width and "width" is the actual height of the bounding box. Most peculiar.
140 |     x1 = int(box.center_x - box.length/2)
141 |     x2 = int(box.center_x + box.length/2)
142 |     y1 = int(box.center_y - box.width/2)
143 |     y2 = int(box.center_y + box.width/2)
144 | 
145 |     # Draw the rectangle
146 |     cv2.rectangle(img, (x1,y1), (x2,y2), colour, thickness = 1)
147 | 
148 | 
149 | def decode_image(camera):
150 |     """ Decode the JPEG image. """
151 | 
152 |     from PIL import Image
153 |     return np.array(Image.open(io.BytesIO(camera.image)))
154 | 
155 | def get_image_transform(camera_calibration):
156 |     """ For a given camera calibration, compute the transformation matrix
157 |         from the vehicle reference frame to the image space.
158 |     """
159 | 
160 |     # TODO: Handle the camera distortions
161 |     extrinsic = np.array(camera_calibration.extrinsic.transform).reshape(4,4)
162 |     intrinsic = camera_calibration.intrinsic
163 | 
164 |     # Camera model:
165 |     # | fx  0 cx 0 |
166 |     # |  0 fy cy 0 |
167 |     # |  0  0  1 0 |
168 |     camera_model = np.array([
169 |         [intrinsic[0], 0, intrinsic[2], 0],
170 |         [0, intrinsic[1], intrinsic[3], 0],
171 |         [0, 0,                       1, 0]])
172 | 
173 |     # Swap the axes around
174 |     axes_transformation = np.array([
175 |         [0,-1,0,0],
176 |         [0,0,-1,0],
177 |         [1,0,0,0],
178 |         [0,0,0,1]])
179 | 
180 |     # Compute the projection matrix from the vehicle space to image space.
181 |     vehicle_to_image = np.matmul(camera_model, np.matmul(axes_transformation, np.linalg.inv(extrinsic)))
182 |     return vehicle_to_image
183 | 
184 | def get_rotation_matrix(roll, pitch, yaw):
185 |     """ Convert Euler angles to a rotation matrix"""
186 | 
187 |     cos_roll = np.cos(roll)
188 |     sin_roll = np.sin(roll)
189 |     cos_yaw = np.cos(yaw)
190 |     sin_yaw = np.sin(yaw)
191 |     cos_pitch = np.cos(pitch)
192 |     sin_pitch = np.sin(pitch)
193 | 
194 |     ones = np.ones_like(yaw)
195 |     zeros = np.zeros_like(yaw)
196 | 
197 |     r_roll = np.stack([
198 |         [ones,  zeros,     zeros],
199 |         [zeros, cos_roll, -sin_roll],
200 |         [zeros, sin_roll,  cos_roll]])
201 | 
202 |     r_pitch = np.stack([
203 |         [ cos_pitch, zeros, sin_pitch],
204 |         [ zeros,     ones,  zeros],
205 |         [-sin_pitch, zeros, cos_pitch]])
206 | 
207 |     r_yaw = np.stack([
208 |         [cos_yaw, -sin_yaw, zeros],
209 |         [sin_yaw,  cos_yaw, zeros],
210 |         [zeros,    zeros,   ones]])
211 | 
212 |     pose = np.einsum('ijhw,jkhw,klhw->ilhw',r_yaw,r_pitch,r_roll)
213 |     pose = pose.transpose(2,3,0,1)
214 |     return pose
215 | 
216 | def parse_range_image_and_camera_projection(laser, second_response=False):
217 |     """ Parse the range image for a given laser.
218 | 
219 |     second_response: If true, return the second strongest response instead of the primary response.
220 |                      The second_response might be useful to detect the edge of objects
221 |     """
222 | 
223 |     range_image_pose = None
224 |     camera_projection = None
225 | 
226 |     if not second_response:
227 |         # Return the strongest response if available
228 |         if len(laser.ri_return1.range_image_compressed) > 0:
229 |             ri = dataset_pb2.MatrixFloat()
230 |             ri.ParseFromString(
231 |                 zlib.decompress(laser.ri_return1.range_image_compressed))
232 |             ri = np.array(ri.data).reshape(ri.shape.dims)
233 | 
234 |             if laser.name == dataset_pb2.LaserName.TOP:
235 |                 range_image_top_pose = dataset_pb2.MatrixFloat()
236 |                 range_image_top_pose.ParseFromString(
237 |                     zlib.decompress(laser.ri_return1.range_image_pose_compressed))
238 |                 range_image_pose = np.array(range_image_top_pose.data).reshape(range_image_top_pose.shape.dims)
239 |                 
240 |             camera_projection = dataset_pb2.MatrixInt32()
241 |             camera_projection.ParseFromString(
242 |                     zlib.decompress(laser.ri_return1.camera_projection_compressed))
243 |             camera_projection = np.array(camera_projection.data).reshape(camera_projection.shape.dims)
244 | 
245 |     else:
246 |         # Return the second strongest response if available
247 | 
248 |         if len(laser.ri_return2.range_image_compressed) > 0:
249 |             ri = dataset_pb2.MatrixFloat()
250 |             ri.ParseFromString(
251 |                 zlib.decompress(laser.ri_return2.range_image_compressed))
252 |             ri = np.array(ri.data).reshape(ri.shape.dims)
253 |                 
254 |             camera_projection = dataset_pb2.MatrixInt32()
255 |             camera_projection.ParseFromString(
256 |                     zlib.decompress(laser.ri_return2.camera_projection_compressed))
257 |             camera_projection = np.array(camera_projection.data).reshape(camera_projection.shape.dims)
258 | 
259 |     return ri, camera_projection, range_image_pose
260 | 
261 | def compute_beam_inclinations(calibration, height):
262 |     """ Compute the inclination angle for each beam in a range image. """
263 | 
264 |     if len(calibration.beam_inclinations) > 0:
265 |         return np.array(calibration.beam_inclinations)
266 |     else:
267 |         inclination_min = calibration.beam_inclination_min
268 |         inclination_max = calibration.beam_inclination_max
269 | 
270 |         return np.linspace(inclination_min, inclination_max, height)
271 | 
272 | def compute_range_image_polar(range_image, extrinsic, inclination):
273 |     """ Convert a range image to polar coordinates. """
274 | 
275 |     height = range_image.shape[0]
276 |     width = range_image.shape[1]
277 | 
278 |     az_correction = math.atan2(extrinsic[1,0], extrinsic[0,0])
279 |     azimuth = np.linspace(np.pi,-np.pi,width) - az_correction
280 | 
281 |     azimuth_tiled = np.broadcast_to(azimuth[np.newaxis,:], (height,width))
282 |     inclination_tiled = np.broadcast_to(inclination[:,np.newaxis],(height,width))
283 | 
284 |     return np.stack((azimuth_tiled,inclination_tiled,range_image))
285 | 
286 | def compute_range_image_cartesian(range_image_polar, extrinsic, pixel_pose, frame_pose):
287 |     """ Convert polar coordinates to cartesian coordinates. """
288 | 
289 |     azimuth = range_image_polar[0]
290 |     inclination = range_image_polar[1]
291 |     range_image_range = range_image_polar[2]
292 | 
293 |     cos_azimuth = np.cos(azimuth)
294 |     sin_azimuth = np.sin(azimuth)
295 |     cos_incl = np.cos(inclination)
296 |     sin_incl = np.sin(inclination)
297 | 
298 |     x = cos_azimuth * cos_incl * range_image_range
299 |     y = sin_azimuth * cos_incl * range_image_range
300 |     z = sin_incl * range_image_range
301 | 
302 |     range_image_points = np.stack([x,y,z,np.ones_like(z)])
303 | 
304 |     range_image_points = np.einsum('ij,jkl->ikl', extrinsic,range_image_points)
305 | 
306 |     # TODO: Use the pixel_pose matrix. It seems that the bottom part of the pixel pose
307 |     #       matrix is missing. Not sure if this is a bug in the dataset.
308 | 
309 |     #if pixel_pose is not None:
310 |     #    range_image_points = np.einsum('hwij,jhw->ihw', pixel_pose, range_image_points)
311 |     #    frame_pos_inv = np.linalg.inv(frame_pose)
312 |     #    range_image_points = np.einsum('ij,jhw->ihw',frame_pos_inv,range_image_points)
313 | 
314 |         
315 |     return range_image_points
316 | 
317 | 
318 | def project_to_pointcloud(frame, ri, camera_projection, range_image_pose, calibration):
319 |     """ Create a pointcloud in vehicle space from LIDAR range image. """
320 |     beam_inclinations = compute_beam_inclinations(calibration, ri.shape[0])
321 |     beam_inclinations = np.flip(beam_inclinations)
322 | 
323 |     extrinsic = np.array(calibration.extrinsic.transform).reshape(4,4)
324 |     frame_pose = np.array(frame.pose.transform).reshape(4,4)
325 | 
326 |     ri_polar = compute_range_image_polar(ri[:,:,0], extrinsic, beam_inclinations)
327 | 
328 |     if range_image_pose is None:
329 |         pixel_pose = None
330 |     else:
331 |         pixel_pose = get_rotation_matrix(range_image_pose[:,:,0], range_image_pose[:,:,1], range_image_pose[:,:,2])
332 |         translation = range_image_pose[:,:,3:]
333 |         pixel_pose = np.block([
334 |             [pixel_pose, translation[:,:,:,np.newaxis]],
335 |             [np.zeros_like(translation)[:,:,np.newaxis],np.ones_like(translation[:,:,0])[:,:,np.newaxis,np.newaxis]]])
336 | 
337 | 
338 |     ri_cartesian = compute_range_image_cartesian(ri_polar, extrinsic, pixel_pose, frame_pose)
339 |     ri_cartesian = ri_cartesian.transpose(1,2,0)
340 | 
341 |     mask = ri[:,:,0] > 0
342 | 
343 |     return ri_cartesian[mask,:3], ri[mask]
344 | 
345 | 
346 | def get(object_list, name):
347 |     """ Search for an object by name in an object list. """
348 | 
349 |     object_list = [obj for obj in object_list if obj.name == name]
350 |     return object_list[0]
351 | 
352 | 


--------------------------------------------------------------------------------
/tools/waymo_reader/dist/simple_waymo_open_dataset_reader-0.0.0-py3.8.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mabhi16/3D_Object_detection_midterm/16db7891a5b1c254b9190aab91a8486b6e9e8e62/tools/waymo_reader/dist/simple_waymo_open_dataset_reader-0.0.0-py3.8.egg


--------------------------------------------------------------------------------
/tools/waymo_reader/generate_proto.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | protoc -I=. --python_out=. simple_waymo_open_dataset_reader/label.proto
4 | protoc -I=. --python_out=. simple_waymo_open_dataset_reader/dataset.proto
5 | 
6 | 


--------------------------------------------------------------------------------
/tools/waymo_reader/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(
4 |         name="simple_waymo_open_dataset_reader",
5 |         packages=['simple_waymo_open_dataset_reader'],
6 |         install_requires=['protobuf'])
7 | 
8 | 


--------------------------------------------------------------------------------
/tools/waymo_reader/simple_waymo_open_dataset_reader.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: simple-waymo-open-dataset-reader
 3 | Version: 0.0.0
 4 | Summary: UNKNOWN
 5 | Home-page: UNKNOWN
 6 | Author: UNKNOWN
 7 | Author-email: UNKNOWN
 8 | License: UNKNOWN
 9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 | 


--------------------------------------------------------------------------------
/tools/waymo_reader/simple_waymo_open_dataset_reader.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | README.md
 2 | setup.py
 3 | simple_waymo_open_dataset_reader/__init__.py
 4 | simple_waymo_open_dataset_reader/dataset_pb2.py
 5 | simple_waymo_open_dataset_reader/label_pb2.py
 6 | simple_waymo_open_dataset_reader/utils.py
 7 | simple_waymo_open_dataset_reader.egg-info/PKG-INFO
 8 | simple_waymo_open_dataset_reader.egg-info/SOURCES.txt
 9 | simple_waymo_open_dataset_reader.egg-info/dependency_links.txt
10 | simple_waymo_open_dataset_reader.egg-info/requires.txt
11 | simple_waymo_open_dataset_reader.egg-info/top_level.txt


--------------------------------------------------------------------------------
/tools/waymo_reader/simple_waymo_open_dataset_reader.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tools/waymo_reader/simple_waymo_open_dataset_reader.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | protobuf
2 | 


--------------------------------------------------------------------------------
/tools/waymo_reader/simple_waymo_open_dataset_reader.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | simple_waymo_open_dataset_reader
2 | 


--------------------------------------------------------------------------------
/tools/waymo_reader/simple_waymo_open_dataset_reader/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019, Grégoire Payen de La Garanderie, Durham University
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | # 
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | import struct
17 | from . import dataset_pb2
18 | 
19 | class WaymoDataFileReader:
20 |     def __init__(self, filename):
21 |         self.file = open(filename, "rb")
22 | 
23 |     def get_record_table(self):
24 |         """ Generate and return a table of the offset of all frame records in the file.
25 | 
26 |             This is particularly useful to determine the number of frames in the file
27 |             and access random frames rather than read the file sequentially.
28 |         """
29 | 
30 |         self.file.seek(0,0)
31 | 
32 |         table = []
33 | 
34 |         while self.file:
35 |             offset = self.file.tell()
36 | 
37 |             try:
38 |                 self.read_record(header_only=True)
39 |                 table.append(offset)
40 |             except StopIteration:
41 |                 break
42 | 
43 |         self.file.seek(0,0)
44 | 
45 |         return table
46 |     
47 |     def seek(self, offset):
48 |         """ Seek to a specific frame record by offset.
49 | 
50 |         The offset of each frame in the file can be obtained with the function reader.get_record_table()
51 |         """
52 | 
53 |         self.file.seek(offset,0)
54 | 
55 |     def read_record(self, header_only = False):
56 |         """ Read the current frame record in the file.
57 | 
58 |         If repeatedly called, it will return sequential records until the end of file. When the end is reached, it will raise a StopIteration exception.
59 |         To reset to the first frame, call reader.seek(0)
60 |         """
61 |         
62 |         # TODO: Check CRCs.
63 | 
64 |         header = self.file.read(12)
65 | 
66 |         if header == b'':
67 |             raise StopIteration()
68 | 
69 |         length, lengthcrc = struct.unpack("QI", header)
70 | 
71 | 
72 |         if header_only:
73 |             # Skip length+4 bytes ahead
74 |             self.file.seek(length+4,1)
75 |             return None
76 |         else:
77 |             data = self.file.read(length)
78 |             datacrc = struct.unpack("I",self.file.read(4))
79 | 
80 |             frame = dataset_pb2.Frame()
81 |             frame.ParseFromString(data)
82 |             return frame
83 | 
84 |     def __iter__(self):
85 |         """ Simple iterator through the file. Note that the iterator will iterate from the current position, does not support concurrent iterators and will not reset back to the beginning when the end is reached. To reset to the first frame, call reader.seek(0)
86 |         """
87 |         return self
88 | 
89 |     def __next__(self):
90 |         return self.read_record()
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/tools/waymo_reader/simple_waymo_open_dataset_reader/dataset.proto:
--------------------------------------------------------------------------------
  1 | /* Copyright 2019 The Waymo Open Dataset Authors. All Rights Reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | You may obtain a copy of the License at
  6 | 
  7 |     http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | ==============================================================================*/
 15 | 
 16 | syntax = "proto2";
 17 | 
 18 | package waymo.open_dataset;
 19 | 
 20 | import "simple_waymo_open_dataset_reader/label.proto";
 21 | 
 22 | message MatrixShape {
 23 |   // Dimensions for the Matrix messages defined below. Must not be empty.
 24 |   //
 25 |   // The order of entries in 'dims' matters, as it indicates the layout of the
 26 |   // values in the tensor in-memory representation.
 27 |   //
 28 |   // The first entry in 'dims' is the outermost dimension used to lay out the
 29 |   // values; the last entry is the innermost dimension.  This matches the
 30 |   // in-memory layout of row-major matrices.
 31 |   repeated int32 dims = 1;
 32 | }
 33 | 
 34 | // Row-major matrix.
 35 | // Requires: data.size() = product(shape.dims()).
 36 | message MatrixFloat {
 37 |   repeated float data = 1 [packed = true];
 38 |   optional MatrixShape shape = 2;
 39 | }
 40 | 
 41 | // Row-major matrix.
 42 | // Requires: data.size() = product(shape.dims()).
 43 | message MatrixInt32 {
 44 |   repeated int32 data = 1 [packed = true];
 45 |   optional MatrixShape shape = 2;
 46 | }
 47 | 
 48 | message CameraName {
 49 |   enum Name {
 50 |     UNKNOWN = 0;
 51 |     FRONT = 1;
 52 |     FRONT_LEFT = 2;
 53 |     FRONT_RIGHT = 3;
 54 |     SIDE_LEFT = 4;
 55 |     SIDE_RIGHT = 5;
 56 |   }
 57 | }
 58 | 
 59 | // 'Laser' is used interchangeably with 'Lidar' in this file.
 60 | message LaserName {
 61 |   enum Name {
 62 |     UNKNOWN = 0;
 63 |     TOP = 1;
 64 |     FRONT = 2;
 65 |     SIDE_LEFT = 3;
 66 |     SIDE_RIGHT = 4;
 67 |     REAR = 5;
 68 |   }
 69 | }
 70 | 
 71 | // 4x4 row major transform matrix that tranforms 3d points from one frame to
 72 | // another.
 73 | message Transform {
 74 |   repeated double transform = 1;
 75 | }
 76 | 
 77 | message Velocity {
 78 |   // Velocity in m/s.
 79 |   optional float v_x = 1;
 80 |   optional float v_y = 2;
 81 |   optional float v_z = 3;
 82 | 
 83 |   // Angular velocity in rad/s.
 84 |   optional double w_x = 4;
 85 |   optional double w_y = 5;
 86 |   optional double w_z = 6;
 87 | }
 88 | 
 89 | message CameraCalibration {
 90 |   optional CameraName.Name name = 1;
 91 |   // 1d Array of [f_u, f_v, c_u, c_v, k{1, 2}, p{1, 2}, k{3}].
 92 |   // Note that this intrinsic corresponds to the images after scaling.
 93 |   // Camera model: pinhole camera.
 94 |   // Lens distortion:
 95 |   //   Radial distortion coefficients: k1, k2, k3.
 96 |   //   Tangential distortion coefficients: p1, p2.
 97 |   // k_{1, 2, 3}, p_{1, 2} follows the same definition as OpenCV.
 98 |   // https://en.wikipedia.org/wiki/Distortion_(optics)
 99 |   // https://docs.opencv.org/2.4/doc/tutorials/calib3d/camera_calibration/camera_calibration.html
100 |   repeated double intrinsic = 2;
101 |   // Vehicle frame to camera frame.
102 |   optional Transform extrinsic = 3;
103 |   // Camera image size.
104 |   optional int32 width = 4;
105 |   optional int32 height = 5;
106 | 
107 |   enum RollingShutterReadOutDirection {
108 |     UNKNOWN = 0;
109 |     TOP_TO_BOTTOM = 1;
110 |     LEFT_TO_RIGHT = 2;
111 |     BOTTOM_TO_TOP = 3;
112 |     RIGHT_TO_LEFT = 4;
113 |     GLOBAL_SHUTTER = 5;
114 |   }
115 |   optional RollingShutterReadOutDirection rolling_shutter_direction = 6;
116 | }
117 | 
118 | message LaserCalibration {
119 |   optional LaserName.Name name = 1;
120 |   // If non-empty, the beam pitch (in radians) is non-uniform. When constructing
121 |   // a range image, this mapping is used to map from beam pitch to range image
122 |   // row.  If this is empty, we assume a uniform distribution.
123 |   repeated double beam_inclinations = 2;
124 |   // beam_inclination_{min,max} (in radians) are used to determine the mapping.
125 |   optional double beam_inclination_min = 3;
126 |   optional double beam_inclination_max = 4;
127 |   // Lidar frame to vehicle frame.
128 |   optional Transform extrinsic = 5;
129 | }
130 | 
131 | message Context {
132 |   // A unique name that identifies the frame sequence.
133 |   optional string name = 1;
134 |   repeated CameraCalibration camera_calibrations = 2;
135 |   repeated LaserCalibration laser_calibrations = 3;
136 |   // Some stats for the run segment used.
137 |   message Stats {
138 |     message ObjectCount {
139 |       optional Label.Type type = 1;
140 |       // The number of unique objects with the type in the segment.
141 |       optional int32 count = 2;
142 |     }
143 |     repeated ObjectCount laser_object_counts = 1;
144 |     repeated ObjectCount camera_object_counts = 5;
145 |     // Day, Dawn/Dusk, or Night, determined from sun elevation.
146 |     optional string time_of_day = 2;
147 |     // Human readable location (e.g. CHD, SF) of the run segment.
148 |     optional string location = 3;
149 |     // Currently either Sunny or Rain.
150 |     optional string weather = 4;
151 |   }
152 |   optional Stats stats = 4;
153 | }
154 | 
155 | // Range image is a 2d tensor. The first dim (row) represents pitch. The second
156 | // dim represents yaw.
157 | // There are two types of range images:
158 | // 1. Raw range image: Raw range image with a non-empty
159 | //   'range_image_pose_compressed' which tells the vehicle pose of each
160 | //   range image cell.
161 | // 2. Virtual range image: Range image with an empty
162 | //   'range_image_pose_compressed'. This range image is constructed by
163 | //   transforming all lidar points into a fixed vehicle frame (usually the
164 | //   vehicle frame of the middle scan).
165 | // NOTE: 'range_image_pose_compressed' is only populated for the first range
166 | // image return. The second return has the exact the same range image pose as
167 | // the first one.
168 | message RangeImage {
169 |   // Zlib compressed [H, W, 4] serialized version of MatrixFloat.
170 |   // To decompress:
171 |   // string val = ZlibDecompress(range_image_compressed);
172 |   // MatrixFloat range_image;
173 |   // range_image.ParseFromString(val);
174 |   // Inner dimensions are:
175 |   //   * channel 0: range
176 |   //   * channel 1: intensity
177 |   //   * channel 2: elongation
178 |   //   * channel 3: is in any no label zone.
179 |   optional bytes range_image_compressed = 2;
180 | 
181 |   // Lidar point to camera image projections. A point can be projected to
182 |   // multiple camera images. We pick the first two at the following order:
183 |   // [FRONT, FRONT_LEFT, FRONT_RIGHT, SIDE_LEFT, SIDE_RIGHT].
184 |   //
185 |   // Zlib compressed [H, W, 6] serialized version of MatrixInt32.
186 |   // To decompress:
187 |   // string val = ZlibDecompress(camera_projection_compressed);
188 |   // MatrixInt32 camera_projection;
189 |   // camera_projection.ParseFromString(val);
190 |   // Inner dimensions are:
191 |   //   * channel 0: CameraName.Name of 1st projection. Set to UNKNOWN if no
192 |   //       projection.
193 |   //   * channel 1: x (axis along image width)
194 |   //   * channel 2: y (axis along image height)
195 |   //   * channel 3: CameraName.Name of 2nd projection. Set to UNKNOWN if no
196 |   //       projection.
197 |   //   * channel 4: x (axis along image width)
198 |   //   * channel 5: y (axis along image height)
199 |   // Note: pixel 0 corresponds to the left edge of the first pixel in the image.
200 |   optional bytes camera_projection_compressed = 3;
201 | 
202 |   // Zlib compressed [H, W, 6] serialized version of MatrixFloat.
203 |   // To decompress:
204 |   // string val = ZlibDecompress(range_image_pose_compressed);
205 |   // MatrixFloat range_image_pose;
206 |   // range_image_pose.ParseFromString(val);
207 |   // Inner dimensions are [roll, pitch, yaw, x, y, z] represents a transform
208 |   // from vehicle frame to global frame for every range image pixel.
209 |   // This is ONLY populated for the first return. The second return is assumed
210 |   // to have exactly the same range_image_pose_compressed.
211 |   //
212 |   // The roll, pitch and yaw are specified as 3-2-1 Euler angle rotations,
213 |   // meaning that rotating from the navigation to vehicle frame consists of a
214 |   // yaw, then pitch and finally roll rotation about the z, y and x axes
215 |   // respectively. All rotations use the right hand rule and are positive
216 |   // in the counter clockwise direction.
217 |   optional bytes range_image_pose_compressed = 4;
218 | 
219 |   // Deprecated, do not use.
220 |   optional MatrixFloat range_image = 1 [deprecated = true];
221 | }
222 | 
223 | // All timestamps in this proto are represented as seconds since Unix epoch.
224 | message CameraImage {
225 |   optional CameraName.Name name = 1;
226 |   // JPEG image.
227 |   optional bytes image = 2;
228 |   // SDC pose.
229 |   optional Transform pose = 3;
230 |   // SDC velocity at 'pose_timestamp' below. The velocity value is represented
231 |   // at vehicle frame.
232 |   // With this velocity, the pose can be extrapolated.
233 |   // r(t+dt) = r(t) + dr/dt * dt where dr/dt = v_{x,y,z}.
234 |   // R(t+dt) = R(t) + R(t)*SkewSymmetric(w_{x,y,z})*dt
235 |   // r(t) = (x(t), y(t), z(t)) is vehicle location at t in the global frame.
236 |   // R(t) = Rotation Matrix (3x3) from the body frame to the global frame at t.
237 |   // SkewSymmetric(x,y,z) is defined as the cross-product matrix in the
238 |   // following:
239 |   // https://en.wikipedia.org/wiki/Cross_product#Conversion_to_matrix_multiplication
240 |   optional Velocity velocity = 4;
241 |   // Timestamp of the `pose` above.
242 |   optional double pose_timestamp = 5;
243 | 
244 |   // Rolling shutter params.
245 | 
246 |   // Shutter duration in seconds. Time taken for one shutter.
247 |   optional double shutter = 6;
248 |   // Time when the sensor was triggered and when readout finished.
249 |   // The difference between trigger time and readout done time includes
250 |   // the exposure time and the actual sensor readout time.
251 |   optional double camera_trigger_time = 7;
252 |   optional double camera_readout_done_time = 8;
253 | }
254 | 
255 | // The camera labels associated with a given camera image. This message
256 | // indicates the ground truth information for the camera image
257 | // recorded by the given camera. If there are no labeled objects in the image,
258 | // then the labels field is empty.
259 | message CameraLabels {
260 |   optional CameraName.Name name = 1;
261 |   repeated Label labels = 2;
262 | }
263 | 
264 | message Laser {
265 |   optional LaserName.Name name = 1;
266 |   optional RangeImage ri_return1 = 2;
267 |   optional RangeImage ri_return2 = 3;
268 | }
269 | 
270 | message Frame {
271 |   // This context is the same for all frames belong to the same driving run
272 |   // segment. Use context.name to identify frames belong to the same driving
273 |   // segment. We do not store all frames from one driving segment in one proto
274 |   // to avoid huge protos.
275 |   optional Context context = 1;
276 | 
277 |   // Frame start time, which is the timestamp of the first top lidar spin
278 |   // within this frame.
279 |   optional int64 timestamp_micros = 2;
280 |   // The vehicle pose.
281 |   optional Transform pose = 3;
282 |   repeated CameraImage images = 4;
283 |   repeated Laser lasers = 5;
284 |   repeated Label laser_labels = 6;
285 |   // Lidar labels (laser_labels) projected to camera images. A projected
286 |   // label is the smallest image axis aligned rectangle that can cover all
287 |   // projected points from the 3d lidar label. The projected label is ignored if
288 |   // the projection is fully outside a camera image. The projected label is
289 |   // clamped to the camera image if it is partially outside.
290 |   repeated CameraLabels projected_lidar_labels = 9;
291 |   // NOTE: if a camera identified by CameraLabels.name has an entry in this
292 |   // field, then it has been labeled, even though it is possible that there are
293 |   // no labeled objects in the corresponding image, which is identified by a
294 |   // zero sized CameraLabels.labels.
295 |   repeated CameraLabels camera_labels = 8;
296 |   // No label zones in the *global* frame.
297 |   repeated Polygon2dProto no_label_zones = 7;
298 | }
299 | 


--------------------------------------------------------------------------------
/tools/waymo_reader/simple_waymo_open_dataset_reader/label.proto:
--------------------------------------------------------------------------------
 1 | /* Copyright 2019 The Waymo Open Dataset Authors. All Rights Reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 | 
16 | syntax = "proto2";
17 | 
18 | package waymo.open_dataset;
19 | 
20 | message Label {
21 |   // Upright box, zero pitch and roll.
22 |   message Box {
23 |     // Box coordinates in vehicle frame.
24 |     optional double center_x = 1;
25 |     optional double center_y = 2;
26 |     optional double center_z = 3;
27 | 
28 |     // Dimensions of the box. length: dim x. width: dim y. height: dim z.
29 |     optional double length = 5;
30 |     optional double width = 4;
31 |     optional double height = 6;
32 | 
33 |     // The heading of the bounding box (in radians).  The heading is the angle
34 |     // required to rotate +x to the surface normal of the SDC front face.
35 |     optional double heading = 7;
36 | 
37 |     enum Type {
38 |       TYPE_UNKNOWN = 0;
39 |       // 7-DOF 3D (a.k.a upright 3D box).
40 |       TYPE_3D = 1;
41 |       // 5-DOF 2D. Mostly used for laser top down representation.
42 |       TYPE_2D = 2;
43 |       // Axis aligned 2D. Mostly used for image.
44 |       TYPE_AA_2D = 3;
45 |     }
46 |   }
47 | 
48 |   optional Box box = 1;
49 | 
50 |   message Metadata {
51 |     optional double speed_x = 1;
52 |     optional double speed_y = 2;
53 |     optional double accel_x = 3;
54 |     optional double accel_y = 4;
55 |   }
56 |   optional Metadata metadata = 2;
57 | 
58 |   enum Type {
59 |     TYPE_UNKNOWN = 0;
60 |     TYPE_VEHICLE = 1;
61 |     TYPE_PEDESTRIAN = 2;
62 |     TYPE_SIGN = 3;
63 |     TYPE_CYCLIST = 4;
64 |   }
65 |   optional Type type = 3;
66 |   // Object ID.
67 |   optional string id = 4;
68 | 
69 |   // The difficulty level of this label. The higher the level, the harder it is.
70 |   enum DifficultyLevel {
71 |     UNKNOWN = 0;
72 |     LEVEL_1 = 1;
73 |     LEVEL_2 = 2;
74 |   }
75 | 
76 |   // Difficulty level for detection problem.
77 |   optional DifficultyLevel detection_difficulty_level = 5;
78 |   // Difficulty level for tracking problem.
79 |   optional DifficultyLevel tracking_difficulty_level = 6;
80 | }
81 | 
82 | // Non-self-intersecting 2d polygons. This polygon is not necessarily convex.
83 | message Polygon2dProto {
84 |   repeated double x = 1;
85 |   repeated double y = 2;
86 | 
87 |   // A globally unique ID.
88 |   optional string id = 3;
89 | }
90 | 


--------------------------------------------------------------------------------
/tools/waymo_reader/simple_waymo_open_dataset_reader/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019, Grégoire Payen de La Garanderie, Durham University
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | # 
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | import numpy as np
 17 | import zlib
 18 | import math
 19 | import io
 20 | 
 21 | # add project directory to python path to enable relative imports
 22 | import os
 23 | import sys
 24 | PACKAGE_PARENT = '..'
 25 | SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser(__file__))))
 26 | sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
 27 | 
 28 | # from simple_waymo_open_dataset_reader import dataset_pb2, label_pb2
 29 | from tools.waymo_reader.simple_waymo_open_dataset_reader import dataset_pb2, label_pb2
 30 | 
 31 | 
 32 | 
 33 | def get_box_transformation_matrix(box):
 34 |     """Create a transformation matrix for a given label box pose."""
 35 | 
 36 |     tx,ty,tz = box.center_x,box.center_y,box.center_z
 37 |     c = math.cos(box.heading)
 38 |     s = math.sin(box.heading)
 39 | 
 40 |     sl, sh, sw = box.length, box.height, box.width
 41 | 
 42 |     return np.array([
 43 |         [ sl*c,-sw*s,  0,tx],
 44 |         [ sl*s, sw*c,  0,ty],
 45 |         [    0,    0, sh,tz],
 46 |         [    0,    0,  0, 1]])
 47 | 
 48 | def get_3d_box_projected_corners(vehicle_to_image, label):
 49 |     """Get the 2D coordinates of the 8 corners of a label's 3D bounding box.
 50 | 
 51 |     vehicle_to_image: Transformation matrix from the vehicle frame to the image frame.
 52 |     label: The object label
 53 |     """
 54 | 
 55 |     box = label.box
 56 | 
 57 |     # Get the vehicle pose
 58 |     box_to_vehicle = get_box_transformation_matrix(box)
 59 | 
 60 |     # Calculate the projection from the box space to the image space.
 61 |     box_to_image = np.matmul(vehicle_to_image, box_to_vehicle)
 62 | 
 63 | 
 64 |     # Loop through the 8 corners constituting the 3D box
 65 |     # and project them onto the image
 66 |     vertices = np.empty([2,2,2,2])
 67 |     for k in [0, 1]:
 68 |         for l in [0, 1]:
 69 |             for m in [0, 1]:
 70 |                 # 3D point in the box space
 71 |                 v = np.array([(k-0.5), (l-0.5), (m-0.5), 1.])
 72 | 
 73 |                 # Project the point onto the image
 74 |                 v = np.matmul(box_to_image, v)
 75 | 
 76 |                 # If any of the corner is behind the camera, ignore this object.
 77 |                 if v[2] < 0:
 78 |                     return None
 79 | 
 80 |                 vertices[k,l,m,:] = [v[0]/v[2], v[1]/v[2]]
 81 | 
 82 |     vertices = vertices.astype(np.int32)
 83 | 
 84 |     return vertices
 85 | 
 86 | def compute_2d_bounding_box(img_or_shape,points):
 87 |     """Compute the 2D bounding box for a set of 2D points.
 88 |     
 89 |     img_or_shape: Either an image or the shape of an image.
 90 |                   img_or_shape is used to clamp the bounding box coordinates.
 91 |     
 92 |     points: The set of 2D points to use
 93 |     """
 94 | 
 95 |     if isinstance(img_or_shape,tuple):
 96 |         shape = img_or_shape
 97 |     else:
 98 |         shape = img_or_shape.shape
 99 | 
100 |     # Compute the 2D bounding box and draw a rectangle
101 |     x1 = np.amin(points[...,0])
102 |     x2 = np.amax(points[...,0])
103 |     y1 = np.amin(points[...,1])
104 |     y2 = np.amax(points[...,1])
105 | 
106 |     x1 = min(max(0,x1),shape[1])
107 |     x2 = min(max(0,x2),shape[1])
108 |     y1 = min(max(0,y1),shape[0])
109 |     y2 = min(max(0,y2),shape[0])
110 | 
111 |     return (x1,y1,x2,y2)
112 | 
113 | def draw_3d_box(img, vehicle_to_image, label, colour=(255,128,128), draw_2d_bounding_box=False):
114 |     """Draw a 3D bounding from a given 3D label on a given "img". "vehicle_to_image" must be a projection matrix from the vehicle reference frame to the image space.
115 | 
116 |     draw_2d_bounding_box: If set a 2D bounding box encompassing the 3D box will be drawn
117 |     """
118 |     import cv2
119 | 
120 |     vertices = get_3d_box_projected_corners(vehicle_to_image, label)
121 | 
122 |     if vertices is None:
123 |         # The box is not visible in this image
124 |         return
125 | 
126 |     if draw_2d_bounding_box:
127 |         x1,y1,x2,y2 = compute_2d_bounding_box(img.shape, vertices)
128 | 
129 |         if (x1 != x2 and y1 != y2):
130 |             cv2.rectangle(img, (x1,y1), (x2,y2), colour, thickness = 2)
131 |     else:
132 |         # Draw the edges of the 3D bounding box
133 |         for k in [0, 1]:
134 |             for l in [0, 1]:
135 |                 for idx1,idx2 in [((0,k,l),(1,k,l)), ((k,0,l),(k,1,l)), ((k,l,0),(k,l,1))]:
136 |                     cv2.line(img, tuple(vertices[idx1]), tuple(vertices[idx2]), colour, thickness=2)
137 |         # Draw a cross on the front face to identify front & back.
138 |         for idx1,idx2 in [((1,0,0),(1,1,1)), ((1,1,0),(1,0,1))]:
139 |             cv2.line(img, tuple(vertices[idx1]), tuple(vertices[idx2]), colour, thickness=2)
140 | 
141 | def draw_2d_box(img, label, colour=(255,128,128)):
142 |     """Draw a 2D bounding from a given 2D label on a given "img".
143 |     """
144 |     import cv2
145 | 
146 |     box = label.box
147 | 
148 |     # Extract the 2D coordinates
149 |     # It seems that "length" is the actual width and "width" is the actual height of the bounding box. Most peculiar.
150 |     x1 = int(box.center_x - box.length/2)
151 |     x2 = int(box.center_x + box.length/2)
152 |     y1 = int(box.center_y - box.width/2)
153 |     y2 = int(box.center_y + box.width/2)
154 | 
155 |     # Draw the rectangle
156 |     cv2.rectangle(img, (x1,y1), (x2,y2), colour, thickness = 1)
157 | 
158 | 
159 | def decode_image(camera):
160 |     """ Decode the JPEG image. """
161 | 
162 |     from PIL import Image
163 |     return np.array(Image.open(io.BytesIO(camera.image)))
164 | 
165 | def get_image_transform(camera_calibration):
166 |     """ For a given camera calibration, compute the transformation matrix
167 |         from the vehicle reference frame to the image space.
168 |     """
169 | 
170 |     # TODO: Handle the camera distortions
171 |     extrinsic = np.array(camera_calibration.extrinsic.transform).reshape(4,4)
172 |     intrinsic = camera_calibration.intrinsic
173 | 
174 |     # Camera model:
175 |     # | fx  0 cx 0 |
176 |     # |  0 fy cy 0 |
177 |     # |  0  0  1 0 |
178 |     camera_model = np.array([
179 |         [intrinsic[0], 0, intrinsic[2], 0],
180 |         [0, intrinsic[1], intrinsic[3], 0],
181 |         [0, 0,                       1, 0]])
182 | 
183 |     # Swap the axes around
184 |     axes_transformation = np.array([
185 |         [0,-1,0,0],
186 |         [0,0,-1,0],
187 |         [1,0,0,0],
188 |         [0,0,0,1]])
189 | 
190 |     # Compute the projection matrix from the vehicle space to image space.
191 |     vehicle_to_image = np.matmul(camera_model, np.matmul(axes_transformation, np.linalg.inv(extrinsic)))
192 |     return vehicle_to_image
193 | 
194 | def parse_range_image_and_camera_projection(laser, second_response=False):
195 |     """ Parse the range image for a given laser.
196 | 
197 |     second_response: If true, return the second strongest response instead of the primary response.
198 |                      The second_response might be useful to detect the edge of objects
199 |     """
200 | 
201 |     range_image_pose = None
202 |     camera_projection = None
203 | 
204 |     if not second_response:
205 |         # Return the strongest response if available
206 |         if len(laser.ri_return1.range_image_compressed) > 0:
207 |             ri = dataset_pb2.MatrixFloat()
208 |             ri.ParseFromString(
209 |                 zlib.decompress(laser.ri_return1.range_image_compressed))
210 |             ri = np.array(ri.data).reshape(ri.shape.dims)
211 | 
212 |             if laser.name == dataset_pb2.LaserName.TOP:
213 |                 range_image_top_pose = dataset_pb2.MatrixFloat()
214 |                 range_image_top_pose.ParseFromString(
215 |                     zlib.decompress(laser.ri_return1.range_image_pose_compressed))
216 |                 range_image_pose = np.array(range_image_top_pose.data).reshape(range_image_top_pose.shape.dims)
217 |                 
218 |             camera_projection = dataset_pb2.MatrixInt32()
219 |             camera_projection.ParseFromString(
220 |                     zlib.decompress(laser.ri_return1.camera_projection_compressed))
221 |             camera_projection = np.array(camera_projection.data).reshape(camera_projection.shape.dims)
222 | 
223 |     else:
224 |         # Return the second strongest response if available
225 | 
226 |         if len(laser.ri_return2.range_image_compressed) > 0:
227 |             ri = dataset_pb2.MatrixFloat()
228 |             ri.ParseFromString(
229 |                 zlib.decompress(laser.ri_return2.range_image_compressed))
230 |             ri = np.array(ri.data).reshape(ri.shape.dims)
231 |                 
232 |             camera_projection = dataset_pb2.MatrixInt32()
233 |             camera_projection.ParseFromString(
234 |                     zlib.decompress(laser.ri_return2.camera_projection_compressed))
235 |             camera_projection = np.array(camera_projection.data).reshape(camera_projection.shape.dims)
236 | 
237 |     return ri, camera_projection, range_image_pose
238 | 
239 | 
240 | def get(object_list, name):
241 |     """ Search for an object by name in an object list. """
242 | 
243 |     object_list = [obj for obj in object_list if obj.name == name]
244 |     return object_list[0]
245 | 
246 | 


--------------------------------------------------------------------------------
/writeup.md:
--------------------------------------------------------------------------------
 1 | # Writeup: Track 3D-Objects Over Time
 2 | 
 3 | Please use this starter template to answer the following questions:
 4 | 
 5 | ### 1. Write a short recap of the four tracking steps and what you implemented there (filter, track management, association, camera fusion). Which results did you achieve? Which part of the project was most difficult for you to complete, and why?
 6 | 
 7 | 
 8 | ### 2. Do you see any benefits in camera-lidar fusion compared to lidar-only tracking (in theory and in your concrete results)? 
 9 | 
10 | 
11 | ### 3. Which challenges will a sensor fusion system face in real-life scenarios? Did you see any of these challenges in the project?
12 | 
13 | 
14 | ### 4. Can you think of ways to improve your tracking results in the future?
15 | 
16 | 


--------------------------------------------------------------------------------