├── .gitignore ├── README.md ├── activate ├── mslam.py ├── mslam ├── agent_loc │ └── agent_loc.py ├── depth │ ├── mono.py │ └── stereo_CNN.py ├── geo_proj │ └── geo_proj.py ├── img_stitch │ └── img_stitch.py ├── lane_finder │ ├── lane_finder.py │ └── lines_detected.png └── obj_rec │ └── obj_rec.py ├── mslam_setup_cdf.sh ├── mslam_vis_pcd.py ├── requirements.pip └── video ├── driving_country_480p_trimmed.mp4 ├── kittbw.mp4 ├── road-photo.jpg ├── video_credits.txt └── wget-log /.gitignore: -------------------------------------------------------------------------------- 1 | env-*/ 2 | 3 | hd3_repo 4 | detectron2_repo 5 | monodepth2_repo 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MultiSLAM 2 | Enhanced Real-Time Multi-Camera SLAM for 3D Scene Reconstruction
3 | By: Mansoor Saqib & Shahin Imtiaz
4 | For: University of Toronto CSC420 - Intro to Image Understanding Fall 2019
5 | 6 | ## Running on CDF 7 | 8 | > $ bash mslam_setup_cdf.sh 9 | > 10 | > $ python3 mslam.py 11 | > 12 | > To view constructed 3D point cloud of map: 13 | > 14 | > $ python3 mslam_vis_pcd.py 15 | > 16 | > User configurable options are located inside mslam.py 17 | 18 | ## Personal Dev Environment Setup 19 | > $ sudo apt-get install python-dev python3-dev 20 | > 21 | > $ python3 -m venv env-mslam 22 | > 23 | > $ source env-mslam/bin/activate 24 | > 25 | > (env-mslam) $ python3 -m pip install -U pip 26 | > 27 | > (env-mslam) $ python3 -m pip install -U 'git+https://github.com/facebookresearch/fvcore.git' 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' 28 | > 29 | > (env-mslam) $ python3 -m pip install -r requirements.pip 30 | > 31 | > (env-mslam) $ git clone https://github.com/facebookresearch/detectron2 detectron2_repo 32 | > 33 | > (env-mslam) $ python3 -m pip install -e detectron2_repo 34 | 35 | ## Package contents 36 | * [mslam.py](mslam.py): Entrypoint to run MultiSLAM 37 | * [mslam_vis_pcd.py](mslam_vis_pcd.py): View 3D point cloud of map 38 | * [mslam/agent_loc](mslam/agent_loc): 39 | - [agent_loc.py](mslam/agent_loc/agent_loc.py): Determine agent movement in 3D space 40 | * [mslam/depth](mslam/depth): 41 | - [mono.py](mslam/depth/mono.py): Compute mono depth information from a single image 42 | - [stereo_CNN.py](mslam/depth/stereo_CNN.py): Compute stereo depth information from a two images 43 | * [mslam/geo_proj](mslam/geo_proj): 44 | - [geo_proj.py](mslam/geo_proj/geo_proj.py): Construct the scene point cloud and track agent location 45 | * [mslam/img_stitch](mslam/img_stitch): 46 | - [img_stitch.py](mslam/img_stitch/img_stich.py): Stitch together a set of images 47 | * [mslam/obj_rec](mslam/obj_rec): 48 | - [obj_rec.py](mslam/obj_rec/obj_rec.py): Classify the objects in a single or set of images -------------------------------------------------------------------------------- /activate: -------------------------------------------------------------------------------- 1 | env-mslam/bin/activate -------------------------------------------------------------------------------- /mslam.py: -------------------------------------------------------------------------------- 1 | ##CSC420 Fall 2019 Final Project - MultiSLAM 2 | ##By: Mansoor Saqib & Shahin Imtiaz 3 | 4 | from __future__ import absolute_import, division, print_function 5 | import sys 6 | sys.path.insert(1, 'hd3_repo') 7 | sys.path.insert(1, 'monodepth2_repo') 8 | 9 | import os 10 | from os.path import join 11 | import threading 12 | import argparse 13 | import datetime 14 | import imutils 15 | import time 16 | import copy 17 | import cv2 18 | import logging 19 | import pprint 20 | from argparse import ArgumentParser 21 | from tqdm import tqdm 22 | import numpy as np 23 | import open3d as o3d 24 | import PIL.Image as pil 25 | from PIL import Image 26 | import matplotlib as mpl 27 | import matplotlib.cm as cm 28 | import matplotlib.pyplot as plt 29 | import scipy.stats 30 | 31 | from mslam.agent_loc.agent_loc import AgentLocate 32 | from mslam.depth.mono import MonoDepth 33 | from mslam.geo_proj.geo_proj import GeoProjection 34 | # from mslam.depth.stereo_CNN import StereoDepthCNN # Stereo Depth currently not working 35 | from mslam.obj_rec.obj_rec import ObjectDetect 36 | 37 | ############################ 38 | #### MultiSlam Loop #### 39 | ############################ 40 | 41 | # The main rendering loop. Combines the SLAM modules together to create 42 | # a mapping of the provided environment video or real time file stream. 43 | def mslam(): 44 | global vs, enableModules, outputFrame, length, settingsModules, debugging, live_stream 45 | 46 | # Initialize modules 47 | mod_object_detection = None 48 | mod_mono_depth = None 49 | mod_stereo_depth_cnn = None 50 | mod_agent_locate = None 51 | mod_geo_projection = None 52 | 53 | if debugging: 54 | print("Initializing modules") 55 | 56 | if enableModules['object_detection']: 57 | mod_object_detection = ObjectDetect( 58 | settingsModules['object_detection']['model_path'], 59 | settingsModules['object_detection']['model_weights']) 60 | if enableModules['mono_depth']: 61 | mod_mono_depth = MonoDepth(settingsModules['mono_depth']['model_path']) 62 | if enableModules['stereo_depth_cnn']: 63 | mod_stereo_depth_cnn = StereoDepthCNN(settingsModules['stereo_depth_cnn']['model_path']) 64 | if enableModules['agent_locate']: 65 | mod_agent_locate = AgentLocate(debugging=debugging) 66 | if enableModules['geo_projection']: 67 | if live_stream: 68 | mod_geo_projection = GeoProjection(mode='online') 69 | else: 70 | mod_geo_projection = GeoProjection(mode='offline') 71 | 72 | if debugging: 73 | print("Done initializing modules") 74 | print("Starting MultiSlam rendering") 75 | 76 | # Loop through video frames 77 | for i in tqdm(range(length)): 78 | 79 | # Get next frame 80 | isValid, frame = vs.read() 81 | if not isValid: # No more frames 82 | break 83 | outputFrame['original_L'] = frame.copy() 84 | 85 | # Get next frame for right camera 86 | if enableModules['stereo_depth_cnn']: 87 | isValid, frame2 = vs2.read() 88 | if not isValid: 89 | break 90 | outputFrame['original_R'] = frame2.copy() 91 | 92 | # Feed in frames to modules and obtain output 93 | if enableModules['mono_depth']: 94 | outputFrame['mono_depth'] = mod_mono_depth.estimate(frame.copy()) 95 | if enableModules['stereo_depth_cnn']: 96 | outputFrame['stereo_depth_cnn'] = mod_stereo_depth_cnn.estimate(frame.copy()) 97 | if enableModules['agent_locate']: 98 | out_agent_locate = mod_agent_locate.estimate(frame.copy()) 99 | outputFrame['agent_locate'] = out_agent_locate['frame'] 100 | al_transform = out_agent_locate['transform'] 101 | if enableModules['object_detection']: 102 | outputFrame['object_detection'] = mod_object_detection.estimate(frame.copy()) 103 | if enableModules['geo_projection']: 104 | outPCD = mod_geo_projection.estimate(frame.copy(), outputFrame['mono_depth'].copy(), al_transform, downsample=10) 105 | outputFrame['geo_projection_pcd'] = outPCD[0] 106 | outputFrame['geo_projection'] = outPCD[1] 107 | 108 | # Write frame to current point cloud or video file 109 | writeFrame(i) 110 | 111 | print("Done processing video. Output has been saved in:", args['output']) 112 | print("Closing mslam") 113 | time.sleep(10) 114 | print("Please exit the application with CTRL+C") 115 | exit(0) 116 | 117 | # Write the current frame among all modules to a file. Alternatively, this 118 | # can be adjusted to have the frame streamed for a live view as rendering is in progress. 119 | ## frameNum = id of the frame being processed 120 | def writeFrame(frameNum): 121 | global outputFrame, outputWriter, enableModules, debugging 122 | 123 | for m in enableModules.keys(): 124 | if not enableModules[m] or outputFrame[m] is None: 125 | continue 126 | 127 | if m == 'geo_projection_pcd' or m == 'geo_projection': 128 | if debugging: 129 | print("Adding points in frame", frameNum, "to global point cloud") 130 | print("writing frame", frameNum, "for", m) 131 | print("frame stats: ", np.max(outputFrame[m]), np.min(outputFrame[m]), outputFrame[m].shape) 132 | o3d.io.write_point_cloud(outputWriter['geo_projection_pcd'], outputFrame['geo_projection_pcd']) 133 | outputWriter['geo_projection'].write(outputFrame['geo_projection']) 134 | continue 135 | 136 | if debugging: 137 | print("writing frame", frameNum, "for", m) 138 | print("frame stats: ", np.max(outputFrame[m]), np.min(outputFrame[m]), outputFrame[m].shape) 139 | 140 | outputWriter[m].write(outputFrame[m]) 141 | 142 | # Contains the current frame output for each module (or point cloud for geo_projection) 143 | outputFrame = { 144 | 'object_detection': None, 145 | 'mono_depth': None, 146 | 'stereo_depth_cnn': None, 147 | 'agent_locate': None, 148 | 'geo_projection': None, 149 | 'geo_projection_pcd': None, 150 | 'original_L': None, 151 | 'original_R': None, 152 | } 153 | 154 | ############################ 155 | #### User Configuration #### 156 | ################################################################################################ 157 | 158 | print('MSLAM options can be configured in mslam.py under User Configuration') 159 | 160 | # Initial configuration for input and output rendering settings 161 | args = { 162 | 'leftcam': 'video/kittbw.mp4', # Path to main or left camera video 163 | # 'leftcam': 'video/driving_country_480p_trimmed.mp4', 164 | 'rightcam': None, # Path to right camera video if stereo is enabled 165 | 'output': 'OUTPUT/', # Path to rendering output 166 | 'endframe': 1400, # Total number of video frames to process. None = All 167 | 'ip': '0.0.0.0', # Address for live streaming. Can access with VLC network stream 168 | 'port': '8000', # Port for live streaming. Can access with VLC network stream 169 | } 170 | 171 | # Verbose execution 172 | debugging = False 173 | 174 | # Live stream the output 175 | live_stream = True 176 | 177 | # Enable or disable the modules for debugging. For a complete slam system, enable all* (*choose one of mono or stereo depth). 178 | enableModules = { 179 | 'object_detection': True, 180 | 'stereo_depth_cnn': False, 181 | 'agent_locate': True, 182 | 'mono_depth': True, 183 | 'geo_projection': True, # depends on depth and agent_locate 184 | } 185 | 186 | # Module specific settings 187 | settingsModules = { 188 | 'object_detection': { 189 | 'model_path': "detectron2_repo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", 190 | 'model_weights': "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl", 191 | }, 192 | 'stereo_depth_cnn': { 193 | 'model_path': "hd3_repo/scripts/model_zoo/hd3s_things_kitti-1243813e.pth", 194 | }, 195 | 'agent_locate': {}, 196 | 'mono_depth': { 197 | 'model_path': os.path.join("monodepth2_repo/models", "mono_1024x320"), 198 | }, 199 | 'geo_projection': {}, 200 | } 201 | 202 | 203 | ################################################################################################ 204 | #### Run MultiSlam #### 205 | ############################ 206 | 207 | # Camera feed setup 208 | vs = cv2.VideoCapture(args["leftcam"]) 209 | if args["rightcam"] is not None: 210 | vs2 = cv2.VideoCapture(args["rightcam"]) 211 | 212 | # Get video stats 213 | length = int(vs.get(cv2.CAP_PROP_FRAME_COUNT)) 214 | width = int(vs.get(cv2.CAP_PROP_FRAME_WIDTH)) 215 | height = int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT)) 216 | 217 | if args["endframe"] is not None: 218 | length = args["endframe"] 219 | 220 | print("MSLAM will run for "+str(length)+" frames") 221 | 222 | # Ouput format 223 | fourcc = cv2.VideoWriter_fourcc(*'mp4v') 224 | 225 | # Output writers 226 | outputWriter = {} 227 | for m in enableModules.keys(): 228 | if enableModules[m]: 229 | if m is 'geo_projection': 230 | outputWriter['geo_projection_pcd'] = args["output"]+'OUT_' + 'geo_projection' +'.pcd' 231 | outputWriter['geo_projection'] = cv2.VideoWriter(args["output"]+'OUT_' + 'geo_projection' +'.mp4',fourcc,30,(1850,1016)) 232 | else: 233 | outputWriter[m] = cv2.VideoWriter(args["output"]+'OUT_' + m +'.mp4',fourcc,30,(width,height)) 234 | 235 | # Single threaded multislam 236 | t = threading.Thread(target=mslam) 237 | t.daemon = True 238 | t.start() 239 | 240 | # Video streaming the output with the Flask framework is based on 241 | # https://www.pyimagesearch.com/2019/09/02/opencv-stream-video-to-web-browser-html-page/ 242 | if live_stream: 243 | from flask import Response 244 | from flask import Flask 245 | from flask import render_template 246 | 247 | app = Flask(__name__) 248 | 249 | def generateStreamFrame(moduleName): 250 | global outputFrame 251 | 252 | while True: 253 | if outputFrame[moduleName] is None: 254 | continue 255 | 256 | (flag, encodedImage) = cv2.imencode(".jpg", outputFrame[moduleName]) 257 | 258 | if not flag: 259 | continue 260 | 261 | yield(b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + 262 | bytearray(encodedImage) + b'\r\n') 263 | 264 | @app.route("/multislam_stream_original_L") 265 | def stream_original_L(): 266 | return Response(generateStreamFrame('original_L'), 267 | mimetype = "multipart/x-mixed-replace; boundary=frame") 268 | 269 | @app.route("/multislam_stream_original_R") 270 | def stream_original_R(): 271 | return Response(generateStreamFrame('original_R'), 272 | mimetype = "multipart/x-mixed-replace; boundary=frame") 273 | 274 | @app.route("/multislam_stream_object_detection") 275 | def stream_object_detection(): 276 | return Response(generateStreamFrame('object_detection'), 277 | mimetype = "multipart/x-mixed-replace; boundary=frame") 278 | 279 | @app.route("/multislam_stream_mono_depth") 280 | def stream_mono_depth(): 281 | return Response(generateStreamFrame('mono_depth'), 282 | mimetype = "multipart/x-mixed-replace; boundary=frame") 283 | 284 | @app.route("/multislam_stream_stereo_depth_cnn") 285 | def stream_stereo_depth_cnn(): 286 | return Response(generateStreamFrame('stereo_depth_cnn'), 287 | mimetype = "multipart/x-mixed-replace; boundary=frame") 288 | 289 | @app.route("/multislam_stream_agent_locate") 290 | def stream_agent_locate(): 291 | return Response(generateStreamFrame('agent_locate'), 292 | mimetype = "multipart/x-mixed-replace; boundary=frame") 293 | 294 | @app.route("/multislam_stream_geo_projection") 295 | def stream_geo_projection(): 296 | return Response(generateStreamFrame('geo_projection'), 297 | mimetype = "multipart/x-mixed-replace; boundary=frame") 298 | 299 | app.run(host=args["ip"], port=args["port"], debug=False, threaded=True, use_reloader=False) 300 | 301 | # Wait for thread to finish 302 | t.join() -------------------------------------------------------------------------------- /mslam/agent_loc/agent_loc.py: -------------------------------------------------------------------------------- 1 | # Based on the OpenCV SIFT and Flann Matcher libraries 2 | # https://docs.opencv.org/3.4/da/df5/tutorial_py_sift_intro.html 3 | # https://docs.opencv.org/3.4/d5/d6f/tutorial_feature_flann_matcher.html 4 | # 5 | # Customized using our own pruning, and agent movement detection and smoothing algorithms 6 | import cv2 7 | import numpy as np 8 | 9 | ''' 10 | Agent Location 11 | Utilizes the OpenCV library: https://opencv.org/ 12 | Computes the movement of the agent based on frame by frame differences 13 | ''' 14 | class AgentLocate: 15 | # Initialize SIFT and Flann Matcher 16 | def __init__(self, kpNum=1000, enableGlobal=False, match_threshold=200, debugging=False): 17 | self.debugging = debugging 18 | self.kpNum = kpNum # Max keypoints to detect 19 | self.match_threshold = match_threshold # Distance threshold 20 | self.enableGlobal = enableGlobal 21 | 22 | self.sift = cv2.xfeatures2d.SIFT_create(self.kpNum) 23 | self.FLANN_INDEX_KDTREE = 1 24 | self.index_params = dict(algorithm = self.FLANN_INDEX_KDTREE, trees = 5) 25 | self.search_params = dict(checks=50) 26 | self.flannFrameByFrame = cv2.FlannBasedMatcher(self.index_params,self.search_params) 27 | self.prevFrameKP = None 28 | 29 | self.flannGlobal = None 30 | if self.enableGlobal: 31 | self.flannGlobal = cv2.FlannBasedMatcher(self.index_params,self.search_params) 32 | 33 | self.smooth_history = 8 34 | self.smooth_x_magnitude = [] 35 | self.smooth_y_magnitude = [] 36 | self.smooth_translate = [] 37 | self.smooth_rotate = [] 38 | self.letter_to_move = { 39 | 's': 0, 40 | 'f': 1, 41 | 'r': 2, 42 | 'l': 3, 43 | } 44 | self.move_to_letter = { 45 | 0: 's', 46 | 1: 'f', 47 | 2: 'r', 48 | 3: 'l', 49 | } 50 | 51 | # Return the descriptors saved in the given flann matcher 52 | def getTrainDescriptors(self, matcher): 53 | if matcher == "global" and self.enableGlobal: 54 | return self.flannGlobal.getTrainDescriptors() 55 | elif matcher == "frameByframe": 56 | return self.flannFrameByFrame.getTrainDescriptors() 57 | return None 58 | 59 | # Returns True iff a given keypoint is a euclidean distance of 20 away from ALL other saved keypoints 60 | # And it is not in the middle region 61 | def pruneKP(self, kp, kpList, middle, thresh=30): 62 | cur_pt = np.array(kp.pt) 63 | if np.linalg.norm(cur_pt - middle) < 100: 64 | return False 65 | for i in kpList: 66 | i_pt = np.array(i.pt) 67 | if np.linalg.norm(cur_pt - i_pt) < thresh: 68 | return False 69 | return True 70 | 71 | # Smooth motion over the last self.smooth_history number of historical movements 72 | # by taking the most common value among them 73 | def smoothMove(self, mov): 74 | if len(self.smooth_translate) < self.smooth_history: 75 | self.smooth_translate.append(mov[0]) 76 | self.smooth_rotate.append(mov[1]) 77 | self.smooth_x_magnitude.append(mov[2]) 78 | self.smooth_y_magnitude.append(mov[3]) 79 | out = [max(set(self.smooth_translate), key=self.smooth_translate.count), 80 | max(set(self.smooth_rotate), key=self.smooth_rotate.count), 81 | max(set(self.smooth_x_magnitude), key=self.smooth_x_magnitude.count), 82 | max(set(self.smooth_y_magnitude), key=self.smooth_y_magnitude.count)] 83 | if self.debugging: 84 | if out[2] is None or out[3] is None: 85 | print(out, 'mag=', None) 86 | else: 87 | print(out, 'mag=', np.hypot(out[2], out[3])) 88 | return out 89 | 90 | self.smooth_translate.pop(0) 91 | self.smooth_rotate.pop(0) 92 | self.smooth_x_magnitude.pop(0) 93 | self.smooth_y_magnitude.pop(0) 94 | self.smooth_translate.append(mov[0]) 95 | self.smooth_rotate.append(mov[1]) 96 | self.smooth_x_magnitude.append(mov[2]) 97 | self.smooth_y_magnitude.append(mov[3]) 98 | out = [max(set(self.smooth_translate), key=self.smooth_translate.count), 99 | max(set(self.smooth_rotate), key=self.smooth_rotate.count), 100 | max(set(self.smooth_x_magnitude), key=self.smooth_x_magnitude.count), 101 | max(set(self.smooth_y_magnitude), key=self.smooth_y_magnitude.count)] 102 | if self.debugging: 103 | if out[2] is None or out[3] is None: 104 | print(out, 'mag=', None) 105 | else: 106 | print(out, 'mag=', np.hypot(out[2], out[3])) 107 | return out 108 | 109 | # Return the camera translation and rotation matrix for the movement between frames 110 | def getViewTransform(self, matches, kp, imgShape): 111 | # Divide the image into 4x4 quadrants for each axis 112 | quad_width = imgShape[1] / 4 113 | quad_height = imgShape[0] / 4 114 | quads_x = np.zeros((4,4)) 115 | quads_y = np.zeros((4,4)) 116 | 117 | # Initialize with a constant value, otherwise median of an empty list = NaN 118 | x_magnitude = [0.00005] 119 | y_magnitude = [0.00005] 120 | 121 | # Reduce the difference in each axis for matching points to a numerical sign and sum over differences in each quadrant 122 | # Place it in the quadrant belonging to the x, y location of the keypoint in the current frame. 123 | for i in range(len(matches)): 124 | x, y = kp[matches[i][0].queryIdx].pt 125 | xh, yh = self.prevFrameKP[matches[i][0].trainIdx].pt 126 | qx = int(x // quad_width) 127 | qy = int(y // quad_height) 128 | x_diff = x-xh 129 | y_diff = y-yh 130 | 131 | if qy in [0, 3]: 132 | x_magnitude.append(abs(x_diff)) 133 | if qx in [0, 3]: 134 | y_magnitude.append(abs(y_diff)) 135 | 136 | quads_x[qy, qx] += x_diff 137 | quads_y[qy, qx] += y_diff 138 | 139 | if x_diff < 0: 140 | quads_x[qy, qx] -= 1 141 | else: 142 | quads_x[qy, qx] += 1 143 | 144 | if y_diff < 0: 145 | quads_y[qy, qx] -= 1 146 | else: 147 | quads_y[qy, qx] += 1 148 | 149 | # Calculate the overall numerical sign of each quadrant 150 | quads_x = np.where(quads_x < 0, -1, quads_x) 151 | quads_x = np.where(quads_x > 0, 1, quads_x) 152 | quads_y = np.where(quads_y < 0, -1, quads_y) 153 | quads_y = np.where(quads_y > 0, 1, quads_y) 154 | 155 | # Transformation logic: 156 | # Move in the forward direction iff all conditions are met: 157 | # - Majority of Y quadrants in the top most row are 0 or negative 158 | # - Majority of X quadrants in the right most column are 0 or positive 159 | # - Majority of Y quadrants in the bottom most row are 0 or positive 160 | # - Majority of X quadrants in the left most column are 0 or negative 161 | # 162 | # Rotate the camera right iff: 163 | # - Majority of X quadrants are 0 or negative 164 | # 165 | # Rotate the camera left iff: 166 | # - Majority of X quadrants are 0 or positive 167 | # 168 | # Rotate the camera up iff: 169 | # - Majority of Y quadrants are 0 or positive 170 | # 171 | # Rotate the camera down iff: 172 | # - Majority of Y quadrants are 0 or negative 173 | 174 | # [translation, rotation, x magnitude, y magnitude] 175 | mov = [None, None, np.median(x_magnitude), np.median(y_magnitude)] 176 | 177 | if (np.sum(quads_y[0]) == 0 and np.sum(quads_y[3]) == 0 and 178 | np.sum(quads_x[:,0]) == 0 and np.sum(quads_x[:,3]) == 0): 179 | mov[0] = 's' 180 | elif (np.sum(quads_y[0]) <= 0 and np.sum(quads_y[3]) >= 0 and 181 | np.sum(quads_x[:,0]) <= 0 and np.sum(quads_x[:,3]) >= 0): 182 | mov[0] = 'f' 183 | 184 | # NOTE: Trial and error for determining best rotation estimation 185 | # x_sum = stats.mode([quads_x[0,0], quads_x[1,0], quads_x[2,0], quads_x[3,0], quads_x[0,1], quads_x[0,2], quads_x[0,3], quads_x[1,3], quads_x[2,3], quads_x[3,3]])[0][0] 186 | # x_sum = np.median([quads_x[1,0], quads_x[2,0], quads_x[3,0], quads_x[1,3], quads_x[2,3], quads_x[3,3]]) 187 | # x_sum = np.median(quads_x) 188 | # x_sum = np.average(quads_x) 189 | # x_sum = scipy.stats.mode(quads_x, axis=None)[0][0] 190 | # x_sum = scipy.stats.mode(np.array([quads_x[0,0], quads_x[1,0], quads_x[2,0], quads_x[3,0], quads_x[0,1], quads_x[0,2], quads_x[0,3], quads_x[1,3], quads_x[2,3], quads_x[3,3]]), axis=None)[0][0] 191 | x_sum = np.sum(quads_x) 192 | if self.debugging: 193 | print('xsum is:', x_sum) 194 | 195 | if (x_sum < -2): 196 | mov[1] = 'r' 197 | elif (x_sum > 2): 198 | mov[1] = 'l' 199 | 200 | # NOTE: Up and down rotations still in progress to be implemented correctly and tested 201 | # elif (np.sum(quads_y) < 0): 202 | # if self.debugging: 203 | # print('Rotate Down') 204 | # mov[1] = 'd' 205 | # elif (np.sum(quads_y) > 0): 206 | # if self.debugging: 207 | # print('Rotate Up') 208 | # mov[1] = 'u' 209 | else: 210 | pass 211 | 212 | return self.smoothMove(mov) 213 | 214 | # Compute the change in agent location based on previous frame 215 | def estimate(self, img, kpNum=1000): 216 | kp, des = self.sift.detectAndCompute(img,None) 217 | if len(kp) == 0 or des is None: 218 | return {'frame': np.zeros(img.shape, dtype='uint8') * 255, 'transform': [None, None, None, None]} 219 | 220 | # Sort keypoints by response 221 | kp, des = zip(*(sorted(zip(kp, des), key=lambda x: x[0].response, reverse=True))) 222 | 223 | # Prune keypoints that are close together 224 | # And around the middle of the frame 225 | kp_apart = [] 226 | des_apart = [] 227 | middle = np.array([img.shape[1]//2, img.shape[0]//2]) 228 | for i in range(len(kp)): 229 | if self.pruneKP(kp[i], kp_apart, middle): 230 | kp_apart.append(kp[i]) 231 | des_apart.append(des[i]) 232 | 233 | if kpNum <= len(kp_apart): 234 | kp = kp_apart[:kpNum] 235 | des = des_apart[:kpNum] 236 | else: 237 | kp = kp_apart 238 | des = des_apart 239 | 240 | # First frame. Add to empty training set and return blank frame 241 | if len(self.getTrainDescriptors('frameByframe')) == 0: 242 | self.flannFrameByFrame.add([np.array(des)]) 243 | self.flannFrameByFrame.train() 244 | 245 | if self.enableGlobal: 246 | self.flannGlobal.add([np.array(des)]) 247 | self.flannGlobal.train() 248 | 249 | self.prevFrameKP = kp 250 | return {'frame': np.zeros(img.shape, dtype='uint8') * 255, 'transform': [None, None, None, None]} 251 | 252 | # Match the current frame with the previous one 253 | matchesFrameByFrame = self.flannFrameByFrame.knnMatch(np.array(des), k=1) 254 | matchesFrameByFrame = [x for x in matchesFrameByFrame if x[0].distance < self.match_threshold] 255 | # Remove the previous frame's descriptors and add in the current one's to the matcher 256 | self.flannFrameByFrame.clear() 257 | self.flannFrameByFrame.add([np.array(des)]) 258 | self.flannFrameByFrame.train() 259 | 260 | # Match the current frame with all previous frames and add to the descriptor collection 261 | if self.enableGlobal: 262 | matchesGlobal = self.flannGlobal.knnMatch(np.array(des), k=1) 263 | matchesGlobal = [x for x in matchesGlobal if x[0].distance < self.match_threshold] 264 | self.flannGlobal.add([np.array(des)]) 265 | self.flannGlobal.train() 266 | 267 | transform = self.getViewTransform(matchesFrameByFrame, kp, img.shape) 268 | self.prevFrameKP = kp 269 | outKPFrame = cv2.drawKeypoints(img,kp,img,flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) 270 | 271 | return {'frame': outKPFrame, 'transform': transform} -------------------------------------------------------------------------------- /mslam/depth/mono.py: -------------------------------------------------------------------------------- 1 | # Based on the Niantic's Monodepth2 libraries 2 | # https://github.com/nianticlabs/monodepth2 3 | # 4 | # Included copyright information: 5 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 6 | # 7 | # This software is licensed under the terms of the Monodepth2 licence 8 | # which allows for non-commercial use only, the full terms of which are made 9 | # available in the LICENSE file. 10 | 11 | from __future__ import absolute_import, division, print_function 12 | 13 | import sys 14 | sys.path.insert(1, '../../monodepth2_repo') 15 | 16 | import os 17 | import glob 18 | import argparse 19 | import numpy as np 20 | import PIL.Image as pil 21 | from PIL import Image 22 | import matplotlib as mpl 23 | import matplotlib.cm as cm 24 | import cv2 25 | 26 | import torch 27 | from torchvision import transforms, datasets 28 | 29 | import monodepth2_repo.networks as networks 30 | from monodepth2_repo.layers import disp_to_depth 31 | from monodepth2_repo.utils import download_model_if_doesnt_exist 32 | 33 | ''' 34 | Monocular Depth 35 | Utilizes the Monodepth2 CNN: https://github.com/nianticlabs/monodepth2 36 | Creates a depth map from a single frame 37 | NOTE: Makes use of the variable naming and calling conventions found in the library's predictor script 38 | ''' 39 | class MonoDepth: 40 | # Initialize the predictor 41 | def __init__(self, path): 42 | if torch.cuda.is_available(): 43 | self.device = torch.device("cuda") 44 | else: 45 | self.device = torch.device("cpu") 46 | 47 | # Load model 48 | self.model_path = path 49 | self.encoder_path = os.path.join(self.model_path, "encoder.pth") 50 | self.depth_decoder_path = os.path.join(self.model_path, "depth.pth") 51 | self.encoder = networks.ResnetEncoder(18, False) 52 | self.loaded_dict_enc = torch.load(self.encoder_path, map_location=self.device) 53 | 54 | # Model training paramaters 55 | self.feed_height = self.loaded_dict_enc['height'] 56 | self.feed_width = self.loaded_dict_enc['width'] 57 | self.filtered_dict_enc = {k: v for k, v in self.loaded_dict_enc.items() if k in self.encoder.state_dict()} 58 | self.encoder.load_state_dict(self.filtered_dict_enc) 59 | self.encoder.to(self.device) 60 | self.encoder.eval() 61 | 62 | self.depth_decoder = networks.DepthDecoder( 63 | num_ch_enc=self.encoder.num_ch_enc, scales=range(4)) 64 | 65 | self.loaded_dict = torch.load(self.depth_decoder_path, map_location=self.device) 66 | self.depth_decoder.load_state_dict(self.loaded_dict) 67 | self.depth_decoder.to(self.device) 68 | self.depth_decoder.eval() 69 | 70 | def estimate(self, img): 71 | with torch.no_grad(): 72 | # Convert cv2 image array to PIL Image 73 | img = Image.fromarray(img).convert('RGB') 74 | original_width, original_height = img.size 75 | img = img.resize((self.feed_width, self.feed_height), pil.LANCZOS) 76 | img = transforms.ToTensor()(img).unsqueeze(0) 77 | 78 | # Get prediction 79 | img = img.to(self.device) 80 | features = self.encoder(img) 81 | outputs = self.depth_decoder(features) 82 | disp = outputs[("disp", 0)] 83 | disp_resized = torch.nn.functional.interpolate( 84 | disp, (original_height, original_width), mode="bilinear", align_corners=False) 85 | 86 | # Generate depth map 87 | disp_resized_np = disp_resized.squeeze().cpu().numpy() 88 | vmax = np.percentile(disp_resized_np, 95) 89 | normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax) 90 | mapper = cm.ScalarMappable(norm=normalizer, cmap='magma') 91 | colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8) 92 | 93 | return colormapped_im -------------------------------------------------------------------------------- /mslam/depth/stereo_CNN.py: -------------------------------------------------------------------------------- 1 | # Based on the UCBDrive's HD3 libraries 2 | # https://github.com/ucbdrive/hd3 3 | # 4 | # Currently work is still in progress to make this module functional as there are a few library errors 5 | 6 | from __future__ import absolute_import, division, print_function 7 | 8 | import sys 9 | sys.path.insert(1, '../../hd3_repo') 10 | sys.path.insert(1, '../../hd3_repo/utils') 11 | 12 | import os 13 | from os.path import join 14 | import cv2 15 | import time 16 | import math 17 | import logging 18 | from argparse import ArgumentParser 19 | import numpy as np 20 | 21 | from PIL import Image 22 | 23 | import torch 24 | import torch.nn.parallel 25 | import torch.nn.functional as F 26 | import torch.backends.cudnn as cudnn 27 | import torch.optim 28 | import torch.utils.data 29 | 30 | # import data.hd3data as datasets 31 | # import data.flowtransforms as transforms 32 | # import hd3model as models 33 | # from utils.utils import * 34 | # from models.hd3_ops import * 35 | # import utils.flowlib as fl 36 | 37 | import hd3_repo.data.hd3data as datasets 38 | import hd3_repo.data.flowtransforms as transforms 39 | import hd3_repo.hd3model as models 40 | from hd3_repo.utils.utils import * 41 | from hd3_repo.models.hd3_ops import * 42 | import hd3_repo.utils.flowlib as fl 43 | 44 | ''' 45 | Stereo Depth 46 | Utilizes the HD3 CNN: https://github.com/ucbdrive/hd3 47 | Creates a depth map from two stereoscopic frames 48 | NOTE: Currently a work in progress 49 | NOTE: Makes use of the variable naming and calling conventions found in the library's predictor script 50 | ''' 51 | class StereoDepthCNN: 52 | def __init__(self, model_path): 53 | self.corr_range = [4, 4, 4, 4, 4, 4] 54 | self.model = models.HD3Model("stereo", "dlaup", "hda", self.corr_range, 55 | False).cuda() 56 | self.model = torch.nn.DataParallel(self.model).cuda() 57 | 58 | self.checkpoint = torch.load(model_path) 59 | self.model.load_state_dict(self.checkpoint['state_dict'], strict=True) 60 | 61 | self.model.eval() 62 | 63 | def get_target_size(self, H, W): 64 | h = 64 * np.array([[math.floor(H / 64), math.floor(H / 64) + 1]]) 65 | w = 64 * np.array([[math.floor(W / 64), math.floor(W / 64) + 1]]) 66 | ratio = np.abs(np.matmul(np.transpose(h), 1 / w) - H / W) 67 | index = np.argmin(ratio) 68 | return h[0, index // 2], w[0, index % 2] 69 | 70 | def estimate(self, imgL, imgR): 71 | input_size = imgL.shape 72 | 73 | mean = [0.485, 0.456, 0.406] 74 | std = [0.229, 0.224, 0.225] 75 | 76 | th, tw = self.get_target_size(input_size[0], input_size[1]) 77 | 78 | val_transform = transforms.Compose( 79 | [transforms.ToTensor(), 80 | transforms.Normalize(mean=mean, std=std)]) 81 | 82 | mode="stereo" 83 | label_num=False 84 | transform=val_transform 85 | out_size=True 86 | 87 | cudnn.enabled = True 88 | cudnn.benchmark = True 89 | 90 | cur_data = [[imgL, imgR], []] 91 | cur_data = list(val_transform(*cur_data)) 92 | cur_data.append(np.array(input_size[::-1], dtype=int)) 93 | cur_data = tuple(cur_data) 94 | 95 | with torch.no_grad(): 96 | img_list, label_list, img_size = cur_data 97 | img_list = [img.unsqueeze(0) for img in img_list] 98 | img_size = np.array(input_size[:2][::-1], dtype=int) 99 | img_size = img_size[np.newaxis, :] 100 | img_list = [img.to(torch.device("cuda")) for img in img_list] 101 | label_list = [ 102 | label.to(torch.device("cuda")) for label in label_list 103 | ] 104 | 105 | # resize test 106 | resized_img_list = [ 107 | F.interpolate( 108 | img, (th, tw), mode='bilinear', align_corners=True) 109 | for img in img_list 110 | ] 111 | 112 | output = self.model( 113 | img_list=resized_img_list, 114 | label_list=label_list, 115 | get_vect=True, 116 | get_epe=False) 117 | scale_factor = 1 / 2**(7 - len(self.corr_range)) 118 | output['vect'] = resize_dense_vector(output['vect'] * scale_factor, 119 | img_size[0, 1], 120 | img_size[0, 0]) 121 | 122 | pred_vect = output['vect'].data.cpu().numpy() 123 | pred_vect = np.transpose(pred_vect, (0, 2, 3, 1)) 124 | curr_vect = pred_vect[0] 125 | 126 | vis_flo = fl.flow_to_image(fl.disp2flow(curr_vect)) 127 | vis_flo = cv2.cvtColor(vis_flo, cv2.COLOR_RGB2BGR) 128 | return vis_flo -------------------------------------------------------------------------------- /mslam/geo_proj/geo_proj.py: -------------------------------------------------------------------------------- 1 | # Based on the Open3D libraries 2 | # https://github.com/intel-isl/Open3D 3 | # 4 | # Customized with our own 3D position estimation of agent in point cloud 5 | 6 | import open3d as o3d 7 | import cv2 8 | import matplotlib.pyplot as plt 9 | import time 10 | import numpy as np 11 | import copy 12 | 13 | ''' 14 | Geometric Projection 15 | Utilizes the Open3D 3D Processing Library: https://github.com/intel-isl/Open3D 16 | Creates a point cloud map of the region the agent is located in 17 | ''' 18 | class GeoProjection(): 19 | # Initialize an empty point cloud. 20 | def __init__(self, mode='offline'): 21 | self.mode = mode 22 | self.pcd = None 23 | self.xyz = np.zeros(3, dtype='float64') 24 | self.rot = np.zeros((3,1), dtype='float64') 25 | self.vis = None 26 | 27 | # If using live mode, create the visualizer window. 28 | if self.mode == 'online': 29 | self.vis = o3d.visualization.Visualizer() 30 | self.vis.create_window() 31 | 32 | # Fix view reset from https://github.com/intel-isl/Open3D/issues/497 33 | def center_view(self, vis): 34 | vis.reset_view_point(True) 35 | ctr = vis.get_view_control() 36 | ctr.rotate(180, 90.0) 37 | 38 | # In live mode, update the visualizer to show the updated point cloud 39 | def update(self): 40 | self.vis.update_geometry() 41 | self.vis.poll_events() 42 | self.vis.reset_view_point(True) 43 | ctr = self.vis.get_view_control() 44 | ctr.rotate(0, 500) 45 | self.vis.update_renderer() 46 | 47 | # DEPRECATED 48 | # Returns the rotation matrix for a rotation in the Y axis 49 | def rotateY(self, d): 50 | return np.array([[np.cos(d), 0, np.sin(d)], 51 | [0, 1, 0], 52 | [-np.sin(d), 0, np.cos(d)]]) 53 | 54 | # DEPRECATED 55 | # Returns the rotation matrix for a rotation in the X axis 56 | def rotateX(self, d): 57 | return np.array([[1, 0, 0], 58 | [0, np.cos(d), -np.sin(d)], 59 | [0, np.sin(d), np.cos(d)]]) 60 | 61 | # Move a single frame's point cloud to match it's location in the global map 62 | def movePoints(self, pcd, transformID): 63 | # vecotor magnitidue from addition of x and y component 64 | # TODO: CURRENTLY USING CONSTANT VALUE 65 | if True or transformID[2] is None or transformID[3] is None: 66 | magnitude = 0.00005 67 | else: 68 | magnitude = np.hypot(transformID[2], transformID[3]) / 100000 69 | 70 | # Forward 71 | if transformID[0] == 'f': 72 | # Forward movement is mapped to the X-Z plane based on the vector angle of the current rotation 73 | # ^ | 74 | # | | 75 | # | <------ | -------> 76 | # | 0d 90d v 180d 270d 77 | self.xyz += np.array([np.sin(self.rot[1][0])*magnitude, 0, -np.cos(self.rot[1][0])*magnitude]) 78 | 79 | # Rotate right (Y axis) 80 | if transformID[1] == 'r': 81 | self.rot += np.array([0,0.032,0]).reshape(3,1) 82 | self.xyz += np.array([np.sin(self.rot[1][0])*magnitude, 0, -np.cos(self.rot[1][0])*magnitude]) 83 | 84 | # Rotate left (Y axis) 85 | elif transformID[1] == 'l': 86 | self.rot += np.array([0,-0.032,0]).reshape(3,1) 87 | self.xyz += np.array([np.sin(self.rot[1][0])*magnitude, 0, -np.cos(self.rot[1][0])*magnitude]) 88 | 89 | # Rotate up (X axis) 90 | elif transformID[1] == 'u': 91 | self.rot += np.array([-0.032,0,0]).reshape(3,1) 92 | 93 | # Rotate down (X axis) 94 | elif transformID[1] == 'd': 95 | self.rot += np.array([0.032,0,0]).reshape(3,1) 96 | 97 | # Apply transformation 98 | cur_pcd = pcd.translate(self.xyz) 99 | cur_pcd = pcd.rotate(self.rot) 100 | return cur_pcd 101 | 102 | # Add the point cloud from the current frame to the global point cloud of the map 103 | def estimate(self, img_colour, img_depth, transformID, crop_fact_h=0.8, crop_fact_w=0.7, downsample=20): 104 | img_colour = cv2.cvtColor(img_colour, cv2.COLOR_BGR2RGB) 105 | 106 | # Crop the frame to reduce boundary depth noise 107 | h, w = img_colour.shape[:2] 108 | crop_h = int((h - (crop_fact_h*h)) / 2) 109 | crop_w = int((w - (crop_fact_w*w)) / 2) 110 | 111 | # Convert the cv2 frames to the Open3D image format 112 | img_colour = copy.deepcopy(img_colour[crop_h:h-crop_h, crop_w:w-crop_w, :]) 113 | img_od3_colour = o3d.geometry.Image(img_colour) 114 | img_depth = copy.deepcopy(img_depth[crop_h:h-crop_h, crop_w:w-crop_w, :]) 115 | img_od3_depth = o3d.geometry.Image(img_depth) 116 | 117 | # Create a point cloud from the current frame and transform it so it is right side up 118 | rgbd_img = o3d.geometry.RGBDImage.create_from_color_and_depth(img_od3_colour, img_od3_depth, convert_rgb_to_intensity=False) 119 | cur_pcd = o3d.geometry.PointCloud.create_from_rgbd_image( 120 | rgbd_img, 121 | o3d.camera.PinholeCameraIntrinsic( 122 | o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault)) 123 | cur_pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]) 124 | 125 | # Downsample the number of points to reduce output size and rendering computation 126 | cur_pcd = cur_pcd.uniform_down_sample(downsample) 127 | 128 | # Cut a square in the point cloud to make a path viewable when the point cloud is rendered 129 | # _______________ 130 | # | X X X X | 131 | # | X X X X | 132 | # | X X X X | 133 | # | | 134 | # --------------- where X represensts the cut region 135 | pcd_array = np.asarray(cur_pcd.points) 136 | pcd_cent = cur_pcd.get_center() 137 | pcd_max = cur_pcd.get_max_bound() 138 | pcd_min = cur_pcd.get_min_bound() 139 | x_thresh = (pcd_max[0] - pcd_cent[0]) / 2 140 | y_thresh = (pcd_max[1] - pcd_cent[1]) / 2 141 | xxR = np.where(pcd_array[:,0] > pcd_cent[0] + x_thresh) 142 | xxL = np.where(pcd_array[:,0] < pcd_cent[0] - x_thresh) 143 | yy = np.where(pcd_array[:,1] < pcd_cent[1] - y_thresh) 144 | uu = np.unique(np.append(np.append(xxR[0], xxL[0]), yy[0])) 145 | cur_pcd = cur_pcd.select_down_sample(uu, invert=False) 146 | 147 | # Based on camera movement, adjust the placement of the point cloud in the global map 148 | cur_pcd = self.movePoints(cur_pcd, transformID) 149 | 150 | # Add the point cloud to the global map 151 | if self.pcd == None: 152 | self.pcd = copy.deepcopy(cur_pcd) 153 | if self.mode == 'online': 154 | self.vis.add_geometry(self.pcd) 155 | else: 156 | self.pcd += cur_pcd 157 | 158 | # Render the current global map 159 | if self.mode == 'online': 160 | self.update() 161 | 162 | outFramePCD = cv2.cvtColor(cv2.normalize(np.asarray( 163 | self.vis.capture_screen_float_buffer(True)), 164 | None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8UC1), cv2.COLOR_RGB2BGR) 165 | 166 | return [self.pcd, outFramePCD] -------------------------------------------------------------------------------- /mslam/img_stitch/img_stitch.py: -------------------------------------------------------------------------------- 1 | # Incomplete at the moment. 2 | # Enhancement still in progress. 3 | 4 | import cv2 5 | from matplotlib import pyplot as plt 6 | 7 | class ImageStitcher: 8 | def __init__(self, left_img, right_img): 9 | this.left = left_img 10 | this.right = right_img 11 | 12 | def stitch(): 13 | sift = cv2.xfeatures2d.SIFT_create() 14 | left_kp, left_des = sift.detectAndCompute(this.left, None) 15 | right_kp, right_des = sift.detectAndCompute(this.right, None) 16 | matches = cv2.BFMatcher().knnMatch( 17 | queryDescriptors=right_des, 18 | trainDescriptors=left_des, 19 | k=2 20 | ) 21 | 22 | to_pts = [] 23 | from_pts = [] 24 | for m, n in matches: 25 | if m.distance < 0.65 * n.distance: 26 | from_pts.append(left_kp[m.trainIdx].pt) 27 | to_pts.append(right_kp[m.queryIdx].pt) 28 | 29 | H = cv2.findHomography(np.float32(from_pts), np.float32(to_pts), cv2.RANSAC, 2.0)[0] 30 | 31 | left_height, left_width = this.left.shape[:2] 32 | warped = np.zeros((left_height, int(left_width*1.75), 3), dtype=left_img.dtype) 33 | for c in range(3): 34 | warped[:,:,c] = cv2.warpPerspective(this.left[:,:,c], H, (warped.shape[1], warped.shape[0]), flags=cv2.INTER_NEAREST) 35 | left_mask = np.ones(warped.shape[:2], warped.dtype) * 255 36 | 37 | return warped 38 | 39 | if __name__ == '__main__': 40 | # use some images from kitti for demonstration 41 | left = cv2.imread('../../videos/cam1.jpg') 42 | right = cv2.imread('../../videos/cam2.jpg') 43 | 44 | out = ImageStitcher(left, right).stitch() 45 | cv2.imshow(out) 46 | -------------------------------------------------------------------------------- /mslam/lane_finder/lane_finder.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from matplotlib import pyplot as plt 3 | import numpy as np 4 | 5 | if __name__ == '__main__': 6 | f = cv2.imread('../../video/road-photo.jpg') 7 | gray = cv2.cvtColor(f, cv2.COLOR_BGR2GRAY) 8 | f = cv2.cvtColor(f, cv2.COLOR_BGR2RGB) 9 | lines_img = np.copy(f) 10 | edges = cv2.Canny(gray,100,600,apertureSize=3) 11 | 12 | lines = cv2.HoughLines(edges,1,3.14159/180,250) 13 | print(lines) 14 | for rho,theta in lines[:,0]: 15 | cos = np.cos(theta) 16 | sin = np.sin(theta) 17 | x0 = cos*rho 18 | y0 = sin*rho 19 | x1 = int(x0 + 10000*(-sin)) 20 | y1 = int(y0 + 10000*cos) 21 | x2 = int(x0 - 10000*(-sin)) 22 | y2 = int(y0 - 10000*cos) 23 | 24 | cv2.line(lines_img,(x1,y1),(x2,y2),(0,255,255),5) 25 | plt.subplot(1, 3, 1) 26 | plt.axis('off') 27 | plt.title('original image') 28 | plt.imshow(f) 29 | 30 | plt.subplot(1, 3, 2) 31 | plt.axis('off') 32 | plt.title('canny image') 33 | plt.imshow(edges, cmap='gray') 34 | 35 | plt.subplot(1, 3, 3) 36 | plt.axis('off') 37 | plt.title('lines detected') 38 | plt.imshow(lines_img) 39 | plt.show() 40 | 41 | -------------------------------------------------------------------------------- /mslam/lane_finder/lines_detected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shahin-imtiaz/MultiSLAM/91d1d8a891542d9050807522ea49b8b31276ef74/mslam/lane_finder/lines_detected.png -------------------------------------------------------------------------------- /mslam/obj_rec/obj_rec.py: -------------------------------------------------------------------------------- 1 | # Based on Facebook AI Research's Detectron2 2 | # https://github.com/facebookresearch/detectron2/ 3 | # Using the official Detectron2 Tutorial (colab): 4 | # https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5#scrollTo=HUjkwRsOn1O0 5 | 6 | import torch, torchvision 7 | import cv2 8 | import detectron2 9 | from detectron2.utils.logger import setup_logger 10 | from detectron2.engine import DefaultPredictor 11 | from detectron2.config import get_cfg 12 | from detectron2.utils.visualizer import Visualizer 13 | from detectron2.data import MetadataCatalog 14 | import pprint 15 | 16 | ''' 17 | Object Detection 18 | Utilizes Facebook AI Research's Detectron2 CNN: https://github.com/facebookresearch/detectron2 19 | Performs frame by frame object detection and segmentation 20 | NOTE: Makes use of the variable naming and calling conventions found in the library's predictor script 21 | ''' 22 | class ObjectDetect: 23 | # Initialize the predictor 24 | def __init__(self, model_yaml, weights): 25 | self.cfg = get_cfg() 26 | self.cfg.merge_from_file(model_yaml) 27 | self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 28 | self.cfg.MODEL.WEIGHTS = weights 29 | self.predictor = DefaultPredictor(self.cfg) 30 | 31 | # Feed in an image frame to the predictor and return the output 32 | def estimate(self, img): 33 | outputs = self.predictor(img) 34 | v = Visualizer(img[:, :, ::-1], MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]), scale=1.0) 35 | v = v.draw_instance_predictions(outputs["instances"].to("cpu")) 36 | return v.get_image()[:, :, ::-1] 37 | 38 | # Return the current CfgNode 39 | def getCFG(self): 40 | return self.cfg -------------------------------------------------------------------------------- /mslam_setup_cdf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu -o pipefail 4 | 5 | echo "Installing mslam. This computer must have CUDA enabled support" 6 | export CUDA_HOME="${CUDA_HOME:-/usr/local/cuda-10.0/}" 7 | python3 -m venv --system-site-packages env-mslam 8 | source env-mslam/bin/activate 9 | alias pip="python3 -m pip" 10 | pip install -U pip 11 | pip install -r requirements.pip 12 | [ ! -f detectron2_repo ] && git clone https://github.com/facebookresearch/detectron2 detectron2_repo || true 13 | [ ! -f monodepth2_repo ] && git clone https://github.com/nianticlabs/monodepth2.git monodepth2_repo || true 14 | [ ! -f hd3_repo ] && git clone https://github.com/ucbdrive/hd3.git || true 15 | pip install Cython cupy 16 | pip install -U 'git+https://github.com/facebookresearch/fvcore.git' 17 | pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' 18 | pip install -e detectron2_repo 19 | pip install -U "git+https://github.com/cocodataset/panopticapi.git" 20 | pip install -U "git+https://github.com/lvis-dataset/lvis-api.git" 21 | pip install -U "git+https://github.com/mcordts/cityscapesScripts.git" 22 | pip install -r requirements.pip 23 | cd monodepth2_repo && python3 test_simple.py --image_path assets/test_image.jpg --model_name mono_1024x320 24 | cd ../ 25 | echo "Done setup. Please run: python3 mslam.py" 26 | -------------------------------------------------------------------------------- /mslam_vis_pcd.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Opens the given Open3D point cloud file (usually with a .pcd extension) in a visualizer window 3 | 4 | Flags: 5 | -i, --input Path to the point cloud file 6 | ''' 7 | 8 | import argparse 9 | import open3d as o3d 10 | 11 | if __name__ == '__main__': 12 | ap = argparse.ArgumentParser() 13 | ap.add_argument("-i", "--input", type=str, required=True, help="Point cloud file path") 14 | args = vars(ap.parse_args()) 15 | 16 | pcd = o3d.io.read_point_cloud(args['input']) 17 | o3d.visualization.draw_geometries([pcd]) -------------------------------------------------------------------------------- /requirements.pip: -------------------------------------------------------------------------------- 1 | absl-py==0.8.1 2 | astroid==2.3.3 3 | attrs==19.3.0 4 | backcall==0.1.0 5 | bleach==3.1.0 6 | cachetools==3.1.1 7 | certifi==2019.11.28 8 | chardet==3.0.4 9 | cityscapesScripts==1.1.0 10 | Click==7.0 11 | cloudpickle==1.2.2 12 | cycler==0.10.0 13 | Cython==0.29.12 14 | decorator==4.4.1 15 | defusedxml==0.6.0 16 | entrypoints==0.3 17 | fastrlock==0.4 18 | Flask==1.1.1 19 | google-auth==1.7.1 20 | google-auth-oauthlib==0.4.1 21 | grpcio==1.25.0 22 | idna==2.8 23 | imageio==2.6.1 24 | imagesize==1.1.0 25 | importlib-metadata==1.0.0 26 | imutils==0.5.3 27 | ipykernel==5.1.3 28 | ipython==7.10.0 29 | ipython-genutils==0.2.0 30 | ipywidgets==7.5.1 31 | isort==4.3.21 32 | itsdangerous==1.1.0 33 | jedi==0.15.1 34 | Jinja2==2.10.3 35 | joblib==0.14.0 36 | jsonschema==3.2.0 37 | jupyter-client==5.3.4 38 | jupyter-core==4.6.1 39 | kiwisolver==1.1.0 40 | lazy-object-proxy==1.4.3 41 | lvis==0.5.1 42 | Markdown==3.1.1 43 | MarkupSafe==1.1.1 44 | matplotlib==3.1.1 45 | mccabe==0.6.1 46 | mistune==0.8.4 47 | more-itertools==8.0.0 48 | nbconvert==5.6.1 49 | nbformat==4.4.0 50 | networkx==2.4 51 | notebook==6.0.2 52 | numpy==1.16.4 53 | oauthlib==3.1.0 54 | open3d==0.8.0.0 55 | opencv-contrib-python==3.4.2.17 56 | pandocfilters==1.4.2 57 | parso==0.5.1 58 | pexpect==4.7.0 59 | pickleshare==0.7.5 60 | Pillow==6.2.1 61 | portalocker==1.5.2 62 | prometheus-client==0.7.1 63 | prompt-toolkit==3.0.2 64 | protobuf==3.11.0 65 | ptyprocess==0.6.0 66 | pyasn1==0.4.8 67 | pyasn1-modules==0.2.7 68 | pycocotools==2.0 69 | Pygments==2.5.2 70 | pylint==2.4.4 71 | pyparsing==2.4.0 72 | pyrsistent==0.15.6 73 | python-dateutil==2.8.0 74 | PyWavelets==1.1.1 75 | PyYAML==5.1.2 76 | pyzmq==18.1.1 77 | requests==2.22.0 78 | requests-oauthlib==1.3.0 79 | rsa==4.0 80 | scikit-image==0.16.2 81 | scikit-learn==0.21.3 82 | scipy==1.3.2 83 | Send2Trash==1.5.0 84 | six==1.12.0 85 | tabulate==0.8.6 86 | tensorboard==2.0.2 87 | termcolor==1.1.0 88 | terminado==0.8.3 89 | testpath==0.4.4 90 | torch==1.3.1 91 | torchvision==0.4.2 92 | tornado==6.0.3 93 | tqdm==4.39.0 94 | traitlets==4.3.3 95 | typed-ast==1.4.0 96 | urllib3==1.25.7 97 | wcwidth==0.1.7 98 | webencodings==0.5.1 99 | Werkzeug==0.16.0 100 | widgetsnbextension==3.5.1 101 | wrapt==1.11.2 102 | yacs==0.1.6 103 | youtube-dl==2019.11.28 104 | zipp==0.6.0 105 | -------------------------------------------------------------------------------- /video/driving_country_480p_trimmed.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shahin-imtiaz/MultiSLAM/91d1d8a891542d9050807522ea49b8b31276ef74/video/driving_country_480p_trimmed.mp4 -------------------------------------------------------------------------------- /video/kittbw.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shahin-imtiaz/MultiSLAM/91d1d8a891542d9050807522ea49b8b31276ef74/video/kittbw.mp4 -------------------------------------------------------------------------------- /video/road-photo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shahin-imtiaz/MultiSLAM/91d1d8a891542d9050807522ea49b8b31276ef74/video/road-photo.jpg -------------------------------------------------------------------------------- /video/video_credits.txt: -------------------------------------------------------------------------------- 1 | # America's Most Visited National Park! - Fall Colors - Great Smoky Mountains 4K - Scenic Drive 2 | https://www.youtube.com/watch?v=Of5pGj6Obzo 3 | 4 | # KITTI Sequence 05 -------------------------------------------------------------------------------- /video/wget-log: -------------------------------------------------------------------------------- 1 | --2019-12-18 19:29:49-- https://images.unsplash.com/photo-1494783367193-149034c05e8f?ixlib=rb-1.2.1 2 | Resolving images.unsplash.com (images.unsplash.com)... 151.101.22.208, 2a04:4e42:5::720 3 | Connecting to images.unsplash.com (images.unsplash.com)|151.101.22.208|:443... connected. 4 | HTTP request sent, awaiting response... 200 OK 5 | Length: 6194940 (5.9M) [image/jpeg] 6 | Saving to: ‘photo-1494783367193-149034c05e8f?ixlib=rb-1.2.1’ 7 | 8 | photo-149 0%[ ] 0 --.-KB/s photo-1494 1%[ ] 118.75K 551KB/s photo-14947 4%[ ] 246.75K 579KB/s photo-149478 6%[> ] 374.75K 587KB/s photo-1494783 8%[> ] 502.75K 590KB/s photo-14947833 10%[=> ] 630.75K 593KB/s photo-149478336 12%[=> ] 758.75K 590KB/s photo-1494783367 14%[=> ] 886.75K 589KB/s photo-14947833671 16%[==> ] 1015K 591KB/s photo-149478336719 18%[==> ] 1.12M 590KB/s photo-1494783367193 21%[===> ] 1.24M 592KB/s hoto-1494783367193- 23%[===> ] 1.37M 593KB/s oto-1494783367193-1 25%[====> ] 1.49M 594KB/s to-1494783367193-14 27%[====> ] 1.62M 594KB/s o-1494783367193-149 29%[====> ] 1.74M 592KB/s eta 7s -1494783367193-1490 31%[=====> ] 1.87M 593KB/s eta 7s 1494783367193-14903 33%[=====> ] 1.99M 594KB/s eta 7s 494783367193-149034 35%[======> ] 2.12M 595KB/s eta 7s 94783367193-149034c 37%[======> ] 2.24M 594KB/s eta 7s 4783367193-149034c0 40%[=======> ] 2.37M 593KB/s eta 6s 783367193-149034c05 42%[=======> ] 2.49M 595KB/s eta 6s 83367193-149034c05e 43%[=======> ] 2.58M 522KB/s eta 6s 3367193-149034c05e8 50%[=========> ] 2.97M 595KB/s eta 5s 367193-149034c05e8f 52%[=========> ] 3.10M 596KB/s eta 5s 67193-149034c05e8f? 54%[=========> ] 3.22M 598KB/s eta 5s 7193-149034c05e8f?i 56%[==========> ] 3.35M 597KB/s eta 5s 193-149034c05e8f?ix 58%[==========> ] 3.47M 597KB/s eta 5s 93-149034c05e8f?ixl 60%[===========> ] 3.60M 598KB/s eta 4s 3-149034c05e8f?ixli 63%[===========> ] 3.72M 597KB/s eta 4s -149034c05e8f?ixlib 65%[============> ] 3.85M 598KB/s eta 4s 149034c05e8f?ixlib= 67%[============> ] 3.97M 600KB/s eta 4s 49034c05e8f?ixlib=r 69%[============> ] 4.10M 601KB/s eta 4s 9034c05e8f?ixlib=rb 71%[=============> ] 4.22M 600KB/s eta 3s 034c05e8f?ixlib=rb- 73%[=============> ] 4.35M 601KB/s eta 3s 34c05e8f?ixlib=rb-1 75%[==============> ] 4.47M 600KB/s eta 3s 4c05e8f?ixlib=rb-1. 77%[==============> ] 4.60M 602KB/s eta 3s c05e8f?ixlib=rb-1.2 79%[==============> ] 4.72M 681KB/s eta 3s 05e8f?ixlib=rb-1.2. 82%[===============> ] 4.85M 597KB/s eta 2s 5e8f?ixlib=rb-1.2.1 84%[===============> ] 4.97M 588KB/s eta 2s e8f?ixlib=rb-1.2.1 86%[================> ] 5.10M 589KB/s eta 2s 8f?ixlib=rb-1.2.1 87%[================> ] 5.19M 578KB/s eta 2s f?ixlib=rb-1.2.1 89%[================> ] 5.30M 574KB/s eta 2s ?ixlib=rb-1.2.1 91%[=================> ] 5.43M 572KB/s eta 1s ixlib=rb-1.2.1 92%[=================> ] 5.49M 542KB/s eta 1s xlib=rb-1.2.1 95%[==================> ] 5.62M 540KB/s eta 1s lib=rb-1.2.1 96%[==================> ] 5.72M 527KB/s eta 1s ib=rb-1.2.1 98%[==================> ] 5.82M 517KB/s eta 0s photo-1494783367193 100%[===================>] 5.91M 519KB/s in 11s 9 | 10 | 2019-12-18 19:30:00 (572 KB/s) - ‘photo-1494783367193-149034c05e8f?ixlib=rb-1.2.1’ saved [6194940/6194940] 11 | 12 | --------------------------------------------------------------------------------