├── .gitignore
├── README.md
├── activate
├── mslam.py
├── mslam
├── agent_loc
│ └── agent_loc.py
├── depth
│ ├── mono.py
│ └── stereo_CNN.py
├── geo_proj
│ └── geo_proj.py
├── img_stitch
│ └── img_stitch.py
├── lane_finder
│ ├── lane_finder.py
│ └── lines_detected.png
└── obj_rec
│ └── obj_rec.py
├── mslam_setup_cdf.sh
├── mslam_vis_pcd.py
├── requirements.pip
└── video
├── driving_country_480p_trimmed.mp4
├── kittbw.mp4
├── road-photo.jpg
├── video_credits.txt
└── wget-log
/.gitignore:
--------------------------------------------------------------------------------
1 | env-*/
2 |
3 | hd3_repo
4 | detectron2_repo
5 | monodepth2_repo
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MultiSLAM
2 | Enhanced Real-Time Multi-Camera SLAM for 3D Scene Reconstruction
3 | By: Mansoor Saqib & Shahin Imtiaz
4 | For: University of Toronto CSC420 - Intro to Image Understanding Fall 2019
5 |
6 | ## Running on CDF
7 |
8 | > $ bash mslam_setup_cdf.sh
9 | >
10 | > $ python3 mslam.py
11 | >
12 | > To view constructed 3D point cloud of map:
13 | >
14 | > $ python3 mslam_vis_pcd.py
15 | >
16 | > User configurable options are located inside mslam.py
17 |
18 | ## Personal Dev Environment Setup
19 | > $ sudo apt-get install python-dev python3-dev
20 | >
21 | > $ python3 -m venv env-mslam
22 | >
23 | > $ source env-mslam/bin/activate
24 | >
25 | > (env-mslam) $ python3 -m pip install -U pip
26 | >
27 | > (env-mslam) $ python3 -m pip install -U 'git+https://github.com/facebookresearch/fvcore.git' 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
28 | >
29 | > (env-mslam) $ python3 -m pip install -r requirements.pip
30 | >
31 | > (env-mslam) $ git clone https://github.com/facebookresearch/detectron2 detectron2_repo
32 | >
33 | > (env-mslam) $ python3 -m pip install -e detectron2_repo
34 |
35 | ## Package contents
36 | * [mslam.py](mslam.py): Entrypoint to run MultiSLAM
37 | * [mslam_vis_pcd.py](mslam_vis_pcd.py): View 3D point cloud of map
38 | * [mslam/agent_loc](mslam/agent_loc):
39 | - [agent_loc.py](mslam/agent_loc/agent_loc.py): Determine agent movement in 3D space
40 | * [mslam/depth](mslam/depth):
41 | - [mono.py](mslam/depth/mono.py): Compute mono depth information from a single image
42 | - [stereo_CNN.py](mslam/depth/stereo_CNN.py): Compute stereo depth information from a two images
43 | * [mslam/geo_proj](mslam/geo_proj):
44 | - [geo_proj.py](mslam/geo_proj/geo_proj.py): Construct the scene point cloud and track agent location
45 | * [mslam/img_stitch](mslam/img_stitch):
46 | - [img_stitch.py](mslam/img_stitch/img_stich.py): Stitch together a set of images
47 | * [mslam/obj_rec](mslam/obj_rec):
48 | - [obj_rec.py](mslam/obj_rec/obj_rec.py): Classify the objects in a single or set of images
--------------------------------------------------------------------------------
/activate:
--------------------------------------------------------------------------------
1 | env-mslam/bin/activate
--------------------------------------------------------------------------------
/mslam.py:
--------------------------------------------------------------------------------
1 | ##CSC420 Fall 2019 Final Project - MultiSLAM
2 | ##By: Mansoor Saqib & Shahin Imtiaz
3 |
4 | from __future__ import absolute_import, division, print_function
5 | import sys
6 | sys.path.insert(1, 'hd3_repo')
7 | sys.path.insert(1, 'monodepth2_repo')
8 |
9 | import os
10 | from os.path import join
11 | import threading
12 | import argparse
13 | import datetime
14 | import imutils
15 | import time
16 | import copy
17 | import cv2
18 | import logging
19 | import pprint
20 | from argparse import ArgumentParser
21 | from tqdm import tqdm
22 | import numpy as np
23 | import open3d as o3d
24 | import PIL.Image as pil
25 | from PIL import Image
26 | import matplotlib as mpl
27 | import matplotlib.cm as cm
28 | import matplotlib.pyplot as plt
29 | import scipy.stats
30 |
31 | from mslam.agent_loc.agent_loc import AgentLocate
32 | from mslam.depth.mono import MonoDepth
33 | from mslam.geo_proj.geo_proj import GeoProjection
34 | # from mslam.depth.stereo_CNN import StereoDepthCNN # Stereo Depth currently not working
35 | from mslam.obj_rec.obj_rec import ObjectDetect
36 |
37 | ############################
38 | #### MultiSlam Loop ####
39 | ############################
40 |
41 | # The main rendering loop. Combines the SLAM modules together to create
42 | # a mapping of the provided environment video or real time file stream.
43 | def mslam():
44 | global vs, enableModules, outputFrame, length, settingsModules, debugging, live_stream
45 |
46 | # Initialize modules
47 | mod_object_detection = None
48 | mod_mono_depth = None
49 | mod_stereo_depth_cnn = None
50 | mod_agent_locate = None
51 | mod_geo_projection = None
52 |
53 | if debugging:
54 | print("Initializing modules")
55 |
56 | if enableModules['object_detection']:
57 | mod_object_detection = ObjectDetect(
58 | settingsModules['object_detection']['model_path'],
59 | settingsModules['object_detection']['model_weights'])
60 | if enableModules['mono_depth']:
61 | mod_mono_depth = MonoDepth(settingsModules['mono_depth']['model_path'])
62 | if enableModules['stereo_depth_cnn']:
63 | mod_stereo_depth_cnn = StereoDepthCNN(settingsModules['stereo_depth_cnn']['model_path'])
64 | if enableModules['agent_locate']:
65 | mod_agent_locate = AgentLocate(debugging=debugging)
66 | if enableModules['geo_projection']:
67 | if live_stream:
68 | mod_geo_projection = GeoProjection(mode='online')
69 | else:
70 | mod_geo_projection = GeoProjection(mode='offline')
71 |
72 | if debugging:
73 | print("Done initializing modules")
74 | print("Starting MultiSlam rendering")
75 |
76 | # Loop through video frames
77 | for i in tqdm(range(length)):
78 |
79 | # Get next frame
80 | isValid, frame = vs.read()
81 | if not isValid: # No more frames
82 | break
83 | outputFrame['original_L'] = frame.copy()
84 |
85 | # Get next frame for right camera
86 | if enableModules['stereo_depth_cnn']:
87 | isValid, frame2 = vs2.read()
88 | if not isValid:
89 | break
90 | outputFrame['original_R'] = frame2.copy()
91 |
92 | # Feed in frames to modules and obtain output
93 | if enableModules['mono_depth']:
94 | outputFrame['mono_depth'] = mod_mono_depth.estimate(frame.copy())
95 | if enableModules['stereo_depth_cnn']:
96 | outputFrame['stereo_depth_cnn'] = mod_stereo_depth_cnn.estimate(frame.copy())
97 | if enableModules['agent_locate']:
98 | out_agent_locate = mod_agent_locate.estimate(frame.copy())
99 | outputFrame['agent_locate'] = out_agent_locate['frame']
100 | al_transform = out_agent_locate['transform']
101 | if enableModules['object_detection']:
102 | outputFrame['object_detection'] = mod_object_detection.estimate(frame.copy())
103 | if enableModules['geo_projection']:
104 | outPCD = mod_geo_projection.estimate(frame.copy(), outputFrame['mono_depth'].copy(), al_transform, downsample=10)
105 | outputFrame['geo_projection_pcd'] = outPCD[0]
106 | outputFrame['geo_projection'] = outPCD[1]
107 |
108 | # Write frame to current point cloud or video file
109 | writeFrame(i)
110 |
111 | print("Done processing video. Output has been saved in:", args['output'])
112 | print("Closing mslam")
113 | time.sleep(10)
114 | print("Please exit the application with CTRL+C")
115 | exit(0)
116 |
117 | # Write the current frame among all modules to a file. Alternatively, this
118 | # can be adjusted to have the frame streamed for a live view as rendering is in progress.
119 | ## frameNum = id of the frame being processed
120 | def writeFrame(frameNum):
121 | global outputFrame, outputWriter, enableModules, debugging
122 |
123 | for m in enableModules.keys():
124 | if not enableModules[m] or outputFrame[m] is None:
125 | continue
126 |
127 | if m == 'geo_projection_pcd' or m == 'geo_projection':
128 | if debugging:
129 | print("Adding points in frame", frameNum, "to global point cloud")
130 | print("writing frame", frameNum, "for", m)
131 | print("frame stats: ", np.max(outputFrame[m]), np.min(outputFrame[m]), outputFrame[m].shape)
132 | o3d.io.write_point_cloud(outputWriter['geo_projection_pcd'], outputFrame['geo_projection_pcd'])
133 | outputWriter['geo_projection'].write(outputFrame['geo_projection'])
134 | continue
135 |
136 | if debugging:
137 | print("writing frame", frameNum, "for", m)
138 | print("frame stats: ", np.max(outputFrame[m]), np.min(outputFrame[m]), outputFrame[m].shape)
139 |
140 | outputWriter[m].write(outputFrame[m])
141 |
142 | # Contains the current frame output for each module (or point cloud for geo_projection)
143 | outputFrame = {
144 | 'object_detection': None,
145 | 'mono_depth': None,
146 | 'stereo_depth_cnn': None,
147 | 'agent_locate': None,
148 | 'geo_projection': None,
149 | 'geo_projection_pcd': None,
150 | 'original_L': None,
151 | 'original_R': None,
152 | }
153 |
154 | ############################
155 | #### User Configuration ####
156 | ################################################################################################
157 |
158 | print('MSLAM options can be configured in mslam.py under User Configuration')
159 |
160 | # Initial configuration for input and output rendering settings
161 | args = {
162 | 'leftcam': 'video/kittbw.mp4', # Path to main or left camera video
163 | # 'leftcam': 'video/driving_country_480p_trimmed.mp4',
164 | 'rightcam': None, # Path to right camera video if stereo is enabled
165 | 'output': 'OUTPUT/', # Path to rendering output
166 | 'endframe': 1400, # Total number of video frames to process. None = All
167 | 'ip': '0.0.0.0', # Address for live streaming. Can access with VLC network stream
168 | 'port': '8000', # Port for live streaming. Can access with VLC network stream
169 | }
170 |
171 | # Verbose execution
172 | debugging = False
173 |
174 | # Live stream the output
175 | live_stream = True
176 |
177 | # Enable or disable the modules for debugging. For a complete slam system, enable all* (*choose one of mono or stereo depth).
178 | enableModules = {
179 | 'object_detection': True,
180 | 'stereo_depth_cnn': False,
181 | 'agent_locate': True,
182 | 'mono_depth': True,
183 | 'geo_projection': True, # depends on depth and agent_locate
184 | }
185 |
186 | # Module specific settings
187 | settingsModules = {
188 | 'object_detection': {
189 | 'model_path': "detectron2_repo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml",
190 | 'model_weights': "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl",
191 | },
192 | 'stereo_depth_cnn': {
193 | 'model_path': "hd3_repo/scripts/model_zoo/hd3s_things_kitti-1243813e.pth",
194 | },
195 | 'agent_locate': {},
196 | 'mono_depth': {
197 | 'model_path': os.path.join("monodepth2_repo/models", "mono_1024x320"),
198 | },
199 | 'geo_projection': {},
200 | }
201 |
202 |
203 | ################################################################################################
204 | #### Run MultiSlam ####
205 | ############################
206 |
207 | # Camera feed setup
208 | vs = cv2.VideoCapture(args["leftcam"])
209 | if args["rightcam"] is not None:
210 | vs2 = cv2.VideoCapture(args["rightcam"])
211 |
212 | # Get video stats
213 | length = int(vs.get(cv2.CAP_PROP_FRAME_COUNT))
214 | width = int(vs.get(cv2.CAP_PROP_FRAME_WIDTH))
215 | height = int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT))
216 |
217 | if args["endframe"] is not None:
218 | length = args["endframe"]
219 |
220 | print("MSLAM will run for "+str(length)+" frames")
221 |
222 | # Ouput format
223 | fourcc = cv2.VideoWriter_fourcc(*'mp4v')
224 |
225 | # Output writers
226 | outputWriter = {}
227 | for m in enableModules.keys():
228 | if enableModules[m]:
229 | if m is 'geo_projection':
230 | outputWriter['geo_projection_pcd'] = args["output"]+'OUT_' + 'geo_projection' +'.pcd'
231 | outputWriter['geo_projection'] = cv2.VideoWriter(args["output"]+'OUT_' + 'geo_projection' +'.mp4',fourcc,30,(1850,1016))
232 | else:
233 | outputWriter[m] = cv2.VideoWriter(args["output"]+'OUT_' + m +'.mp4',fourcc,30,(width,height))
234 |
235 | # Single threaded multislam
236 | t = threading.Thread(target=mslam)
237 | t.daemon = True
238 | t.start()
239 |
240 | # Video streaming the output with the Flask framework is based on
241 | # https://www.pyimagesearch.com/2019/09/02/opencv-stream-video-to-web-browser-html-page/
242 | if live_stream:
243 | from flask import Response
244 | from flask import Flask
245 | from flask import render_template
246 |
247 | app = Flask(__name__)
248 |
249 | def generateStreamFrame(moduleName):
250 | global outputFrame
251 |
252 | while True:
253 | if outputFrame[moduleName] is None:
254 | continue
255 |
256 | (flag, encodedImage) = cv2.imencode(".jpg", outputFrame[moduleName])
257 |
258 | if not flag:
259 | continue
260 |
261 | yield(b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' +
262 | bytearray(encodedImage) + b'\r\n')
263 |
264 | @app.route("/multislam_stream_original_L")
265 | def stream_original_L():
266 | return Response(generateStreamFrame('original_L'),
267 | mimetype = "multipart/x-mixed-replace; boundary=frame")
268 |
269 | @app.route("/multislam_stream_original_R")
270 | def stream_original_R():
271 | return Response(generateStreamFrame('original_R'),
272 | mimetype = "multipart/x-mixed-replace; boundary=frame")
273 |
274 | @app.route("/multislam_stream_object_detection")
275 | def stream_object_detection():
276 | return Response(generateStreamFrame('object_detection'),
277 | mimetype = "multipart/x-mixed-replace; boundary=frame")
278 |
279 | @app.route("/multislam_stream_mono_depth")
280 | def stream_mono_depth():
281 | return Response(generateStreamFrame('mono_depth'),
282 | mimetype = "multipart/x-mixed-replace; boundary=frame")
283 |
284 | @app.route("/multislam_stream_stereo_depth_cnn")
285 | def stream_stereo_depth_cnn():
286 | return Response(generateStreamFrame('stereo_depth_cnn'),
287 | mimetype = "multipart/x-mixed-replace; boundary=frame")
288 |
289 | @app.route("/multislam_stream_agent_locate")
290 | def stream_agent_locate():
291 | return Response(generateStreamFrame('agent_locate'),
292 | mimetype = "multipart/x-mixed-replace; boundary=frame")
293 |
294 | @app.route("/multislam_stream_geo_projection")
295 | def stream_geo_projection():
296 | return Response(generateStreamFrame('geo_projection'),
297 | mimetype = "multipart/x-mixed-replace; boundary=frame")
298 |
299 | app.run(host=args["ip"], port=args["port"], debug=False, threaded=True, use_reloader=False)
300 |
301 | # Wait for thread to finish
302 | t.join()
--------------------------------------------------------------------------------
/mslam/agent_loc/agent_loc.py:
--------------------------------------------------------------------------------
1 | # Based on the OpenCV SIFT and Flann Matcher libraries
2 | # https://docs.opencv.org/3.4/da/df5/tutorial_py_sift_intro.html
3 | # https://docs.opencv.org/3.4/d5/d6f/tutorial_feature_flann_matcher.html
4 | #
5 | # Customized using our own pruning, and agent movement detection and smoothing algorithms
6 | import cv2
7 | import numpy as np
8 |
9 | '''
10 | Agent Location
11 | Utilizes the OpenCV library: https://opencv.org/
12 | Computes the movement of the agent based on frame by frame differences
13 | '''
14 | class AgentLocate:
15 | # Initialize SIFT and Flann Matcher
16 | def __init__(self, kpNum=1000, enableGlobal=False, match_threshold=200, debugging=False):
17 | self.debugging = debugging
18 | self.kpNum = kpNum # Max keypoints to detect
19 | self.match_threshold = match_threshold # Distance threshold
20 | self.enableGlobal = enableGlobal
21 |
22 | self.sift = cv2.xfeatures2d.SIFT_create(self.kpNum)
23 | self.FLANN_INDEX_KDTREE = 1
24 | self.index_params = dict(algorithm = self.FLANN_INDEX_KDTREE, trees = 5)
25 | self.search_params = dict(checks=50)
26 | self.flannFrameByFrame = cv2.FlannBasedMatcher(self.index_params,self.search_params)
27 | self.prevFrameKP = None
28 |
29 | self.flannGlobal = None
30 | if self.enableGlobal:
31 | self.flannGlobal = cv2.FlannBasedMatcher(self.index_params,self.search_params)
32 |
33 | self.smooth_history = 8
34 | self.smooth_x_magnitude = []
35 | self.smooth_y_magnitude = []
36 | self.smooth_translate = []
37 | self.smooth_rotate = []
38 | self.letter_to_move = {
39 | 's': 0,
40 | 'f': 1,
41 | 'r': 2,
42 | 'l': 3,
43 | }
44 | self.move_to_letter = {
45 | 0: 's',
46 | 1: 'f',
47 | 2: 'r',
48 | 3: 'l',
49 | }
50 |
51 | # Return the descriptors saved in the given flann matcher
52 | def getTrainDescriptors(self, matcher):
53 | if matcher == "global" and self.enableGlobal:
54 | return self.flannGlobal.getTrainDescriptors()
55 | elif matcher == "frameByframe":
56 | return self.flannFrameByFrame.getTrainDescriptors()
57 | return None
58 |
59 | # Returns True iff a given keypoint is a euclidean distance of 20 away from ALL other saved keypoints
60 | # And it is not in the middle region
61 | def pruneKP(self, kp, kpList, middle, thresh=30):
62 | cur_pt = np.array(kp.pt)
63 | if np.linalg.norm(cur_pt - middle) < 100:
64 | return False
65 | for i in kpList:
66 | i_pt = np.array(i.pt)
67 | if np.linalg.norm(cur_pt - i_pt) < thresh:
68 | return False
69 | return True
70 |
71 | # Smooth motion over the last self.smooth_history number of historical movements
72 | # by taking the most common value among them
73 | def smoothMove(self, mov):
74 | if len(self.smooth_translate) < self.smooth_history:
75 | self.smooth_translate.append(mov[0])
76 | self.smooth_rotate.append(mov[1])
77 | self.smooth_x_magnitude.append(mov[2])
78 | self.smooth_y_magnitude.append(mov[3])
79 | out = [max(set(self.smooth_translate), key=self.smooth_translate.count),
80 | max(set(self.smooth_rotate), key=self.smooth_rotate.count),
81 | max(set(self.smooth_x_magnitude), key=self.smooth_x_magnitude.count),
82 | max(set(self.smooth_y_magnitude), key=self.smooth_y_magnitude.count)]
83 | if self.debugging:
84 | if out[2] is None or out[3] is None:
85 | print(out, 'mag=', None)
86 | else:
87 | print(out, 'mag=', np.hypot(out[2], out[3]))
88 | return out
89 |
90 | self.smooth_translate.pop(0)
91 | self.smooth_rotate.pop(0)
92 | self.smooth_x_magnitude.pop(0)
93 | self.smooth_y_magnitude.pop(0)
94 | self.smooth_translate.append(mov[0])
95 | self.smooth_rotate.append(mov[1])
96 | self.smooth_x_magnitude.append(mov[2])
97 | self.smooth_y_magnitude.append(mov[3])
98 | out = [max(set(self.smooth_translate), key=self.smooth_translate.count),
99 | max(set(self.smooth_rotate), key=self.smooth_rotate.count),
100 | max(set(self.smooth_x_magnitude), key=self.smooth_x_magnitude.count),
101 | max(set(self.smooth_y_magnitude), key=self.smooth_y_magnitude.count)]
102 | if self.debugging:
103 | if out[2] is None or out[3] is None:
104 | print(out, 'mag=', None)
105 | else:
106 | print(out, 'mag=', np.hypot(out[2], out[3]))
107 | return out
108 |
109 | # Return the camera translation and rotation matrix for the movement between frames
110 | def getViewTransform(self, matches, kp, imgShape):
111 | # Divide the image into 4x4 quadrants for each axis
112 | quad_width = imgShape[1] / 4
113 | quad_height = imgShape[0] / 4
114 | quads_x = np.zeros((4,4))
115 | quads_y = np.zeros((4,4))
116 |
117 | # Initialize with a constant value, otherwise median of an empty list = NaN
118 | x_magnitude = [0.00005]
119 | y_magnitude = [0.00005]
120 |
121 | # Reduce the difference in each axis for matching points to a numerical sign and sum over differences in each quadrant
122 | # Place it in the quadrant belonging to the x, y location of the keypoint in the current frame.
123 | for i in range(len(matches)):
124 | x, y = kp[matches[i][0].queryIdx].pt
125 | xh, yh = self.prevFrameKP[matches[i][0].trainIdx].pt
126 | qx = int(x // quad_width)
127 | qy = int(y // quad_height)
128 | x_diff = x-xh
129 | y_diff = y-yh
130 |
131 | if qy in [0, 3]:
132 | x_magnitude.append(abs(x_diff))
133 | if qx in [0, 3]:
134 | y_magnitude.append(abs(y_diff))
135 |
136 | quads_x[qy, qx] += x_diff
137 | quads_y[qy, qx] += y_diff
138 |
139 | if x_diff < 0:
140 | quads_x[qy, qx] -= 1
141 | else:
142 | quads_x[qy, qx] += 1
143 |
144 | if y_diff < 0:
145 | quads_y[qy, qx] -= 1
146 | else:
147 | quads_y[qy, qx] += 1
148 |
149 | # Calculate the overall numerical sign of each quadrant
150 | quads_x = np.where(quads_x < 0, -1, quads_x)
151 | quads_x = np.where(quads_x > 0, 1, quads_x)
152 | quads_y = np.where(quads_y < 0, -1, quads_y)
153 | quads_y = np.where(quads_y > 0, 1, quads_y)
154 |
155 | # Transformation logic:
156 | # Move in the forward direction iff all conditions are met:
157 | # - Majority of Y quadrants in the top most row are 0 or negative
158 | # - Majority of X quadrants in the right most column are 0 or positive
159 | # - Majority of Y quadrants in the bottom most row are 0 or positive
160 | # - Majority of X quadrants in the left most column are 0 or negative
161 | #
162 | # Rotate the camera right iff:
163 | # - Majority of X quadrants are 0 or negative
164 | #
165 | # Rotate the camera left iff:
166 | # - Majority of X quadrants are 0 or positive
167 | #
168 | # Rotate the camera up iff:
169 | # - Majority of Y quadrants are 0 or positive
170 | #
171 | # Rotate the camera down iff:
172 | # - Majority of Y quadrants are 0 or negative
173 |
174 | # [translation, rotation, x magnitude, y magnitude]
175 | mov = [None, None, np.median(x_magnitude), np.median(y_magnitude)]
176 |
177 | if (np.sum(quads_y[0]) == 0 and np.sum(quads_y[3]) == 0 and
178 | np.sum(quads_x[:,0]) == 0 and np.sum(quads_x[:,3]) == 0):
179 | mov[0] = 's'
180 | elif (np.sum(quads_y[0]) <= 0 and np.sum(quads_y[3]) >= 0 and
181 | np.sum(quads_x[:,0]) <= 0 and np.sum(quads_x[:,3]) >= 0):
182 | mov[0] = 'f'
183 |
184 | # NOTE: Trial and error for determining best rotation estimation
185 | # x_sum = stats.mode([quads_x[0,0], quads_x[1,0], quads_x[2,0], quads_x[3,0], quads_x[0,1], quads_x[0,2], quads_x[0,3], quads_x[1,3], quads_x[2,3], quads_x[3,3]])[0][0]
186 | # x_sum = np.median([quads_x[1,0], quads_x[2,0], quads_x[3,0], quads_x[1,3], quads_x[2,3], quads_x[3,3]])
187 | # x_sum = np.median(quads_x)
188 | # x_sum = np.average(quads_x)
189 | # x_sum = scipy.stats.mode(quads_x, axis=None)[0][0]
190 | # x_sum = scipy.stats.mode(np.array([quads_x[0,0], quads_x[1,0], quads_x[2,0], quads_x[3,0], quads_x[0,1], quads_x[0,2], quads_x[0,3], quads_x[1,3], quads_x[2,3], quads_x[3,3]]), axis=None)[0][0]
191 | x_sum = np.sum(quads_x)
192 | if self.debugging:
193 | print('xsum is:', x_sum)
194 |
195 | if (x_sum < -2):
196 | mov[1] = 'r'
197 | elif (x_sum > 2):
198 | mov[1] = 'l'
199 |
200 | # NOTE: Up and down rotations still in progress to be implemented correctly and tested
201 | # elif (np.sum(quads_y) < 0):
202 | # if self.debugging:
203 | # print('Rotate Down')
204 | # mov[1] = 'd'
205 | # elif (np.sum(quads_y) > 0):
206 | # if self.debugging:
207 | # print('Rotate Up')
208 | # mov[1] = 'u'
209 | else:
210 | pass
211 |
212 | return self.smoothMove(mov)
213 |
214 | # Compute the change in agent location based on previous frame
215 | def estimate(self, img, kpNum=1000):
216 | kp, des = self.sift.detectAndCompute(img,None)
217 | if len(kp) == 0 or des is None:
218 | return {'frame': np.zeros(img.shape, dtype='uint8') * 255, 'transform': [None, None, None, None]}
219 |
220 | # Sort keypoints by response
221 | kp, des = zip(*(sorted(zip(kp, des), key=lambda x: x[0].response, reverse=True)))
222 |
223 | # Prune keypoints that are close together
224 | # And around the middle of the frame
225 | kp_apart = []
226 | des_apart = []
227 | middle = np.array([img.shape[1]//2, img.shape[0]//2])
228 | for i in range(len(kp)):
229 | if self.pruneKP(kp[i], kp_apart, middle):
230 | kp_apart.append(kp[i])
231 | des_apart.append(des[i])
232 |
233 | if kpNum <= len(kp_apart):
234 | kp = kp_apart[:kpNum]
235 | des = des_apart[:kpNum]
236 | else:
237 | kp = kp_apart
238 | des = des_apart
239 |
240 | # First frame. Add to empty training set and return blank frame
241 | if len(self.getTrainDescriptors('frameByframe')) == 0:
242 | self.flannFrameByFrame.add([np.array(des)])
243 | self.flannFrameByFrame.train()
244 |
245 | if self.enableGlobal:
246 | self.flannGlobal.add([np.array(des)])
247 | self.flannGlobal.train()
248 |
249 | self.prevFrameKP = kp
250 | return {'frame': np.zeros(img.shape, dtype='uint8') * 255, 'transform': [None, None, None, None]}
251 |
252 | # Match the current frame with the previous one
253 | matchesFrameByFrame = self.flannFrameByFrame.knnMatch(np.array(des), k=1)
254 | matchesFrameByFrame = [x for x in matchesFrameByFrame if x[0].distance < self.match_threshold]
255 | # Remove the previous frame's descriptors and add in the current one's to the matcher
256 | self.flannFrameByFrame.clear()
257 | self.flannFrameByFrame.add([np.array(des)])
258 | self.flannFrameByFrame.train()
259 |
260 | # Match the current frame with all previous frames and add to the descriptor collection
261 | if self.enableGlobal:
262 | matchesGlobal = self.flannGlobal.knnMatch(np.array(des), k=1)
263 | matchesGlobal = [x for x in matchesGlobal if x[0].distance < self.match_threshold]
264 | self.flannGlobal.add([np.array(des)])
265 | self.flannGlobal.train()
266 |
267 | transform = self.getViewTransform(matchesFrameByFrame, kp, img.shape)
268 | self.prevFrameKP = kp
269 | outKPFrame = cv2.drawKeypoints(img,kp,img,flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
270 |
271 | return {'frame': outKPFrame, 'transform': transform}
--------------------------------------------------------------------------------
/mslam/depth/mono.py:
--------------------------------------------------------------------------------
1 | # Based on the Niantic's Monodepth2 libraries
2 | # https://github.com/nianticlabs/monodepth2
3 | #
4 | # Included copyright information:
5 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
6 | #
7 | # This software is licensed under the terms of the Monodepth2 licence
8 | # which allows for non-commercial use only, the full terms of which are made
9 | # available in the LICENSE file.
10 |
11 | from __future__ import absolute_import, division, print_function
12 |
13 | import sys
14 | sys.path.insert(1, '../../monodepth2_repo')
15 |
16 | import os
17 | import glob
18 | import argparse
19 | import numpy as np
20 | import PIL.Image as pil
21 | from PIL import Image
22 | import matplotlib as mpl
23 | import matplotlib.cm as cm
24 | import cv2
25 |
26 | import torch
27 | from torchvision import transforms, datasets
28 |
29 | import monodepth2_repo.networks as networks
30 | from monodepth2_repo.layers import disp_to_depth
31 | from monodepth2_repo.utils import download_model_if_doesnt_exist
32 |
33 | '''
34 | Monocular Depth
35 | Utilizes the Monodepth2 CNN: https://github.com/nianticlabs/monodepth2
36 | Creates a depth map from a single frame
37 | NOTE: Makes use of the variable naming and calling conventions found in the library's predictor script
38 | '''
39 | class MonoDepth:
40 | # Initialize the predictor
41 | def __init__(self, path):
42 | if torch.cuda.is_available():
43 | self.device = torch.device("cuda")
44 | else:
45 | self.device = torch.device("cpu")
46 |
47 | # Load model
48 | self.model_path = path
49 | self.encoder_path = os.path.join(self.model_path, "encoder.pth")
50 | self.depth_decoder_path = os.path.join(self.model_path, "depth.pth")
51 | self.encoder = networks.ResnetEncoder(18, False)
52 | self.loaded_dict_enc = torch.load(self.encoder_path, map_location=self.device)
53 |
54 | # Model training paramaters
55 | self.feed_height = self.loaded_dict_enc['height']
56 | self.feed_width = self.loaded_dict_enc['width']
57 | self.filtered_dict_enc = {k: v for k, v in self.loaded_dict_enc.items() if k in self.encoder.state_dict()}
58 | self.encoder.load_state_dict(self.filtered_dict_enc)
59 | self.encoder.to(self.device)
60 | self.encoder.eval()
61 |
62 | self.depth_decoder = networks.DepthDecoder(
63 | num_ch_enc=self.encoder.num_ch_enc, scales=range(4))
64 |
65 | self.loaded_dict = torch.load(self.depth_decoder_path, map_location=self.device)
66 | self.depth_decoder.load_state_dict(self.loaded_dict)
67 | self.depth_decoder.to(self.device)
68 | self.depth_decoder.eval()
69 |
70 | def estimate(self, img):
71 | with torch.no_grad():
72 | # Convert cv2 image array to PIL Image
73 | img = Image.fromarray(img).convert('RGB')
74 | original_width, original_height = img.size
75 | img = img.resize((self.feed_width, self.feed_height), pil.LANCZOS)
76 | img = transforms.ToTensor()(img).unsqueeze(0)
77 |
78 | # Get prediction
79 | img = img.to(self.device)
80 | features = self.encoder(img)
81 | outputs = self.depth_decoder(features)
82 | disp = outputs[("disp", 0)]
83 | disp_resized = torch.nn.functional.interpolate(
84 | disp, (original_height, original_width), mode="bilinear", align_corners=False)
85 |
86 | # Generate depth map
87 | disp_resized_np = disp_resized.squeeze().cpu().numpy()
88 | vmax = np.percentile(disp_resized_np, 95)
89 | normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax)
90 | mapper = cm.ScalarMappable(norm=normalizer, cmap='magma')
91 | colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8)
92 |
93 | return colormapped_im
--------------------------------------------------------------------------------
/mslam/depth/stereo_CNN.py:
--------------------------------------------------------------------------------
1 | # Based on the UCBDrive's HD3 libraries
2 | # https://github.com/ucbdrive/hd3
3 | #
4 | # Currently work is still in progress to make this module functional as there are a few library errors
5 |
6 | from __future__ import absolute_import, division, print_function
7 |
8 | import sys
9 | sys.path.insert(1, '../../hd3_repo')
10 | sys.path.insert(1, '../../hd3_repo/utils')
11 |
12 | import os
13 | from os.path import join
14 | import cv2
15 | import time
16 | import math
17 | import logging
18 | from argparse import ArgumentParser
19 | import numpy as np
20 |
21 | from PIL import Image
22 |
23 | import torch
24 | import torch.nn.parallel
25 | import torch.nn.functional as F
26 | import torch.backends.cudnn as cudnn
27 | import torch.optim
28 | import torch.utils.data
29 |
30 | # import data.hd3data as datasets
31 | # import data.flowtransforms as transforms
32 | # import hd3model as models
33 | # from utils.utils import *
34 | # from models.hd3_ops import *
35 | # import utils.flowlib as fl
36 |
37 | import hd3_repo.data.hd3data as datasets
38 | import hd3_repo.data.flowtransforms as transforms
39 | import hd3_repo.hd3model as models
40 | from hd3_repo.utils.utils import *
41 | from hd3_repo.models.hd3_ops import *
42 | import hd3_repo.utils.flowlib as fl
43 |
44 | '''
45 | Stereo Depth
46 | Utilizes the HD3 CNN: https://github.com/ucbdrive/hd3
47 | Creates a depth map from two stereoscopic frames
48 | NOTE: Currently a work in progress
49 | NOTE: Makes use of the variable naming and calling conventions found in the library's predictor script
50 | '''
51 | class StereoDepthCNN:
52 | def __init__(self, model_path):
53 | self.corr_range = [4, 4, 4, 4, 4, 4]
54 | self.model = models.HD3Model("stereo", "dlaup", "hda", self.corr_range,
55 | False).cuda()
56 | self.model = torch.nn.DataParallel(self.model).cuda()
57 |
58 | self.checkpoint = torch.load(model_path)
59 | self.model.load_state_dict(self.checkpoint['state_dict'], strict=True)
60 |
61 | self.model.eval()
62 |
63 | def get_target_size(self, H, W):
64 | h = 64 * np.array([[math.floor(H / 64), math.floor(H / 64) + 1]])
65 | w = 64 * np.array([[math.floor(W / 64), math.floor(W / 64) + 1]])
66 | ratio = np.abs(np.matmul(np.transpose(h), 1 / w) - H / W)
67 | index = np.argmin(ratio)
68 | return h[0, index // 2], w[0, index % 2]
69 |
70 | def estimate(self, imgL, imgR):
71 | input_size = imgL.shape
72 |
73 | mean = [0.485, 0.456, 0.406]
74 | std = [0.229, 0.224, 0.225]
75 |
76 | th, tw = self.get_target_size(input_size[0], input_size[1])
77 |
78 | val_transform = transforms.Compose(
79 | [transforms.ToTensor(),
80 | transforms.Normalize(mean=mean, std=std)])
81 |
82 | mode="stereo"
83 | label_num=False
84 | transform=val_transform
85 | out_size=True
86 |
87 | cudnn.enabled = True
88 | cudnn.benchmark = True
89 |
90 | cur_data = [[imgL, imgR], []]
91 | cur_data = list(val_transform(*cur_data))
92 | cur_data.append(np.array(input_size[::-1], dtype=int))
93 | cur_data = tuple(cur_data)
94 |
95 | with torch.no_grad():
96 | img_list, label_list, img_size = cur_data
97 | img_list = [img.unsqueeze(0) for img in img_list]
98 | img_size = np.array(input_size[:2][::-1], dtype=int)
99 | img_size = img_size[np.newaxis, :]
100 | img_list = [img.to(torch.device("cuda")) for img in img_list]
101 | label_list = [
102 | label.to(torch.device("cuda")) for label in label_list
103 | ]
104 |
105 | # resize test
106 | resized_img_list = [
107 | F.interpolate(
108 | img, (th, tw), mode='bilinear', align_corners=True)
109 | for img in img_list
110 | ]
111 |
112 | output = self.model(
113 | img_list=resized_img_list,
114 | label_list=label_list,
115 | get_vect=True,
116 | get_epe=False)
117 | scale_factor = 1 / 2**(7 - len(self.corr_range))
118 | output['vect'] = resize_dense_vector(output['vect'] * scale_factor,
119 | img_size[0, 1],
120 | img_size[0, 0])
121 |
122 | pred_vect = output['vect'].data.cpu().numpy()
123 | pred_vect = np.transpose(pred_vect, (0, 2, 3, 1))
124 | curr_vect = pred_vect[0]
125 |
126 | vis_flo = fl.flow_to_image(fl.disp2flow(curr_vect))
127 | vis_flo = cv2.cvtColor(vis_flo, cv2.COLOR_RGB2BGR)
128 | return vis_flo
--------------------------------------------------------------------------------
/mslam/geo_proj/geo_proj.py:
--------------------------------------------------------------------------------
1 | # Based on the Open3D libraries
2 | # https://github.com/intel-isl/Open3D
3 | #
4 | # Customized with our own 3D position estimation of agent in point cloud
5 |
6 | import open3d as o3d
7 | import cv2
8 | import matplotlib.pyplot as plt
9 | import time
10 | import numpy as np
11 | import copy
12 |
13 | '''
14 | Geometric Projection
15 | Utilizes the Open3D 3D Processing Library: https://github.com/intel-isl/Open3D
16 | Creates a point cloud map of the region the agent is located in
17 | '''
18 | class GeoProjection():
19 | # Initialize an empty point cloud.
20 | def __init__(self, mode='offline'):
21 | self.mode = mode
22 | self.pcd = None
23 | self.xyz = np.zeros(3, dtype='float64')
24 | self.rot = np.zeros((3,1), dtype='float64')
25 | self.vis = None
26 |
27 | # If using live mode, create the visualizer window.
28 | if self.mode == 'online':
29 | self.vis = o3d.visualization.Visualizer()
30 | self.vis.create_window()
31 |
32 | # Fix view reset from https://github.com/intel-isl/Open3D/issues/497
33 | def center_view(self, vis):
34 | vis.reset_view_point(True)
35 | ctr = vis.get_view_control()
36 | ctr.rotate(180, 90.0)
37 |
38 | # In live mode, update the visualizer to show the updated point cloud
39 | def update(self):
40 | self.vis.update_geometry()
41 | self.vis.poll_events()
42 | self.vis.reset_view_point(True)
43 | ctr = self.vis.get_view_control()
44 | ctr.rotate(0, 500)
45 | self.vis.update_renderer()
46 |
47 | # DEPRECATED
48 | # Returns the rotation matrix for a rotation in the Y axis
49 | def rotateY(self, d):
50 | return np.array([[np.cos(d), 0, np.sin(d)],
51 | [0, 1, 0],
52 | [-np.sin(d), 0, np.cos(d)]])
53 |
54 | # DEPRECATED
55 | # Returns the rotation matrix for a rotation in the X axis
56 | def rotateX(self, d):
57 | return np.array([[1, 0, 0],
58 | [0, np.cos(d), -np.sin(d)],
59 | [0, np.sin(d), np.cos(d)]])
60 |
61 | # Move a single frame's point cloud to match it's location in the global map
62 | def movePoints(self, pcd, transformID):
63 | # vecotor magnitidue from addition of x and y component
64 | # TODO: CURRENTLY USING CONSTANT VALUE
65 | if True or transformID[2] is None or transformID[3] is None:
66 | magnitude = 0.00005
67 | else:
68 | magnitude = np.hypot(transformID[2], transformID[3]) / 100000
69 |
70 | # Forward
71 | if transformID[0] == 'f':
72 | # Forward movement is mapped to the X-Z plane based on the vector angle of the current rotation
73 | # ^ |
74 | # | |
75 | # | <------ | ------->
76 | # | 0d 90d v 180d 270d
77 | self.xyz += np.array([np.sin(self.rot[1][0])*magnitude, 0, -np.cos(self.rot[1][0])*magnitude])
78 |
79 | # Rotate right (Y axis)
80 | if transformID[1] == 'r':
81 | self.rot += np.array([0,0.032,0]).reshape(3,1)
82 | self.xyz += np.array([np.sin(self.rot[1][0])*magnitude, 0, -np.cos(self.rot[1][0])*magnitude])
83 |
84 | # Rotate left (Y axis)
85 | elif transformID[1] == 'l':
86 | self.rot += np.array([0,-0.032,0]).reshape(3,1)
87 | self.xyz += np.array([np.sin(self.rot[1][0])*magnitude, 0, -np.cos(self.rot[1][0])*magnitude])
88 |
89 | # Rotate up (X axis)
90 | elif transformID[1] == 'u':
91 | self.rot += np.array([-0.032,0,0]).reshape(3,1)
92 |
93 | # Rotate down (X axis)
94 | elif transformID[1] == 'd':
95 | self.rot += np.array([0.032,0,0]).reshape(3,1)
96 |
97 | # Apply transformation
98 | cur_pcd = pcd.translate(self.xyz)
99 | cur_pcd = pcd.rotate(self.rot)
100 | return cur_pcd
101 |
102 | # Add the point cloud from the current frame to the global point cloud of the map
103 | def estimate(self, img_colour, img_depth, transformID, crop_fact_h=0.8, crop_fact_w=0.7, downsample=20):
104 | img_colour = cv2.cvtColor(img_colour, cv2.COLOR_BGR2RGB)
105 |
106 | # Crop the frame to reduce boundary depth noise
107 | h, w = img_colour.shape[:2]
108 | crop_h = int((h - (crop_fact_h*h)) / 2)
109 | crop_w = int((w - (crop_fact_w*w)) / 2)
110 |
111 | # Convert the cv2 frames to the Open3D image format
112 | img_colour = copy.deepcopy(img_colour[crop_h:h-crop_h, crop_w:w-crop_w, :])
113 | img_od3_colour = o3d.geometry.Image(img_colour)
114 | img_depth = copy.deepcopy(img_depth[crop_h:h-crop_h, crop_w:w-crop_w, :])
115 | img_od3_depth = o3d.geometry.Image(img_depth)
116 |
117 | # Create a point cloud from the current frame and transform it so it is right side up
118 | rgbd_img = o3d.geometry.RGBDImage.create_from_color_and_depth(img_od3_colour, img_od3_depth, convert_rgb_to_intensity=False)
119 | cur_pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
120 | rgbd_img,
121 | o3d.camera.PinholeCameraIntrinsic(
122 | o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
123 | cur_pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
124 |
125 | # Downsample the number of points to reduce output size and rendering computation
126 | cur_pcd = cur_pcd.uniform_down_sample(downsample)
127 |
128 | # Cut a square in the point cloud to make a path viewable when the point cloud is rendered
129 | # _______________
130 | # | X X X X |
131 | # | X X X X |
132 | # | X X X X |
133 | # | |
134 | # --------------- where X represensts the cut region
135 | pcd_array = np.asarray(cur_pcd.points)
136 | pcd_cent = cur_pcd.get_center()
137 | pcd_max = cur_pcd.get_max_bound()
138 | pcd_min = cur_pcd.get_min_bound()
139 | x_thresh = (pcd_max[0] - pcd_cent[0]) / 2
140 | y_thresh = (pcd_max[1] - pcd_cent[1]) / 2
141 | xxR = np.where(pcd_array[:,0] > pcd_cent[0] + x_thresh)
142 | xxL = np.where(pcd_array[:,0] < pcd_cent[0] - x_thresh)
143 | yy = np.where(pcd_array[:,1] < pcd_cent[1] - y_thresh)
144 | uu = np.unique(np.append(np.append(xxR[0], xxL[0]), yy[0]))
145 | cur_pcd = cur_pcd.select_down_sample(uu, invert=False)
146 |
147 | # Based on camera movement, adjust the placement of the point cloud in the global map
148 | cur_pcd = self.movePoints(cur_pcd, transformID)
149 |
150 | # Add the point cloud to the global map
151 | if self.pcd == None:
152 | self.pcd = copy.deepcopy(cur_pcd)
153 | if self.mode == 'online':
154 | self.vis.add_geometry(self.pcd)
155 | else:
156 | self.pcd += cur_pcd
157 |
158 | # Render the current global map
159 | if self.mode == 'online':
160 | self.update()
161 |
162 | outFramePCD = cv2.cvtColor(cv2.normalize(np.asarray(
163 | self.vis.capture_screen_float_buffer(True)),
164 | None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8UC1), cv2.COLOR_RGB2BGR)
165 |
166 | return [self.pcd, outFramePCD]
--------------------------------------------------------------------------------
/mslam/img_stitch/img_stitch.py:
--------------------------------------------------------------------------------
1 | # Incomplete at the moment.
2 | # Enhancement still in progress.
3 |
4 | import cv2
5 | from matplotlib import pyplot as plt
6 |
7 | class ImageStitcher:
8 | def __init__(self, left_img, right_img):
9 | this.left = left_img
10 | this.right = right_img
11 |
12 | def stitch():
13 | sift = cv2.xfeatures2d.SIFT_create()
14 | left_kp, left_des = sift.detectAndCompute(this.left, None)
15 | right_kp, right_des = sift.detectAndCompute(this.right, None)
16 | matches = cv2.BFMatcher().knnMatch(
17 | queryDescriptors=right_des,
18 | trainDescriptors=left_des,
19 | k=2
20 | )
21 |
22 | to_pts = []
23 | from_pts = []
24 | for m, n in matches:
25 | if m.distance < 0.65 * n.distance:
26 | from_pts.append(left_kp[m.trainIdx].pt)
27 | to_pts.append(right_kp[m.queryIdx].pt)
28 |
29 | H = cv2.findHomography(np.float32(from_pts), np.float32(to_pts), cv2.RANSAC, 2.0)[0]
30 |
31 | left_height, left_width = this.left.shape[:2]
32 | warped = np.zeros((left_height, int(left_width*1.75), 3), dtype=left_img.dtype)
33 | for c in range(3):
34 | warped[:,:,c] = cv2.warpPerspective(this.left[:,:,c], H, (warped.shape[1], warped.shape[0]), flags=cv2.INTER_NEAREST)
35 | left_mask = np.ones(warped.shape[:2], warped.dtype) * 255
36 |
37 | return warped
38 |
39 | if __name__ == '__main__':
40 | # use some images from kitti for demonstration
41 | left = cv2.imread('../../videos/cam1.jpg')
42 | right = cv2.imread('../../videos/cam2.jpg')
43 |
44 | out = ImageStitcher(left, right).stitch()
45 | cv2.imshow(out)
46 |
--------------------------------------------------------------------------------
/mslam/lane_finder/lane_finder.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | from matplotlib import pyplot as plt
3 | import numpy as np
4 |
5 | if __name__ == '__main__':
6 | f = cv2.imread('../../video/road-photo.jpg')
7 | gray = cv2.cvtColor(f, cv2.COLOR_BGR2GRAY)
8 | f = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
9 | lines_img = np.copy(f)
10 | edges = cv2.Canny(gray,100,600,apertureSize=3)
11 |
12 | lines = cv2.HoughLines(edges,1,3.14159/180,250)
13 | print(lines)
14 | for rho,theta in lines[:,0]:
15 | cos = np.cos(theta)
16 | sin = np.sin(theta)
17 | x0 = cos*rho
18 | y0 = sin*rho
19 | x1 = int(x0 + 10000*(-sin))
20 | y1 = int(y0 + 10000*cos)
21 | x2 = int(x0 - 10000*(-sin))
22 | y2 = int(y0 - 10000*cos)
23 |
24 | cv2.line(lines_img,(x1,y1),(x2,y2),(0,255,255),5)
25 | plt.subplot(1, 3, 1)
26 | plt.axis('off')
27 | plt.title('original image')
28 | plt.imshow(f)
29 |
30 | plt.subplot(1, 3, 2)
31 | plt.axis('off')
32 | plt.title('canny image')
33 | plt.imshow(edges, cmap='gray')
34 |
35 | plt.subplot(1, 3, 3)
36 | plt.axis('off')
37 | plt.title('lines detected')
38 | plt.imshow(lines_img)
39 | plt.show()
40 |
41 |
--------------------------------------------------------------------------------
/mslam/lane_finder/lines_detected.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shahin-imtiaz/MultiSLAM/91d1d8a891542d9050807522ea49b8b31276ef74/mslam/lane_finder/lines_detected.png
--------------------------------------------------------------------------------
/mslam/obj_rec/obj_rec.py:
--------------------------------------------------------------------------------
1 | # Based on Facebook AI Research's Detectron2
2 | # https://github.com/facebookresearch/detectron2/
3 | # Using the official Detectron2 Tutorial (colab):
4 | # https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5#scrollTo=HUjkwRsOn1O0
5 |
6 | import torch, torchvision
7 | import cv2
8 | import detectron2
9 | from detectron2.utils.logger import setup_logger
10 | from detectron2.engine import DefaultPredictor
11 | from detectron2.config import get_cfg
12 | from detectron2.utils.visualizer import Visualizer
13 | from detectron2.data import MetadataCatalog
14 | import pprint
15 |
16 | '''
17 | Object Detection
18 | Utilizes Facebook AI Research's Detectron2 CNN: https://github.com/facebookresearch/detectron2
19 | Performs frame by frame object detection and segmentation
20 | NOTE: Makes use of the variable naming and calling conventions found in the library's predictor script
21 | '''
22 | class ObjectDetect:
23 | # Initialize the predictor
24 | def __init__(self, model_yaml, weights):
25 | self.cfg = get_cfg()
26 | self.cfg.merge_from_file(model_yaml)
27 | self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
28 | self.cfg.MODEL.WEIGHTS = weights
29 | self.predictor = DefaultPredictor(self.cfg)
30 |
31 | # Feed in an image frame to the predictor and return the output
32 | def estimate(self, img):
33 | outputs = self.predictor(img)
34 | v = Visualizer(img[:, :, ::-1], MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]), scale=1.0)
35 | v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
36 | return v.get_image()[:, :, ::-1]
37 |
38 | # Return the current CfgNode
39 | def getCFG(self):
40 | return self.cfg
--------------------------------------------------------------------------------
/mslam_setup_cdf.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -eu -o pipefail
4 |
5 | echo "Installing mslam. This computer must have CUDA enabled support"
6 | export CUDA_HOME="${CUDA_HOME:-/usr/local/cuda-10.0/}"
7 | python3 -m venv --system-site-packages env-mslam
8 | source env-mslam/bin/activate
9 | alias pip="python3 -m pip"
10 | pip install -U pip
11 | pip install -r requirements.pip
12 | [ ! -f detectron2_repo ] && git clone https://github.com/facebookresearch/detectron2 detectron2_repo || true
13 | [ ! -f monodepth2_repo ] && git clone https://github.com/nianticlabs/monodepth2.git monodepth2_repo || true
14 | [ ! -f hd3_repo ] && git clone https://github.com/ucbdrive/hd3.git || true
15 | pip install Cython cupy
16 | pip install -U 'git+https://github.com/facebookresearch/fvcore.git'
17 | pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
18 | pip install -e detectron2_repo
19 | pip install -U "git+https://github.com/cocodataset/panopticapi.git"
20 | pip install -U "git+https://github.com/lvis-dataset/lvis-api.git"
21 | pip install -U "git+https://github.com/mcordts/cityscapesScripts.git"
22 | pip install -r requirements.pip
23 | cd monodepth2_repo && python3 test_simple.py --image_path assets/test_image.jpg --model_name mono_1024x320
24 | cd ../
25 | echo "Done setup. Please run: python3 mslam.py"
26 |
--------------------------------------------------------------------------------
/mslam_vis_pcd.py:
--------------------------------------------------------------------------------
1 | '''
2 | Opens the given Open3D point cloud file (usually with a .pcd extension) in a visualizer window
3 |
4 | Flags:
5 | -i, --input Path to the point cloud file
6 | '''
7 |
8 | import argparse
9 | import open3d as o3d
10 |
11 | if __name__ == '__main__':
12 | ap = argparse.ArgumentParser()
13 | ap.add_argument("-i", "--input", type=str, required=True, help="Point cloud file path")
14 | args = vars(ap.parse_args())
15 |
16 | pcd = o3d.io.read_point_cloud(args['input'])
17 | o3d.visualization.draw_geometries([pcd])
--------------------------------------------------------------------------------
/requirements.pip:
--------------------------------------------------------------------------------
1 | absl-py==0.8.1
2 | astroid==2.3.3
3 | attrs==19.3.0
4 | backcall==0.1.0
5 | bleach==3.1.0
6 | cachetools==3.1.1
7 | certifi==2019.11.28
8 | chardet==3.0.4
9 | cityscapesScripts==1.1.0
10 | Click==7.0
11 | cloudpickle==1.2.2
12 | cycler==0.10.0
13 | Cython==0.29.12
14 | decorator==4.4.1
15 | defusedxml==0.6.0
16 | entrypoints==0.3
17 | fastrlock==0.4
18 | Flask==1.1.1
19 | google-auth==1.7.1
20 | google-auth-oauthlib==0.4.1
21 | grpcio==1.25.0
22 | idna==2.8
23 | imageio==2.6.1
24 | imagesize==1.1.0
25 | importlib-metadata==1.0.0
26 | imutils==0.5.3
27 | ipykernel==5.1.3
28 | ipython==7.10.0
29 | ipython-genutils==0.2.0
30 | ipywidgets==7.5.1
31 | isort==4.3.21
32 | itsdangerous==1.1.0
33 | jedi==0.15.1
34 | Jinja2==2.10.3
35 | joblib==0.14.0
36 | jsonschema==3.2.0
37 | jupyter-client==5.3.4
38 | jupyter-core==4.6.1
39 | kiwisolver==1.1.0
40 | lazy-object-proxy==1.4.3
41 | lvis==0.5.1
42 | Markdown==3.1.1
43 | MarkupSafe==1.1.1
44 | matplotlib==3.1.1
45 | mccabe==0.6.1
46 | mistune==0.8.4
47 | more-itertools==8.0.0
48 | nbconvert==5.6.1
49 | nbformat==4.4.0
50 | networkx==2.4
51 | notebook==6.0.2
52 | numpy==1.16.4
53 | oauthlib==3.1.0
54 | open3d==0.8.0.0
55 | opencv-contrib-python==3.4.2.17
56 | pandocfilters==1.4.2
57 | parso==0.5.1
58 | pexpect==4.7.0
59 | pickleshare==0.7.5
60 | Pillow==6.2.1
61 | portalocker==1.5.2
62 | prometheus-client==0.7.1
63 | prompt-toolkit==3.0.2
64 | protobuf==3.11.0
65 | ptyprocess==0.6.0
66 | pyasn1==0.4.8
67 | pyasn1-modules==0.2.7
68 | pycocotools==2.0
69 | Pygments==2.5.2
70 | pylint==2.4.4
71 | pyparsing==2.4.0
72 | pyrsistent==0.15.6
73 | python-dateutil==2.8.0
74 | PyWavelets==1.1.1
75 | PyYAML==5.1.2
76 | pyzmq==18.1.1
77 | requests==2.22.0
78 | requests-oauthlib==1.3.0
79 | rsa==4.0
80 | scikit-image==0.16.2
81 | scikit-learn==0.21.3
82 | scipy==1.3.2
83 | Send2Trash==1.5.0
84 | six==1.12.0
85 | tabulate==0.8.6
86 | tensorboard==2.0.2
87 | termcolor==1.1.0
88 | terminado==0.8.3
89 | testpath==0.4.4
90 | torch==1.3.1
91 | torchvision==0.4.2
92 | tornado==6.0.3
93 | tqdm==4.39.0
94 | traitlets==4.3.3
95 | typed-ast==1.4.0
96 | urllib3==1.25.7
97 | wcwidth==0.1.7
98 | webencodings==0.5.1
99 | Werkzeug==0.16.0
100 | widgetsnbextension==3.5.1
101 | wrapt==1.11.2
102 | yacs==0.1.6
103 | youtube-dl==2019.11.28
104 | zipp==0.6.0
105 |
--------------------------------------------------------------------------------
/video/driving_country_480p_trimmed.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shahin-imtiaz/MultiSLAM/91d1d8a891542d9050807522ea49b8b31276ef74/video/driving_country_480p_trimmed.mp4
--------------------------------------------------------------------------------
/video/kittbw.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shahin-imtiaz/MultiSLAM/91d1d8a891542d9050807522ea49b8b31276ef74/video/kittbw.mp4
--------------------------------------------------------------------------------
/video/road-photo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shahin-imtiaz/MultiSLAM/91d1d8a891542d9050807522ea49b8b31276ef74/video/road-photo.jpg
--------------------------------------------------------------------------------
/video/video_credits.txt:
--------------------------------------------------------------------------------
1 | # America's Most Visited National Park! - Fall Colors - Great Smoky Mountains 4K - Scenic Drive
2 | https://www.youtube.com/watch?v=Of5pGj6Obzo
3 |
4 | # KITTI Sequence 05
--------------------------------------------------------------------------------
/video/wget-log:
--------------------------------------------------------------------------------
1 | --2019-12-18 19:29:49-- https://images.unsplash.com/photo-1494783367193-149034c05e8f?ixlib=rb-1.2.1
2 | Resolving images.unsplash.com (images.unsplash.com)... 151.101.22.208, 2a04:4e42:5::720
3 | Connecting to images.unsplash.com (images.unsplash.com)|151.101.22.208|:443... connected.
4 | HTTP request sent, awaiting response... 200 OK
5 | Length: 6194940 (5.9M) [image/jpeg]
6 | Saving to: ‘photo-1494783367193-149034c05e8f?ixlib=rb-1.2.1’
7 |
8 |
photo-149 0%[ ] 0 --.-KB/s
photo-1494 1%[ ] 118.75K 551KB/s
photo-14947 4%[ ] 246.75K 579KB/s
photo-149478 6%[> ] 374.75K 587KB/s
photo-1494783 8%[> ] 502.75K 590KB/s
photo-14947833 10%[=> ] 630.75K 593KB/s
photo-149478336 12%[=> ] 758.75K 590KB/s
photo-1494783367 14%[=> ] 886.75K 589KB/s
photo-14947833671 16%[==> ] 1015K 591KB/s
photo-149478336719 18%[==> ] 1.12M 590KB/s
photo-1494783367193 21%[===> ] 1.24M 592KB/s
hoto-1494783367193- 23%[===> ] 1.37M 593KB/s
oto-1494783367193-1 25%[====> ] 1.49M 594KB/s
to-1494783367193-14 27%[====> ] 1.62M 594KB/s
o-1494783367193-149 29%[====> ] 1.74M 592KB/s eta 7s
-1494783367193-1490 31%[=====> ] 1.87M 593KB/s eta 7s
1494783367193-14903 33%[=====> ] 1.99M 594KB/s eta 7s
494783367193-149034 35%[======> ] 2.12M 595KB/s eta 7s
94783367193-149034c 37%[======> ] 2.24M 594KB/s eta 7s
4783367193-149034c0 40%[=======> ] 2.37M 593KB/s eta 6s
783367193-149034c05 42%[=======> ] 2.49M 595KB/s eta 6s
83367193-149034c05e 43%[=======> ] 2.58M 522KB/s eta 6s
3367193-149034c05e8 50%[=========> ] 2.97M 595KB/s eta 5s
367193-149034c05e8f 52%[=========> ] 3.10M 596KB/s eta 5s
67193-149034c05e8f? 54%[=========> ] 3.22M 598KB/s eta 5s
7193-149034c05e8f?i 56%[==========> ] 3.35M 597KB/s eta 5s
193-149034c05e8f?ix 58%[==========> ] 3.47M 597KB/s eta 5s
93-149034c05e8f?ixl 60%[===========> ] 3.60M 598KB/s eta 4s
3-149034c05e8f?ixli 63%[===========> ] 3.72M 597KB/s eta 4s
-149034c05e8f?ixlib 65%[============> ] 3.85M 598KB/s eta 4s
149034c05e8f?ixlib= 67%[============> ] 3.97M 600KB/s eta 4s
49034c05e8f?ixlib=r 69%[============> ] 4.10M 601KB/s eta 4s
9034c05e8f?ixlib=rb 71%[=============> ] 4.22M 600KB/s eta 3s
034c05e8f?ixlib=rb- 73%[=============> ] 4.35M 601KB/s eta 3s
34c05e8f?ixlib=rb-1 75%[==============> ] 4.47M 600KB/s eta 3s
4c05e8f?ixlib=rb-1. 77%[==============> ] 4.60M 602KB/s eta 3s
c05e8f?ixlib=rb-1.2 79%[==============> ] 4.72M 681KB/s eta 3s
05e8f?ixlib=rb-1.2. 82%[===============> ] 4.85M 597KB/s eta 2s
5e8f?ixlib=rb-1.2.1 84%[===============> ] 4.97M 588KB/s eta 2s
e8f?ixlib=rb-1.2.1 86%[================> ] 5.10M 589KB/s eta 2s
8f?ixlib=rb-1.2.1 87%[================> ] 5.19M 578KB/s eta 2s
f?ixlib=rb-1.2.1 89%[================> ] 5.30M 574KB/s eta 2s
?ixlib=rb-1.2.1 91%[=================> ] 5.43M 572KB/s eta 1s
ixlib=rb-1.2.1 92%[=================> ] 5.49M 542KB/s eta 1s
xlib=rb-1.2.1 95%[==================> ] 5.62M 540KB/s eta 1s
lib=rb-1.2.1 96%[==================> ] 5.72M 527KB/s eta 1s
ib=rb-1.2.1 98%[==================> ] 5.82M 517KB/s eta 0s
photo-1494783367193 100%[===================>] 5.91M 519KB/s in 11s
9 |
10 | 2019-12-18 19:30:00 (572 KB/s) - ‘photo-1494783367193-149034c05e8f?ixlib=rb-1.2.1’ saved [6194940/6194940]
11 |
12 |
--------------------------------------------------------------------------------