├── .gitignore ├── README.md ├── imgutils.py ├── photometric_alignment.py ├── pyramid.py ├── requirements.txt ├── se3utils.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | cache/* 2 | *.pyc 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dvo_python: Dense visual odometry in Python(3.6(.6)) 2 | > Coded up in _slightly longer than_ a night! :) 3 | 4 | Someone tweeted about [this elegant implementation ](https://github.com/muskie82/simple_dvo), and that's what made my day (rather, my night). I was like, "Hmm, a good refresher on dense SLAM would be to implement this, let me do it in Python." 5 | 6 | The first coding sprint ran for about 7.5 hours, in which I implemented most of the framework, i.e., the residual and Jacobian computation, and a simplish gradient-descent optimizer. 7 | 8 | 9 | ## Micromanagement 10 | 11 | - [x] Work out dependencies (`numpy`, `OpenCV`, some SE(3) package(??), `matplotlib`) 12 | - [x] Read in a pair of pointclouds and visualize them 13 | - [x] Construct image and depth pyramids 14 | - [x] Compute the residual (warping error) 15 | - [x] Implement SE(3) routines 16 | - [x] Implement image gradient computation 17 | - [x] Compute Jacobian of the error function 18 | - [x] Write a Gradient-descent optimizer 19 | - [ ] Write a Gauss-Newton optimizer 20 | - [ ] Robustify the error function (IRLS / M-Estimators) 21 | - [ ] Debug the two-image alignment case 22 | - [ ] Extend to a sequence of several images 23 | - [ ] Setup class to load a TUM RGB-D sequence and run `dvo _python` on it. 24 | - [ ] Debug! 25 | - [ ] Check for any possibile visualization glitches/enhancements. 26 | 27 | 28 | ## Big picture checklist 29 | 30 | - [x] Barebones version of repo up 31 | - [x] Chalk out repo structure, order in which to code 32 | - [x] Put the order up as a checklist 33 | - [ ] Code & Debug (on a pair of images)! 34 | - [ ] Code & Debug on a sequence of images 35 | - [ ] Get it to run on TUM RGB-D! 36 | - [ ] Benchmark (time and accuracy) 37 | - [ ] Finish up documentation and README 38 | - [ ] Take a moment to revel in a sense of accomplishment ;) 39 | - [ ] Get some sleep!!! :) 40 | 41 | 42 | ## Activity Log 43 | 44 | Times are in 24-hour format. 45 | 46 | Night 1: 21 November-22 November 2018 47 | * 2000 - 2020: Chalk out action plan. 48 | * 2020 - 2050: Dinner break. 49 | * 2100 - 2130: Download a sequence from TUM RGB-D, load and display stuff 50 | * 2140 - 2300: Build pyramid levels (plus a lot of interruptions :|) 51 | * 2330 - 0115: Compute the photometric warp error 52 | * 0115 - 0145: Get SE(3) helper functions in 53 | * 0200 - 0220: Image gradient computation, fetch SE(3) Jacobian helper functions 54 | * 0220 - 0310: Implement Jacobian computation 55 | * 0310 - 0330: Implement gradient descent optimizer 56 | 57 | Day 2: 22 November 2018 58 | * 1100 - 1130: Vectorize image pyramid level construction 59 | * 1614 - 1645: Vectorize edge-preserving depth pyramid level construction 60 | -------------------------------------------------------------------------------- /imgutils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities 3 | """ 4 | 5 | import cv2 6 | import numpy as np 7 | 8 | 9 | def im2float(img): 10 | # Convert input image to normalized float 11 | return cv2.normalize(img.astype('float'), None, 0.0, 1.0, cv2.NORM_MINMAX) 12 | 13 | 14 | # Interpolate an intensity value bilinearly (useful when a warped point is not integral) 15 | # Translated from: https://github.com/muskie82/simple_dvo/blob/master/src/util.cpp 16 | def bilinear_interpolation(img, x, y, width, height): 17 | 18 | # Consider the pixel as invalid, to begin with 19 | valid = np.nan 20 | 21 | # Get the four corner coordinates for the current floating point values x, y 22 | x0 = np.floor(x).astype(np.uint8) 23 | y0 = np.floor(y).astype(np.uint8) 24 | x1 = x0 + 1 25 | y1 = y0 + 1 26 | 27 | # Compute weights for each corner location, inversely proportional to the distance 28 | x1_weight = x - x0 29 | y1_weight = y - y0 30 | x0_weight = 1 - x1_weight 31 | y0_weight = 1 - y1_weight 32 | 33 | # Check if the warped points lie within the image 34 | if x0 < 0 or x0 >= width: 35 | x0_weight = 0 36 | if x1 < 0 or x1 >= width: 37 | x0_weight = 0 38 | if y0 < 0 or y0 >= height: 39 | y0_weight = 0 40 | if y1 < 0 or y1 >= height: 41 | y1_weight = 0 42 | 43 | # Bilinear weights 44 | w00 = x0_weight * y0_weight 45 | w10 = x1_weight * y0_weight 46 | w01 = x0_weight * y1_weight 47 | w11 = x1_weight * y1_weight 48 | 49 | # Bilinearly interpolate intensities 50 | sum_weights = w00 + w10 + w01 + w11 51 | total = 0 52 | if w00 > 0: 53 | total += img.item((y0, x0)) * w00 54 | if w01 > 0: 55 | total += img.item((y1, x0)) * w01 56 | if w10 > 0: 57 | total += img.item((y0, x1)) * w10 58 | if w11 > 0: 59 | total += img.item((y1, x1)) * w11 60 | 61 | if sum_weights > 0: 62 | valid = total / sum_weights 63 | 64 | return valid 65 | -------------------------------------------------------------------------------- /photometric_alignment.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions to compute photometric error residuals and jacobians 3 | """ 4 | 5 | import numpy as np 6 | 7 | import imgutils 8 | import se3utils 9 | 10 | 11 | # Takes in an intensity image and a registered depth image, and outputs a pointcloud 12 | # Intrinsics must be provided, else we use the TUM RGB-D benchmark defaults. 13 | # https://vision.in.tum.de/data/datasets/rgbd-dataset/file_formats#intrinsic_camera_calibration_of_the_kinect 14 | def rgbd_to_pointcloud(gray, depth, focal_length, cx, cy, scaling_factor): 15 | pointcloud = [] 16 | for v in range(gray.shape[1]): 17 | for u in range(gray.shape[0]): 18 | intensity = gray.item((u, v)) 19 | Z = depth.item((u, v)) / scaling_factor 20 | if Z == 0: 21 | continue 22 | X = (u - cx) * Z / focal_length 23 | Y = (v - cy) * Z / focal_length 24 | pointcloud.append((X, Y, Z, intensity)) 25 | return pointcloud 26 | 27 | 28 | # Compute photometric error (i.e., the residuals) 29 | def computeResiduals(gray_prev, depth_prev, gray_cur, K, xi): 30 | """ 31 | Computes the image alignment error (residuals). Takes in the previous intensity image and 32 | first backprojects it to 3D to obtain a pointcloud. This pointcloud is then rotated by an 33 | SE(3) transform "xi", and then projected down to the current image. After this step, an 34 | intensity interpolation step is performed and we compute the error between the projected 35 | image and the actual current intensity image. 36 | 37 | While performing the residuals, also cache information to speedup Jacobian computation. 38 | """ 39 | 40 | width = gray_cur.shape[1] 41 | height = gray_cur.shape[0] 42 | residuals = np.zeros(gray_cur.shape, dtype = np.float32) 43 | 44 | # Cache to store computed 3D points 45 | cache_point3d = np.zeros((height, width, 3), dtype = np.float32) 46 | 47 | # # Backproject an image to 3D to obtain a pointcloud 48 | # pointcloud = rgbd_to_pointcloud(gray_prev, depth_prev, focal_length=K['f'], cx=K['cx'], 49 | # cy=K['cy'], scaling_factor = K['scaling_factor']) 50 | 51 | one_by_f = 1. / K['f'] 52 | 53 | # Use the SE(3) Exponential map to compute a 4 x 4 matrix from the vector xi 54 | T = se3utils.SE3_Exp(xi) 55 | 56 | K_mat = np.asarray([[K['f'], 0, K['cx']], [0, K['f'], K['cy']], [0, 0, 1]]) 57 | 58 | # Warp each point in the previous image, to the current image 59 | for v in range(gray_prev.shape[0]): 60 | for u in range(gray_prev.shape[1]): 61 | intensity_prev = gray_prev.item((v,u)) 62 | Z = depth_prev.item((v,u)) / K['scaling_factor'] 63 | if Z <= 0: 64 | continue 65 | Y = one_by_f * Z * (v - K['cy']) 66 | X = one_by_f * Z * (u - K['cx']) 67 | # Transform the 3D point 68 | point_3d = np.dot(T[0:3,0:3], np.asarray([X, Y, Z])) + T[0:3,3] 69 | point_3d = np.reshape(point_3d, (3,1)) 70 | cache_point3d[v,u,:] = np.reshape(point_3d, (3)) 71 | # Project it down to 2D 72 | point_2d_warped = np.dot(K_mat, point_3d) 73 | px = point_2d_warped[0] / point_2d_warped[2] 74 | py = point_2d_warped[1] / point_2d_warped[2] 75 | 76 | # Interpolate the intensity value bilinearly 77 | intensity_warped = imgutils.bilinear_interpolation(gray_cur, px[0], py[0], width, height) 78 | 79 | # If the pixel is valid (i.e., interpolation return a non-NaN value), compute residual 80 | if not np.isnan(intensity_warped): 81 | residuals.itemset((v, u), intensity_prev - intensity_warped) 82 | 83 | return residuals, cache_point3d 84 | 85 | 86 | # Function to compute image gradients (used in Jacobian computation) 87 | def computeImageGradients(img): 88 | """ 89 | We use a simple form for the image gradient. For instance, a gradient along the X-direction 90 | at location (y, x) is computed as I(y, x+1) - I(y, x-1). 91 | """ 92 | gradX = np.zeros(img.shape, dtype = np.float32) 93 | gradY = np.zeros(img.shape, dtype = np.float32) 94 | 95 | width = img.shape[1] 96 | height = img.shape[0] 97 | 98 | # Exploit the fact that we can perform matrix operations on images, to compute gradients quicker 99 | gradX[:, 1:width-1] = img[:, 2:] - img[:,0:width-2] 100 | gradY[1:height-1, :] = img[2:, :] - img[:height-2, :] 101 | 102 | return gradX, gradY 103 | 104 | 105 | # Compute the Jacobian of the photometric error residual (i.e., the loss function that is 106 | # being minimized) 107 | def computeJacobian(gray_prev, depth_prev, gray_cur, K, xi, residuals, cache_point3d): 108 | 109 | width = gray_prev.shape[1] 110 | height = gray_prev.shape[0] 111 | 112 | K_mat = np.asarray([[K['f'], 0, K['cx']], [0, K['f'], K['cy']], [0, 0, 1]]) 113 | f = K['f'] 114 | cx = K['cx'] 115 | cy = K['cy'] 116 | 117 | # Initialize memory to store the Jacobian 118 | J = np.zeros((height, width, 6)) 119 | 120 | # Compute image gradients 121 | grad_ix, grad_iy = computeImageGradients(gray_cur) 122 | 123 | # For each pixel, compute one Jacobian term 124 | for v in range(gray_prev.shape[0]): 125 | for u in range(gray_prev.shape[1]): 126 | X = cache_point3d.item((v, u, 0)) 127 | Y = cache_point3d.item((v, u, 1)) 128 | Z = cache_point3d.item((v, u, 2)) 129 | if Z <= 0: 130 | continue 131 | J_img = np.reshape(np.asarray([[grad_ix[v,u], grad_iy[v,u]]]), (1,2)) 132 | J_pi = np.reshape(np.asarray([[f/Z, 0, -f*X/(Z*Z)], [0, f/2, -f*Y/(Z*Z)]]), (2,3)) 133 | J_exp = np.concatenate((np.eye(3), se3utils.SO3_hat(-np.asarray([X, Y, Z]))), axis=1) 134 | J_exp = np.dot(J_exp, se3utils.SE3_left_jacobian(xi)) 135 | J[v,u,:] = residuals[v,u] * np.reshape(np.dot(J_img, np.dot(J_pi, J_exp)), (6)) 136 | 137 | return J 138 | -------------------------------------------------------------------------------- /pyramid.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions to construct image pyramids 3 | """ 4 | 5 | import cv2 6 | import numpy as np 7 | 8 | 9 | # Function to downsample an intensity (grayscale) image 10 | def downsampleGray(img): 11 | """ 12 | The downsampling strategy eventually chosen is a naive block averaging method. 13 | That is, for each pixel in the target image, we choose a block comprising 4 neighbors 14 | in the source image, and simply average their intensities. For each target image point 15 | (y, x), where x indexes the width and y indexes the height dimensions, we consider the 16 | following four neighbors: (2*y,2*x), (2*y+1,2*x), (2*y,2*x+1), (2*y+1,2*x+1). 17 | NOTE: The image must be float, to begin with. 18 | """ 19 | 20 | # Perform block-averaging 21 | img_new = (img[0::2,0::2] + img[0::2,1::2] + img[1::2,0::2] + img[1::2,1::2]) / 4. 22 | 23 | return img_new 24 | 25 | 26 | # Function to downsample a depth image 27 | def downsampleDepth(img): 28 | """ 29 | For depth images, the downsampling strategy is very similar to that for intensity images, 30 | with a minor mod: we do not average all pixels; rather, we average only pixels with non-zero 31 | depth values. 32 | """ 33 | 34 | # Perform block-averaging, but not across depth boundaries. (i.e., compute average only 35 | # over non-zero elements) 36 | img_ = np.stack([img[0::2,0::2], img[0::2,1::2], img[1::2,0::2], img[1::2,1::2]], axis=2) 37 | num_nonzero = np.count_nonzero(img_, axis=2) 38 | num_nonzero[np.where(num_nonzero == 0)] = 1 39 | img_new = np.sum(img_, axis=2) / num_nonzero 40 | 41 | return img_new.astype(np.uint8) 42 | 43 | 44 | # Function to construct a pyramid of intensity and depth images with a specified number of levels 45 | def buildPyramid(gray, depth, num_levels, focal_length, cx, cy): 46 | 47 | # Lists to store each level of a pyramid 48 | pyramid_gray = [] 49 | pyramid_depth = [] 50 | pyramid_intrinsics = [] 51 | 52 | current_gray = gray 53 | current_depth = depth 54 | current_f = focal_length 55 | current_cx = cx 56 | current_cy = cy 57 | 58 | # Build levels of the pyramid 59 | for level in range(num_levels): 60 | pyramid_gray.append(current_gray) 61 | pyramid_depth.append(current_depth) 62 | K_cur = dict() 63 | K_cur['f'] = current_f 64 | K_cur['cx'] = current_cx 65 | K_cur['cy'] = current_cy 66 | pyramid_intrinsics.append(K_cur) 67 | if level < num_levels-1: 68 | current_gray = downsampleGray(current_gray) 69 | current_depth = downsampleDepth(current_depth) 70 | 71 | return pyramid_gray, pyramid_depth, pyramid_intrinsics 72 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | matplotlib 3 | -------------------------------------------------------------------------------- /se3utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utils for computations on the Lie Group SE(3) 3 | Note: This is probably the most _cryptic_ file of the lot. Comments are sparse and the 4 | code is compact, mainly because I'm writing these functions for the Nth time. 5 | """ 6 | 7 | import numpy as np 8 | 9 | 10 | epsil = 1e-6 11 | 12 | # SO(3) hat operator 13 | def SO3_hat(omega): 14 | 15 | omega_hat = np.zeros((3,3)) 16 | omega_hat[0][1] = -omega[2] 17 | omega_hat[1][0] = omega[2] 18 | omega_hat[0][2] = omega[1] 19 | omega_hat[2][0] = -omega[1] 20 | omega_hat[1][2] = -omega[0] 21 | omega_hat[2][1] = omega[0] 22 | 23 | return omega_hat 24 | 25 | 26 | # SE(3) exponential map 27 | def SE3_Exp(xi): 28 | 29 | u = xi[:3] 30 | omega = xi[3:] 31 | omega_hat = SO3_hat(omega) 32 | 33 | theta = np.linalg.norm(omega) 34 | 35 | if np.linalg.norm(omega) < epsil: 36 | R = np.eye(3) + omega_hat 37 | V = np.eye(3) + omega_hat 38 | else: 39 | s = np.sin(theta) 40 | c = np.cos(theta) 41 | omega_hat_sq = np.dot(omega_hat, omega_hat) 42 | theta_sq = theta * theta 43 | A = s / theta 44 | B = (1 - c) / (theta_sq) 45 | C = (1 - A) / (theta_sq) 46 | R = np.eye(3) + A * omega_hat + B * omega_hat_sq 47 | V = np.eye(3) + B * omega_hat + C * omega_hat_sq 48 | t = np.dot(V, u.reshape(3,1)) 49 | lastrow = np.zeros((1,4)) 50 | lastrow[0][3] = 1. 51 | return np.concatenate((np.concatenate((R, t), axis = 1), lastrow), axis = 0) 52 | 53 | 54 | # Functions to help compute the left jacobian for SE(3) 55 | # See Tim Barfoot's book http://asrl.utias.utoronto.ca/~tdb/bib/barfoot_ser17.pdf 56 | 57 | # SE(3) hat operator 58 | def SE3_hat(xi): 59 | """ 60 | Takes in a 6 x 1 vector of SE(3) exponential coordinates and constructs a 4 x 4 tangent vector xi_hat 61 | in the Lie algebra se(3) 62 | """ 63 | 64 | v = xi[:3] 65 | omega = xi[3:] 66 | xi_hat = np.zeros((4,4)).astype(np.float32) 67 | xi_hat[0:3,0:3] = SO3_hat(omega) 68 | xi_hat[0:3,3] = v 69 | 70 | return xi_hat 71 | 72 | 73 | # SE(3) curly hat operator 74 | def SE3_curly_hat(xi): 75 | """ 76 | Takes in a 6 x 1 vector of SE(3) exponential coordinates and constructs a 6 x 6 adjoint representation. 77 | (the adjoint in the Lie algebra) 78 | """ 79 | v = xi[:3] 80 | omega = xi[3:] 81 | xi_curly_hat = np.zeros((6,6)).astype(np.float32) 82 | omega_hat = SO3_hat(omega) 83 | xi_curly_hat[0:3,0:3] = omega_hat 84 | xi_curly_hat[0:3,3:6] = SO3_hat(v) 85 | xi_curly_hat[3:6,3:6] = omega_hat 86 | return xi_curly_hat 87 | 88 | 89 | # SE(3) Q matrix (according to Tim Barfoot's convention) 90 | # Used in computing the left SE(3) Jacobian 91 | def SE3_Q(xi): 92 | """ 93 | This function is to be called only when the axis-angle vector is not very small, as this definition 94 | DOES NOT take care of small-angle approximations. 95 | """ 96 | 97 | v = xi[:3] 98 | omega = xi[3:] 99 | 100 | theta = np.linalg.norm(omega) 101 | theta_2 = theta * theta 102 | theta_3 = theta_2 * theta 103 | theta_4 = theta_3 * theta 104 | theta_5 = theta_4 * theta 105 | 106 | omega_hat = SO3_hat(omega) 107 | v_hat = SO3_hat(v) 108 | 109 | c = np.cos(theta) 110 | s = np.sin(theta) 111 | 112 | coeff1 = 0.5 113 | coeff2 = (theta - s) / (theta_3) 114 | coeff3 = (theta_2 + 2*c - 2) / (2 * theta_4) 115 | coeff4 = (2*theta - 3*s + theta*c) / (2 * theta_5) 116 | 117 | v_hat_omega_hat = np.dot(v_hat, omega_hat) 118 | omega_hat_v_hat = np.dot(omega_hat, v_hat) 119 | omega_hat_sq = np.dot(omega_hat, omega_hat) 120 | omega_hat_v_hat_omega_hat = np.dot(omega_hat, v_hat_omega_hat) 121 | v_hat_omega_hat_sq = np.dot(v_hat, omega_hat_sq) 122 | 123 | matrix1 = v_hat 124 | matrix2 = omega_hat_v_hat + v_hat_omega_hat + np.dot(omega_hat, v_hat_omega_hat) 125 | matrix3 = np.dot(omega_hat, omega_hat_v_hat) + v_hat_omega_hat_sq - 3 * omega_hat_v_hat_omega_hat 126 | matrix4 = np.dot(omega_hat, v_hat_omega_hat_sq) + np.dot(omega_hat, omega_hat_v_hat_omega_hat) 127 | 128 | Q = coeff1 * matrix1 + coeff2 * matrix2 + coeff3 * matrix3 + coeff4 * matrix4 129 | 130 | return Q 131 | 132 | 133 | # Return the left jacobian of SO(3) 134 | def SO3_left_jacobian(omega): 135 | """ 136 | Takes as input a vector of SO(3) exponential coordinates, and returns the left jacobian of SO(3) 137 | """ 138 | 139 | theta = np.linalg.norm(omega) 140 | omega_hat = SO3_hat(omega) 141 | 142 | if theta < epsil: 143 | return np.eye(3) + 0.5 * omega_hat 144 | 145 | omega_hat_sq = np.dot(omega_hat, omega_hat) 146 | theta_2 = theta * theta 147 | theta_3 = theta_2 * theta 148 | c = np.cos(theta) 149 | s = np.sin(theta) 150 | B = (1 - c) / theta_2 151 | C = (theta - s) / theta_3 152 | 153 | return np.eye(3) + B * omega_hat + C * omega_hat_sq 154 | 155 | 156 | # Return the inverse of the left jacobian of SO(3) 157 | def SO3_inv_left_jacobian(omega): 158 | """ 159 | Takes as input a vector of SO(3) exponential coordinates, and returns the inverse of the left jacobian of SO(3) 160 | """ 161 | 162 | theta = np.linalg.norm(omega) 163 | omega_hat = SO3_hat(omega) 164 | 165 | if theta < epsil: 166 | return np.eye(3) - 0.5 * omega_hat 167 | 168 | omega_hat_sq = np.dot(omega_hat, omega_hat) 169 | half_theta = theta / 2 170 | t_half = np.tan(half_theta) 171 | D = 1 - (half_theta / t_half) 172 | return np.eye(3) - 0.5 * omega_hat + D * omega_hat_sq 173 | 174 | 175 | # SE(3) left jacobian 176 | def SE3_left_jacobian(xi): 177 | 178 | v = xi[:3] 179 | omega = xi[3:] 180 | 181 | theta = np.linalg.norm(omega) 182 | xi_curly_hat = SE3_curly_hat(xi) 183 | 184 | if theta < epsil: 185 | return np.eye(6) + 0.5 * xi_curly_hat 186 | 187 | J_SO3 = SO3_left_jacobian(omega) 188 | Q = SE3_Q(xi) 189 | 190 | J_SE3 = np.zeros((6,6)) 191 | J_SE3[0:3,0:3] = J_SE3[3:6,3:6] = J_SO3 192 | J_SE3[0:3,3:6] = Q 193 | 194 | return J_SE3 195 | 196 | 197 | # SE(3) inverse left jacobian 198 | def SE3_inv_left_jacobian(xi): 199 | 200 | v = xi[:3] 201 | omega = xi[3:] 202 | 203 | theta = np.linalg.norm(omega) 204 | xi_curly_hat = SE3_curly_hat(xi) 205 | 206 | if theta < epsil: 207 | return np.eye(6) - xi_curly_hat 208 | 209 | inv_J_SO3 = SO3_inv_left_jacobian(omega) 210 | Q = SE3_Q(xi) 211 | 212 | inv_J_SE3 = np.zeros((6,6)) 213 | inv_J_SE3[0:3,0:3] = inv_J_SE3[3:6,3:6] = inv_J_SO3 214 | inv_J_SE3[0:3,3:6] = - np.dot(inv_J_SO3, np.dot(Q, inv_J_SO3)) 215 | 216 | return inv_J_SE3 217 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import os 6 | import sys 7 | 8 | import imgutils 9 | import photometric_alignment 10 | import pyramid 11 | import se3utils 12 | 13 | 14 | # Parse command-line arguments 15 | def parse_args(): 16 | 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('-datapath', help='Path to a TUM RGB-D Odometry benchmark sequence', \ 19 | required=True) 20 | parser.add_argument('-startFrameRGB', help='Filename (sans the .png extension) of the first \ 21 | RGB frame to be processed', required=True) 22 | parser.add_argument('-startFrameDepth', help='Filename (sans the .png extension) of the first \ 23 | depth frame to be processed', required=True) 24 | parser.add_argument('-endFrameRGB', help='Filename (sans the .png extension) of the last \ 25 | RGB frame to be processed') 26 | parser.add_argument('-endFrameDepth', help='Filename (sans the .png extension) of the last \ 27 | depth frame to be processed') 28 | parser.add_argument('-numPyramidLevels', help='Number of levels used in the pyramid', default=3) 29 | parser.add_argument('-stepsize', help='Step-size parameter, for gradient descent', default=1e-6) 30 | parser.add_argument('-numIters', help='Default number of iterations to run each optimization \ 31 | routine for', default=50) 32 | parser.add_argument('-tol', help='Tolerance parameter for gradient-based optimization. \ 33 | Specifies the amount by which loss must change across successive iterations', default=1e-8) 34 | 35 | 36 | args = parser.parse_args() 37 | 38 | return args 39 | 40 | 41 | # Main method 42 | def main(args): 43 | 44 | img_gray_prev = cv2.imread(os.path.join(args.datapath, 'rgb', args.startFrameRGB + '.png'), cv2.IMREAD_GRAYSCALE) 45 | img_depth_prev = cv2.imread(os.path.join(args.datapath, 'depth', args.startFrameDepth + '.png'), cv2.IMREAD_GRAYSCALE) 46 | img_gray_cur = cv2.imread(os.path.join(args.datapath, 'rgb', args.endFrameRGB + '.png'), cv2.IMREAD_GRAYSCALE) 47 | img_depth_cur = cv2.imread(os.path.join(args.datapath, 'depth', args.endFrameDepth + '.png'), cv2.IMREAD_GRAYSCALE) 48 | 49 | # Convert the intensity images to float 50 | img_gray_prev = imgutils.im2float(img_gray_prev) 51 | img_gray_cur = imgutils.im2float(img_gray_cur) 52 | # print(img_gray_prev.shape, img_depth_prev.shape, img_gray_cur.shape, img_depth_cur.shape) 53 | 54 | # Use default camera intrinsics, for now 55 | f = 525.0 56 | cx = 319.5 57 | cy = 239.5 58 | scaling_factor = 5000 59 | 60 | # Construct a downsampled pyramid using the specified number of pyramid levels 61 | pyramid_gray, pyramid_depth, pyramid_intrinsics = pyramid.buildPyramid(img_gray_prev, \ 62 | img_depth_prev, num_levels=args.numPyramidLevels, focal_length=f, cx=cx, cy=cy) 63 | 64 | # Compute residuals 65 | K = dict() 66 | K['f'] = f 67 | K['cx'] = cx 68 | K['cy'] = cy 69 | K['scaling_factor'] = scaling_factor 70 | xi_init = np.zeros((6,1)) 71 | # residuals, cache_point3d = photometric_alignment.computeResiduals(img_gray_prev, img_depth_prev, \ 72 | # img_gray_cur, K, xi_init) 73 | 74 | # # Test image gradient computation 75 | # grad_ix, grad_iy = photometric_alignment.computeImageGradients(img_gray_prev) 76 | # cv2.imshow('img', img_gray_prev) 77 | # cv2.imshow('grad_x', grad_ix) 78 | # cv2.imshow('grad_y', grad_iy) 79 | # cv2.waitKey(0) 80 | 81 | # # Test Jacobian computation 82 | # J = photometric_alignment.computeJacobian(img_gray_prev, img_depth_prev, img_gray_cur, \ 83 | # K, xi_init, residuals, cache_point3d) 84 | 85 | # Simple gradient descent test 86 | stepsize = 1e-6 87 | max_iters = 100 88 | tol = 1e-8 89 | err_prev = 1e24 90 | for it in range(max_iters): 91 | residuals, cache_point3d = photometric_alignment.computeResiduals(img_gray_prev, \ 92 | img_depth_prev, img_gray_cur, K, xi_init) 93 | J = photometric_alignment.computeJacobian(img_gray_prev, img_depth_prev, img_gray_cur, \ 94 | K, xi_init, residuals, cache_point3d) 95 | # Normalize the error and the jacobian 96 | err_cur = 0.5 * (1 / (img_gray_cur.shape[0]*img_gray_cur.shape[1])) * np.sum(np.abs(residuals)) 97 | grad = (1 / (img_gray_cur.shape[0]*img_gray_cur.shape[1])) * np.reshape(np.sum(J, axis=(0,1)).T, (6,1)) 98 | print('Error: ', err_cur) 99 | print('Jacobian: ', np.sum(J, axis=(0,1))) 100 | xi_init += stepsize * grad 101 | if np.abs(err_prev - err_cur) < tol: 102 | break 103 | err_prev = err_cur 104 | 105 | 106 | # fig, ax = plt.subplots(2, 2) 107 | # ax[0, 0].imshow(img_gray_prev, cmap='gray') 108 | # ax[0, 0].set_title('RGB image (current frame)') 109 | # ax[0, 1].imshow(img_depth_prev, cmap='gray') 110 | # ax[0, 1].set_title('Depth image (current frame)') 111 | # ax[1, 0].imshow(img_gray_cur, cmap='gray') 112 | # ax[1, 0].set_title('RGB image (next frame)') 113 | # ax[1, 1].imshow(img_depth_cur, cmap='gray') 114 | # ax[1, 1].set_title('Depth image (next frame)') 115 | # plt.show() 116 | 117 | 118 | if __name__ == '__main__': 119 | args = parse_args() 120 | main(args) 121 | --------------------------------------------------------------------------------