├── .gitignore ├── facap ├── __init__.py ├── data │ ├── __init__.py │ └── scan.py ├── geometry │ ├── __init__.py │ ├── torch.py │ ├── open3d.py │ ├── numpy.py │ └── allign_walls.py ├── colmap_scripts │ ├── __init__.py │ └── read_write_model.py ├── feature_errors │ ├── __init__.py │ ├── point_error.py │ ├── reprojection_error.py │ └── ray_error.py ├── optimization │ ├── __init__.py │ ├── project.py │ ├── unproject.py │ ├── floor_term.py │ ├── wall_segment_term.py │ ├── wall_term.py │ └── parameters.py ├── floorplan_metrics.py └── utils.py ├── imgs ├── BIMCaP.png ├── BIMCaP_v2.png ├── bimfloor.png └── pipeline.png ├── env.yaml ├── experiments ├── config_facap.yaml ├── config_bim.yaml └── config.yaml ├── Dockerfile ├── README.md ├── scripts └── run_experiment.py └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* -------------------------------------------------------------------------------- /facap/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /facap/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /facap/geometry/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /facap/colmap_scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /imgs/BIMCaP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MigVega/BIMCaP/HEAD/imgs/BIMCaP.png -------------------------------------------------------------------------------- /imgs/BIMCaP_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MigVega/BIMCaP/HEAD/imgs/BIMCaP_v2.png -------------------------------------------------------------------------------- /imgs/bimfloor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MigVega/BIMCaP/HEAD/imgs/bimfloor.png -------------------------------------------------------------------------------- /imgs/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MigVega/BIMCaP/HEAD/imgs/pipeline.png -------------------------------------------------------------------------------- /facap/feature_errors/__init__.py: -------------------------------------------------------------------------------- 1 | from facap.feature_errors.point_error import point_error 2 | from facap.feature_errors.ray_error import ray_error 3 | from facap.feature_errors.reprojection_error import reprojection_error 4 | 5 | __all__ = ["point_error", "ray_error", "reprojection_error"] 6 | -------------------------------------------------------------------------------- /facap/feature_errors/point_error.py: -------------------------------------------------------------------------------- 1 | def point_error(left, right, unproject, distance_function, **kwargs): 2 | processed_left = unproject(left["depths"], left["points"], left["camera_idxs"]) 3 | processed_right = unproject(right["depths"], right["points"], right["camera_idxs"]) 4 | 5 | return distance_function(processed_left, processed_right) 6 | -------------------------------------------------------------------------------- /facap/feature_errors/reprojection_error.py: -------------------------------------------------------------------------------- 1 | 2 | def reprojection_error(left, right, unproject, project, distance_function, **kwargs): 3 | unprojected_left = unproject(left["depths"], left["points"], left["camera_idxs"]) 4 | projected_left_on_right = project(unprojected_left, right["camera_idxs"]) 5 | 6 | return distance_function(right["points"], projected_left_on_right) 7 | -------------------------------------------------------------------------------- /env.yaml: -------------------------------------------------------------------------------- 1 | name: FACaP 2 | channels: 3 | - pytorch 4 | - iopath 5 | - fvcore 6 | - conda-forge 7 | - bottler 8 | - pytorch3d 9 | dependencies: 10 | - python=3.8 11 | - pytorch=1.7.1 12 | - torchvision=0.8.2 13 | - torchaudio=0.7.2 14 | - cudatoolkit=11.0 15 | - fvcore 16 | - iopath 17 | - nvidiacub=1.10.0 18 | - pytorch3d=0.4.0 19 | - opencv 20 | - matplotlib 21 | - scipy 22 | - tqdm 23 | - pyyaml 24 | -------------------------------------------------------------------------------- /facap/optimization/__init__.py: -------------------------------------------------------------------------------- 1 | from facap.optimization.unproject import Unproject 2 | from facap.optimization.project import Project 3 | from facap.optimization.parameters import CameraParameters 4 | from facap.optimization.floor_term import FloorTerm 5 | from facap.optimization.wall_term import WallTerm 6 | from facap.optimization.wall_segment_term import WallSegmentTerm 7 | 8 | __all__ = ["Unproject", "Project", "CameraParameters", "FloorTerm", "WallTerm", "WallSegmentTerm"] 9 | -------------------------------------------------------------------------------- /facap/optimization/project.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from facap.geometry.torch import project_points_rotvec 5 | 6 | 7 | class Project(nn.Module): 8 | def __init__(self, camera_parameters): 9 | super(Project, self).__init__() 10 | self.camera_parameters = camera_parameters 11 | 12 | def forward(self, pcd, cam_ids): 13 | if len(cam_ids) == 1: 14 | cam_ids = list(cam_ids) * len(pcd) 15 | rotvecs, translations, f, pp = self.camera_parameters.get_tensors(cam_ids) 16 | 17 | yxs = project_points_rotvec(pcd, f, pp, rotvecs, translations) 18 | 19 | return yxs 20 | -------------------------------------------------------------------------------- /facap/optimization/unproject.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from facap.geometry.torch import unproject_points_rotvec 5 | 6 | 7 | class Unproject(nn.Module): 8 | def __init__(self, camera_parameters, scale): 9 | super(Unproject, self).__init__() 10 | self.camera_parameters = camera_parameters 11 | self.scale = scale 12 | 13 | def forward(self, depths, points, cam_ids): 14 | if len(cam_ids) == 1: 15 | cam_ids = list(cam_ids) * len(depths) 16 | rotvecs, translations, f, pp = self.camera_parameters.get_tensors(cam_ids) 17 | pcd = unproject_points_rotvec(depths, points, f, pp, rotvecs, translations, scale=self.scale) 18 | 19 | return pcd 20 | -------------------------------------------------------------------------------- /experiments/config_facap.yaml: -------------------------------------------------------------------------------- 1 | paths: 2 | scan_path: "../camera_refinement_BIM/scan_lowres_002_250/" 3 | save_path: "../camera_refinement_BIM/scan_lowres_002_250/results_gfw/" 4 | 5 | data: 6 | use_bim : False 7 | wall_sparsity: 150 8 | floor_sparsity: 30 9 | min_frame_difference: 1 10 | max_initial_distance: 0.4 11 | floor_percentiles: [ 0.5, 95 ] 12 | depths_scale: 1 13 | 14 | optimization: 15 | num_epoches: 20000 16 | lr: 0.1 17 | momentum: 0.9 18 | fixed_cameras_idx: [ ] 19 | 20 | error: 21 | error_type: point_error 22 | floor_term: True 23 | floor_weight: 0.02 24 | ceil_term: False 25 | ceil_weight: 0.02 26 | wall_term: True 27 | wall_term_type: "point" 28 | wall_weight: 0.0007 29 | include_columns: False 30 | -------------------------------------------------------------------------------- /experiments/config_bim.yaml: -------------------------------------------------------------------------------- 1 | paths: 2 | scan_path: "../camera_refinement_BIM/scan_lowres_002_250/" 3 | save_path: "../camera_refinement_BIM/scan_lowres_002_250/results_bim_noceil_nocol_gfw/" 4 | 5 | data: 6 | use_bim : True 7 | wall_sparsity: 150 8 | floor_sparsity: 30 9 | min_frame_difference: 1 10 | max_initial_distance: 0.4 11 | floor_percentiles: [ 0.5, 95 ] 12 | depths_scale: 1 13 | 14 | optimization: 15 | num_epoches: 20000 16 | lr: 0.1 17 | momentum: 0.9 18 | fixed_cameras_idx: [ ] 19 | 20 | error: 21 | error_type: point_error 22 | floor_term: True 23 | floor_weight: 0.02 24 | ceil_term: False 25 | ceil_weight: 0.02 26 | wall_term: True 27 | wall_term_type: "point" 28 | wall_weight: 0.0007 29 | include_columns: False 30 | -------------------------------------------------------------------------------- /experiments/config.yaml: -------------------------------------------------------------------------------- 1 | paths: 2 | scan_path: "./data/htkr" 3 | save_path: "./data/htkr/results_2d" 4 | 5 | data: 6 | wall_sparsity: 150 7 | floor_sparsity: 30 8 | #Minimum distance between frames that are used for optimization 9 | min_frame_difference: 3 10 | #Filter out matched keypoints with too much initial distance 11 | max_initial_distance: 0.4 12 | #Filter out floor points in lower and upper percentiles regarding Z axis 13 | floor_percentiles: [ 0.5, 95 ] 14 | #The scale at which the depth images are provided 15 | depths_scale: 1000 16 | 17 | optimization: 18 | num_epoches: 25000 19 | lr: 0.1 20 | momentum: 0.9 21 | fixed_cameras_idx: [ ] 22 | 23 | error: 24 | error_type: point_error 25 | floor_term: True 26 | floor_weight: 0.02 27 | wall_term: True 28 | wall_term_type: "segment" 29 | wall_weight: 0.0007 -------------------------------------------------------------------------------- /facap/floorplan_metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def nearest_line_distance(florplan_edges, pcd_2d, max_dist=10., border_margin=.1, percentile=.9): 5 | biases = np.array([np.array(i[0]) for i in florplan_edges]) 6 | directions = np.array([np.array(i[1]) - np.array(i[0]) for i in florplan_edges]) 7 | 8 | distances = [] 9 | 10 | for a, b in zip(directions, biases): 11 | c = pcd_2d - b 12 | dot_product = np.sum(c * a, axis=-1) / a.dot(a) 13 | 14 | mask = np.where((dot_product < -border_margin) | (dot_product > 1 + border_margin)) 15 | 16 | dst = np.abs(c[:, 0] * a[1] - c[:, 1] * a[0]) / np.linalg.norm(a) 17 | 18 | dst[mask] = max_dist 19 | dst[dst > max_dist] = max_dist 20 | distances.append(dst) 21 | 22 | distances = np.stack(distances) 23 | distances = np.min(distances, axis=0) 24 | return distances, np.mean(distances), np.percentile(distances, percentile) 25 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04 2 | 3 | ENV TZ=Europe/Moscow 4 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone 5 | 6 | RUN apt-get update &&\ 7 | apt-get upgrade -y &&\ 8 | apt-get install -y curl git vim htop wget bzip2 g++ libgl1-mesa-dev libosmesa6-dev patchelf\ 9 | libglfw3 libglew-dev libglib2.0-0 libsm6 unzip xvfb 10 | 11 | ARG USERNAME=user 12 | RUN apt-get install -y sudo && \ 13 | addgroup --gid 1000 $USERNAME && \ 14 | adduser --uid 1000 --gid 1000 --disabled-password --gecos '' $USERNAME && \ 15 | adduser $USERNAME sudo && \ 16 | echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \ 17 | USER=$USERNAME && \ 18 | GROUP=$USERNAME 19 | 20 | USER $USERNAME:$USERNAME 21 | WORKDIR "/home/$USERNAME" 22 | ENV PATH="/home/$USERNAME/.local/bin:${PATH}" 23 | 24 | ENV PATH=/home/$USERNAME/miniconda3/bin:${PATH} 25 | 26 | RUN wget -q "https://repo.continuum.io/miniconda/Miniconda3-py39_4.9.2-Linux-x86_64.sh" -O 'miniconda3.sh' && \ 27 | bash 'miniconda3.sh' -b -p '/home/$USERNAME/miniconda3' && \ 28 | rm 'miniconda3.sh' && \ 29 | conda update -y conda && \ 30 | conda install pip 31 | 32 | ENV HOME /home/$USERNAME 33 | ADD env.yaml env.yaml 34 | RUN conda env update -f env.yaml --prune 35 | 36 | RUN pip install open3d==0.10.0 -U 37 | 38 | 39 | SHELL ["/bin/bash", "-c"] 40 | -------------------------------------------------------------------------------- /facap/optimization/floor_term.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | def fit_plane(pcd): 6 | mean = torch.mean(pcd, dim=0) 7 | pcd_c = pcd - mean 8 | x = torch.matmul(pcd_c.T, pcd_c) 9 | u, s, v = torch.svd(x) 10 | abc = v[:, -1] 11 | d = -torch.dot(abc, mean) 12 | coefs = torch.cat([abc, d.view(1, )]) 13 | return coefs 14 | 15 | 16 | class FloorTerm(nn.Module): 17 | def __init__(self, floor, unproject, distance_function, bim_floor=None): 18 | super(FloorTerm, self).__init__() 19 | self.floor = floor 20 | self.unproject = unproject 21 | 22 | self.plane = self.get_initial_plane(bim_floor) 23 | self.distance_function = distance_function 24 | 25 | def get_initial_plane(self, bim_floor): 26 | with torch.no_grad(): 27 | if bim_floor is None: 28 | floor_pcd = self.unproject(self.floor["depths"], self.floor["points"], self.floor["camera_idxs"]) 29 | else: 30 | floor_pcd = bim_floor.to('cuda:0') 31 | print('FLOORPCD', floor_pcd.shape) 32 | normed_coefs = fit_plane(floor_pcd) 33 | return normed_coefs 34 | 35 | def forward(self): 36 | floor_pcd = self.unproject(self.floor["depths"], self.floor["points"], self.floor["camera_idxs"]) 37 | floor_distances = (torch.sum(floor_pcd * self.plane[:3], dim=-1) + self.plane[3]) 38 | zeros = torch.zeros_like(floor_distances) 39 | floor_term = self.distance_function(zeros, floor_distances) 40 | return floor_term 41 | -------------------------------------------------------------------------------- /facap/optimization/wall_segment_term.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | 6 | def point_segment_distance(pcd_2d, segments): 7 | biases = torch.stack([i[:2] for i in segments]) 8 | directions = torch.stack([i[2:] - i[:2] for i in segments]) 9 | 10 | c = pcd_2d - biases 11 | dot_product = torch.sum(c * directions, dim=-1) / torch.sum(directions * directions, dim=-1) 12 | 13 | par_dist = torch.zeros_like(dot_product) 14 | mask = dot_product < 0 15 | par_dist[mask] = - dot_product[mask] * torch.norm(directions, dim=-1)[mask] 16 | mask = dot_product > 1. 17 | par_dist[dot_product > 1.] = (dot_product[mask] - 1) * torch.norm(directions, dim=-1)[mask] 18 | ort_dst = torch.abs(c[:, 0] * directions[:, 1] - c[:, 1] * directions[:, 0]) / torch.norm(directions, dim=-1) 19 | 20 | return par_dist, ort_dst 21 | 22 | 23 | class WallSegmentTerm(nn.Module): 24 | def __init__(self, wall, unproject, distance_function, floorplan): 25 | super(WallSegmentTerm, self).__init__() 26 | self.wall = wall 27 | self.unproject = unproject 28 | self.distance_function = distance_function 29 | self.register_buffer("floorplan", floorplan) 30 | 31 | def forward(self): 32 | wall_pcd = self.unproject(self.wall["depths"], self.wall["points"], self.wall["camera_idxs"]) 33 | par_dist, ort_dst = point_segment_distance(wall_pcd[:, [0, 1]], self.wall["segments"]) #TODO 01 34 | zeros = torch.zeros_like(par_dist) 35 | 36 | return self.distance_function(zeros, par_dist) + self.distance_function(zeros, ort_dst) 37 | -------------------------------------------------------------------------------- /facap/optimization/wall_term.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | 6 | def nearest_segment_distance(floorplan, pcd_2d): 7 | biases = torch.stack([i[:2] for i in floorplan]) 8 | directions = torch.stack([i[2:] - i[:2] for i in floorplan]) 9 | 10 | distances = [] 11 | 12 | for a, b in zip(directions, biases): 13 | c = pcd_2d - b 14 | dot_product = torch.sum(c * a, dim=-1) / a.dot(a) 15 | 16 | par_dist = torch.zeros_like(dot_product) 17 | par_dist[dot_product < 0] = - dot_product[dot_product < 0] * torch.norm(a) 18 | par_dist[dot_product > 1.] = (dot_product[dot_product > 1.] - 1) * torch.norm(a) 19 | ort_dst = torch.abs(c[:, 0] * a[1] - c[:, 1] * a[0]) / torch.norm(a) 20 | distances.append(torch.sqrt(par_dist ** 2 + ort_dst ** 2)) 21 | 22 | distances = torch.stack(distances) 23 | distances = torch.min(distances, dim=0).values 24 | return distances 25 | 26 | 27 | class WallTerm(nn.Module): 28 | def __init__(self, wall, unproject, distance_function, floorplan): 29 | super(WallTerm, self).__init__() 30 | self.wall = wall 31 | self.unproject = unproject 32 | self.distance_function = distance_function 33 | self.register_buffer("floorplan", floorplan) 34 | 35 | def forward(self): 36 | wall_pcd = self.unproject(self.wall["depths"], self.wall["points"], self.wall["camera_idxs"]) 37 | distances = nearest_segment_distance(self.floorplan, wall_pcd[:, [0, 1]]) #TODO 01 38 | zeros = torch.zeros_like(distances) 39 | 40 | return self.distance_function(zeros, distances) 41 | -------------------------------------------------------------------------------- /facap/optimization/parameters.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from facap.data.scan import Camera 6 | from collections import OrderedDict 7 | 8 | 9 | class CameraParameters(nn.Module): 10 | def __init__(self, cameras): 11 | super(CameraParameters, self).__init__() 12 | self.cameras = sorted(list(cameras.keys())) 13 | self.cam2id = dict(zip(self.cameras, range(len(self.cameras)))) 14 | self.rotvecs = nn.Parameter(torch.stack([torch.from_numpy(cameras[i].rotvec) for i in cameras])) 15 | self.translations = nn.Parameter(torch.stack([torch.from_numpy(cameras[i].translation) for i in cameras])) 16 | f = torch.stack([torch.tensor(cameras[i].f) for i in cameras]) 17 | pp = torch.stack([torch.tensor(cameras[i].pp) for i in cameras]) 18 | self.register_buffer("f", f) 19 | self.register_buffer("pp", pp) 20 | self.cached_indexes = OrderedDict() 21 | self.n_cached = 5 22 | 23 | def get_cameras(self): 24 | cameras = {} 25 | 26 | for cam_id in self.cameras: 27 | idx = self.cam2id[cam_id] 28 | rotvec = self.rotvecs[idx].cpu().detach().numpy() 29 | translation = self.translations[idx].cpu().detach().numpy() 30 | f = self.f[idx].cpu().detach().numpy() 31 | pp = self.pp[idx].cpu().detach().numpy() 32 | cameras[cam_id] = Camera(f, pp, rotvec, translation) 33 | 34 | return cameras 35 | 36 | def get_tensors(self, camera_idxs): 37 | idxs = self.get_cached_indexes(camera_idxs) 38 | rotvecs = self.rotvecs[idxs] 39 | translations = self.translations[idxs] 40 | f = self.f[idxs] 41 | pp = self.pp[idxs] 42 | return rotvecs, translations, f, pp 43 | 44 | def get_cached_indexes(self, cameras): 45 | for key in self.cached_indexes: 46 | if key == id(cameras): 47 | return self.cached_indexes[key] 48 | if len(self.cached_indexes) == self.n_cached: 49 | self.cached_indexes.popitem(last=True) 50 | idxs = [self.cam2id[i] for i in cameras] 51 | self.cached_indexes.update({id(cameras): idxs}) 52 | self.cached_indexes.move_to_end(id(cameras), last=False) 53 | return idxs 54 | -------------------------------------------------------------------------------- /facap/feature_errors/ray_error.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def ray_distance(a0, a, b0, b): 5 | c = torch.cross(a, b) 6 | c = c / torch.norm(c, dim=-1, keepdim=True) 7 | dr = torch.unsqueeze(torch.sum(c * (b0 - a0), dim=-1), dim=-1) 8 | a0 += dr * c 9 | 10 | def get_coef(a, b, a0, b0): 11 | return ((b0[:, 1] - a0[:, 1]) * a[:, 0] - (b0[:, 0] - a0[:, 0]) * a[:, 1]) / \ 12 | (b[:, 0] * a[:, 1] - a[:, 0] * b[:, 1]) 13 | 14 | beta = get_coef(a, b, a0, b0) 15 | alpha = get_coef(b, a, b0, a0) 16 | 17 | beta = torch.unsqueeze(beta, dim=-1) 18 | alpha = torch.unsqueeze(alpha, dim=-1) 19 | 20 | return (dr.squeeze(dim=-1), 21 | alpha.squeeze(dim=-1), 22 | beta.squeeze(dim=-1)) 23 | 24 | 25 | def ray_error(left, right, unproject, distance_function, max_depth=3., parallel_eps=1e-2, 26 | depths_weight=0.1, depths_scale=1000, **kwargs): 27 | zero_depth = torch.zeros_like(left["depths"]) 28 | ones_depth = torch.ones_like(left["depths"]) 29 | 30 | a0 = unproject(zero_depth, left["points"], left["camera_idxs"]) 31 | b0 = unproject(zero_depth, right["points"], right["camera_idxs"]) 32 | 33 | a1 = unproject(ones_depth, left["points"], left["camera_idxs"]) 34 | b1 = unproject(ones_depth, right["points"], right["camera_idxs"]) 35 | 36 | a = a1 - a0 37 | b = b1 - b0 38 | a = a / torch.norm(a, dim=-1, keepdim=True) 39 | b = b / torch.norm(b, dim=-1, keepdim=True) 40 | 41 | dr, left_est_depth, right_est_depth = ray_distance(a0, a, b0, b) 42 | 43 | c = torch.cross(a, b) 44 | norm_c = torch.norm(c, dim=-1) 45 | 46 | mask = (left_est_depth > 0) & (right_est_depth > 0) & (left_est_depth < max_depth) \ 47 | & (right_est_depth < max_depth) & (norm_c > parallel_eps) 48 | 49 | dr = dr[mask] 50 | scaled_right_depth = right["depths"] / depths_scale 51 | scaled_left_depth = left["depths"] / depths_scale 52 | 53 | zeros = torch.zeros_like(dr) 54 | ray_term = distance_function(zeros, dr) 55 | depths_distance = distance_function(scaled_right_depth[mask], right_est_depth[mask]) 56 | depths_distance += distance_function(scaled_left_depth[mask], left_est_depth[mask]) 57 | 58 | return ray_term + depths_weight * depths_distance 59 | -------------------------------------------------------------------------------- /facap/geometry/torch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def unproject_points_rotvec(depths, yxs, f, pp, rotvec, translation, scale=1): 5 | """ 6 | Unprojects a given depth map into a point cloud with inverse extrinsics. 7 | 8 | Parameters 9 | ---------- 10 | depths : torch.Tensor 11 | Array of source depths. 12 | yxs : torch.Tensor 13 | Coordinates of points which are corresponding to depths. 14 | f : torch.Tensor 15 | Focus with shape (2,) and (N, 2). 16 | pp : torch.Tensor 17 | The principal point with shape (2,) and (N, 2). 18 | rotvec : torch.Tensor 19 | Rotation vectors. 20 | translation : torch.Tensor 21 | Translations. 22 | scale : float, optional (default=1000) 23 | The scale for the map. 24 | 25 | Returns 26 | ------- 27 | torch.Tensor 28 | 3d coordinates of the given points. 29 | 30 | """ 31 | u, v = yxs[:, 0], yxs[:, 1] 32 | z_cam = torch.divide(depths, scale) 33 | if isinstance(f, tuple) or len(f.shape) == 1: 34 | x_cam = z_cam * (v - pp[0]) / f[0] 35 | y_cam = z_cam * (u - pp[1]) / f[1] 36 | else: 37 | x_cam = z_cam * (v - pp[:, 0]) / f[:, 0] 38 | y_cam = z_cam * (u - pp[:, 1]) / f[:, 1] 39 | 40 | coords_cam = torch.stack((x_cam, y_cam, z_cam)).T 41 | coords_world = rotate(coords_cam, rotvec) + translation 42 | return coords_world 43 | 44 | 45 | def project_points_rotvec(coords_world, f, pp, rotvec, translation): 46 | """ 47 | Unprojects a given depth map into a point cloud with inverse extrinsics. 48 | 49 | Parameters 50 | ---------- 51 | depths : torch.Tensor 52 | Array of source depths. 53 | yxs : torch.Tensor 54 | Coordinates of points which are corresponding to depths. 55 | f : torch.Tensor 56 | Focus with shape (2,) and (N, 2). 57 | pp : torch.Tensor 58 | The principal point with shape (2,) and (N, 2). 59 | rotvec : torch.Tensor 60 | Rotation vectors. 61 | translation : torch.Tensor 62 | Translations. 63 | scale : float, optional (default=1000) 64 | The scale for the map. 65 | 66 | Returns 67 | ------- 68 | torch.Tensor 69 | 3d coordinates of the given points. 70 | 71 | """ 72 | coords_cam = rotate(coords_world - translation, -rotvec) 73 | x_cam, y_cam, z_cam = coords_cam[:, 0], coords_cam[:, 1], coords_cam[:, 2] 74 | if isinstance(f, tuple) or len(f.shape) == 1: 75 | v = torch.divide(x_cam, z_cam) * f[0] + pp[0] 76 | u = torch.divide(y_cam, z_cam) * f[1] + pp[1] 77 | else: 78 | v = torch.divide(x_cam, z_cam) * f[:, 0] + pp[:, 0] 79 | u = torch.divide(y_cam, z_cam) * f[:, 1] + pp[:, 1] 80 | yxs = torch.stack((u, v)).T 81 | return yxs 82 | 83 | 84 | def rotate(points, rotvec): 85 | """ 86 | Rotates the given point cloud. 87 | 88 | Parameters 89 | ---------- 90 | 91 | rotvec : torch.Tensor 92 | Rotation vectors. 93 | points : torch.Tensor 94 | 3d point cloud. 95 | 96 | Returns 97 | ------- 98 | torch.Tensor 99 | Rotated point cloud. 100 | 101 | """ 102 | 103 | if len(rotvec.shape) == 1: 104 | rotvec = rotvec.repeat(len(points)).reshape(-1, 3) 105 | theta = torch.linalg.norm(rotvec, dim=1).unsqueeze(1) 106 | v = torch.divide(rotvec, theta) 107 | v = torch.where(torch.isnan(v), torch.zeros_like(v), v) 108 | dot = torch.sum(points * v, dim=1).unsqueeze(1) 109 | cos_theta = torch.cos(theta) 110 | sin_theta = torch.sin(theta) 111 | return cos_theta * points + sin_theta * torch.cross(v, points) + dot * (1 - cos_theta) * v 112 | -------------------------------------------------------------------------------- /facap/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import open3d as o3d 4 | 5 | from matplotlib import pyplot as plt 6 | from facap.geometry.numpy import unproject_points_rotvec 7 | from facap.geometry.open3d import make_pcd_from_numpy, make_line_set 8 | 9 | 10 | def read_npy(path): 11 | paths = np.load(path, allow_pickle=True) 12 | 13 | edges = [] 14 | 15 | for path in paths: 16 | ed = list(zip(path[1:].tolist(), path[:-1].tolist())) 17 | ed = [(tuple(i[0]), tuple(i[1])) for i in ed if i[0] != i[1]] 18 | edges.extend(ed) 19 | corners = [tuple(elem) for edge in edges for elem in edge] 20 | corners = set(corners) 21 | 22 | return corners, edges 23 | 24 | 25 | def plot_graph(edges, color, name, markersize=20): 26 | for j, (st, end) in enumerate(edges): 27 | if j == 0: 28 | plt.plot([st[0], end[0]], [st[1], end[1]], f"{color}.-", label=name, markersize=markersize) 29 | else: 30 | plt.plot([st[0], end[0]], [st[1], end[1]], f"{color}.-", markersize=markersize) 31 | 32 | 33 | def dicts_to_torch(dict_list, device): 34 | for dct in dict_list: 35 | for key in dct: 36 | if key != "camera_idxs": 37 | dct[key] = torch.from_numpy(dct[key]).to(device).float() 38 | 39 | 40 | def dicts_to_numpy(dict_list): 41 | for dct in dict_list: 42 | for key in dct: 43 | if key != "camera_idxs": 44 | dct[key] = dct[key].cpu().detach().numpy() 45 | 46 | 47 | def visualize_aligned_walls(wall_data, save_path, scale=1): 48 | segments = [tuple(i.tolist()) for i in wall_data["segments"]] 49 | #print('seg\n', len(segments), segments) 50 | unique_segments = list(set(segments)) 51 | 52 | plt.figure(figsize=(8, 8)) 53 | 54 | segment_to_color = { i: np.random.rand(3) for i in unique_segments} 55 | points_colors = [segment_to_color[i] for i in segments] 56 | 57 | pcd = unproject_points_rotvec(wall_data["depths"], wall_data["points"], 58 | wall_data["f"], wall_data["pp"], 59 | wall_data["rotvecs"], wall_data["translations"], scale=scale) 60 | 61 | for s, c in segment_to_color.items(): 62 | plt.plot([s[0], s[2]], [s[1], s[3]], c=c) 63 | plt.scatter(pcd[:, 0], pcd[:, 1], c=points_colors) #TODO 2 64 | plt.savefig(f"{save_path}/alligned_walls.png") 65 | 66 | 67 | def visualize_data(data, save_path, suffix="", scale=1): 68 | left, right, wall, floor, ceiling = data 69 | left_pcd = unproject_points_rotvec(left["depths"], left["points"], left["f"], 70 | left["pp"], left["rotvecs"], left["translations"], scale=scale) 71 | 72 | right_pcd = unproject_points_rotvec(right["depths"], right["points"], right["f"], 73 | right["pp"], right["rotvecs"], right["translations"], scale=scale) 74 | 75 | floor_pcd = unproject_points_rotvec(floor["depths"], floor["points"], floor["f"],floor["pp"], floor["rotvecs"], floor["translations"], scale=scale) 76 | 77 | wall_pcd = unproject_points_rotvec(wall["depths"], wall["points"], wall["f"],wall["pp"], wall["rotvecs"], wall["translations"], scale=scale) 78 | 79 | ceiling_pcd = unproject_points_rotvec(ceiling["depths"], ceiling["points"], ceiling["f"],ceiling["pp"], ceiling["rotvecs"], ceiling["translations"], scale=scale) 80 | 81 | 82 | if "segments" in wall: 83 | visualize_aligned_walls(wall, save_path, scale=scale) 84 | 85 | 86 | red = np.array([1, 0, 0]) 87 | green = np.array([0, 1, 0]) 88 | blue = np.array([0, 0, 1]) 89 | left_pcd = make_pcd_from_numpy(left_pcd, red) 90 | right_pcd = make_pcd_from_numpy(right_pcd, green) 91 | wall_pcd = make_pcd_from_numpy(wall_pcd, red) 92 | floor_pcd = make_pcd_from_numpy(floor_pcd, blue) 93 | ceiling_pcd = make_pcd_from_numpy(ceiling_pcd, blue) 94 | line_set = make_line_set(left_pcd, right_pcd, green) 95 | 96 | 97 | o3d.io.write_line_set(f"{save_path}/keypoints"+suffix+".ply", line_set) 98 | o3d.io.write_point_cloud(f"{save_path}/wall"+suffix+".ply", wall_pcd) 99 | o3d.io.write_point_cloud(f"{save_path}/floor"+suffix+".ply", floor_pcd) 100 | o3d.io.write_point_cloud(f"{save_path}/ceiling"+suffix+".ply", ceiling_pcd) 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /facap/geometry/open3d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import open3d as o3d 4 | from open3d import visualization 5 | 6 | 7 | def project_points(pcd, extrinsic, f, pp, width, height): 8 | """ 9 | Projects a given depth map into a depth map with Open3D. 10 | 11 | Parameters 12 | ---------- 13 | pcd : open3d.geometry.PointCloud 14 | Source point cloud. 15 | f : tuple 16 | Focus (fx, fy). 17 | pp : tuple 18 | The principal point (cx, cy). 19 | extrinsic : numpy.ndarray 20 | The 4x4 extrinsic matrix. 21 | width : int 22 | Width of the map. 23 | height : int 24 | Height of the map. 25 | 26 | Returns 27 | ------- 28 | numpy.ndarray 29 | Depth map. 30 | 31 | """ 32 | camera = o3d.camera.PinholeCameraParameters() 33 | camera.intrinsic.set_intrinsics(width, height, f[0], f[1], pp[0] - 0.5, pp[1] - 0.5) 34 | camera.extrinsic = extrinsic 35 | viewer = visualization.Visualizer() 36 | viewer.create_window(width=width, height=height) 37 | viewer.add_geometry(pcd) 38 | viewer.get_view_control().convert_from_pinhole_camera_parameters(camera) 39 | viewer.poll_events() 40 | viewer.update_renderer() 41 | depth = viewer.capture_depth_float_buffer() 42 | viewer.destroy_window() 43 | depth = (np.asarray(depth).astype(np.float32)).astype(np.uint16) 44 | return depth 45 | 46 | 47 | def unproject_points(depth_map, color_map, extrinsic, f, pp, width, height): 48 | """ 49 | Unprojects a given depth map into a point cloud using Open3D. 50 | 51 | Parameters 52 | ---------- 53 | depth_map : numpy.ndarray 54 | Source depth map. 55 | color_map : numpy.ndarray 56 | Source rgb image. 57 | f : tuple 58 | Focus (fx, fy). 59 | pp : tuple 60 | The principal point (cx, cy). 61 | extrinsic : numpy.ndarray 62 | The 4x4 extrinsic matrix. 63 | width : int 64 | Width of the map. 65 | height : int 66 | Height of the map. 67 | 68 | Returns 69 | ------- 70 | open3d.geometry.PointCloud 71 | The point cloud constructed from the given data. 72 | 73 | """ 74 | 75 | camera = o3d.camera.PinholeCameraIntrinsic() 76 | camera.set_intrinsics(width, height, f[0], f[1], pp[0], pp[1]) 77 | color = o3d.geometry.Image(color_map) 78 | depth = o3d.geometry.Image(depth_map.astype(np.float32)) 79 | rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(color, depth, 80 | convert_rgb_to_intensity=False, depth_scale=1) 81 | pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, camera, extrinsic) 82 | return pcd 83 | 84 | 85 | def sample_points_from_pcd(pcd, num_points): 86 | new_pcd = o3d.geometry.PointCloud() 87 | xyz = np.asarray(pcd.points) 88 | color = np.asarray(pcd.colors) 89 | 90 | indexes = np.random.randint(0, len(xyz), num_points) 91 | 92 | xyz = xyz[indexes] 93 | color = color[indexes] 94 | 95 | new_pcd.points = o3d.utility.Vector3dVector(xyz) 96 | new_pcd.colors = o3d.utility.Vector3dVector(color) 97 | return new_pcd 98 | 99 | 100 | def make_pcd_from_numpy(xyz, colors): 101 | pcd = o3d.geometry.PointCloud() 102 | pcd.points = o3d.utility.Vector3dVector(xyz) 103 | 104 | if len(colors.shape) == 1: 105 | colors = np.ones_like(xyz) * colors 106 | pcd.colors = o3d.utility.Vector3dVector(colors) 107 | pcd.estimate_normals() 108 | return pcd 109 | 110 | 111 | def make_line_set(pcd1, pcd2, colors=np.array([1, 0, 0]), num_points=None): 112 | points1 = np.asarray(pcd1.points) 113 | points2 = np.asarray(pcd2.points) 114 | 115 | if num_points is not None: 116 | indexes = np.random.randint(0, len(points1), num_points) 117 | 118 | points1 = points1[indexes] 119 | points2 = points2[indexes] 120 | 121 | lines = o3d.geometry.LineSet() 122 | points = np.r_[points1, points2] 123 | lines.points = o3d.utility.Vector3dVector(points) 124 | 125 | size = len(points) // 2 126 | l = np.c_[np.arange(size), np.arange(size) + size] 127 | lines.lines = o3d.utility.Vector2iVector(l) 128 | if len(colors.shape) == 1: 129 | colors = np.ones((size, 3)) * colors 130 | lines.colors = o3d.utility.Vector3dVector(colors) 131 | 132 | return lines 133 | -------------------------------------------------------------------------------- /facap/geometry/numpy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def project_points(pcd, f, pp, extrinsic, height, width, scale=1): 5 | """ 6 | Projects a given point cloud to 2d according to camera's pose. 7 | 8 | Parameters 9 | ---------- 10 | pcd : numpy.ndarray 11 | Source point cloud. 12 | f : tuple 13 | Focus (fx, fy). 14 | pp : tuple 15 | The principal point (cx, cy). 16 | extrinsic : numpy.ndarray 17 | The 4x4 extrinsic matrix. 18 | scale : float, optional (default=1000) 19 | The scale for the map. 20 | width : int 21 | Width of the map. 22 | height : int 23 | Height of the map. 24 | 25 | Returns 26 | ------- 27 | numpy.ndarray 28 | 2d depth map. 29 | 30 | """ 31 | 32 | depth_map = np.zeros((height, width)) 33 | intrinsic = np.array([[f[0], 0, pp[0]], 34 | [0, f[1], pp[1]], 35 | [0, 0, 1]]) 36 | proj_mat = np.dot(intrinsic, extrinsic[:3]) 37 | pcd_xyz1 = np.pad(pcd, [[0, 0], [0, 1]], 'constant', constant_values=1).T 38 | p_uv_1 = np.dot(proj_mat, pcd_xyz1) 39 | p_uv = np.divide(p_uv_1, p_uv_1[2])[:2] 40 | pos_x = np.array(list(map(lambda x: round(x), p_uv[0]))) 41 | pos_y = np.array(list(map(lambda y: round(y), p_uv[1]))) 42 | valid_ids = [] 43 | for i, (x, y) in enumerate(zip(pos_x, pos_y)): 44 | if x < 0 or y < 0 or x >= width or y >= height: 45 | continue 46 | if p_uv_1[2][i] < 0 or p_uv_1[2][i] > 3.: 47 | continue 48 | valid_ids.append(i) 49 | valid_ids = np.array(valid_ids) 50 | if len(valid_ids) > 0: 51 | depth_map[pos_y[valid_ids], pos_x[valid_ids]] = np.maximum(0, p_uv_1[2][valid_ids]) * ( 52 | p_uv_1[2][valid_ids] < 3.) 53 | depth_map *= scale 54 | return depth_map, valid_ids 55 | 56 | 57 | def unproject_points(depth_map, coords_2d, f, pp, inv_extrinsic, scale=1): 58 | """ 59 | Unprojects a given depth map into a point cloud with inverse extrinsics. 60 | 61 | Parameters 62 | ---------- 63 | depth_map : numpy.ndarray 64 | Source depth map. 65 | coords_2d : numpy.ndarray 66 | Coordinates of using points on the map. 67 | f : tuple 68 | Focus (fx, fy). 69 | pp : tuple 70 | The principal point (cx, cy). 71 | inv_extrinsic : numpy.ndarray 72 | The 4x4 matrix with the pose of the camera. 73 | scale : float, optional (default=1000) 74 | The scale for the map. 75 | 76 | Returns 77 | ------- 78 | numpy.ndarray 79 | 3d coordinates of the given points. 80 | 81 | """ 82 | u, v = np.array(list(zip(coords_2d)))[:, 0, :] 83 | z_cam = depth_map / scale 84 | if isinstance(f, tuple) or len(f.shape) == 1: 85 | x_cam = z_cam * (v - pp[0]) / f[0] 86 | y_cam = z_cam * (u - pp[1]) / f[1] 87 | else: 88 | x_cam = z_cam * (v - pp[:, 0]) / f[:, 0] 89 | y_cam = z_cam * (u - pp[:, 1]) / f[:, 1] 90 | # x_cam = z_cam * (v - pp[0]) / f[0] 91 | # y_cam = z_cam * (u - pp[1]) / f[1] 92 | 93 | coords_cam = np.stack((x_cam, y_cam, z_cam, np.ones_like(z_cam))) 94 | coords_world = inv_extrinsic @ coords_cam.T[..., None] 95 | 96 | return coords_world[:, :3, 0] 97 | 98 | 99 | def unproject_points_rotvec(depths, yxs, f, pp, rotvec, translation, scale=1): 100 | """ 101 | Unprojects a given depth map into a point cloud with inverse extrinsics. 102 | 103 | Parameters 104 | ---------- 105 | depths : numpy.ndarray 106 | Array of source depths. 107 | yxs : numpy.ndarray 108 | Coordinates of points which are corresponding to depths. 109 | f : tuple 110 | Focus (fx, fy). 111 | pp : tuole 112 | The principal point (cx, cy). 113 | rotvec : numpy.ndarray 114 | Rotation vectors. 115 | translation : numpy.ndarray 116 | Translations. 117 | scale : float, optional (default=1000) 118 | The scale for the map. 119 | 120 | Returns 121 | ------- 122 | numpy.ndarray 123 | 3d coordinates of the given points. 124 | 125 | """ 126 | 127 | u, v = yxs[:, 0], yxs[:, 1] 128 | z_cam = depths / scale 129 | if isinstance(f, tuple) or len(f.shape) == 1: 130 | x_cam = z_cam * (v - pp[0]) / f[0] 131 | y_cam = z_cam * (u - pp[1]) / f[1] 132 | else: 133 | x_cam = z_cam * (v - pp[:, 0]) / f[:, 0] 134 | y_cam = z_cam * (u - pp[:, 1]) / f[:, 1] 135 | # x_cam = z_cam * (v - pp[0]) / f[0] 136 | # y_cam = z_cam * (u - pp[1]) / f[1] 137 | 138 | coords_cam = np.stack((x_cam, y_cam, z_cam)).T 139 | if len(rotvec.shape) == 1: 140 | coords_world = rotate(coords_cam, rotvec[None]) + translation 141 | else: 142 | coords_world = rotate(coords_cam, rotvec) + translation 143 | 144 | return coords_world 145 | 146 | 147 | def rotate(points, rotvec): 148 | """ 149 | Rotates the given point cloud. 150 | 151 | Parameters 152 | ---------- 153 | 154 | rotvec : numpy.ndarray 155 | Rotation vectors. 156 | points : numpy.ndarray 157 | 3d point cloud. 158 | 159 | Returns 160 | ------- 161 | numpy.ndarray 162 | Rotated point cloud. 163 | 164 | """ 165 | 166 | theta = np.linalg.norm(rotvec, axis=1)[:, np.newaxis] 167 | with np.errstate(invalid='ignore'): 168 | v = rotvec / theta 169 | v = np.nan_to_num(v) 170 | dot = np.sum(points * v, axis=1)[:, np.newaxis] 171 | cos_theta = np.cos(theta) 172 | sin_theta = np.sin(theta) 173 | return cos_theta * points + sin_theta * np.cross(v, points) + dot * (1 - cos_theta) * v 174 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
7 | BIM-based AI-supported LiDAR-Camera Pose Refinement. 8 |
9 | 10 | 11 | 12 |13 | ArXiv Paper • 14 | BIM model Data • 15 | Construcion Site Semantic Segmentation Data 16 |
17 | 18 | This repository will contain the implementation of BIM-based AI-supported LiDAR-Camera Pose Refinement [paper](https://mediatum.ub.tum.de/node?id=1748534). 19 | This project is an extension of the [FACaP](https://github.com/SamsungLabs/FACaP) project. 20 | 21 | ## Introduction 22 | 23 | This paper introduces a method conceived to overcome the challenge of integrating mobile 3D sparse LiDAR data and camera measurements with pre-existing building information models (BIM models). This enhancement is crucial for fast and accurate mapping conducted with affordable sensors within indoor environments. 24 | Our proposed framework, termed BIMCaP, presents a novel approach for automatic sensor pose refinement, leveraging a reference 3D BIM model. 25 | Central to our methodology is the application of a bundle adjustment technique, which seamlessly aligns semantically enhanced measurements from the real world with a BIM model. 26 | Through experimentation on real-world open-access data, we demonstrate the superior accuracy of our framework, surpassing current state-of-the-art methods. 27 | BIMCaP holds significant promise for enhancing the efficiency and cost-effectiveness of 3D mapping methodologies. 28 | This advancement carries substantial advantages across various domains, such as in construction site management and emergency response, where up-to-date digital maps facilitate better decision-making and enhanced productivity. 29 | 30 |  31 | 32 | We enhance the original FACaP pipeline by leveraging a 3D BIM model, fusioning sparse LiDAR and Camera sensors and implementing semantic segmentation on challenging construction site environments. 33 | 34 |  35 | 36 | 37 | 38 | ## Installation 39 | 40 | We recommend using `Dockerfile` to build a container for the project. 41 | All required libraries are described in the environment file `env.yaml`. In the final version 42 | we drop all of the `pytorch3d` dependencies, so you do not have to install it. 43 | 44 | ## Data structure 45 | All scans should be preprocessed to the next structure: 46 | ``` 47 | scan 48 | │ floorplan.npy 49 | | floorplane.ply 50 | | ceiling.ply 51 | │ 52 | └───db 53 | │ └───sparse 54 | │ └───0 55 | | | cameras.bin 56 | | | images.bin 57 | | | points3D.bin 58 | │ 59 | └───arcore 60 | │ │ cam_params.txt 61 | │ │ ... 62 | │ │ depth-0001.png 63 | │ │ ... 64 | │ │ frame-0001.png 65 | │ │ ... 66 | │ │ pose-0001.txt 67 | │ │ ... 68 | │ │ 69 | │ 70 | └───segmentation 71 | │ │ frame-0001_wall.png 72 | │ │ ... 73 | │ │ frame-0001_floor.png 74 | │ │ ... 75 | │ │ frame-0001_ceiling.png 76 | │ │ ... 77 | │ │ frame-0001_columns.png 78 | │ 79 | 80 | ``` 81 | 82 | Here: 83 | - `floorplan.npy` is an array with the shape `n x 4`. Each element is a segment of the floorplan. 84 | - `floorplane.ply` is a pointcloud of the floor in the BIM model. 85 | - `ceiling.ply` is a pointcloud of the ceiling in the BIM model. 86 | - `db` features a database in COLMAP format, which is used to map covisible points. This can be computed from the image frames using the [COLMAP](https://colmap.github.io/cli.html) automatic reconstructor. 87 | - `cam_params.txt` intrinsics of the corresponding camera (w, h, f1, f1, p1, p2). 88 | - `pose-0001.txt` extrinsic matrix of the corresponding camera 89 | - `depth-0001.npy` depth map 90 | - `frame-0001.png` RGB frame 91 | - `frame-0001_wall.png` mask of walls for the corresponding frame 92 | - `frame-0001_floor.png` mask of the floor for the corresponding frame 93 | - `frame-0001_ceiling.png` mask of the ceiling for the corresponding frame 94 | - `frame-0001_columns.png` rotated mask of the columns for the corresponding frame 95 | 96 | 97 | For more details please see the file `facap/data/scan.py`. 98 | 99 | ## Usage 100 | 101 | To run an experiment you should create a config file run experiment. 102 | 103 | ```python 104 | python scripts/run_experimnt.py --config path_to_config --device "cuda:0" 105 | ``` 106 | 107 | The example of the config can be found in the path `experiments/config.yaml`. 108 | 109 | ## Citation 110 | 111 | ```BibTeX 112 | 113 | @inproceedings{Vega:2024:BIMCaP, 114 | author = {Vega-Torres, Miguel A. and Ribic, Anna and García de Soto, Borja and Borrmann, André}, 115 | title = {BIMCaP: BIM-based AI-supported LiDAR-Camera Pose Refinement}, 116 | booktitle = { Proc. of the 31th Int. Conference on Intelligent Computing in Engineering (EG-ICE)}, 117 | year = {2024}, 118 | month = {Jul}, 119 | doi = {https://doi.org/10.48550/arXiv.2412.03434}, 120 | keywords = {Construction Robotics; SLAM; BIM; Bundle Adjustment; Long-term Mapping}, 121 | url = {https://github.com/MigVega/BIMCaP} 122 | } 123 | 124 | ``` 125 | 126 | ## Acknowledgements 127 | This project is an extension of [FACaP](https://github.com/SamsungLabs/FACaP) by Sokolova, Anna and Nikitin, Filipp and Vorontsova, Anna and Konushin, Anton, which is licensed under the MIT License. 128 | 129 | ## License 130 | For academic usage, the code is released under the [GPLv3 license](https://www.gnu.org/licenses/gpl-3.0.en.html). For any commercial purpose, please contact the authors. 131 | -------------------------------------------------------------------------------- /facap/geometry/allign_walls.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import numpy as np 4 | import open3d as o3d 5 | from scipy.signal import find_peaks 6 | from tqdm import tqdm 7 | 8 | from facap.geometry.numpy import unproject_points_rotvec 9 | from facap.geometry.open3d import make_pcd_from_numpy 10 | 11 | 12 | def find_nearest_segment(floorplan, wall): 13 | biases = np.stack([i[:2] for i in floorplan]) 14 | directions = np.stack([i[2:] - i[:2] for i in floorplan]) 15 | 16 | distances = [] 17 | 18 | for a, b in zip(directions, biases): 19 | c = wall - b 20 | dot_product = np.sum(c * a, axis=-1) / a.dot(a) 21 | 22 | par_dist = np.zeros_like(dot_product) 23 | par_dist[dot_product < 0] = - dot_product[dot_product < 0] * np.linalg.norm(a) 24 | par_dist[dot_product > 1.] = (dot_product[dot_product > 1.] - 1) * np.linalg.norm(a) 25 | ort_dst = np.abs(c[:, 0] * a[1] - c[:, 1] * a[0]) / np.linalg.norm(a) 26 | distances.append(np.sqrt(par_dist ** 2 + ort_dst ** 2)) 27 | 28 | distances = np.stack(distances) 29 | distances = np.mean(distances, axis=1) 30 | segment_idx = np.argmin(distances, axis=0) 31 | 32 | return segment_idx, distances[segment_idx] 33 | 34 | 35 | def get_distinct_walls(pcd, indexes, alpha, 36 | max_vertical_component=.3, 37 | min_num_points=500, 38 | num_ransac_tries=20000, 39 | max_collinearity_rate=.2): 40 | walls = [] 41 | cur_pcd = deepcopy(pcd) 42 | 43 | for i in tqdm(range(num_ransac_tries)): 44 | if len(cur_pcd.points) >= min_num_points: 45 | plane_model, inliers = cur_pcd.segment_plane(distance_threshold=0.01, 46 | ransac_n=10, 47 | num_iterations=100) 48 | a, b, c, _ = plane_model 49 | 50 | if (b <= max_vertical_component) and (abs(alpha - np.arctan(c / a)) <= max_collinearity_rate): 51 | wall_pcd = cur_pcd.select_by_index(inliers) 52 | wall = {"plane_model": plane_model, 53 | "pcd": wall_pcd, 54 | "indexes": indexes[inliers]} 55 | 56 | cur_pcd = cur_pcd.select_by_index(inliers, invert=True) 57 | indexes = np.delete(indexes, inliers) 58 | walls.append(wall) 59 | return walls 60 | 61 | 62 | def join_walls(walls): 63 | wall = o3d.geometry.PointCloud() 64 | plane_models = [] 65 | indexes = [] 66 | for w in walls: 67 | wall += w["pcd"] 68 | plane_models.append(w["plane_model"]) 69 | indexes.append(w["indexes"]) 70 | plane_model = np.array(plane_models).mean(axis=0).tolist() 71 | indexes = np.concatenate(indexes) 72 | wall = {"pcd": wall, "plane_model": plane_model, "indexes": indexes} 73 | return wall 74 | 75 | 76 | def unite_walls(walls, iou_thr=0.8, colin_thr=0.8): 77 | united_walls = {} 78 | for i, wall in tqdm(enumerate(walls)): 79 | new_id = -1 80 | for wall_id in united_walls: 81 | for w in united_walls[wall_id]: 82 | dist_1 = np.array(wall["pcd"].compute_point_cloud_distance(w["pcd"])) 83 | dist_2 = np.array(w["pcd"].compute_point_cloud_distance(wall["pcd"])) 84 | plane_model_1 = wall["plane_model"] 85 | plane_model_2 = w["plane_model"] 86 | if ((dist_1 < .2).mean() > iou_thr or (dist_2 < .2).mean() > iou_thr) and abs( 87 | plane_model_1[:3].dot(plane_model_2[:3])) > colin_thr: 88 | new_id = wall_id 89 | 90 | if len(united_walls) == 0: 91 | new_id = 0 92 | elif new_id == -1: 93 | new_id = max(list(united_walls.keys())) + 1 94 | if new_id not in united_walls: 95 | united_walls[new_id] = [] 96 | united_walls[new_id].append(wall) 97 | 98 | walls = [] 99 | for wall_id in united_walls: 100 | wall = join_walls(united_walls[wall_id]) 101 | walls.append(wall) 102 | return walls 103 | 104 | #TODO look here 105 | def reorder_wall(wall, segment2wall, floorplan): 106 | indexes = [] 107 | segments = [] 108 | print('wallreoder',wall) 109 | for segment_id, w in segment2wall.items(): 110 | indexes.append(w["indexes"]) 111 | segments.append(np.stack([floorplan[segment_id]] * len(w["indexes"]))) 112 | indexes = np.concatenate(indexes) 113 | segments = np.concatenate(segments) 114 | 115 | for key in wall: 116 | print('keyindex',key, indexes) 117 | wall[key] = wall[key][indexes] 118 | wall["segments"] = segments 119 | return wall 120 | 121 | 122 | def get_aligned_walls(walls, floorplan): 123 | plane2walls = {} 124 | for i, w in enumerate(walls): 125 | wall_pcd = np.asarray(w["pcd"].points)[:, [0, 1]] #TODO 0,2 126 | idx, distance = find_nearest_segment(floorplan, wall_pcd) 127 | if idx not in plane2walls: 128 | plane2walls[idx] = [w, ] 129 | else: 130 | plane2walls[idx].append(w) 131 | 132 | for segment_id in plane2walls: 133 | plane2walls[segment_id] = join_walls(plane2walls[segment_id]) 134 | return plane2walls 135 | 136 | 137 | def align_walls(wall_data, floorplan, scale=1): 138 | walls_pcd = unproject_points_rotvec(wall_data["depths"], wall_data["points"], wall_data["f"], wall_data["pp"], 139 | wall_data["rotvecs"], wall_data["translations"], scale=scale) 140 | walls_pcd = make_pcd_from_numpy(walls_pcd, np.array([0, 1, 0])) 141 | 142 | indexes = np.arange(len(walls_pcd.points)) 143 | normals = np.asarray(walls_pcd.normals) 144 | vertical_component_condition = np.abs(normals[..., 2]) < 0.3 # TODO normal 1 145 | norm_proj = normals[..., [0, 1]][vertical_component_condition] #TODO normals 2 146 | 147 | angles = np.arctan(norm_proj[..., 1] / (1e-10 + norm_proj[..., 0])) 148 | counts, bins = np.histogram(angles, bins=360) 149 | peaks = find_peaks(counts, height=300, distance=50, prominence=30) 150 | 151 | walls = [] 152 | for peak in peaks[0]: 153 | alpha = (bins[peak] + bins[peak + 1]) / 2 154 | 155 | angle_condition = np.abs((np.arctan(normals[..., 1] / (normals[..., 0] + 1e-10))) - alpha) < .1 #TODO normals 2 156 | condition = vertical_component_condition & angle_condition 157 | mask = np.where(condition)[0] 158 | filtered_by_normals = walls_pcd.select_by_index(mask) 159 | filtered_indexes = indexes[mask] 160 | walls.extend(get_distinct_walls(filtered_by_normals, filtered_indexes, alpha)) 161 | 162 | walls = unite_walls(walls, iou_thr=0.7) 163 | plane2walls = get_aligned_walls(walls, floorplan) 164 | #print('plane2walls', len(plane2walls), plane2walls) 165 | wall_data = reorder_wall(wall_data, plane2walls, floorplan) 166 | return wall_data 167 | -------------------------------------------------------------------------------- /scripts/run_experiment.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | import os 4 | import open3d as o3d 5 | import numpy as np 6 | 7 | import yaml 8 | from torch import nn 9 | from torch import optim 10 | import torch.nn.functional as F 11 | from torch.linalg import norm 12 | 13 | from facap import feature_errors 14 | from facap.data.scan import Scan 15 | from facap.optimization import Project, Unproject, CameraParameters, FloorTerm, WallTerm, WallSegmentTerm 16 | from facap.utils import dicts_to_torch, visualize_data 17 | from facap.geometry.allign_walls import align_walls 18 | from scipy.spatial.transform import Rotation as R 19 | 20 | #todo get floorplan from BIM 21 | 22 | def compute_pose_rmse(translations_gt, rotations_gt, translations_est, rotations_est): 23 | # Compute RMSE for translations 24 | translation_errors = np.linalg.norm(translations_gt - translations_est, axis=1) 25 | translation_err = np.mean(translation_errors) 26 | 27 | # Compute RMSE for rotations 28 | degree_errors = [] 29 | for rotvec_gt, rotvec_est in zip(rotations_gt, rotations_est): 30 | rotation_gt = R.from_rotvec(rotvec_gt) 31 | rotation_est = R.from_rotvec(rotvec_est) 32 | 33 | cosine_similarity = np.trace(np.dot(np.transpose(rotation_est.as_matrix()), rotation_gt.as_matrix())) 34 | angular_distance_radians = np.arccos((cosine_similarity - 1) / 2) 35 | 36 | # Convert angular distance from radians to degrees 37 | angular_distance_degrees = angular_distance_radians * (180 / np.pi) 38 | 39 | angular_distance_degrees = min(angular_distance_degrees, 180-angular_distance_degrees) 40 | 41 | print("Angular distance between estimated and ground truth rotations:", angular_distance_degrees, "degrees") 42 | 43 | degree_errors.append(angular_distance_degrees) 44 | 45 | rotation_err = np.mean(degree_errors) 46 | 47 | return translation_err, rotation_err 48 | 49 | 50 | if __name__ == '__main__': 51 | print('Started experiment') 52 | parser = argparse.ArgumentParser() 53 | parser.add_argument("--config", help="YAML configuration file") 54 | parser.add_argument("--device", default='cuda:0', help="Device to run") 55 | args = parser.parse_args() 56 | with open(args.config, 'r') as ymlfile: 57 | cfg = yaml.load(ymlfile, Loader=yaml.FullLoader) 58 | 59 | #Scan should have camera poses 60 | scan_path = cfg["paths"]["scan_path"] 61 | scan = Scan(scan_path, scale=cfg["data"]["depths_scale"])# , cut_frames=70) 62 | 63 | 64 | save_path = cfg["paths"]["save_path"] 65 | os.makedirs(save_path, exist_ok=True) 66 | 67 | mesh, pcd = scan.make_mesh_pcd() 68 | o3d.io.write_triangle_mesh(f"{save_path}/source_mesh.ply", mesh) 69 | o3d.io.write_point_cloud(f"{save_path}/source_pcd.ply", pcd) 70 | 71 | print('Written Source Mesh and PointCloud') 72 | 73 | #Extract scan data 74 | data = scan.generate_ba_data(min_frame_difference=cfg["data"]["min_frame_difference"], 75 | max_initial_distance=cfg["data"]["max_initial_distance"], 76 | floor_percentiles=cfg["data"]["floor_percentiles"], 77 | wall_sparsity=cfg["data"]["wall_sparsity"], 78 | floor_sparsity=cfg["data"]["floor_sparsity"], 79 | include_columns=cfg["error"]["include_columns"] 80 | ) 81 | 82 | print('Generated data') 83 | if "wall_term_type" in cfg["error"] and cfg["error"]["wall_term_type"] == "segment": 84 | floorplan = torch.from_numpy(np.load(f"{scan_path}/floorplan.npy")) 85 | alligned_walls = align_walls(data[2], floorplan, scale=scan.scale) 86 | data = (data[0], data[1], alligned_walls, data[3], data[4]) 87 | 88 | visualize_data(data, save_path=save_path, scale=scan.scale) 89 | dicts_to_torch(data, args.device) 90 | 91 | #Extract left? 92 | left, right, wall, floor, ceiling = data 93 | 94 | #Get camere parameters 95 | camera_parameters = CameraParameters(scan.cameras).to(args.device).float() 96 | unproject = Unproject(camera_parameters, scale=scan.scale) 97 | project = Project(camera_parameters) 98 | cost_function = nn.MSELoss() 99 | 100 | use_bim = cfg["data"]["use_bim"] 101 | 102 | if cfg["error"]["floor_term"]: 103 | floor_plane = None 104 | if use_bim: 105 | floor_plane = torch.Tensor(np.asarray(o3d.io.read_point_cloud(f"{scan_path}/floorplane.ply").points).astype(np.float32)) 106 | floor_function = FloorTerm(floor, unproject, cost_function, floor_plane) 107 | 108 | if cfg["error"]["ceil_term"]: 109 | ceil_plane = None 110 | if use_bim: 111 | ceil_plane = torch.Tensor(np.asarray(o3d.io.read_point_cloud(f"{scan_path}/ceiling.ply").points).astype(np.float32)) 112 | ceil_function = FloorTerm(ceiling, unproject, cost_function, ceil_plane) 113 | 114 | if cfg["error"]["wall_term"]: 115 | floorplan = torch.from_numpy(np.load(f"{scan_path}/floorplan.npy")) 116 | if cfg["error"]["wall_term_type"] == "point": 117 | wall_function = WallTerm(wall, unproject, cost_function, floorplan).to(args.device).float() 118 | else: 119 | wall_function = WallSegmentTerm(wall, unproject, cost_function, floorplan).to(args.device).float() 120 | 121 | params = [] 122 | 123 | #Add Camera Parameters as parameters to be optimized 124 | fixed_cameras = [scan._frames[i] for i in cfg["optimization"]["fixed_cameras_idx"]] 125 | for name, param in camera_parameters.named_parameters(): 126 | if name.split(".")[-1] not in fixed_cameras: 127 | params.append(param) 128 | 129 | optimizer = optim.SGD(params, lr=cfg["optimization"]["lr"], momentum=cfg["optimization"]["momentum"]) 130 | 131 | torch.save(camera_parameters.state_dict(), f"{save_path}/source_cameras.pth") 132 | 133 | scan_gt = Scan("../camera_refinement_BIM/scan_full/", scale=1)#, cut_frames=70) 134 | 135 | gt_mesh, gt_pcd = scan_gt.make_mesh_pcd() 136 | o3d.io.write_triangle_mesh(f"{save_path}/gt_mesh.ply", gt_mesh) 137 | o3d.io.write_point_cloud(f"{save_path}/gt_pcd.ply", gt_pcd) 138 | 139 | print('Written gt mesh and pointcloud') 140 | 141 | camera_parameters_gt = CameraParameters(scan_gt.cameras).to(args.device).float() 142 | torch.save(camera_parameters_gt.state_dict(), f"{save_path}/gt_cameras.pth") 143 | rotvec_gt = camera_parameters_gt.state_dict()['rotvecs'] 144 | trans_gt = camera_parameters_gt.state_dict()['translations'] 145 | 146 | for epoch in range(cfg["optimization"]["num_epoches"]): 147 | 148 | 149 | if epoch % 50 == 0: 150 | print(f"Epoch {epoch}") 151 | rotvec_est = camera_parameters.state_dict()['rotvecs'] 152 | trans_est = camera_parameters.state_dict()['translations'] 153 | translation_rmse, rotation_rmse = compute_pose_rmse(trans_gt.cpu().detach().numpy(), 154 | rotvec_gt.cpu().detach().numpy(), 155 | trans_est.cpu().detach().numpy(), 156 | rotvec_est.cpu().detach().numpy()) 157 | print("Translation Err:", translation_rmse) 158 | print("Rotation Err:", rotation_rmse) 159 | 160 | 161 | optimizer.zero_grad() 162 | error_args = {"unproject": unproject, 163 | "project": project, 164 | "scale": scan.scale, 165 | "distance_function": cost_function, 166 | **cfg["error"]} 167 | ba_function = getattr(feature_errors, cfg["error"]["error_type"]) 168 | ba_term = ba_function(left, right, **error_args) 169 | 170 | floor_term = 0. 171 | wall_term = 0. 172 | ceil_term = 0. 173 | print(f"The value of the loss function on the {epoch}-iteration") 174 | print(f"\t\t feature-based BA term - {float(ba_term)}") 175 | 176 | if cfg["error"]["floor_term"]: 177 | floor_term = floor_function() * cfg["error"]["floor_weight"] 178 | print(f"\t\t floor term - {float(floor_term)}") 179 | 180 | if cfg["error"]["ceil_term"]: 181 | 182 | ceil_term = ceil_function() * cfg["error"]["ceil_weight"] 183 | print(f"\t\t ceil term - {float(ceil_term)}") 184 | 185 | if cfg["error"]["wall_term"]: 186 | wall_term = wall_function() * cfg["error"]["wall_weight"] 187 | print(f"\t\t wall term - {float(wall_term)}") 188 | 189 | loss = ba_term + wall_term + floor_term + ceil_term 190 | 191 | loss.backward() 192 | 193 | for name, param in camera_parameters.named_parameters(): 194 | if torch.isnan(param.grad).any(): 195 | param.grad[torch.isnan(param.grad)] = 0.0 196 | optimizer.step() 197 | 198 | 199 | torch.save(camera_parameters.state_dict(), f"{save_path}/cameras.pth") 200 | cameras = camera_parameters.get_cameras() 201 | scan.set_cameras(cameras) 202 | 203 | p_mesh, p_pcd = scan.make_mesh_pcd() 204 | o3d.io.write_triangle_mesh(f"{save_path}/processed_mesh.ply", p_mesh) 205 | o3d.io.write_point_cloud(f"{save_path}/processed_pcd.ply", p_pcd) 206 | print('Written processed mesh and pointcloud') 207 | 208 | 209 | 210 | 211 | -------------------------------------------------------------------------------- /facap/data/scan.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from glob import glob 3 | from open3d.pipelines import integration 4 | 5 | import cv2 6 | import numpy as np 7 | import open3d as o3d 8 | from scipy.spatial.transform import Rotation as R 9 | 10 | from facap.geometry.open3d import unproject_points, sample_points_from_pcd 11 | from facap.geometry.numpy import unproject_points_rotvec 12 | from facap.colmap_scripts.read_write_model import read_model 13 | import matplotlib.pyplot as plt 14 | 15 | 16 | def read_data(scan_path, frame_id, include_columns=False): 17 | color = cv2.imread(f'{scan_path}/arcore/frame-{frame_id}.png') 18 | wall = cv2.imread(f'{scan_path}/segmentation/frame-{frame_id}_wall.png', cv2.IMREAD_GRAYSCALE) 19 | wall = np.rot90(wall, k=1) 20 | floor = cv2.imread(f'{scan_path}/segmentation/frame-{frame_id}_floor.png', cv2.IMREAD_GRAYSCALE) 21 | floor = np.rot90(floor, k=1) #floor > floor.min() 22 | ceiling = cv2.imread(f'{scan_path}/segmentation/frame-{frame_id}_ceiling.png', cv2.IMREAD_GRAYSCALE) 23 | ceiling = np.rot90(ceiling, k=1) 24 | columns = cv2.imread(f'{scan_path}/segmentation/frame-{frame_id}_columns.png', cv2.IMREAD_GRAYSCALE) 25 | columns = np.rot90(columns, k=1) # floor > floor.min() 26 | 27 | if include_columns: 28 | print('includecolumsn', include_columns) 29 | wall = cv2.bitwise_or(wall, columns) 30 | 31 | depth = np.load(f'{scan_path}/arcore/depth-{frame_id}.npy') #cv2.imread(f'{scan_path}/arcore/depth-{frame_id}.png', -1) 32 | 33 | pose = np.loadtxt(f'{scan_path}/arcore/pose-{frame_id}.txt') 34 | 35 | depth = np.ascontiguousarray(np.rot90(depth, k=1)) 36 | color = np.ascontiguousarray(np.rot90(color, k=1)) 37 | 38 | rotation_matrix = np.array([[0, 0, 1], 39 | [0, 1, 0], 40 | [-1, 0, 0]]) 41 | 42 | # Extract the original translation part 43 | translation = pose[:3, 3] 44 | new_pose = np.eye(4) 45 | new_pose[:3, :3] = np.dot(pose[:3, :3], rotation_matrix) 46 | new_pose[:3, 3] = translation 47 | 48 | camera_params = np.loadtxt(f'{scan_path}/arcore/cam_params.txt') #-{frame_id}.txt') 49 | camera_params[[0, 1]]= camera_params[[1,0]] 50 | camera_params[[2, 3]]= camera_params[[3,2]] 51 | camera_params[[4, 5]]= camera_params[[5,4]] 52 | 53 | return color, wall, floor, ceiling, columns, depth, new_pose, camera_params 54 | 55 | 56 | def get_yxds(scan_path, frame_ids, max_depth=3000, min_depth=0): 57 | yxds = {} 58 | for frame_id in frame_ids: 59 | color, _, _, _, _, depth, _, _ = read_data(scan_path, frame_id) 60 | depth_mask = np.where((depth > min_depth) * (depth < max_depth)) 61 | yxds[frame_id] = get_index_value_dict(depth, depth_mask) 62 | return yxds 63 | 64 | 65 | def get_segmentation(scan_path, frame_ids, part="floor", sparsity=30, max_depth=10, min_depth=0, include_columns=False): 66 | result = {} 67 | for frame_id in frame_ids: 68 | color, wall, floor, ceiling, columns, depth, _, _ = read_data(scan_path, frame_id, include_columns=include_columns) 69 | if part == "floor": 70 | parts = floor 71 | elif part == "wall": 72 | parts = wall 73 | elif part == "ceiling": 74 | parts = ceiling 75 | else: 76 | parts = columns 77 | 78 | part_mask = np.where((depth > min_depth) * (depth < max_depth) * (parts > 0)) 79 | part_dict = get_index_value_dict(depth, part_mask, sparsity=sparsity) 80 | result[frame_id] = list(part_dict.items()) 81 | return result 82 | 83 | 84 | def read_features(scan_path, xyds, frame_ids, min_freq=2): 85 | scan_path = f"{scan_path}/db" #glob(f'{scan_path}/db/1/triangulated/feat*/')[0] 86 | cameras, images, points_3d = read_model(f'{scan_path}/sparse/0/')#read_model(f'{scan_path}/sparse/models/triangulated') 87 | result = {} 88 | for point in points_3d: 89 | result[point] = {} 90 | 91 | for img_id in points_3d[point].image_ids: 92 | img = images[img_id] 93 | xy = tuple((img.xys[img.point3D_ids == point] + 0.5).astype(int)[0]) 94 | yx = (xy[1], xy[0]) 95 | frame_id = img.name[6:-4] 96 | if frame_id in frame_ids: 97 | if yx in xyds[frame_id]: 98 | d = xyds[frame_id][yx] 99 | result[point][img.name[6:-4]] = (yx, d) 100 | filtered_result = {} 101 | for point in result: 102 | if len(result[point]) > min_freq: 103 | filtered_result[point] = result[point] 104 | return filtered_result 105 | 106 | 107 | def get_index_value_dict(array_2d, mask, sparsity=1): 108 | y, x = np.array(mask).astype(int) 109 | d = array_2d[mask].astype(float) 110 | x, y, d = x[::sparsity], y[::sparsity], d[::sparsity] 111 | yxd = dict(zip(list(zip(y, x)), d)) 112 | return yxd 113 | 114 | 115 | class Camera: 116 | def __init__(self, f, pp, rotvec, translation): 117 | self.f = f 118 | self.pp = pp 119 | self.rotvec = rotvec.astype(float) 120 | self.translation = translation.astype(float) 121 | 122 | @classmethod 123 | def read_camera(cls, scan_path, frame_id): 124 | pose = np.loadtxt(f'{scan_path}/arcore/pose-{frame_id}.txt') 125 | rotation_matrix = np.array([[0, 0, 1], 126 | [0, 1, 0], 127 | [-1, 0, 0]]) 128 | 129 | # Extract the original translation part 130 | translation = pose[:3, 3] 131 | new_pose = np.eye(4) 132 | new_pose[:3, :3] = np.dot(pose[:3, :3], rotation_matrix) 133 | new_pose[:3, 3] = translation 134 | pose = new_pose 135 | 136 | camera_params = np.loadtxt(f'{scan_path}/arcore/cam_params.txt') #-{frame_id}.txt') 137 | camera_params[[0, 1]] = camera_params[[1, 0]] 138 | camera_params[[2, 3]] = camera_params[[3, 2]] 139 | camera_params[[4, 5]] = camera_params[[5, 4]] 140 | 141 | rotvec = R.from_matrix(pose[:3, :3]).as_rotvec() 142 | translation = pose[:3, 3] 143 | f = (camera_params[2], camera_params[3]) 144 | pp = (camera_params[4], camera_params[5]) 145 | camera = cls(f, pp, rotvec, translation) 146 | return camera 147 | 148 | 149 | class Scan: 150 | def __init__(self, scan_path, sparsity=1, cut_frames=None, scale=1): 151 | frames = sorted(glob(f"{scan_path}/segmentation/frame*_floor*")) 152 | frame_ids = [i.split("/")[-1][6:-10] for i in frames] 153 | frame_ids = frame_ids[::sparsity] 154 | 155 | if cut_frames is not None: 156 | frame_ids = frame_ids[:cut_frames] 157 | self.scan_path = scan_path 158 | self._frames = frame_ids 159 | self.scale = scale 160 | self.cameras = {frame_id: Camera.read_camera(scan_path, frame_id) for frame_id in frame_ids} 161 | self.include_columns = False 162 | 163 | def get_data(self, cam_id): 164 | color, wall, floor, ceiling, columns, depth, pose, camera_params = read_data(self.scan_path, cam_id, include_columns=self.include_columns) 165 | return color, wall, floor, depth, pose, camera_params 166 | 167 | def make_pcd(self, num_points=9000000): 168 | pcds = [] 169 | for frame_id in self._frames: 170 | color_map, _, _, depth_map, _, _ = self.get_data(frame_id) 171 | camera = self.cameras[frame_id] 172 | extrinsic = np.eye(4) 173 | extrinsic[:3, :3] = R.from_rotvec(camera.rotvec).as_matrix() 174 | extrinsic[:3, 3] = camera.translation 175 | 176 | pcd = unproject_points(depth_map, color_map, np.linalg.inv(extrinsic), 177 | camera.f, camera.pp, *depth_map.shape, scale=self.scale) 178 | pcds.append(pcd) 179 | 180 | pcd_combined = o3d.geometry.PointCloud() 181 | 182 | for pcd in pcds: 183 | pcd_combined += pcd 184 | 185 | if num_points is not None: 186 | pcd_combined = sample_points_from_pcd(pcd_combined, num_points) 187 | 188 | return pcd_combined 189 | 190 | def make_mesh_pcd(self, vox_length=0.05): 191 | 192 | volume = integration.ScalableTSDFVolume( 193 | voxel_length=vox_length, 194 | sdf_trunc=vox_length * 4, 195 | color_type=integration.TSDFVolumeColorType.RGB8) 196 | 197 | camera = o3d.camera.PinholeCameraIntrinsic() 198 | #Bug cannot use original pose 199 | for cam_id in self._frames: 200 | color_map, wall, floor, depth_map, pose, camera_params = self.get_data(cam_id) 201 | depth_map = depth_map.astype(np.float32) 202 | 203 | camera.set_intrinsics(int(camera_params[0]), int(camera_params[1]), *camera_params[2:]) 204 | color = o3d.geometry.Image(color_map) 205 | depth = o3d.geometry.Image(depth_map) 206 | 207 | rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth( 208 | color, depth, depth_scale=1, depth_trunc=8, convert_rgb_to_intensity=False) #TODO changed depth trunc from 6 to 100 and depth scale 209 | 210 | rotvec = self.cameras[cam_id].rotvec 211 | translation = self.cameras[cam_id].translation 212 | 213 | rotation_matrix = R.from_rotvec(rotvec).as_matrix() 214 | pose_matrix = np.eye(4) # Initialize 4x4 identity matrix 215 | pose_matrix[:3, :3] = rotation_matrix # Assign rotation 216 | pose_matrix[:3, 3] = translation 217 | 218 | volume.integrate(rgbd, camera, np.linalg.inv(pose_matrix)) 219 | 220 | mesh = volume.extract_triangle_mesh() 221 | mesh.compute_vertex_normals() 222 | 223 | pcd = volume.extract_point_cloud() 224 | return mesh, pcd 225 | 226 | def set_cameras(self, cameras): 227 | self.cameras = cameras 228 | 229 | def generate_ba_data(self, min_frame_difference=3, 230 | floor_percentiles=(2, 90), 231 | max_initial_distance=0.4, 232 | wall_sparsity=30, 233 | floor_sparsity=30, 234 | include_columns=False): 235 | self.include_columns = include_columns 236 | left = {"points": [], 237 | "depths": [], 238 | "camera_idxs": [], 239 | "rotvecs": [], 240 | "translations": [], 241 | "f": [], 242 | "pp": []} 243 | right = deepcopy(left) 244 | floor = deepcopy(left) 245 | wall = deepcopy(left) 246 | ceiling = deepcopy(left) 247 | 248 | yxds = get_yxds(self.scan_path, self._frames) 249 | features = read_features(self.scan_path, yxds, self._frames) 250 | 251 | for point_id in features: 252 | point = features[point_id] 253 | cams = list(point.keys()) 254 | cam_params = [self.cameras[i] for i in cams] 255 | rotvecs = [i.rotvec for i in cam_params] 256 | translations = [i.translation for i in cam_params] 257 | 258 | for i, cam_i in enumerate(cams): 259 | for j in range(i + 1, len(cams)): 260 | if int(cam_i) - int(cams[j]) >= min_frame_difference: 261 | for part, idx, cam_idx in zip([left, right], [i, j], [cam_i, cams[j]]): 262 | part["points"].append(point[cam_idx][0]) 263 | part["depths"].append(point[cam_idx][1]) 264 | part["rotvecs"].append(rotvecs[idx]) 265 | part["translations"].append(translations[idx]) 266 | part["f"].append(cam_params[idx].f) 267 | part["pp"].append(cam_params[idx].pp) 268 | part["camera_idxs"].append(cam_idx) 269 | del yxds 270 | wall_data = get_segmentation(self.scan_path, self._frames, part="wall", sparsity=wall_sparsity, include_columns=self.include_columns) 271 | floor_data = get_segmentation(self.scan_path, self._frames, part="floor", sparsity=floor_sparsity ,include_columns=self.include_columns) 272 | ceiling_data = get_segmentation(self.scan_path, self._frames, part="ceiling", sparsity=floor_sparsity, include_columns=self.include_columns) 273 | 274 | for source, target in zip([wall_data, floor_data, ceiling_data], [wall, floor, ceiling]): 275 | for cam_id in source: 276 | camera = self.cameras[cam_id] 277 | rotvec = camera.rotvec 278 | translation = camera.translation 279 | f = camera.f 280 | pp = camera.pp 281 | for point in source[cam_id]: 282 | target["points"].append(point[0]) 283 | target["depths"].append(point[1]) 284 | target["rotvecs"].extend([rotvec] * len(source[cam_id])) 285 | target["translations"].extend([translation] * len(source[cam_id])) 286 | target["f"].extend([f] * len(source[cam_id])) 287 | target["pp"].extend([pp] * len(source[cam_id])) 288 | target["camera_idxs"].extend([cam_id] * len(source[cam_id])) 289 | del wall_data, floor_data, ceiling_data 290 | 291 | for part in [left, right, wall, floor, ceiling]: 292 | for key in part: 293 | part[key] = np.array(part[key]) 294 | 295 | def apply_mask(dct, mask): 296 | for key in dct: 297 | dct[key] = dct[key][mask] 298 | 299 | def unproject(part): 300 | return unproject_points_rotvec(part["depths"], part["points"], part["f"], 301 | part["pp"], part["rotvecs"], part["translations"], scale=self.scale) 302 | 303 | keypoint_mask = np.linalg.norm(unproject(left) - unproject(right), axis=-1) < max_initial_distance 304 | 305 | apply_mask(left, keypoint_mask) 306 | apply_mask(right, keypoint_mask) 307 | 308 | floor_pcd_vert = unproject(floor)[:, 2] 309 | floor_mask = (floor_pcd_vert > np.percentile(floor_pcd_vert, floor_percentiles[0])) & \ 310 | (floor_pcd_vert < np.percentile(floor_pcd_vert, floor_percentiles[1])) 311 | apply_mask(floor, floor_mask) 312 | 313 | ceiling_pcd_vert = unproject(ceiling)[:, 2] 314 | ceiling_mask = (ceiling_pcd_vert > np.percentile(ceiling_pcd_vert, floor_percentiles[0])) & \ 315 | (ceiling_pcd_vert < np.percentile(ceiling_pcd_vert, floor_percentiles[1])) 316 | apply_mask(ceiling, ceiling_mask) 317 | 318 | return left, right, wall, floor, ceiling 319 | -------------------------------------------------------------------------------- /facap/colmap_scripts/read_write_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022, ETH Zurich and UNC Chapel Hill. 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # 10 | # * Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # 14 | # * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of 15 | # its contributors may be used to endorse or promote products derived 16 | # from this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE 22 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | # POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de) 31 | 32 | import os 33 | import collections 34 | import numpy as np 35 | import struct 36 | import argparse 37 | 38 | 39 | CameraModel = collections.namedtuple( 40 | "CameraModel", ["model_id", "model_name", "num_params"]) 41 | Camera = collections.namedtuple( 42 | "Camera", ["id", "model", "width", "height", "params"]) 43 | BaseImage = collections.namedtuple( 44 | "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]) 45 | Point3D = collections.namedtuple( 46 | "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"]) 47 | 48 | 49 | class Image(BaseImage): 50 | def qvec2rotmat(self): 51 | return qvec2rotmat(self.qvec) 52 | 53 | 54 | CAMERA_MODELS = { 55 | CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3), 56 | CameraModel(model_id=1, model_name="PINHOLE", num_params=4), 57 | CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4), 58 | CameraModel(model_id=3, model_name="RADIAL", num_params=5), 59 | CameraModel(model_id=4, model_name="OPENCV", num_params=8), 60 | CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8), 61 | CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12), 62 | CameraModel(model_id=7, model_name="FOV", num_params=5), 63 | CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4), 64 | CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5), 65 | CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12) 66 | } 67 | CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model) 68 | for camera_model in CAMERA_MODELS]) 69 | CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model) 70 | for camera_model in CAMERA_MODELS]) 71 | 72 | 73 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"): 74 | """Read and unpack the next bytes from a binary file. 75 | :param fid: 76 | :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc. 77 | :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}. 78 | :param endian_character: Any of {@, =, <, >, !} 79 | :return: Tuple of read and unpacked values. 80 | """ 81 | data = fid.read(num_bytes) 82 | return struct.unpack(endian_character + format_char_sequence, data) 83 | 84 | 85 | def write_next_bytes(fid, data, format_char_sequence, endian_character="<"): 86 | """pack and write to a binary file. 87 | :param fid: 88 | :param data: data to send, if multiple elements are sent at the same time, 89 | they should be encapsuled either in a list or a tuple 90 | :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}. 91 | should be the same length as the data list or tuple 92 | :param endian_character: Any of {@, =, <, >, !} 93 | """ 94 | if isinstance(data, (list, tuple)): 95 | bytes = struct.pack(endian_character + format_char_sequence, *data) 96 | else: 97 | bytes = struct.pack(endian_character + format_char_sequence, data) 98 | fid.write(bytes) 99 | 100 | 101 | def read_cameras_text(path): 102 | """ 103 | see: src/base/reconstruction.cc 104 | void Reconstruction::WriteCamerasText(const std::string& path) 105 | void Reconstruction::ReadCamerasText(const std::string& path) 106 | """ 107 | cameras = {} 108 | with open(path, "r") as fid: 109 | while True: 110 | line = fid.readline() 111 | if not line: 112 | break 113 | line = line.strip() 114 | if len(line) > 0 and line[0] != "#": 115 | elems = line.split() 116 | camera_id = int(elems[0]) 117 | model = elems[1] 118 | width = int(elems[2]) 119 | height = int(elems[3]) 120 | params = np.array(tuple(map(float, elems[4:]))) 121 | cameras[camera_id] = Camera(id=camera_id, model=model, 122 | width=width, height=height, 123 | params=params) 124 | return cameras 125 | 126 | 127 | def read_cameras_binary(path_to_model_file): 128 | """ 129 | see: src/base/reconstruction.cc 130 | void Reconstruction::WriteCamerasBinary(const std::string& path) 131 | void Reconstruction::ReadCamerasBinary(const std::string& path) 132 | """ 133 | cameras = {} 134 | with open(path_to_model_file, "rb") as fid: 135 | num_cameras = read_next_bytes(fid, 8, "Q")[0] 136 | for _ in range(num_cameras): 137 | camera_properties = read_next_bytes( 138 | fid, num_bytes=24, format_char_sequence="iiQQ") 139 | camera_id = camera_properties[0] 140 | model_id = camera_properties[1] 141 | model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name 142 | width = camera_properties[2] 143 | height = camera_properties[3] 144 | num_params = CAMERA_MODEL_IDS[model_id].num_params 145 | params = read_next_bytes(fid, num_bytes=8*num_params, 146 | format_char_sequence="d"*num_params) 147 | cameras[camera_id] = Camera(id=camera_id, 148 | model=model_name, 149 | width=width, 150 | height=height, 151 | params=np.array(params)) 152 | assert len(cameras) == num_cameras 153 | return cameras 154 | 155 | 156 | def write_cameras_text(cameras, path): 157 | """ 158 | see: src/base/reconstruction.cc 159 | void Reconstruction::WriteCamerasText(const std::string& path) 160 | void Reconstruction::ReadCamerasText(const std::string& path) 161 | """ 162 | HEADER = "# Camera list with one line of data per camera:\n" + \ 163 | "# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n" + \ 164 | "# Number of cameras: {}\n".format(len(cameras)) 165 | with open(path, "w") as fid: 166 | fid.write(HEADER) 167 | for _, cam in cameras.items(): 168 | to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params] 169 | line = " ".join([str(elem) for elem in to_write]) 170 | fid.write(line + "\n") 171 | 172 | 173 | def write_cameras_binary(cameras, path_to_model_file): 174 | """ 175 | see: src/base/reconstruction.cc 176 | void Reconstruction::WriteCamerasBinary(const std::string& path) 177 | void Reconstruction::ReadCamerasBinary(const std::string& path) 178 | """ 179 | with open(path_to_model_file, "wb") as fid: 180 | write_next_bytes(fid, len(cameras), "Q") 181 | for _, cam in cameras.items(): 182 | model_id = CAMERA_MODEL_NAMES[cam.model].model_id 183 | camera_properties = [cam.id, 184 | model_id, 185 | cam.width, 186 | cam.height] 187 | write_next_bytes(fid, camera_properties, "iiQQ") 188 | for p in cam.params: 189 | write_next_bytes(fid, float(p), "d") 190 | return cameras 191 | 192 | 193 | def read_images_text(path): 194 | """ 195 | see: src/base/reconstruction.cc 196 | void Reconstruction::ReadImagesText(const std::string& path) 197 | void Reconstruction::WriteImagesText(const std::string& path) 198 | """ 199 | images = {} 200 | with open(path, "r") as fid: 201 | while True: 202 | line = fid.readline() 203 | if not line: 204 | break 205 | line = line.strip() 206 | if len(line) > 0 and line[0] != "#": 207 | elems = line.split() 208 | image_id = int(elems[0]) 209 | qvec = np.array(tuple(map(float, elems[1:5]))) 210 | tvec = np.array(tuple(map(float, elems[5:8]))) 211 | camera_id = int(elems[8]) 212 | image_name = elems[9] 213 | elems = fid.readline().split() 214 | xys = np.column_stack([tuple(map(float, elems[0::3])), 215 | tuple(map(float, elems[1::3]))]) 216 | point3D_ids = np.array(tuple(map(int, elems[2::3]))) 217 | images[image_id] = Image( 218 | id=image_id, qvec=qvec, tvec=tvec, 219 | camera_id=camera_id, name=image_name, 220 | xys=xys, point3D_ids=point3D_ids) 221 | return images 222 | 223 | 224 | def read_images_binary(path_to_model_file): 225 | """ 226 | see: src/base/reconstruction.cc 227 | void Reconstruction::ReadImagesBinary(const std::string& path) 228 | void Reconstruction::WriteImagesBinary(const std::string& path) 229 | """ 230 | images = {} 231 | with open(path_to_model_file, "rb") as fid: 232 | num_reg_images = read_next_bytes(fid, 8, "Q")[0] 233 | for _ in range(num_reg_images): 234 | binary_image_properties = read_next_bytes( 235 | fid, num_bytes=64, format_char_sequence="idddddddi") 236 | image_id = binary_image_properties[0] 237 | qvec = np.array(binary_image_properties[1:5]) 238 | tvec = np.array(binary_image_properties[5:8]) 239 | camera_id = binary_image_properties[8] 240 | image_name = "" 241 | current_char = read_next_bytes(fid, 1, "c")[0] 242 | while current_char != b"\x00": # look for the ASCII 0 entry 243 | image_name += current_char.decode("utf-8") 244 | current_char = read_next_bytes(fid, 1, "c")[0] 245 | num_points2D = read_next_bytes(fid, num_bytes=8, 246 | format_char_sequence="Q")[0] 247 | x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D, 248 | format_char_sequence="ddq"*num_points2D) 249 | xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])), 250 | tuple(map(float, x_y_id_s[1::3]))]) 251 | point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3]))) 252 | images[image_id] = Image( 253 | id=image_id, qvec=qvec, tvec=tvec, 254 | camera_id=camera_id, name=image_name, 255 | xys=xys, point3D_ids=point3D_ids) 256 | return images 257 | 258 | 259 | def write_images_text(images, path): 260 | """ 261 | see: src/base/reconstruction.cc 262 | void Reconstruction::ReadImagesText(const std::string& path) 263 | void Reconstruction::WriteImagesText(const std::string& path) 264 | """ 265 | if len(images) == 0: 266 | mean_observations = 0 267 | else: 268 | mean_observations = sum((len(img.point3D_ids) for _, img in images.items()))/len(images) 269 | HEADER = "# Image list with two lines of data per image:\n" + \ 270 | "# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n" + \ 271 | "# POINTS2D[] as (X, Y, POINT3D_ID)\n" + \ 272 | "# Number of images: {}, mean observations per image: {}\n".format(len(images), mean_observations) 273 | 274 | with open(path, "w") as fid: 275 | fid.write(HEADER) 276 | for _, img in images.items(): 277 | image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name] 278 | first_line = " ".join(map(str, image_header)) 279 | fid.write(first_line + "\n") 280 | 281 | points_strings = [] 282 | for xy, point3D_id in zip(img.xys, img.point3D_ids): 283 | points_strings.append(" ".join(map(str, [*xy, point3D_id]))) 284 | fid.write(" ".join(points_strings) + "\n") 285 | 286 | 287 | def write_images_binary(images, path_to_model_file): 288 | """ 289 | see: src/base/reconstruction.cc 290 | void Reconstruction::ReadImagesBinary(const std::string& path) 291 | void Reconstruction::WriteImagesBinary(const std::string& path) 292 | """ 293 | with open(path_to_model_file, "wb") as fid: 294 | write_next_bytes(fid, len(images), "Q") 295 | for _, img in images.items(): 296 | write_next_bytes(fid, img.id, "i") 297 | write_next_bytes(fid, img.qvec.tolist(), "dddd") 298 | write_next_bytes(fid, img.tvec.tolist(), "ddd") 299 | write_next_bytes(fid, img.camera_id, "i") 300 | for char in img.name: 301 | write_next_bytes(fid, char.encode("utf-8"), "c") 302 | write_next_bytes(fid, b"\x00", "c") 303 | write_next_bytes(fid, len(img.point3D_ids), "Q") 304 | for xy, p3d_id in zip(img.xys, img.point3D_ids): 305 | write_next_bytes(fid, [*xy, p3d_id], "ddq") 306 | 307 | 308 | def read_points3D_text(path): 309 | """ 310 | see: src/base/reconstruction.cc 311 | void Reconstruction::ReadPoints3DText(const std::string& path) 312 | void Reconstruction::WritePoints3DText(const std::string& path) 313 | """ 314 | points3D = {} 315 | with open(path, "r") as fid: 316 | while True: 317 | line = fid.readline() 318 | if not line: 319 | break 320 | line = line.strip() 321 | if len(line) > 0 and line[0] != "#": 322 | elems = line.split() 323 | point3D_id = int(elems[0]) 324 | xyz = np.array(tuple(map(float, elems[1:4]))) 325 | rgb = np.array(tuple(map(int, elems[4:7]))) 326 | error = float(elems[7]) 327 | image_ids = np.array(tuple(map(int, elems[8::2]))) 328 | point2D_idxs = np.array(tuple(map(int, elems[9::2]))) 329 | points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb, 330 | error=error, image_ids=image_ids, 331 | point2D_idxs=point2D_idxs) 332 | return points3D 333 | 334 | 335 | def read_points3D_binary(path_to_model_file): 336 | """ 337 | see: src/base/reconstruction.cc 338 | void Reconstruction::ReadPoints3DBinary(const std::string& path) 339 | void Reconstruction::WritePoints3DBinary(const std::string& path) 340 | """ 341 | points3D = {} 342 | with open(path_to_model_file, "rb") as fid: 343 | num_points = read_next_bytes(fid, 8, "Q")[0] 344 | for _ in range(num_points): 345 | binary_point_line_properties = read_next_bytes( 346 | fid, num_bytes=43, format_char_sequence="QdddBBBd") 347 | point3D_id = binary_point_line_properties[0] 348 | xyz = np.array(binary_point_line_properties[1:4]) 349 | rgb = np.array(binary_point_line_properties[4:7]) 350 | error = np.array(binary_point_line_properties[7]) 351 | track_length = read_next_bytes( 352 | fid, num_bytes=8, format_char_sequence="Q")[0] 353 | track_elems = read_next_bytes( 354 | fid, num_bytes=8*track_length, 355 | format_char_sequence="ii"*track_length) 356 | image_ids = np.array(tuple(map(int, track_elems[0::2]))) 357 | point2D_idxs = np.array(tuple(map(int, track_elems[1::2]))) 358 | points3D[point3D_id] = Point3D( 359 | id=point3D_id, xyz=xyz, rgb=rgb, 360 | error=error, image_ids=image_ids, 361 | point2D_idxs=point2D_idxs) 362 | return points3D 363 | 364 | 365 | def write_points3D_text(points3D, path): 366 | """ 367 | see: src/base/reconstruction.cc 368 | void Reconstruction::ReadPoints3DText(const std::string& path) 369 | void Reconstruction::WritePoints3DText(const std::string& path) 370 | """ 371 | if len(points3D) == 0: 372 | mean_track_length = 0 373 | else: 374 | mean_track_length = sum((len(pt.image_ids) for _, pt in points3D.items()))/len(points3D) 375 | HEADER = "# 3D point list with one line of data per point:\n" + \ 376 | "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n" + \ 377 | "# Number of points: {}, mean track length: {}\n".format(len(points3D), mean_track_length) 378 | 379 | with open(path, "w") as fid: 380 | fid.write(HEADER) 381 | for _, pt in points3D.items(): 382 | point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error] 383 | fid.write(" ".join(map(str, point_header)) + " ") 384 | track_strings = [] 385 | for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs): 386 | track_strings.append(" ".join(map(str, [image_id, point2D]))) 387 | fid.write(" ".join(track_strings) + "\n") 388 | 389 | 390 | def write_points3D_binary(points3D, path_to_model_file): 391 | """ 392 | see: src/base/reconstruction.cc 393 | void Reconstruction::ReadPoints3DBinary(const std::string& path) 394 | void Reconstruction::WritePoints3DBinary(const std::string& path) 395 | """ 396 | with open(path_to_model_file, "wb") as fid: 397 | write_next_bytes(fid, len(points3D), "Q") 398 | for _, pt in points3D.items(): 399 | write_next_bytes(fid, pt.id, "Q") 400 | write_next_bytes(fid, pt.xyz.tolist(), "ddd") 401 | write_next_bytes(fid, pt.rgb.tolist(), "BBB") 402 | write_next_bytes(fid, pt.error, "d") 403 | track_length = pt.image_ids.shape[0] 404 | write_next_bytes(fid, track_length, "Q") 405 | for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs): 406 | write_next_bytes(fid, [image_id, point2D_id], "ii") 407 | 408 | 409 | def detect_model_format(path, ext): 410 | if os.path.isfile(os.path.join(path, "cameras" + ext)) and \ 411 | os.path.isfile(os.path.join(path, "images" + ext)) and \ 412 | os.path.isfile(os.path.join(path, "points3D" + ext)): 413 | print("Detected model format: '" + ext + "'") 414 | return True 415 | 416 | return False 417 | 418 | 419 | def read_model(path, ext=""): 420 | # try to detect the extension automatically 421 | if ext == "": 422 | if detect_model_format(path, ".bin"): 423 | ext = ".bin" 424 | elif detect_model_format(path, ".txt"): 425 | ext = ".txt" 426 | else: 427 | print("Provide model format: '.bin' or '.txt'") 428 | return 429 | 430 | if ext == ".txt": 431 | cameras = read_cameras_text(os.path.join(path, "cameras" + ext)) 432 | images = read_images_text(os.path.join(path, "images" + ext)) 433 | points3D = read_points3D_text(os.path.join(path, "points3D") + ext) 434 | else: 435 | cameras = read_cameras_binary(os.path.join(path, "cameras" + ext)) 436 | images = read_images_binary(os.path.join(path, "images" + ext)) 437 | points3D = read_points3D_binary(os.path.join(path, "points3D") + ext) 438 | print('colmappoints', len(points3D)) 439 | return cameras, images, points3D 440 | 441 | 442 | def write_model(cameras, images, points3D, path, ext=".bin"): 443 | if ext == ".txt": 444 | write_cameras_text(cameras, os.path.join(path, "cameras" + ext)) 445 | write_images_text(images, os.path.join(path, "images" + ext)) 446 | write_points3D_text(points3D, os.path.join(path, "points3D") + ext) 447 | else: 448 | write_cameras_binary(cameras, os.path.join(path, "cameras" + ext)) 449 | write_images_binary(images, os.path.join(path, "images" + ext)) 450 | write_points3D_binary(points3D, os.path.join(path, "points3D") + ext) 451 | return cameras, images, points3D 452 | 453 | 454 | def qvec2rotmat(qvec): 455 | return np.array([ 456 | [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2, 457 | 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], 458 | 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]], 459 | [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], 460 | 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2, 461 | 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]], 462 | [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], 463 | 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], 464 | 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]]) 465 | 466 | 467 | def rotmat2qvec(R): 468 | Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat 469 | K = np.array([ 470 | [Rxx - Ryy - Rzz, 0, 0, 0], 471 | [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0], 472 | [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0], 473 | [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0 474 | eigvals, eigvecs = np.linalg.eigh(K) 475 | qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)] 476 | if qvec[0] < 0: 477 | qvec *= -1 478 | return qvec 479 | 480 | 481 | def main(): 482 | parser = argparse.ArgumentParser(description="Read and write COLMAP binary and text models") 483 | parser.add_argument("--input_model", help="path to input model folder") 484 | parser.add_argument("--input_format", choices=[".bin", ".txt"], 485 | help="input model format", default="") 486 | parser.add_argument("--output_model", 487 | help="path to output model folder") 488 | parser.add_argument("--output_format", choices=[".bin", ".txt"], 489 | help="outut model format", default=".txt") 490 | args = parser.parse_args() 491 | 492 | cameras, images, points3D = read_model(path=args.input_model, ext=args.input_format) 493 | 494 | print("num_cameras:", len(cameras)) 495 | print("num_images:", len(images)) 496 | print("num_points3D:", len(points3D)) 497 | 498 | if args.output_model is not None: 499 | write_model(cameras, images, points3D, path=args.output_model, ext=args.output_format) 500 | 501 | 502 | if __name__ == "__main__": 503 | main() 504 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc.