├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── add_video_to_db.py ├── add_video_to_model.py ├── anafi_metadata.py ├── build_pcl_util.sh ├── bundle_adjustment.py ├── cli_utils.py ├── colmap_util ├── database.py └── read_model.py ├── construct_evaluation_metadata.py ├── convert_dataset.py ├── convert_euroc.py ├── dxf_to_ply.py ├── edit_exif.py ├── evaluation_toolkit ├── README.md ├── evaluation_toolkit │ ├── __init__.py │ ├── depth_evaluation.py │ └── inference_toolkit.py └── setup.py ├── extract_pictures_from_model.py ├── extract_video_with_gps.py ├── filter_colmap_model.py ├── generate_sky_masks.py ├── images ├── drone1.jpg ├── drone2.jpg ├── example_viz.jpg ├── optimal_sample1.jpg ├── optimal_sample2.jpg ├── photog1.jpg ├── photog2.jpg ├── piloting1.jpg ├── piloting2.jpg ├── plan1.jpg ├── plan2.jpg ├── plan3.jpg ├── plan4.jpg ├── plot │ ├── GTwise_depth_diff_DepthNet.jpg │ ├── GTwise_depth_diff_SfmLearner.jpg │ ├── depth_distrib.jpg │ ├── est_error_quantiles.jpg │ ├── fpv_error_quantiles.jpg │ ├── global_depth_diff.jpg │ └── gt_error_quantiles.jpg ├── pointcloud1.jpg ├── pointcloud2.jpg ├── result1.jpg ├── result2.jpg ├── result3.jpg └── splats.jpg ├── install_dependencies.sh ├── las2ply.py ├── main_pipeline.py ├── main_pipeline_no_lidar.py ├── meshlab_xml_writer.py ├── model ├── ENet ├── ENet_summary.txt └── enet.py ├── pcl_util ├── CMakeLists.txt ├── cloud_registrator.cpp ├── create_vis_file.cpp ├── mesh_triangulator.cpp ├── normals_transfer.cpp ├── pointcloud_sor.cpp ├── pointcloud_subsampler.cpp └── pointcloud_subsampler.h ├── picture_localization.py ├── prepare_images.py ├── prepare_workspace.py ├── requirements.txt ├── resize_colmap_cameras.py ├── split_dataset.py ├── video_localization.py ├── videos_to_colmap.py └── wrappers ├── __init__.py ├── cloudcompare.py ├── colmap.py ├── default_wrapper.py ├── eth3d.py ├── ffmpeg.py ├── pcl_util.py └── pdraw.py /.gitignore: -------------------------------------------------------------------------------- 1 | *__pycache__* 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cudagl:10.2-devel-ubuntu18.04 2 | 3 | WORKDIR /parrot-photogrammetry 4 | COPY . /parrot-photogrammetry 5 | ARG DEBIAN_FRONTEND=noninteractive 6 | 7 | 8 | RUN apt update \ 9 | && apt install -y git \ 10 | repo \ 11 | ffmpeg \ 12 | python3 \ 13 | python3-pip \ 14 | python \ 15 | gosu \ 16 | cmake \ 17 | build-essential \ 18 | pkg-config \ 19 | libboost-all-dev \ 20 | libeigen3-dev \ 21 | libsuitesparse-dev \ 22 | libfreeimage-dev \ 23 | libgoogle-glog-dev \ 24 | libgflags-dev \ 25 | libglew-dev \ 26 | qtbase5-dev \ 27 | libqt5opengl5-dev \ 28 | libcgal-dev \ 29 | libvtk6-dev \ 30 | libflann-dev \ 31 | cmake libgmp-dev \ 32 | libgoogle-glog-dev \ 33 | libqwt-qt5-dev \ 34 | libpcl-dev \ 35 | libproj-dev \ 36 | libcgal-qt5-dev \ 37 | libatlas-base-dev \ 38 | libsuitesparse-dev \ 39 | zlib1g-dev \ 40 | libglfw3-dev \ 41 | libsdl2-dev rsync 42 | 43 | RUN git clone https://github.com/laurentkneip/opengv.git 44 | RUN cd opengv \ 45 | && mkdir build \ 46 | && cd build \ 47 | && cmake .. \ 48 | && make -j8 \ 49 | && make install \ 50 | && cd ../../ 51 | 52 | RUN git clone https://github.com/opencv/opencv.git 53 | RUN cd opencv \ 54 | && git checkout 4.1.2 \ 55 | && mkdir build \ 56 | && cd build \ 57 | && cmake -D WITH_CUDA=OFF .. \ 58 | && make -j8 \ 59 | && make install \ 60 | && cd ../../ 61 | 62 | RUN git clone https://github.com/ETH3D/dataset-pipeline 63 | RUN cd dataset-pipeline \ 64 | && mkdir -p build \ 65 | && cd build \ 66 | && cmake .. \ 67 | && make -j8 \ 68 | && cd ../../ 69 | 70 | RUN git clone https://ceres-solver.googlesource.com/ceres-solver 71 | RUN cd ceres-solver \ 72 | && git checkout $(git describe --tags) \ 73 | && mkdir build \ 74 | && cd build \ 75 | && cmake .. -DBUILD_TESTING=OFF -DBUILD_EXAMPLES=OFF \ 76 | && make -j8 \ 77 | && make install \ 78 | && cd ../../ 79 | 80 | RUN git clone https://github.com/colmap/colmap.git 81 | RUN cd colmap \ 82 | && git checkout dev \ 83 | && mkdir build \ 84 | && cd build \ 85 | && cmake .. \ 86 | && make -j8 \ 87 | && make install \ 88 | && cd ../../ 89 | 90 | RUN mkdir -p groundsdk \ 91 | && cd groundsdk \ 92 | && repo init -u https://github.com/Parrot-Developers/groundsdk-manifest -m release.xml \ 93 | && repo sync 94 | RUN mkdir -p /.config && chmod 777 -R /.config && chmod 777 -R groundsdk/ && cd groundsdk/ && yes "y" | gosu 1000:1 ./build.sh -p pdraw-linux -t build -j8 \ 95 | && cd ../ 96 | 97 | RUN pip3 install -r requirements.txt 98 | 99 | RUN /parrot-photogrammetry/build_pcl_util.sh 100 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Clément Pinard 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /add_video_to_db.py: -------------------------------------------------------------------------------- 1 | from colmap_util import database as db 2 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 3 | from path import Path 4 | import pandas as pd 5 | import numpy as np 6 | from sqlite3 import IntegrityError 7 | from tqdm import tqdm 8 | parser = ArgumentParser(description='Add video frames to a colmap db file. ' 9 | 'Needs original colmap db file to link the frames to the right cameras', 10 | formatter_class=ArgumentDefaultsHelpFormatter) 11 | 12 | parser.add_argument('--frame_list', metavar='PATH', 13 | help='path to list with relative path to images', type=Path, default=None) 14 | parser.add_argument('--metadata', metavar='PATH', required=True, 15 | help='path to metadata csv file', type=Path) 16 | parser.add_argument('--database', metavar='DB', required=True, 17 | help='path to colmap database file, to get the image ids right') 18 | 19 | 20 | def add_to_db(db_path, metadata, frame_list_path, **env): 21 | database = db.COLMAPDatabase.connect(db_path) 22 | 23 | frame_list = [] 24 | frame_ids = [] 25 | if frame_list_path is not None: 26 | with open(frame_list_path, "r") as f: 27 | frame_list = [line[:-1] for line in f.readlines()] 28 | metadata = metadata[metadata["image_path"].isin(frame_list)] 29 | 30 | for _, row in tqdm(metadata.iterrows(), total=len(metadata)): 31 | image_path = row["image_path"] 32 | camera_id = row["camera_id"] 33 | if "location_valid" in row.keys() and row["location_valid"]: 34 | frame_gps = row[["location_longitude", "location_latitude", "location_altitude"]] 35 | else: 36 | frame_gps = np.full(3, np.NaN) 37 | try: 38 | frame_ids.append(database.add_image(image_path, int(camera_id), prior_t=frame_gps, image_id=row["db_id"])) 39 | except IntegrityError: 40 | sql_string = "SELECT camera_id, image_id FROM images WHERE name='{}'".format(image_path) 41 | sql_output = next(database.execute(sql_string)) 42 | existing_camera_id = sql_output[0] 43 | assert(existing_camera_id == camera_id) 44 | frame_ids.append(sql_output[1]) 45 | database.commit() 46 | database.close() 47 | return frame_ids 48 | 49 | 50 | def get_frame_without_features(db_path): 51 | database = db.COLMAPDatabase.connect(db_path) 52 | first_string = "SELECT image_id FROM descriptors WHERE cols=0" 53 | descriptors = list(database.execute(first_string)) 54 | for d in descriptors: 55 | second_string = "SELECT name FROM images WHERE image_id={}".format(d) 56 | row = list(database.execute(second_string))[0] 57 | print(row) 58 | 59 | database.close() 60 | 61 | 62 | def main(): 63 | args = parser.parse_args() 64 | add_to_db(args.database, pd.read_csv(args.metadata), args.frame_list) 65 | 66 | return 67 | 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /add_video_to_model.py: -------------------------------------------------------------------------------- 1 | from colmap_util import read_model as rm 2 | from colmap_util.database import COLMAPDatabase 3 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 4 | from path import Path 5 | import pandas as pd 6 | import numpy as np 7 | from pyproj import Proj 8 | 9 | parser = ArgumentParser(description='Add GPS localized video to colmap model (Note : Localization is not precise enough)', 10 | formatter_class=ArgumentDefaultsHelpFormatter) 11 | 12 | parser.add_argument('--video_list', metavar='PATH', 13 | help='path to list with relative path to images', type=Path) 14 | parser.add_argument('--metadata', metavar='PATH', 15 | help='path to metadata csv file', type=Path) 16 | parser.add_argument('--database', metavar='DB', required=True, 17 | help='path to colmap database file, to get the image ids right') 18 | parser.add_argument('--input_model', metavar='Path', type=Path) 19 | parser.add_argument('--system', default='epsg:2154') 20 | parser.add_argument('--centroid_path', default=None) 21 | parser.add_argument('--output_model', metavar='DIR', default=None, type=Path) 22 | 23 | 24 | def print_cams(cameras): 25 | print("id \t model \t \t width \t height \t params") 26 | for id, c in cameras.items(): 27 | param_string = " ".join(["{:.3f}".format(p) for p in c.params]) 28 | print("{} \t {} \t {} \t {} \t {}".format(id, c.model, c.width, c.height, param_string)) 29 | 30 | 31 | def print_imgs(images, max_img=2): 32 | max_img = min(max_img, len(images)) 33 | keys = sorted(images.keys())[:max_img] 34 | for k in keys: 35 | print(images[k]) 36 | 37 | 38 | def world_coord_from_frame(frame_qvec, frame_tvec): 39 | world2NED = np.float32([[0, 1, 0], 40 | [1, 0, 0], 41 | [0, 0, -1]]) 42 | NED2cam = np.float32([[0, 1, 0], 43 | [0, 0, 1], 44 | [1, 0, 0]]) 45 | frame_rot = NED2cam @ rm.qvec2rotmat(frame_qvec).T @ world2NED 46 | world_tvec = - frame_rot @ frame_tvec 47 | world_qvec = rm.rotmat2qvec(frame_rot) 48 | return world_qvec, world_tvec 49 | 50 | 51 | def get_id_from_db(db): 52 | rows = db.execute("SELECT * FROM images") 53 | id_name = {} 54 | for id, name, *_ in rows: 55 | id_name[name] = id 56 | return id_name 57 | 58 | 59 | def main(): 60 | args = parser.parse_args() 61 | proj = Proj(args.system) 62 | if args.centroid_path is not None: 63 | centroid = np.loadtxt(args.centroid_path) 64 | else: 65 | centroid = np.zeros(3) 66 | db = COLMAPDatabase.connect(args.database) 67 | with open(args.video_list, 'r') as f: 68 | image_list = f.read().splitlines() 69 | cameras = rm.read_cameras_binary(args.input_model / "cameras.bin") 70 | print("Available cameras :") 71 | print_cams(cameras) 72 | camera_id = int(input("which camera for the video ?\n")) 73 | images = rm.read_images_binary(args.input_model / "images.bin") 74 | # images = {} 75 | image_ids = get_id_from_db(db) 76 | for name in image_list: 77 | if name not in image_ids.keys(): 78 | raise Exception("Image {} not in database".format(name)) 79 | 80 | metadata = pd.read_csv(args.metadata, sep=" ") 81 | for (i, row), image_path in zip(metadata.iterrows(), image_list): 82 | image_id = image_ids[image_path] 83 | frame_qvec = np.array([row["frame_quat_w"], 84 | row["frame_quat_x"], 85 | row["frame_quat_y"], 86 | row["frame_quat_z"]]) 87 | lat, lon, alt = row["location_latitude"], row["location_longitude"], row["location_altitude"] 88 | x, y = proj(lon, lat) 89 | frame_tvec = np.array([x, y, alt]) - centroid 90 | world_qvec, world_tvec = world_coord_from_frame(frame_qvec, frame_tvec) 91 | images[image_id] = rm.Image( 92 | id=image_id, qvec=world_qvec, tvec=world_tvec, 93 | camera_id=camera_id, name=image_path, 94 | xys=[], point3D_ids=[]) 95 | 96 | rm.write_images_binary(images, args.output_model, "images.bin") 97 | rm.write_points3d_binary({}, args.output_model / "points3D.bin") 98 | return 99 | 100 | 101 | if __name__ == '__main__': 102 | main() 103 | -------------------------------------------------------------------------------- /anafi_metadata.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from scipy import integrate 4 | 5 | 6 | def extrapolate_position(speeds, timestamps, initial_position, final_position): 7 | trapz_x = integrate.cumtrapz(speeds[:, 0], timestamps, initial=0) 8 | trapz_y = integrate.cumtrapz(speeds[:, 1], timestamps, initial=0) 9 | trapz_z = integrate.cumtrapz(speeds[:, 2], timestamps, initial=0) 10 | trapz = np.stack([trapz_x, trapz_y, trapz_z], axis=-1) 11 | if initial_position is None and final_position is None: 12 | return trapz 13 | elif initial_position is not None and final_position is None: 14 | return trapz + initial_position 15 | elif initial_position is None and final_position is not None: 16 | return trapz + final_position - trapz[-1] 17 | else: 18 | interp = np.linspace(0, final_position - trapz[-1], speeds.shape[0]) 19 | return trapz + interp 20 | 21 | 22 | def preprocess_metadata(metadata, proj): 23 | def lambda_fun(x): 24 | return pd.Series(proj(*x), index=["x", "y"]) 25 | position_xy = metadata[["location_longitude", "location_latitude"]].apply(lambda_fun, axis=1) 26 | metadata = metadata.join(position_xy) 27 | metadata["z"] = metadata["location_altitude"] 28 | # Extrapolate position from speed and previous frames 29 | 30 | speed = metadata[["speed_east", "speed_north", "speed_down"]].values * np.array([1, 1, -1]) 31 | timestamps = metadata["time"].values * 1e-6 32 | positions = metadata[["x", "y", "z"]].values 33 | if metadata["location_valid"].unique().tolist() == [False]: 34 | metadata["indoor"] = True 35 | positions = extrapolate_position(speed, timestamps, None, None) 36 | else: 37 | metadata["indoor"] = False 38 | if 0 in metadata["location_valid"].unique(): 39 | location_validity = metadata["location_valid"].diff() 40 | invalidity_start = location_validity.index[location_validity == -1].tolist() 41 | validity_start = location_validity.index[location_validity == 1].tolist() 42 | 43 | if not metadata["location_valid"].iloc[0]: 44 | end = validity_start.pop(0) 45 | positions[:end] = extrapolate_position(speed[:end], timestamps[:end], None, positions[end]) 46 | if not metadata["location_valid"].iloc[-1]: 47 | start = invalidity_start.pop(-1) - 1 48 | positions[start:] = extrapolate_position(speed[start:], timestamps[start:], positions[start], None) 49 | 50 | if(len(invalidity_start) != len(validity_start)): 51 | raise ValueError("error, invalidity_start ({}) and validity_start({})" 52 | " are not the same length ({} vs {})".format(invalidity_start, 53 | validity_start, 54 | len(invalidity_start), 55 | len(validity_start))) 56 | 57 | for start, end in zip(invalidity_start, validity_start): 58 | positions[start:end] = extrapolate_position(speed[start:end], timestamps[start:end], positions[start], positions[end]) 59 | 60 | metadata["x"], metadata["y"], metadata["z"] = positions.transpose() 61 | 62 | return metadata 63 | 64 | 65 | def extract_metadata(folder_path, file_path, native_wrapper, proj, w, h, f, save_path=None): 66 | metadata = native_wrapper.vmeta_extract(file_path) 67 | metadata = metadata.iloc[:-1] 68 | metadata = preprocess_metadata(metadata, proj) 69 | video_quality = h * w / f 70 | metadata["video_quality"] = video_quality 71 | metadata["height"] = h 72 | metadata["width"] = w 73 | metadata["framerate"] = f 74 | metadata["video"] = file_path 75 | metadata['frame'] = metadata.index + 1 76 | metadata["location_valid"] = metadata["location_valid"] == 1 77 | fx = metadata["width"] / (2 * np.tan(metadata["picture_hfov"] * np.pi/360)) 78 | fy = metadata["height"] / (2 * np.tan(metadata["picture_vfov"] * np.pi/360)) 79 | params = np.stack([fx.values, fy.values, metadata["width"]/2, metadata["height"]/2], axis=-1) 80 | metadata["camera_params"] = [tuple(p) for p in params] 81 | 82 | if save_path is not None: 83 | metadata.to_csv(save_path) 84 | return metadata 85 | -------------------------------------------------------------------------------- /build_pcl_util.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd pcl_util \ 4 | && rm -rf build \ 5 | && mkdir build \ 6 | && cd build \ 7 | && cmake .. \ 8 | && make -j8 \ 9 | && cd .. -------------------------------------------------------------------------------- /cli_utils.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 2 | from path import Path 3 | import numpy as np 4 | 5 | 6 | def add_main_options(parser): 7 | main_parser = parser.add_argument_group("Main options") 8 | main_parser.add_argument('--input_folder', metavar='PATH', default=Path("."), type=Path, 9 | help="Folder with LAS point cloud, videos, and images") 10 | main_parser.add_argument('--workspace', metavar='PATH', default=Path("."), 11 | help='path to workspace where COLMAP operations will be done', type=Path) 12 | main_parser.add_argument('--raw_output_folder', metavar='PATH', default=Path("."), 13 | help='path to output folder : must be big !', type=Path) 14 | main_parser.add_argument('--converted_output_folder', metavar='PATH', default=Path("."), 15 | help='path to output folder : must be big !', type=Path) 16 | 17 | main_parser.add_argument('--skip_step', metavar="N", nargs="*", default=[], type=int, 18 | help='Skip selected steps') 19 | main_parser.add_argument('--begin_step', metavar="N", type=int, default=None) 20 | main_parser.add_argument('--show_steps', action="store_true") 21 | main_parser.add_argument('--add_new_videos', action="store_true", 22 | help="If selected, will skip first 6 steps to directly register videos without mapping") 23 | main_parser.add_argument('--generate_groundtruth_for_individual_images', '--gt_images', action="store_true", 24 | help="If selected, will generate Ground truth for individual images as well as videos") 25 | main_parser.add_argument('--save_space', action="store_true") 26 | main_parser.add_argument('-v', '--verbose', action="count", default=0) 27 | main_parser.add_argument('--resume_work', action="store_true", 28 | help='If selected, will try to skip video aready localized, and ground truth already generated') 29 | main_parser.add_argument('--inspect_dataset', action="store_true", 30 | help='If selected, will open a window to inspect the dataset. ' 31 | 'See https://github.com/ETH3D/dataset-pipeline#dataset-inspection') 32 | main_parser.add_argument('--vid_ext', nargs='+', default=[".mp4", ".MP4"], 33 | help='Video extensions to scrape from input folder') 34 | main_parser.add_argument('--pic_ext', nargs='+', default=[".jpg", ".JPG", ".png", ".PNG"], 35 | help='Image extensions to scrape from input folder') 36 | main_parser.add_argument('--raw_ext', nargs='+', default=[".ARW", ".NEF", ".DNG"], 37 | help='Raw Image extensions to scrape from input folder') 38 | 39 | 40 | def add_pcp_options(parser): 41 | pcp_parser = parser.add_argument_group("PointCloud preparation") 42 | pcp_parser.add_argument("--pointcloud_resolution", default=None, type=float, 43 | help='If set, will subsample the Lidar point clouds at the chosen resolution') 44 | pcp_parser.add_argument("--SOR", default=[10, 6], nargs=2, type=float, 45 | help="Satistical Outlier Removal parameters : Number of nearest neighbours, max relative distance to standard deviation") 46 | pcp_parser.add_argument('--registration_method', choices=["simple", "eth3d", "interactive"], default="simple", 47 | help='Method used for point cloud registration. See README, Manual step by step : step 11') 48 | 49 | 50 | def add_ve_options(parser): 51 | ve_parser = parser.add_argument_group("Video extractor") 52 | ve_parser.add_argument('--total_frames', default=500, type=int) 53 | ve_parser.add_argument('--orientation_weight', default=1, type=float, 54 | help="Weight applied to orientation during optimal sample. " 55 | "Higher means two pictures with same location but different " 56 | "orientation will be considered farer apart") 57 | ve_parser.add_argument('--resolution_weight', default=1, type=float, 58 | help="same as orientation, but with image size") 59 | ve_parser.add_argument('--num_neighbours', default=10, type=int, 60 | help='Number of frame shared between subsequent chunks') 61 | ve_parser.add_argument('--system', default="epsg:2154", 62 | help='coordinates system used for GPS, should be the same as the LAS files used') 63 | ve_parser.add_argument('--lowfps', default=1, type=int, 64 | help="framerate at which videos will be scanned WITH reconstruction") 65 | ve_parser.add_argument('--max_sequence_length', default=4000, type=int, 66 | help='Number max of frames for a chunk. ' 67 | 'This is for RAM purpose, as loading feature matches of thousands of frames can take up GBs of RAM') 68 | ve_parser.add_argument('--include_lowfps_thorough', action='store_true', 69 | help="if selected, will include videos frames at lowfps for thorough scan (longer)") 70 | ve_parser.add_argument('--generic_model', default='OPENCV', 71 | help='COLMAP model for generic videos. Same zoom level assumed throughout the whole video. ' 72 | 'See https://colmap.github.io/cameras.html') 73 | ve_parser.add_argument('--full_metadata', default=None, 74 | help='where to save all concatenated metadata in a file that will be used to add new videos afterward. ' 75 | 'If not set, will save at the root of workspace') 76 | 77 | 78 | def add_exec_options(parser): 79 | exec_parser = parser.add_argument_group("Executable files") 80 | exec_parser.add_argument('--log', default=None, type=Path) 81 | exec_parser.add_argument('--nw', default="native-wrapper.sh", type=Path, 82 | help="native-wrapper.sh file location (see Anafi SDK documentation)") 83 | exec_parser.add_argument("--colmap", default="colmap", type=Path, 84 | help="colmap exec file location") 85 | exec_parser.add_argument("--eth3d", default="../dataset-pipeline/build", 86 | type=Path, help="ETH3D detaset pipeline exec files folder location") 87 | exec_parser.add_argument("--ffmpeg", default="ffmpeg", type=Path) 88 | exec_parser.add_argument("--pcl_util", default="pcl_util/build", type=Path) 89 | 90 | 91 | def add_pm_options(parser): 92 | pm_parser = parser.add_argument_group("Photogrammetry") 93 | pm_parser.add_argument('--max_num_matches', default=32768, type=int, help="max number of matches, lower it if you get GPU memory error") 94 | pm_parser.add_argument('--match_method', default='vocab_tree', choices=['vocab_tree', 'exhaustive'], 95 | help='Match method for first thorough photogrammetry, ' 96 | 'see https://colmap.github.io/tutorial.html#feature-matching-and-geometric-verification') 97 | pm_parser.add_argument('--vocab_tree', type=Path, default="vocab_tree_flickr100K_words256K.bin") 98 | pm_parser.add_argument('--triangulate', action="store_true") 99 | pm_parser.add_argument('--multiple_models', action='store_true', help='If selected, will let colmap mapper do multiple models.' 100 | 'The biggest one will then be chosen') 101 | pm_parser.add_argument('--more_sift_features', action="store_true", 102 | help="If selected, will activate the COLMAP options ` SiftExtraction.domain_size_pooling` " 103 | " and `--SiftExtraction.estimate_affine_shape` during feature extraction. Be careful, " 104 | "this does not use GPU and is thus very slow. More info : " 105 | "https://colmap.github.io/faq.html#increase-number-of-matches-sparse-3d-points") 106 | pm_parser.add_argument('--filter_models', action="store_true", 107 | help="If selected, will filter video localization to smooth trajectory") 108 | pm_parser.add_argument('--stereo_min_depth', type=float, default=0.1, help="Min depth for PatchMatch Stereo") 109 | pm_parser.add_argument('--stereo_max_depth', type=float, default=100, help="Max depth for PatchMatch Stereo") 110 | 111 | 112 | def add_om_options(parser): 113 | om_parser = parser.add_argument_group("Occlusion Mesh") 114 | om_parser.add_argument('--normals_method', default="radius", choices=["radius", "neighbours"], 115 | help='Method used for normal computation between radius and nearest neighbours') 116 | om_parser.add_argument('--normals_radius', default=0.2, type=float, 117 | help='If radius method for normals, radius within which other points will be considered neighbours') 118 | om_parser.add_argument('--normals_neighbours', default=8, type=int, 119 | help='If nearest neighbours method chosen, number of neighbours to consider.' 120 | 'Could be very close or very far points, but has a constant complexity') 121 | om_parser.add_argument('--mesh_resolution', default=0.2, type=float, 122 | help='Mesh resolution for occlusion in meters. Higher means more coarse. (in meters)') 123 | om_parser.add_argument('--splats', action='store_true', 124 | help='If selected, will create splats for points in the cloud that are far from the occlusion mesh') 125 | om_parser.add_argument('--splat_threshold', default=0.1, type=float, 126 | help='Distance from occlusion mesh at which a splat will be created for a particular point (in meters)') 127 | om_parser.add_argument('--max_splat_size', default=None, type=float, 128 | help='Splat size is defined by mean istance from its neighbours. You can define a max splat size for ' 129 | 'isolated points which otherwise would make a very large useless splat. ' 130 | 'If not set, will be `2.5*splat_threshold`.') 131 | 132 | 133 | def add_gt_options(parser): 134 | gt_parser = parser.add_argument_group("Ground Truth Creator") 135 | gt_parser.add_argument('--max_occlusion_depth', default=250, type=float, 136 | help='max depth for occlusion. Everything further will not be considered at infinity') 137 | gt_parser.add_argument('--eth3d_splat_radius', default=0.01, type=float, 138 | help='see splat radius for ETH3D') 139 | im_size = gt_parser.add_mutually_exclusive_group() 140 | im_size.add_argument('--output_rescale', type=float, default=1, 141 | help='Rescale images for depth ground truth') 142 | im_size.add_argument('--output_width', type=int, default=None, 143 | help='width of output images and depth maps') 144 | 145 | 146 | def set_full_argparser(): 147 | parser = ArgumentParser(description='Main pipeline, from LIDAR pictures and videos to GT depth enabled videos', 148 | formatter_class=ArgumentDefaultsHelpFormatter) 149 | 150 | add_main_options(parser) 151 | add_pcp_options(parser) 152 | add_ve_options(parser) 153 | add_exec_options(parser) 154 | add_pm_options(parser) 155 | add_om_options(parser) 156 | add_gt_options(parser) 157 | return parser 158 | 159 | 160 | def set_full_argparser_no_lidar(): 161 | parser = ArgumentParser(description='Main pipeline, from pictures and videos to GT depth enabled videos, ' 162 | 'using only COLMAP (No lidar)', 163 | formatter_class=ArgumentDefaultsHelpFormatter) 164 | add_main_options(parser) 165 | add_ve_options(parser) 166 | add_exec_options(parser) 167 | add_pm_options(parser) 168 | add_om_options(parser) 169 | add_gt_options(parser) 170 | 171 | nl_parser = parser.add_argument_group("Main pipeline no Lidar specific options") 172 | nl_parser.add_argument("--SOR", default=[10, 6], nargs=2, type=float, 173 | help="Satistical Outlier Removal parameters : Number of nearest neighbours, " 174 | "max relative distance to standard deviation. " 175 | "This will be used for filtering dense reconstruction") 176 | return parser 177 | 178 | 179 | def set_new_images_arparser(): 180 | parser = ArgumentParser(description='Additional pipeline, to add new pictures to an already existing workspace, ' 181 | 'The main pipeline must be finished before launching this script, ' 182 | ' at least until reconstruction alignment', 183 | formatter_class=ArgumentDefaultsHelpFormatter) 184 | add_main_options(parser) 185 | add_exec_options(parser) 186 | add_pm_options(parser) 187 | add_om_options(parser) 188 | add_gt_options(parser) 189 | 190 | ni_parser = parser.add_argument_group("Add new pictures specific options") 191 | 192 | ni_parser.add_argument("--map_new_images", action="store_true", 193 | help="if selected, will replace the 'omage_registrator' step with a full mapping step") 194 | ni_parser.add_argument("--bundle_adjuster_steps", default=100, type=int, 195 | help="number of iteration for bundle adjustor after image registration") 196 | ni_parser.add_argument("--rebuild_occlusion_mesh", action="store_true", 197 | help="If selected, will rebuild a new dense point cloud and deauney mesh. " 198 | "Useful when new images see new parts of the model") 199 | ni_parser.add_argument('--generic_model', default='OPENCV', 200 | help='COLMAP model for added pictures. Same zoom level assumed throughout whole folders. ' 201 | 'See https://colmap.github.io/cameras.html') 202 | return parser 203 | 204 | 205 | def print_step(step_number, step_name): 206 | print("\n\n=================") 207 | print("Step {}".format(step_number)) 208 | print(step_name) 209 | print("=================") 210 | 211 | 212 | global_steps = ["Point Cloud Preparation", 213 | "Pictures preparation", 214 | "Extracting Videos and selecting optimal frames for a thorough scan", 215 | "First thorough photogrammetry", 216 | "Alignment of photogrammetric reconstruction with GPS", 217 | "Video localization with respect to reconstruction", 218 | "Full reconstruction point cloud densificitation", 219 | "Alignment of photogrammetric reconstruction with Lidar point cloud", 220 | "Occlusion Mesh computing", 221 | "Ground Truth creation"] 222 | 223 | per_vid_steps_1 = ["Full video extraction", 224 | "Sky mask generation", 225 | "Complete photogrammetry with video at 1 fps", 226 | "Localizing remaining frames", 227 | "Re-Alignment of triangulated points with Lidar point cloud"] 228 | per_vid_steps_2 = ["Creating Ground truth data", 229 | "Create video with GT visualization and Convert to KITTI format"] 230 | 231 | 232 | def print_workflow(): 233 | print("Global steps :") 234 | for i, s in enumerate(global_steps): 235 | print("{}) {}".format(i+1, s)) 236 | if i == 4: 237 | print("\tPer video:") 238 | for i, s in enumerate(per_vid_steps_1): 239 | print("\t{}) {}".format(i+1, s)) 240 | 241 | print("\tPer video:") 242 | for i, s in enumerate(per_vid_steps_2): 243 | print("\t{}) {}".format(i+1, s)) 244 | 245 | 246 | def get_matrix(path): 247 | if path.isfile(): 248 | '''Note : We use the inverse matrix here, because in general, it's easier to register the reconstructed model into the lidar one, 249 | as the reconstructed will have less points, but in the end we need the matrix to apply to the lidar point to be aligned 250 | with the camera positions (ie the inverse)''' 251 | return np.linalg.inv(np.fromfile(path, sep=" ").reshape(4, 4)) 252 | else: 253 | print("Error, no registration matrix can be found") 254 | print("Ensure that your registration matrix was saved under the name {}".format(path)) 255 | decision = None 256 | while decision not in ["y", "n", ""]: 257 | decision = input("retry ? [Y/n]") 258 | if decision.lower() in ["y", ""]: 259 | return get_matrix(path) 260 | elif decision.lower() == "n": 261 | return np.eye(4) 262 | -------------------------------------------------------------------------------- /colmap_util/database.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018, ETH Zurich and UNC Chapel Hill. 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # 10 | # * Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # 14 | # * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of 15 | # its contributors may be used to endorse or promote products derived 16 | # from this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE 22 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | # POSSIBILITY OF SUCH DAMAGE. 29 | # 30 | # Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de) 31 | 32 | # This script is based on an original implementation by True Price. 33 | 34 | import sys 35 | import sqlite3 36 | import numpy as np 37 | 38 | 39 | IS_PYTHON3 = sys.version_info[0] >= 3 40 | 41 | MAX_IMAGE_ID = 2**31 - 1 42 | 43 | CREATE_CAMERAS_TABLE = """CREATE TABLE IF NOT EXISTS cameras ( 44 | camera_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 45 | model INTEGER NOT NULL, 46 | width INTEGER NOT NULL, 47 | height INTEGER NOT NULL, 48 | params BLOB, 49 | prior_focal_length INTEGER NOT NULL)""" 50 | 51 | CREATE_DESCRIPTORS_TABLE = """CREATE TABLE IF NOT EXISTS descriptors ( 52 | image_id INTEGER PRIMARY KEY NOT NULL, 53 | rows INTEGER NOT NULL, 54 | cols INTEGER NOT NULL, 55 | data BLOB, 56 | FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)""" 57 | 58 | CREATE_IMAGES_TABLE = """CREATE TABLE IF NOT EXISTS images ( 59 | image_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 60 | name TEXT NOT NULL UNIQUE, 61 | camera_id INTEGER NOT NULL, 62 | prior_qw REAL, 63 | prior_qx REAL, 64 | prior_qy REAL, 65 | prior_qz REAL, 66 | prior_tx REAL, 67 | prior_ty REAL, 68 | prior_tz REAL, 69 | CONSTRAINT image_id_check CHECK(image_id >= 0 and image_id < {}), 70 | FOREIGN KEY(camera_id) REFERENCES cameras(camera_id)) 71 | """.format(MAX_IMAGE_ID) 72 | 73 | CREATE_TWO_VIEW_GEOMETRIES_TABLE = """ 74 | CREATE TABLE IF NOT EXISTS two_view_geometries ( 75 | pair_id INTEGER PRIMARY KEY NOT NULL, 76 | rows INTEGER NOT NULL, 77 | cols INTEGER NOT NULL, 78 | data BLOB, 79 | config INTEGER NOT NULL, 80 | F BLOB, 81 | E BLOB, 82 | H BLOB) 83 | """ 84 | 85 | CREATE_KEYPOINTS_TABLE = """CREATE TABLE IF NOT EXISTS keypoints ( 86 | image_id INTEGER PRIMARY KEY NOT NULL, 87 | rows INTEGER NOT NULL, 88 | cols INTEGER NOT NULL, 89 | data BLOB, 90 | FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE) 91 | """ 92 | 93 | CREATE_MATCHES_TABLE = """CREATE TABLE IF NOT EXISTS matches ( 94 | pair_id INTEGER PRIMARY KEY NOT NULL, 95 | rows INTEGER NOT NULL, 96 | cols INTEGER NOT NULL, 97 | data BLOB)""" 98 | 99 | CREATE_NAME_INDEX = \ 100 | "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)" 101 | 102 | CREATE_ALL = "; ".join([ 103 | CREATE_CAMERAS_TABLE, 104 | CREATE_IMAGES_TABLE, 105 | CREATE_KEYPOINTS_TABLE, 106 | CREATE_DESCRIPTORS_TABLE, 107 | CREATE_MATCHES_TABLE, 108 | CREATE_TWO_VIEW_GEOMETRIES_TABLE, 109 | CREATE_NAME_INDEX 110 | ]) 111 | 112 | 113 | def image_ids_to_pair_id(image_id1, image_id2): 114 | if image_id1 > image_id2: 115 | image_id1, image_id2 = image_id2, image_id1 116 | return image_id1 * MAX_IMAGE_ID + image_id2 117 | 118 | 119 | def pair_id_to_image_ids(pair_id): 120 | image_id2 = pair_id % MAX_IMAGE_ID 121 | image_id1 = (pair_id - image_id2) / MAX_IMAGE_ID 122 | return image_id1, image_id2 123 | 124 | 125 | def array_to_blob(array): 126 | if IS_PYTHON3: 127 | return array.tostring() 128 | else: 129 | return np.getbuffer(array) 130 | 131 | 132 | def blob_to_array(blob, dtype, shape=(-1,)): 133 | if IS_PYTHON3: 134 | return np.fromstring(blob, dtype=dtype).reshape(*shape) 135 | else: 136 | return np.frombuffer(blob, dtype=dtype).reshape(*shape) 137 | 138 | 139 | class COLMAPDatabase(sqlite3.Connection): 140 | 141 | @staticmethod 142 | def connect(database_path): 143 | return sqlite3.connect(database_path, factory=COLMAPDatabase) 144 | 145 | 146 | def __init__(self, *args, **kwargs): 147 | super(COLMAPDatabase, self).__init__(*args, **kwargs) 148 | 149 | self.create_tables = lambda: self.executescript(CREATE_ALL) 150 | self.create_cameras_table = \ 151 | lambda: self.executescript(CREATE_CAMERAS_TABLE) 152 | self.create_descriptors_table = \ 153 | lambda: self.executescript(CREATE_DESCRIPTORS_TABLE) 154 | self.create_images_table = \ 155 | lambda: self.executescript(CREATE_IMAGES_TABLE) 156 | self.create_two_view_geometries_table = \ 157 | lambda: self.executescript(CREATE_TWO_VIEW_GEOMETRIES_TABLE) 158 | self.create_keypoints_table = \ 159 | lambda: self.executescript(CREATE_KEYPOINTS_TABLE) 160 | self.create_matches_table = \ 161 | lambda: self.executescript(CREATE_MATCHES_TABLE) 162 | self.create_name_index = lambda: self.executescript(CREATE_NAME_INDEX) 163 | 164 | def add_camera(self, model, width, height, params, 165 | prior_focal_length=False, camera_id=None): 166 | params = np.asarray(params, np.float64) 167 | cursor = self.execute( 168 | "INSERT INTO cameras VALUES (?, ?, ?, ?, ?, ?)", 169 | (camera_id, model, width, height, array_to_blob(params), 170 | prior_focal_length)) 171 | return cursor.lastrowid 172 | 173 | def add_image(self, name, camera_id, 174 | prior_q=np.full(4, np.NaN), prior_t=np.full(3, np.NaN), image_id=None): 175 | cursor = self.execute( 176 | "INSERT INTO images VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", 177 | (image_id, name, camera_id, prior_q[0], prior_q[1], prior_q[2], 178 | prior_q[3], prior_t[0], prior_t[1], prior_t[2])) 179 | return cursor.lastrowid 180 | 181 | def add_keypoints(self, image_id, keypoints): 182 | assert(len(keypoints.shape) == 2) 183 | assert(keypoints.shape[1] in [2, 4, 6]) 184 | 185 | keypoints = np.asarray(keypoints, np.float32) 186 | self.execute( 187 | "INSERT INTO keypoints VALUES (?, ?, ?, ?)", 188 | (image_id,) + keypoints.shape + (array_to_blob(keypoints),)) 189 | 190 | def add_descriptors(self, image_id, descriptors): 191 | descriptors = np.ascontiguousarray(descriptors, np.uint8) 192 | self.execute( 193 | "INSERT INTO descriptors VALUES (?, ?, ?, ?)", 194 | (image_id,) + descriptors.shape + (array_to_blob(descriptors),)) 195 | 196 | def add_matches(self, image_id1, image_id2, matches): 197 | assert(len(matches.shape) == 2) 198 | assert(matches.shape[1] == 2) 199 | 200 | if image_id1 > image_id2: 201 | matches = matches[:,::-1] 202 | 203 | pair_id = image_ids_to_pair_id(image_id1, image_id2) 204 | matches = np.asarray(matches, np.uint32) 205 | self.execute( 206 | "INSERT INTO matches VALUES (?, ?, ?, ?)", 207 | (pair_id,) + matches.shape + (array_to_blob(matches),)) 208 | 209 | def add_two_view_geometry(self, image_id1, image_id2, matches, 210 | F=np.eye(3), E=np.eye(3), H=np.eye(3), config=2): 211 | assert(len(matches.shape) == 2) 212 | assert(matches.shape[1] == 2) 213 | 214 | if image_id1 > image_id2: 215 | matches = matches[:,::-1] 216 | 217 | pair_id = image_ids_to_pair_id(image_id1, image_id2) 218 | matches = np.asarray(matches, np.uint32) 219 | F = np.asarray(F, dtype=np.float64) 220 | E = np.asarray(E, dtype=np.float64) 221 | H = np.asarray(H, dtype=np.float64) 222 | self.execute( 223 | "INSERT INTO two_view_geometries VALUES (?, ?, ?, ?, ?, ?, ?, ?)", 224 | (pair_id,) + matches.shape + (array_to_blob(matches), config, 225 | array_to_blob(F), array_to_blob(E), array_to_blob(H))) 226 | 227 | 228 | def example_usage(): 229 | import os 230 | import argparse 231 | 232 | parser = argparse.ArgumentParser() 233 | parser.add_argument("--database_path", default="database.db") 234 | args = parser.parse_args() 235 | 236 | if os.path.exists(args.database_path): 237 | print("ERROR: database path already exists -- will not modify it.") 238 | return 239 | 240 | # Open the database. 241 | 242 | db = COLMAPDatabase.connect(args.database_path) 243 | 244 | # For convenience, try creating all the tables upfront. 245 | 246 | db.create_tables() 247 | 248 | # Create dummy cameras. 249 | 250 | model1, width1, height1, params1 = \ 251 | 0, 1024, 768, np.array((1024., 512., 384.)) 252 | model2, width2, height2, params2 = \ 253 | 2, 1024, 768, np.array((1024., 512., 384., 0.1)) 254 | 255 | camera_id1 = db.add_camera(model1, width1, height1, params1) 256 | camera_id2 = db.add_camera(model2, width2, height2, params2) 257 | 258 | # Create dummy images. 259 | 260 | image_id1 = db.add_image("image1.png", camera_id1) 261 | image_id2 = db.add_image("image2.png", camera_id1) 262 | image_id3 = db.add_image("image3.png", camera_id2) 263 | image_id4 = db.add_image("image4.png", camera_id2) 264 | 265 | # Create dummy keypoints. 266 | # 267 | # Note that COLMAP supports: 268 | # - 2D keypoints: (x, y) 269 | # - 4D keypoints: (x, y, theta, scale) 270 | # - 6D affine keypoints: (x, y, a_11, a_12, a_21, a_22) 271 | 272 | num_keypoints = 1000 273 | keypoints1 = np.random.rand(num_keypoints, 2) * (width1, height1) 274 | keypoints2 = np.random.rand(num_keypoints, 2) * (width1, height1) 275 | keypoints3 = np.random.rand(num_keypoints, 2) * (width2, height2) 276 | keypoints4 = np.random.rand(num_keypoints, 2) * (width2, height2) 277 | 278 | db.add_keypoints(image_id1, keypoints1) 279 | db.add_keypoints(image_id2, keypoints2) 280 | db.add_keypoints(image_id3, keypoints3) 281 | db.add_keypoints(image_id4, keypoints4) 282 | 283 | # Create dummy matches. 284 | 285 | M = 50 286 | matches12 = np.random.randint(num_keypoints, size=(M, 2)) 287 | matches23 = np.random.randint(num_keypoints, size=(M, 2)) 288 | matches34 = np.random.randint(num_keypoints, size=(M, 2)) 289 | 290 | db.add_matches(image_id1, image_id2, matches12) 291 | db.add_matches(image_id2, image_id3, matches23) 292 | db.add_matches(image_id3, image_id4, matches34) 293 | 294 | # Commit the data to the file. 295 | 296 | db.commit() 297 | 298 | # Read and check cameras. 299 | 300 | rows = db.execute("SELECT * FROM cameras") 301 | 302 | camera_id, model, width, height, params, prior = next(rows) 303 | params = blob_to_array(params, np.float64) 304 | assert camera_id == camera_id1 305 | assert model == model1 and width == width1 and height == height1 306 | assert np.allclose(params, params1) 307 | 308 | camera_id, model, width, height, params, prior = next(rows) 309 | params = blob_to_array(params, np.float64) 310 | assert camera_id == camera_id2 311 | assert model == model2 and width == width2 and height == height2 312 | assert np.allclose(params, params2) 313 | 314 | # Read and check keypoints. 315 | 316 | keypoints = dict( 317 | (image_id, blob_to_array(data, np.float32, (-1, 2))) 318 | for image_id, data in db.execute( 319 | "SELECT image_id, data FROM keypoints")) 320 | 321 | assert np.allclose(keypoints[image_id1], keypoints1) 322 | assert np.allclose(keypoints[image_id2], keypoints2) 323 | assert np.allclose(keypoints[image_id3], keypoints3) 324 | assert np.allclose(keypoints[image_id4], keypoints4) 325 | 326 | # Read and check matches. 327 | 328 | pair_ids = [image_ids_to_pair_id(*pair) for pair in 329 | ((image_id1, image_id2), 330 | (image_id2, image_id3), 331 | (image_id3, image_id4))] 332 | 333 | matches = dict( 334 | (pair_id_to_image_ids(pair_id), 335 | blob_to_array(data, np.uint32, (-1, 2))) 336 | for pair_id, data in db.execute("SELECT pair_id, data FROM matches") 337 | ) 338 | 339 | assert np.all(matches[(image_id1, image_id2)] == matches12) 340 | assert np.all(matches[(image_id2, image_id3)] == matches23) 341 | assert np.all(matches[(image_id3, image_id4)] == matches34) 342 | 343 | # Clean up. 344 | 345 | db.close() 346 | 347 | if os.path.exists(args.database_path): 348 | os.remove(args.database_path) 349 | 350 | 351 | if __name__ == "__main__": 352 | example_usage() 353 | -------------------------------------------------------------------------------- /construct_evaluation_metadata.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 2 | from path import Path 3 | import random 4 | import pandas as pd 5 | from tqdm import tqdm 6 | import numpy as np 7 | 8 | 9 | parser = ArgumentParser(description='Create a file list for tests based on a set of movement constraints, create a FPV file', 10 | formatter_class=ArgumentDefaultsHelpFormatter) 11 | 12 | parser.add_argument('--dataset_dir', metavar='DIR', type=Path, required=True, 13 | help='folder containing the converted dataset') 14 | parser.add_argument('--split', type=float, default=0, 15 | help="proportion between train and test. By default, the whole dataset serves for evaluation") 16 | parser.add_argument('--seed', type=int, default=0, 17 | help='seed for random classification between train and val') 18 | parser.add_argument('--verbose', '-v', action='count', default=0) 19 | parser.add_argument('--max_num_samples', default=500, type=int) 20 | parser.add_argument('--min_shift', default=0, type=int, 21 | help='Minimum of former frames with valid odometry') 22 | parser.add_argument('--allow_interpolated_frames', action='store_true', 23 | help='If set, will consider frames with interpolated odometry to be valid') 24 | 25 | 26 | def flight_path_vector(sequence, max_shift): 27 | """Get the Flight Path Vector for each frame of the sequence 28 | 29 | Args: 30 | sequence (pd.DataFrame): table of a particular sequence, with pose and intrinsics info 31 | max_shift (int): max shift that will be used to deduce the mean speed vector of the camera 32 | 33 | Returns: 34 | [type]: [description] 35 | """ 36 | tvec = sequence[["pose03", "pose13", "pose23"]] 37 | displacement = np.zeros_like(tvec) 38 | for j in range(1, max_shift): 39 | displacement += tvec.diff(j) / j 40 | # TODO Note that this is only valid for pinhole cameras. 41 | fpv_x = sequence["fx"] * displacement["pose03"] / displacement["pose23"] + sequence["cx"] 42 | fpv_y = sequence["fy"] * displacement["pose13"] / displacement["pose23"] + sequence["cy"] 43 | return fpv_x, fpv_y 44 | 45 | 46 | def main(): 47 | args = parser.parse_args() 48 | random.seed(args.seed) 49 | folder_tree = args.dataset_dir.walkdirs() 50 | video_sequences = [] 51 | for f in folder_tree: 52 | if f.files('*.jpg'): 53 | video_sequences.append(f) 54 | 55 | # Select a subset of the videos for training. It won't be used for 56 | # constructing the validation set. 57 | random.shuffle(video_sequences) 58 | n = len(video_sequences) 59 | train_videos = video_sequences[:int(n*args.split)] 60 | test_videos = video_sequences[int(n*args.split):] 61 | 62 | total_valid_frames = [] 63 | for v in tqdm(test_videos): 64 | metadata = pd.read_csv(v / "metadata.csv") 65 | # Construct table of frames with valid odometry 66 | # If option selected, this includes the interpolated frames 67 | valid_odometry_frames = metadata["registered"] 68 | if not args.allow_interpolated_frames: 69 | valid_odometry_frames = valid_odometry_frames & ~metadata["interpolated"] 70 | valid_diff = valid_odometry_frames.astype(float).diff() 71 | # Get start and end of validity. 72 | # The way we can reconstruct sequences only containing valid frames 73 | invalidity_start = valid_diff.index[valid_diff == -1].tolist() 74 | validity_start = valid_diff.index[valid_diff == 1].tolist() 75 | if valid_odometry_frames.iloc[0]: 76 | validity_start = [0] + validity_start 77 | if valid_odometry_frames.iloc[-1]: 78 | invalidity_start.append(len(valid_odometry_frames)) 79 | # valid_sequences is a list of dataframes with only frames with valid odometry 80 | valid_sequences = [metadata.iloc[s:e].copy() for s, e in zip(validity_start, invalidity_start)] 81 | for s in valid_sequences: 82 | fpv_x, fpv_y = flight_path_vector(s, max_shift=3) 83 | s["fpv_x"] = fpv_x 84 | s["fpv_y"] = fpv_y 85 | 86 | # Get valid frames for depth : 87 | # - Has more than frames before it that have a valid odometry 88 | # This is useful for algorithms that require multiple frames with known odometry. 89 | # For that we just discard the first frames of the sequence. 90 | # - Is not interpolated 91 | valid_frames = s.iloc[args.min_shift:] 92 | valid_frames = valid_frames[~valid_frames["interpolated"]] 93 | valid_frames["image_path"] = [args.dataset_dir.relpathto(v) / Path(f).basename() 94 | for f in valid_frames["image_path"].values] 95 | 96 | # Add the valid frames of this sequence to the list of all valid frames 97 | total_valid_frames.append(valid_frames) 98 | 99 | total_valid_frames_df = pd.concat(total_valid_frames) 100 | 101 | # Select a subset of this tables for evaluation. Each row represent a frame that we can use for depth 102 | # evaluation if we want. 103 | if len(total_valid_frames_df) <= args.max_num_samples: 104 | # We don't have enough valid frames, just take all the frames 105 | print("Warning : Dataset has not enough valid frames, " 106 | "constructing a test set with only {} frames".format(len(total_valid_frames_df))) 107 | final_frames = total_valid_frames_df 108 | else: 109 | final_frames = total_valid_frames_df.sample(args.max_num_samples, random_state=args.seed) 110 | 111 | # Construct final metdata : 112 | train_dirs_list_path = args.dataset_dir / "train_folders.txt" 113 | image_list_path = args.dataset_dir / "test_files.txt" 114 | fpv_list_path = args.dataset_dir / "fpv.txt" 115 | with open(image_list_path, 'w') as f: 116 | f.writelines(line + "\n" for line in final_frames["image_path"].values) 117 | np.savetxt(fpv_list_path, final_frames[["fpv_x", "fpv_y"]].values) 118 | if len(train_videos) > 0: 119 | with open(train_dirs_list_path, 'w') as f: 120 | f.writelines([folder + "\n" for folder in train_videos]) 121 | 122 | 123 | if __name__ == '__main__': 124 | main() 125 | -------------------------------------------------------------------------------- /convert_euroc.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from path import Path 4 | import yaml 5 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 6 | from colmap_util.read_model import Image, Camera, Point3D, write_model, rotmat2qvec 7 | import meshlab_xml_writer as mxw 8 | from tqdm import tqdm 9 | from pyntcloud import PyntCloud 10 | from scipy.spatial.transform import Rotation, Slerp 11 | from scipy.interpolate import interp1d 12 | from wrappers import FFMpeg 13 | 14 | parser = ArgumentParser(description='Convert EuroC dataset to COLMAP', 15 | formatter_class=ArgumentDefaultsHelpFormatter) 16 | 17 | parser.add_argument('--root', metavar='DIR', type=Path, help='path to root folder eof EuRoC, where V[N]_[M]_[difficulty] folders should be') 18 | parser.add_argument('--output_dir', metavar='DIR', default=None, type=Path) 19 | parser.add_argument('--pointcloud_to_colmap', action='store_true') 20 | parser.add_argument('--colmap_format', choices=['.txt', '.bin'], default='.txt') 21 | parser.add_argument("--ffmpeg", default="ffmpeg", type=Path) 22 | parser.add_argument('--log', default=None, type=Path) 23 | parser.add_argument('-v', '--verbose', action="count", default=0) 24 | 25 | 26 | def get_cam(yaml_path, cam_id): 27 | with open(yaml_path) as f: 28 | cam_dict = yaml.load(f, Loader=yaml.SafeLoader) 29 | 30 | calib = cam_dict["T_BS"] 31 | calib_matrix = np.array(calib["data"]).reshape((calib["rows"], calib["cols"])) 32 | assert cam_dict["distortion_model"] == "radial-tangential" 33 | w, h = cam_dict["resolution"] 34 | cam = Camera(id=cam_id, 35 | model="OPENCV", 36 | width=w, 37 | height=h, 38 | params=np.array(cam_dict["intrinsics"] + cam_dict["distortion_coefficients"])) 39 | 40 | return cam, calib_matrix 41 | 42 | 43 | def get_vicon_calib(yaml_path): 44 | with open(yaml_path) as f: 45 | vicon_dict = yaml.load(f, Loader=yaml.SafeLoader) 46 | 47 | calib = vicon_dict["T_BS"] 48 | return np.array(calib["data"]).reshape((calib["rows"], calib["cols"])) 49 | 50 | 51 | def create_image(img_id, cam_id, file_path, drone_tvec, drone_matrix, image_calib, vicon_calib): 52 | drone_full_matrix = np.concatenate((np.hstack((drone_matrix, drone_tvec[:, None])), np.array([0, 0, 0, 1]).reshape(1, 4))) 53 | image_matrix = drone_full_matrix @ np.linalg.inv(vicon_calib) @ image_calib 54 | colmap_matrix = np.linalg.inv(image_matrix) 55 | colmap_qvec = rotmat2qvec(colmap_matrix[:3, :3]) 56 | colmap_tvec = colmap_matrix[:3, -1] 57 | 58 | return Image(id=img_id, qvec=colmap_qvec, tvec=colmap_tvec, 59 | camera_id=cam_id, name=file_path, 60 | xys=[], point3D_ids=[]), image_matrix[:3, -1] 61 | 62 | 63 | def convert_cloud(input_dir, output_dir): 64 | cloud_path = input_dir / "data.ply" 65 | if not cloud_path.isfile(): 66 | return None 67 | cloud = PyntCloud.from_file(cloud_path) 68 | cloud.points = cloud.points[['x', 'y', 'z', 'intensity']] 69 | yaml_path = input_dir / "sensor.yaml" 70 | with open(yaml_path) as f: 71 | cloud_dict = yaml.load(f, Loader=yaml.SafeLoader) 72 | calib = cloud_dict["T_WR"] 73 | transform = np.array(calib["data"]).reshape((calib["rows"], calib["cols"])) 74 | output_ply = output_dir / "data.ply" 75 | mxw.create_project(output_dir / 'data.mlp', [output_ply], labels=None, transforms=[transform]) 76 | cloud.to_file(output_ply) 77 | return cloud 78 | 79 | 80 | def main(): 81 | args = parser.parse_args() 82 | scenes = ["V1", "V2"] 83 | ffmpeg = FFMpeg(args.ffmpeg, verbose=args.verbose, logfile=args.log) 84 | for s in scenes: 85 | pointcloud = None 86 | lidar_output = args.output_dir / s / "Lidar" 87 | video_output = args.output_dir / s / "Videos" 88 | lidar_output.makedirs_p() 89 | video_output.makedirs_p() 90 | (args.output_dir / s / "Pictures").makedirs_p() 91 | 92 | colmap_model = {"cams": {}, 93 | "imgs": {}, 94 | "points": {}} 95 | video_sequences = sorted(args.root.dirs("{}*".format(s))) 96 | cam_id = 0 97 | for v in video_sequences: 98 | mav = v / "mav0" 99 | cam_dirs = [mav/"cam0", mav/"cam1"] 100 | vicon_dir = mav/"state_groundtruth_estimate0" 101 | if pointcloud is None: 102 | cloud = convert_cloud(mav/"pointcloud0", lidar_output) 103 | 104 | vicon_poses = pd.read_csv(vicon_dir/"data.csv") 105 | vicon_poses = vicon_poses.set_index("#timestamp") 106 | min_ts, max_ts = min(vicon_poses.index), max(vicon_poses.index) 107 | t_prefix = " p_RS_R_{} [m]" 108 | q_prefix = " q_RS_{} []" 109 | drone_tvec = vicon_poses[[t_prefix.format(dim) for dim in 'xyz']].values 110 | drone_qvec = Rotation.from_quat(vicon_poses[[q_prefix.format(dim) for dim in 'xyzw']].values) 111 | drone_qvec_slerp = Slerp(vicon_poses.index, drone_qvec) 112 | drone_tvec_interp = interp1d(vicon_poses.index, drone_tvec.T) 113 | vicon_calib = get_vicon_calib(vicon_dir/"sensor.yaml") 114 | for cam in cam_dirs: 115 | output_video_file = video_output/"{}_{}.mp4".format(v.stem, cam.stem) 116 | image_georef = [] 117 | image_rel_paths = [] 118 | image_ids = [] 119 | qvecs = [] 120 | print("Converting camera {} from video {}...".format(cam.relpath(v), v)) 121 | if len(colmap_model["imgs"].keys()) == 0: 122 | last_image_id = 0 123 | else: 124 | last_image_id = max(colmap_model["imgs"].keys()) + 1 125 | colmap_cam, cam_calib = get_cam(cam/"sensor.yaml", cam_id) 126 | colmap_model["cams"][cam_id] = colmap_cam 127 | metadata = pd.read_csv(cam/"data.csv").sort_values(by=['#timestamp [ns]']) 128 | metadata["camera_model"] = "OPENCV" 129 | metadata["width"] = colmap_cam.width 130 | metadata["height"] = colmap_cam.height 131 | metadata["camera_params"] = [tuple(colmap_cam.params)] * len(metadata) 132 | metadata["time"] = metadata['#timestamp [ns]'] 133 | metadata = metadata[(metadata['time'] > min_ts) & (metadata['time'] < max_ts)] 134 | tvec_interpolated = drone_tvec_interp(metadata['time']).T 135 | qvec_interpolated = drone_qvec_slerp(metadata['time']) 136 | # Convert time from nanoseconds to microseconds for compatibility 137 | metadata['time'] = metadata['time'] * 1e-3 138 | for img_id, (filename, current_tvec, current_qvec) in tqdm(enumerate(zip(metadata["filename"].values, 139 | tvec_interpolated, 140 | qvec_interpolated)), 141 | total=len(metadata)): 142 | final_path = args.root.relpathto(cam / "data") / filename 143 | image_rel_paths.append(final_path) 144 | colmap_model["imgs"][img_id + last_image_id], georef = create_image(img_id + last_image_id, cam_id, 145 | final_path, current_tvec, 146 | current_qvec.as_matrix(), 147 | cam_calib, vicon_calib) 148 | image_georef.append(georef) 149 | image_ids.append(img_id + last_image_id) 150 | qvecs.append(current_qvec.as_quat()) 151 | 152 | metadata['x'], metadata['y'], metadata['z'] = np.array(image_georef).transpose() 153 | qvecs_array = np.array(qvecs).transpose() 154 | for coord, title in zip(qvecs_array, 'xyzw'): 155 | metadata['frame_quat_{}'.format(title)] = coord 156 | metadata['image_path'] = image_rel_paths 157 | metadata['location_valid'] = True 158 | metadata['indoor'] = True 159 | metadata['video'] = cam 160 | framerate = len(metadata) / np.ptp(metadata['time'].values * 1e-6) 161 | metadata['framerate'] = framerate 162 | # Copy images for ffmpeg 163 | for i, f in enumerate(metadata["filename"]): 164 | (cam / "data" / f).copy(video_output / "tmp_{:05d}.png".format(i)) 165 | glob_pattern = str(video_output / "tmp_%05d.png") 166 | ffmpeg.create_video(output_video_file, glob_pattern, fps=framerate, glob=False) 167 | frames_to_delete = video_output.files("tmp*") 168 | for f in frames_to_delete: 169 | f.remove() 170 | # Save metadata in csv file 171 | metadata_file_path = output_video_file.parent / "{}_metadata.csv".format(output_video_file.stem) 172 | metadata.to_csv(metadata_file_path) 173 | cam_id += 1 174 | 175 | points = {} 176 | if args.pointcloud_to_colmap and cloud is not None: 177 | subsample = 1 178 | print("Converting ...") 179 | npy_points = cloud.points[['x', 'y', 'z', 'intensity']].values[::subsample] 180 | for id_point, row in tqdm(enumerate(npy_points), total=len(npy_points)): 181 | xyz = row[:3] 182 | gray_level = int(row[-1]*255) 183 | rgb = np.array([gray_level] * 3) 184 | points[id_point] = Point3D(id=id_point, xyz=xyz, rgb=rgb, 185 | error=0, image_ids=np.array([]), 186 | point2D_idxs=np.array([])) 187 | with open(args.output_dir/"images.txt", "w") as f1, open(args.root/"georef.txt", "w") as f2: 188 | for path, pos in zip(image_rel_paths, image_georef): 189 | f1.write(path + "\n") 190 | f2.write("{} {} {} {}\n".format(path, *pos)) 191 | colmap_output = args.output_dir / s / "colmap_from_GT" 192 | colmap_output.makedirs_p() 193 | write_model(colmap_model["cams"], 194 | colmap_model["imgs"], 195 | colmap_model["points"], 196 | colmap_output, 197 | args.colmap_format) 198 | 199 | 200 | if __name__ == '__main__': 201 | main() 202 | -------------------------------------------------------------------------------- /dxf_to_ply.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import ezdxf 3 | import meshio 4 | from path import Path 5 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 6 | from tqdm import tqdm 7 | 8 | 9 | def dxf2numpy(dxf_file, centroid): 10 | print("Opening dxf file...") 11 | file = ezdxf.readfile(dxf_file) 12 | msp = file.modelspace() 13 | print("extracting edges from dxf file...") 14 | num_edges = sum(1 for _ in msp.query('POLYLINE')) 15 | edges = np.zeros((num_edges, 2, 3)) 16 | 17 | for i, pl in tqdm(enumerate(msp.query('POLYLINE')), total=num_edges): 18 | start, end = pl.points() 19 | edges[i, 0] = start 20 | edges[i, 1] = end 21 | edges -= centroid 22 | return edges.astype(np.float32) 23 | 24 | 25 | def edges2triangles(edges): 26 | vertices, edge_indices = np.unique(edges.reshape(-1, 3), axis=0, return_inverse=True) 27 | edge_indices = edge_indices.reshape(-1, 2) 28 | 29 | vertex_tree = {} 30 | 31 | def add_entry(tree, loc, target): 32 | if loc not in tree: 33 | tree[loc] = set() 34 | tree[loc].add(target) 35 | 36 | print("Constructing vertex tree...") 37 | for seg in tqdm(edge_indices): 38 | i_start, i_end = seg 39 | add_entry(vertex_tree, i_start, i_end) 40 | add_entry(vertex_tree, i_end, i_start) 41 | 42 | faces_set = set() 43 | 44 | print("Detecting triangles...") 45 | 46 | def sub_dict(tree, indices): 47 | return {k: tree[k] for k in indices if k in tree} 48 | 49 | for v1, leaf1 in tqdm(vertex_tree.items()): 50 | for v2, leaf2 in sub_dict(vertex_tree, leaf1).items(): 51 | for v3, leaf3 in sub_dict(vertex_tree, leaf2).items(): 52 | if v1 in leaf3: 53 | faces_set.add(frozenset([v1, v2, v3])) 54 | 55 | faces = np.zeros((len(faces_set), 3), dtype=np.int32) 56 | for i, f in enumerate(faces_set): 57 | faces[i] = list(f) 58 | 59 | return vertices, faces 60 | 61 | 62 | parser = ArgumentParser(description='Convert a dxf file with only edges to a faced mesh, only counting triangles', 63 | formatter_class=ArgumentDefaultsHelpFormatter) 64 | 65 | parser.add_argument('--dxf', default="manoir.dxf", 66 | help='dxf file, must contain the wireframe') 67 | parser.add_argument('--centroid_path', default="centroid.txt", 68 | help='txt containing the centroid computed with las2ply.py') 69 | parser.add_argument('--output', default=None, 70 | help="output file name. By default, will be dxf path with \".dxf\" replaced with \"ply\"") 71 | 72 | 73 | def main(): 74 | args = parser.parse_args() 75 | if args.centroid_path is not None: 76 | centroid = np.loadtxt(args.centroid_path) 77 | else: 78 | centroid = np.zeros(3) 79 | 80 | if args.output is None: 81 | output_name = Path(args.dxf).stripext() 82 | output_path = str(output_name + ".ply") 83 | else: 84 | output_path = args.output 85 | 86 | edges = dxf2numpy(args.dxf, centroid) 87 | vertices, faces = edges2triangles(edges) 88 | 89 | cells = { 90 | "triangle": faces 91 | } 92 | meshio.write_points_cells( 93 | output_path, 94 | vertices, 95 | cells, 96 | file_format='ply-ascii' 97 | ) 98 | 99 | 100 | if __name__ == '__main__': 101 | main() 102 | -------------------------------------------------------------------------------- /edit_exif.py: -------------------------------------------------------------------------------- 1 | import piexif 2 | from fractions import Fraction 3 | 4 | 5 | def to_deg(value, loc): 6 | """convert decimal coordinates into degrees, munutes and seconds tuple 7 | 8 | Keyword arguments: value is float gps-value, loc is direction list ["S", "N"] or ["W", "E"] 9 | return: tuple like (25, 13, 48.343 ,'N') 10 | """ 11 | if value < 0: 12 | loc_value = loc[0] 13 | elif value > 0: 14 | loc_value = loc[1] 15 | else: 16 | loc_value = "" 17 | abs_value = abs(value) 18 | deg = int(abs_value) 19 | t1 = (abs_value-deg)*60 20 | min = int(t1) 21 | sec = round((t1 - min) * 60, 5) 22 | return [deg, min, sec], loc_value 23 | 24 | 25 | def to_dec(deg, min, sec, sign): 26 | result = deg[0] / deg[1] 27 | result += min[0] / (60 * min[1]) 28 | result += sec[0] / (60 * 60 * sec[1]) 29 | 30 | return sign * result 31 | 32 | 33 | def change_to_rational(number): 34 | """convert a number to rantional 35 | 36 | Keyword arguments: number 37 | return: tuple like (1, 2), (numerator, denominator) 38 | """ 39 | f = Fraction(str(number)) 40 | return (f.numerator, f.denominator) 41 | 42 | 43 | def set_gps_location(file_name, lat, lng, altitude): 44 | """Adds GPS position as EXIF metadata 45 | 46 | Keyword arguments: 47 | file_name -- image file 48 | lat -- latitude (as float) 49 | lng -- longitude (as float) 50 | altitude -- altitude (as float) 51 | 52 | """ 53 | lat_deg, ref_lat = to_deg(lat, ["S", "N"]) 54 | lng_deg, ref_lng = to_deg(lng, ["W", "E"]) 55 | 56 | exif_lat = list(map(change_to_rational, lat_deg)) 57 | exif_lng = list(map(change_to_rational, lng_deg)) 58 | 59 | ref = 1 if altitude < 0 else 0 60 | 61 | gps_ifd = { 62 | piexif.GPSIFD.GPSVersionID: (2, 0, 0, 0), 63 | piexif.GPSIFD.GPSAltitudeRef: ref, 64 | piexif.GPSIFD.GPSAltitude: change_to_rational(abs(altitude)), 65 | piexif.GPSIFD.GPSLatitudeRef: ref_lat, 66 | piexif.GPSIFD.GPSLatitude: exif_lat, 67 | piexif.GPSIFD.GPSLongitudeRef: ref_lng, 68 | piexif.GPSIFD.GPSLongitude: exif_lng, 69 | } 70 | 71 | exif_dict = {"GPS": gps_ifd} 72 | exif_bytes = piexif.dump(exif_dict) 73 | piexif.insert(exif_bytes, file_name) 74 | 75 | 76 | def get_gps_location(file_name): 77 | exif_dict = piexif.load(file_name) 78 | gps = exif_dict['GPS'] 79 | if len(gps) == 0: 80 | return 81 | 82 | ref_lat = gps[piexif.GPSIFD.GPSLatitudeRef] 83 | ref_lng = gps[piexif.GPSIFD.GPSLongitudeRef] 84 | ref_alt = gps[piexif.GPSIFD.GPSAltitudeRef] 85 | 86 | exif_lat = gps[piexif.GPSIFD.GPSLatitude] 87 | exif_lng = gps[piexif.GPSIFD.GPSLongitude] 88 | exif_alt = gps[piexif.GPSIFD.GPSAltitude] 89 | 90 | lat = to_dec(*exif_lat, (1 if ref_lat == b'N' else -1)) 91 | lng = to_dec(*exif_lng, (1 if ref_lng == b'E' else -1)) 92 | 93 | alt = exif_alt[0] / exif_alt[1] 94 | if ref_alt == 1: 95 | alt *= -1 96 | 97 | return lat, lng, alt 98 | -------------------------------------------------------------------------------- /evaluation_toolkit/README.md: -------------------------------------------------------------------------------- 1 | # Evaluation Toolkit 2 | 3 | Set of tools to run a particular algorithm on a dataset constructed with the validation set constructor, and evaluate it, along with advanced statistics regarding depth value and pixel position in image with respect to flight path vector. 4 | 5 | ## Inference Example 6 | 7 | Get the last frame and a previous frame such that the displacement magnitude is as close to 30cm as possible, with the condition of having a rotation of less that 1 radian. Each frame is preprocessed so that it is of shape `[C, H, W]` and with a range `[0, 1]` instead of `[0, 255]`. 8 | 9 | ```python 10 | from evaluation_toolkit import inferenceFramework 11 | 12 | engine = inferenceFramework(dataset_root, evaluation_list, lambda x: x.transpose(2, 0, 1).astype(np.float32)[None]/255) 13 | 14 | for sample in tqdm(engine): 15 | latest_frame, latest_intrinsics, _ = sample.get_frame() 16 | previous_frame, previous_intrinsics, previous_pose = sample.get_previous_frame(displacement=0.3) 17 | estimated_depth_map = my_model(latest_frame, previous_frame, previous_pose) 18 | engine.finish_frame(estimated_depth_map) 19 | mean_inference_time, output_depth_maps = engine.finalize(output_path='output.npz') 20 | ``` 21 | 22 | You can find an example usage of this Inference Framework for SfmLearner [here](https://github.com/ClementPinard/SfmLearner-Pytorch/tree/validation_set_constructor) 23 | 24 | ## Evaluation 25 | 26 | The evaluation step is a simple script that takes into input the computed depth maps (here in the file `output.npz`). You can combine multiple computed depth maps to compare algorithms. 27 | 28 | ``` 29 | depth_evaluation --dataset_root /path/to/dataset/root --est_depth output1.npz output2.npz --algorithm_names name1 name2 --evaluation_list_path /path/to/evaluation_list.txt --flight_path_vector_list /path/to/fligt_path_vector_list.txt <--scale_invariant> <--mask_path /path/to/mask.npy> --output_figures /path/to/figures/folder 30 | ``` 31 | 32 | It will output typical metrics and plot advanced statistics regarding the dataset and the depth estimations. 33 | Note that if you want to save the figures, you will need `xelatex` installed in your system. Otherwise, don't specify a parameter to `--output_figures` and it will use `plt.show` -------------------------------------------------------------------------------- /evaluation_toolkit/evaluation_toolkit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/evaluation_toolkit/evaluation_toolkit/__init__.py -------------------------------------------------------------------------------- /evaluation_toolkit/evaluation_toolkit/inference_toolkit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from path import Path 3 | from imageio import imread 4 | import time 5 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 6 | from scipy.spatial.transform import Rotation 7 | from tqdm import tqdm 8 | 9 | 10 | class Timer: 11 | """ 12 | Timer class is used to measure elapsed time, while being able to 13 | pause it when needed. This is useful to measure algorithm inference 14 | time without measuring time spent retrieving wanted images 15 | """ 16 | def __init__(self): 17 | self._start_time = None 18 | self._elapsed_time = 0 19 | 20 | def running(self): 21 | return self._start_time is not None 22 | 23 | def start(self): 24 | """Start a new timer""" 25 | if self._start_time is not None: 26 | return 27 | 28 | self._start_time = time.perf_counter() 29 | 30 | def stop(self): 31 | """Stop the timer, and report the elapsed time""" 32 | if self._start_time is None: 33 | return 34 | 35 | self._elapsed_time += time.perf_counter() - self._start_time 36 | self._start_time = None 37 | 38 | def get_elapsed(self): 39 | return self._elapsed_time 40 | 41 | def reset(self): 42 | self.__init__() 43 | 44 | 45 | class inferenceFramework(object): 46 | """Inference Framework used for simulating navigation conditions 47 | for depth algorithms on a dataset created by RDC. It also comes with a way to measure your inference time 48 | and to record your estimated depths. 49 | The framework is iterable, and each iteration gives an Inference Sample Object from which you can get images 50 | to compute depth on. 51 | 52 | Attributes: 53 | root (Path): Root directory where the Final output of RDC is stored. 54 | 55 | max_shift (float): Max number of frames the algorithm is allowed to search in the past. If the algorithm 56 | eg. wants to get a frame that was at a particular distance from the last frame, with a barely moving camera, 57 | the frame can only be as anterior as {max_shift} frames before, even if it means the movement won't be enough. 58 | 59 | estimated_depth_maps (dict): Dictionnary for estimated depth maps, as numpy arrays. Key is image path 60 | of image on which we estimated depth. 61 | 62 | inference_time (List): List of time spent by your algorithm for inference. 63 | Will be used at the end of the evaluation to compute the mean inference time 64 | 65 | frame_transform (function): function which will be used to transform images 66 | before returning them to the algorithm. The function takes a numpy array as 67 | an argument and can return anything your algorithm want, eg. a pytorch tensor. 68 | """ 69 | def __init__(self, root, test_files, max_shift=50, frame_transform=None): 70 | self.root = Path(root) 71 | self.test_files = test_files 72 | self.max_shift = max_shift 73 | self.frame_transform = frame_transform 74 | self.inference_time = [] 75 | self.estimated_depth_maps = {} 76 | 77 | def __getitem__(self, i): 78 | """Get item routine. Before returning the sample, the timer is triggered to measure inference time. 79 | 80 | Args: 81 | i (int): Position of the sample in the test_files list, which has been created with RDC 82 | 83 | Returns: 84 | InferenceSample: Object to compute depth 85 | """ 86 | timer = Timer() 87 | self.i = i 88 | self.current_sample = inferenceSample(self.root, 89 | self.test_files[i], 90 | self.max_shift, 91 | timer, self.frame_transform) 92 | self.current_sample.timer.start() 93 | return self.current_sample 94 | 95 | def finish_frame(self, estimated_depth): 96 | """Finish Frame routine: This method needs to be called each time your algorithm has 97 | finished the depth inference. It also stops the timer and stores the time elapsed for this 98 | sample to compute a mean inference time at the end of the evaluation. 99 | 100 | Args: 101 | estimated_depth (np.array): The output of your depth algorithm. It will then be stored in 102 | a dict, and then saved after when it will be completely populated. 103 | 104 | Returns: 105 | float: time elapsed for inference for this sample 106 | """ 107 | self.current_sample.timer.stop() 108 | elapsed = self.current_sample.timer.get_elapsed() 109 | self.inference_time.append(elapsed) 110 | self.estimated_depth_maps[self.current_sample.file] = estimated_depth 111 | return elapsed 112 | 113 | def finalize(self, output_path=None): 114 | """Finalize: this methods needs to be called at the end of the whole evaluation, 115 | when there is no sample left to estimate depth on. 116 | 117 | Args: 118 | output_path (Path, optional): Where to save all the estimated depth. It will 119 | be saved in a compressed numpy file. 120 | 121 | Returns: 122 | (float, dict): Return the mean inference time and the compute depth maps in a dictionnary 123 | """ 124 | if output_path is not None: 125 | np.savez(output_path, **self.estimated_depth_maps) 126 | mean_inference_time = np.mean(self.inference_time) 127 | return mean_inference_time, self.estimated_depth_maps 128 | 129 | def __len__(self): 130 | return len(self.test_files) 131 | 132 | 133 | class inferenceSample(object): 134 | """Inferance Sample class. Is used to get a particular frame with displacement constraints 135 | For example, you can take the last frame (of which you need to compute the depth map), 136 | and then want the frame that was 0.3 meters from the last one to ensure a sufficient parallax 137 | 138 | Attributes: 139 | root (Path): Same as inferenceFramework. Root directory where the Final output of RDC is stored. 140 | 141 | file (Path): image path of image of which we want to estimate depth. 142 | 143 | frame_transform (function) : Same as InferenceFramework. function used to transform loaded image 144 | into the data format of your choice. 145 | 146 | timer (Timer): timer used to measure time spent computing depth. All the frame gathering and transformation 147 | are not taken into account in order to only measure inference time. 148 | 149 | valid_frames (List of Path): Ordered list of frame paths representing the frame sequence that is going 150 | to be used to get the optimal frame pair/set for the algotihm you want to evaluate. 151 | The order is descending: last frame is first and oldest frames are last. 152 | 153 | poses (np.array): Array of all the poses of the valid_frames list in the R,T format (3x4 matrix). 154 | They are computed relative to the last frame, and as such, first pose is identity 155 | 156 | rotation_angles (1D np.array): computed from poses, the angle magnitude between last frame and any given frame. 157 | This is useful when you don't want rotation to be too large. 158 | 159 | displacement (1D np.array): compute from poses, displacement magnitude between last frame and any given frame. 160 | Useful when you don't want frames to be too close to each other. 161 | 162 | intrinsics (np.array): Intrinsics for each frame, stored in a 3x3 matrix. 163 | """ 164 | def __init__(self, root, file, max_shift, timer, frame_transform=None): 165 | self.root = root 166 | self.file = file 167 | self.frame_transform = frame_transform 168 | self.timer = timer 169 | full_filepath = self.root / file 170 | scene = full_filepath.parent 171 | # Get all frames in the scene folder. Normally, there should be more than "max_shift" frames. 172 | scene_files = sorted(scene.files("*.jpg")) 173 | poses = np.genfromtxt(scene / "poses.txt").reshape((-1, 3, 4)) 174 | sample_id = scene_files.index(full_filepath) 175 | assert(sample_id >= max_shift) 176 | start_id = sample_id - max_shift 177 | # Get all frames between start_id (oldest frame) and sample_id. 178 | # Revert the list so that oldest frames are in the end, like in a buffer 179 | self.valid_frames = scene_files[start_id:sample_id + 1][::-1] 180 | # Flip_ud is equivalent to reverting the row and thus the same as [::-1] 181 | valid_poses = np.flipud(poses[start_id:sample_id + 1]) 182 | # All poses in the sequence should be valid 183 | assert not np.isnan(valid_poses.sum()) 184 | # Change the pose array so that instead of 3x4 matrices, we have 4x4 matrices, which we can invert 185 | last_line = np.broadcast_to(np.array([0, 0, 0, 1]), (valid_poses.shape[0], 1, 4)) 186 | valid_poses_full = np.concatenate([valid_poses, last_line], axis=1) 187 | self.poses = (np.linalg.inv(valid_poses_full[0]) @ valid_poses_full)[:, :3] 188 | R = self.poses[:, :3, :3] 189 | self.rotation_angles = Rotation.from_matrix(R).magnitude() 190 | self.displacements = np.linalg.norm(self.poses[:, :, -1], axis=-1) 191 | 192 | # Case 1 for intrinsics : Zoom level never changed and thus there's only one intrinsics 193 | # matrix for the whole video, stored in intrinsics.txt This is the most usual case 194 | # Case 2 : Each frame has its own intrinsics file _intrinsics.txt 195 | # Case is only here for later compatibility, but it has not been tested thoroughly 196 | if (scene / "intrinsics.txt").isfile(): 197 | self.intrinsics = np.stack([np.genfromtxt(scene / "intrinsics.txt")] * max_shift) 198 | else: 199 | intrinsics_files = [f.stripext() + "_intrinsics.txt" for f in self.valid_frames] 200 | self.intrinsics = np.stack([np.genfromtxt(i) for i in intrinsics_files]) 201 | 202 | def timer_decorator(func, *args, **kwargs): 203 | """ 204 | Decorator used to pause the timer and only restart it when returning the result. 205 | This is used to not penalize the inference algorithm when frame retrieving is slow, 206 | because in real conditions, it's possible you get the wanted frames immediately instead 207 | of searching for them in the memory. 208 | """ 209 | def wrapper(self, *args, **kwargs): 210 | if self.timer.running(): 211 | self.timer.stop() 212 | res = func(self, *args, **kwargs) 213 | self.timer.start() 214 | else: 215 | res = func(self, *args, **kwargs) 216 | return res 217 | return wrapper 218 | 219 | @timer_decorator 220 | def get_frame(self, shift=0): 221 | """Basic function to get frame within a fixed shift. When used without parameters, it returns 222 | the sample frame. 223 | 224 | Args: 225 | shift (int, optional): Position relative to sample frame of the frame we want to get. 226 | Defaults to 0. 227 | 228 | Returns a tuple of 3: 229 | [Unknown type]: Output of the frame_transform function, used on the desired frame, loaded in a np array 230 | np.array: 3x3 intrinsics matrix of returned frame 231 | np.array: 3x4 pose matrix of returned frame 232 | """ 233 | file = self.valid_frames[shift] 234 | img = imread(file) 235 | if self.frame_transform is not None: 236 | img = self.frame_transform(img) 237 | return img, self.intrinsics[shift], self.poses[shift] 238 | 239 | @timer_decorator 240 | def get_previous_frame(self, shift=1, displacement=None, max_rot=1): 241 | """More advanced function, to get a frame within shift, displacement and rotation constraints. Timer is paused when this 242 | function is running. 243 | 244 | Args: 245 | shift (int, optional): As above. Position relative to sample frame of the frame we want to get. 246 | Defaults to 1. 247 | 248 | displacement (Float, optional): Desired displacement (in meters) between sample frame and 249 | the frame we want to get. This parameter overwrite the shift parameter. Defaults to None. 250 | 251 | max_rot (int, optional): Maximum Rotation, in radians. The function cannot return a frame 252 | with a higher rotation than max_rot. It assumes rotation is growing with time 253 | (only true for the first frames). The maximum shift of the returned frame corresponds to 254 | the first frame with a rotation above this threshold. Defaults to 1. 255 | 256 | Returns a tuple of 3: 257 | [Unknow type]: Output of the frame_transform function, 258 | used on the frame that best represent the different constrains. 259 | np.array: 3x3 intrinsics matrix of returned frame 260 | np.array: 3x4 pose matrix of returned frame 261 | """ 262 | if displacement is not None: 263 | shift = max(1, np.abs(self.displacements - displacement).argmin()) 264 | rot_valid = self.rotation_angles < max_rot 265 | assert sum(rot_valid[1: shift + 1] > 0), "Rotation is always higher than {}".format(max_rot) 266 | # Highest shift that has rotation below max_rot thresold 267 | final_shift = np.where(rot_valid[-1 - shift:])[0][-1] 268 | return self.get_frame(final_shift) 269 | 270 | @timer_decorator 271 | def get_previous_frames(self, shifts=[1], displacements=None, max_rot=1): 272 | """Helper function to get multiple frames at the same time. with the previous function. 273 | 274 | Args: 275 | shifts (List): list of wanted shifts 276 | displacements (List): List of wanted displacements, overwrite shifts 277 | max_rot (int, optional): Maximum Rotation, see previous function 278 | 279 | Returns a tuple of 3: 280 | List: Outputs of the frame_transform function for each desired frame 281 | List: 3x3 intrinsics matrices of returned frames 282 | List: 3x4 pose matrices of returned frames 283 | """ 284 | if displacements is not None: 285 | frames = zip(*[self.get_previous_frame(displacement=d, max_rot=max_rot) for d in displacements]) 286 | else: 287 | frames = zip(*[self.get_previous_frame(shift=s, max_rot=max_rot) for s in shifts]) 288 | return frames 289 | 290 | 291 | def inference_toolkit_example(): 292 | parser = ArgumentParser(description='Example usage of Inference toolkit', 293 | formatter_class=ArgumentDefaultsHelpFormatter) 294 | 295 | parser.add_argument('--dataset_root', metavar='DIR', type=Path) 296 | parser.add_argument('--depth_output', metavar='FILE', type=Path, 297 | help='where to store the estimated depth maps, must be a npy file') 298 | parser.add_argument('--evaluation_list_path', metavar='PATH', type=Path, 299 | help='File with list of images to test for depth evaluation') 300 | parser.add_argument('--scale-invariant', action='store_true', 301 | help='If selected, will rescale depth map with ratio of medians') 302 | args = parser.parse_args() 303 | 304 | with open(args.evaluation_list_path) as f: 305 | evaluation_list = [line[:-1] for line in f.readlines()] 306 | 307 | def my_model(frame, previous, pose): 308 | # Mock up function that uses two frames and translation magnitude 309 | # Replace it with your algorithm, eg. DepthNet model 310 | return np.linalg.norm(pose[:, -1]) * np.linalg.norm(frame - previous, axis=-1) 311 | 312 | # This is our transform function. It converts the uint8 array into a float array, 313 | # divides it by 255 to have values in [0,1] and adds the batch dimensions 314 | def my_transform(img): 315 | return img.transpose(2, 0, 1).astype(np.float32)[None] / 255 316 | 317 | engine = inferenceFramework(args.dataset_root, evaluation_list, my_transform) 318 | for sample in tqdm(engine): 319 | latest_frame, latest_intrinsics, _ = sample.get_frame() 320 | previous_frame, previous_intrinsics, previous_pose = sample.get_previous_frame(displacement=0.3) 321 | engine.finish_frame(my_model(latest_frame, previous_frame, previous_pose)) 322 | 323 | mean_time, _ = engine.finalize(args.depth_output) 324 | print("Mean time per sample : {:.2f}us".format(1e6 * mean_time)) 325 | 326 | 327 | if __name__ == '__main__': 328 | inference_toolkit_example() 329 | -------------------------------------------------------------------------------- /evaluation_toolkit/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setup(name='inference toolkit', 7 | license='MIT', 8 | author='Clément Pinard', 9 | author_email='clempinard@gmail.com', 10 | description='Inference and evaluation routines to test on a dataset constructed with validation set constructor', 11 | long_description=long_description, 12 | long_description_content_type="text/markdown", 13 | packages=["evaluation_toolkit"], 14 | entry_points={ 15 | 'console_scripts': [ 16 | 'depth_evaluation = evaluation_toolkit.depth_evaluation:main' 17 | ] 18 | }, 19 | install_requires=[ 20 | 'numpy', 21 | 'pandas', 22 | 'path', 23 | 'imageio', 24 | 'scikit-image', 25 | 'scipy', 26 | 'tqdm' 27 | ], 28 | classifiers=[ 29 | "Programming Language :: Python :: 3", 30 | "License :: OSI Approved :: MIT License", 31 | "Intended Audience :: Science/Research" 32 | ] 33 | ) 34 | -------------------------------------------------------------------------------- /extract_pictures_from_model.py: -------------------------------------------------------------------------------- 1 | from colmap_util import read_model as rm 2 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 3 | from path import Path 4 | import pandas as pd 5 | 6 | parser = ArgumentParser(description='create a new colmap model with only the frames of selected video', 7 | formatter_class=ArgumentDefaultsHelpFormatter) 8 | 9 | parser.add_argument('--input_model', metavar='DIR', type=Path, required=True, 10 | help='folder where the cameras.bin and images.bin are located') 11 | parser.add_argument('--output_model', metavar='DIR', type=Path, required=True, 12 | help='Output folder where the modified COLMAP model will be saved') 13 | parser.add_argument('--output_format', choices=['.txt', '.bin'], default='.txt') 14 | group = parser.add_mutually_exclusive_group() 15 | group.add_argument('--metadata_path', metavar="CSV", type=Path, default=None, 16 | help='Path to metadata CSV file of the desired video. ' 17 | 'Usually in /pictures/Videos///metadata.csv') 18 | group.add_argument('--picture_list_path', type=Path, default=None, 19 | help='Path to list of picture to extract from model. ' 20 | 'Picture paths must be relatvie to colmap root') 21 | 22 | 23 | def extract_pictures(input, output, picture_list, output_format='.bin'): 24 | cameras = rm.read_cameras_binary(input / "cameras.bin") 25 | images = rm.read_images_binary(input / "images.bin") 26 | images_per_name = {} 27 | camera_ids = [] 28 | 29 | def add_image(image): 30 | images_per_name[image.name] = image._replace(xys=[], point3D_ids=[]) 31 | cam_id = image.camera_id 32 | if cam_id not in camera_ids: 33 | camera_ids.append(cam_id) 34 | 35 | for id, image in images.items(): 36 | if image.name in picture_list: 37 | add_image(image) 38 | 39 | if len(images_per_name) == 1: 40 | # Add also first picture so that we have multiple pictures. 41 | # Otherwise, GourndTruth Creator will error 42 | for id, image in images.items(): 43 | if image.name not in picture_list: 44 | add_image(image) 45 | break 46 | 47 | output_cameras = {cid: cameras[cid] for cid in camera_ids if cid in cameras.keys()} 48 | 49 | rm.write_model(output_cameras, images_per_name, {}, output, output_format) 50 | 51 | return len(images_per_name) > 1 52 | 53 | 54 | def main(): 55 | args = parser.parse_args() 56 | if args.metadata_path is not None: 57 | picture_list = pd.read_csv(args.metadata_path)["image_path"].values 58 | elif args.picture_list_path is not None: 59 | with open(args.picture_list_path, 'r') as f: 60 | picture_list = [line[:-1] for line in f.readlines()] 61 | extract_pictures(args.input_model, args.output_model, picture_list, args.output_format) 62 | return 63 | 64 | 65 | if __name__ == '__main__': 66 | main() 67 | -------------------------------------------------------------------------------- /extract_video_with_gps.py: -------------------------------------------------------------------------------- 1 | import edit_exif 2 | from subprocess import Popen, PIPE 3 | from path import Path 4 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 5 | import pandas as pd 6 | from tqdm import tqdm 7 | import ffmpeg 8 | import anafi_metadata as am 9 | 10 | 11 | def extract_images(folder_path, file_path, fps): 12 | 13 | print("exporting to images with ffmpeg ...") 14 | if fps is not None: 15 | fps_arg = ["-vf", "fps={}".format(fps)] 16 | else: 17 | fps_arg = [] 18 | 19 | ffmpeg = Popen(["ffmpeg", "-y", "-i", str(file_path), "-qscale:v", "2"] + fps_arg + [str(folder_path/"%05d.jpg")], 20 | stdout=PIPE, stderr=PIPE) 21 | ffmpeg.wait() 22 | return sorted(folder_path.files("*.jpg")) 23 | 24 | 25 | def extract_metadata(folder_path, file_path, native_wrapper): 26 | output_file = folder_path/"metadata.csv" 27 | print("extracting metadata with vmeta_extract...") 28 | vmeta_extract = Popen([native_wrapper, "vmeta-extract", str(file_path), "--csv", str(output_file)], 29 | stdout=PIPE, stderr=PIPE) 30 | vmeta_extract.wait() 31 | return output_file 32 | 33 | 34 | def add_gps_to_exif(metadata, image_paths, fps): 35 | metadata = metadata.set_index("time") 36 | metadata.index = pd.to_datetime(metadata.index, unit="us") 37 | 38 | if fps is not None: 39 | metadata = metadata.resample("{:.3f}S".format(1/fps)).first() 40 | metadata.to_csv(image_paths[0].parent / "metadata_{}fps.csv".format(fps), sep=" ") 41 | 42 | print("Modifying gps EXIF for colmap...") 43 | for img_path, row in tqdm(zip(image_paths, metadata.iterrows()), total=len(image_paths)): 44 | if row[1]["location_valid"] == 1: 45 | edit_exif.set_gps_location(img_path, 46 | row[1]["location_latitude"], 47 | row[1]["location_longitude"], 48 | row[1]["location_altitude"]) 49 | 50 | 51 | def save_images_path_list(output_folder, origin, images_path_list): 52 | relative_path_lists = [origin.relpathto(img) + '\n' for img in images_path_list] 53 | with open(output_folder/'images.txt', 'w') as f: 54 | f.writelines(relative_path_lists) 55 | 56 | 57 | def workflow(root, output_folder, video_path, args): 58 | print("Generating images with gps for video {}".format(str(video_path))) 59 | output_folder /= video_path.stem 60 | if args.fps is not None: 61 | output_folder += "_{}fps".format(args.fps) 62 | output_folder.mkdir_p() 63 | images_path_list = ffmpeg.extract_images(output_folder, video_path, args.fps) 64 | metadata = am.extract_metadata(output_folder, video_path, args.nw) 65 | add_gps_to_exif(metadata, images_path_list, args.fps) 66 | save_images_path_list(output_folder, args.origin or root, images_path_list) 67 | 68 | 69 | parser = ArgumentParser(description='image extractor from parrot video', 70 | formatter_class=ArgumentDefaultsHelpFormatter) 71 | 72 | parser.add_argument('--input', metavar='PATH', default="~/Images/scan manoir/anafi/video", 73 | help='path to video folder or video file', type=Path) 74 | parser.add_argument('--fps', metavar='F', default=None, type=int, 75 | help='fps') 76 | parser.add_argument('--output_folder', metavar='DIR', default=None, type=Path) 77 | parser.add_argument('--origin', metavar='DIR', default=None, type=Path, 78 | help='folder relative to which the images path list will be generated') 79 | parser.add_argument('--nw', default='', 80 | help="native-wrapper.sh file location") 81 | 82 | if __name__ == '__main__': 83 | file_exts = ['.mp4', '.MP4'] 84 | args = parser.parse_args() 85 | if args.input.isfile() and args.input.ext in file_exts: 86 | root = args.input.parent 87 | videos = [args.input] 88 | elif args.input.isdir(): 89 | root = args.input 90 | videos = sum([args.input.walkfiles('*{}'.format(ext)) for ext in file_exts], []) 91 | print("Found {} videos".format(len(videos))) 92 | 93 | if args.output_folder is None: 94 | args.output_folder = root 95 | for video_path in videos: 96 | workflow(root, args.output_folder/(video_path.parent.relpath(root)), video_path, args) 97 | -------------------------------------------------------------------------------- /filter_colmap_model.py: -------------------------------------------------------------------------------- 1 | from colmap_util import read_model as rm 2 | import pandas as pd 3 | import numpy as np 4 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 5 | from path import Path 6 | import matplotlib.pyplot as plt 7 | from scipy.signal import savgol_filter 8 | from scipy.spatial.transform import Rotation, Slerp 9 | 10 | parser = ArgumentParser(description='Filter COLMAP model of a single video by discards frames with impossible acceleration. ' 11 | 'The script then interplate dismissed frames and smooth out the trajectory with a SavGol filter.', 12 | formatter_class=ArgumentDefaultsHelpFormatter) 13 | 14 | parser.add_argument('--input_images_colmap', metavar='FILE', type=Path, required=True, 15 | help='Input COLMAP images.bin or images.txt file to filter.') 16 | parser.add_argument('--metadata_path', metavar='FILE', type=Path, required=True, 17 | help='Metadata CSV file of filtered video') 18 | parser.add_argument('--output_images_colmap', metavar='FILE', type=Path, required=True, 19 | help='Output images.bin or images.txt file with filtered frame localizations') 20 | parser.add_argument('--interpolated_frames_list', type=Path, required=True, 21 | help='Outpt list containing interpolated frames in order to discard them from ground-truth validation') 22 | parser.add_argument('--filter_degree', default=3, type=int, 23 | help='Degree of SavGol filter, higher means less filtering and more neighbouring frames') 24 | parser.add_argument('--filter_time', default=0.1, type=float, 25 | help='Time windows used by filter. Must be enough frames for filter degree') 26 | parser.add_argument('--visualize', action="store_true") 27 | parser.add_argument('--threshold_t', default=0.01, type=float, 28 | help='Authorized deviation from SavGol filter output for position. Above, frame will be discarded') 29 | parser.add_argument('--threshold_q', default=5e-3, type=float, 30 | help='Same as threshold_t but for orientation with quaternions') 31 | 32 | 33 | ''' 34 | def colmap_to_world(tvec, qvec): 35 | if any(np.isnan(qvec)): 36 | return qvec, tvec 37 | cam2world = rm.qvec2rotmat(qvec).T 38 | world_tvec = - cam2world @ tvec 39 | wolrd_qvec = np.array((qvec[0], *qvec[1:])) 40 | return wolrd_qvec, world_tvec 41 | ''' 42 | 43 | 44 | def colmap_to_world(tvec, qvec): 45 | quats = Rotation.from_quat(qvec) 46 | return -quats.apply(tvec, inverse=True) 47 | 48 | 49 | def world_to_colmap(tvec, qvec): 50 | quats = Rotation.from_quat(qvec) 51 | return -quats.apply(tvec, inverse=False) 52 | 53 | 54 | def NEDtoworld(qvec): 55 | world2NED = np.float32([[0, 1, 0], 56 | [1, 0, 0], 57 | [0, 0, -1]]) 58 | NED2world = world2NED 59 | world_qvec = rm.rotmat2qvec(NED2world @ rm.qvec2rotmat(qvec).T @ world2NED) 60 | return world_qvec 61 | 62 | 63 | def quaternion_distances(prefix="", suffix=""): 64 | def fun(row): 65 | """ Create two Quaternions objects and calculate 3 distances between them """ 66 | q1 = np.array((row[['{}q{}{}'.format(prefix, col, suffix) for col in list("wxyz")]])) 67 | q2 = np.array((row[['{}q{}2{}'.format(prefix, col, suffix) for col in list("wxyz")]])) 68 | row['{}qdist{}'.format(prefix, suffix)] = np.arccos(2 * (q1.dot(q2)) ** 2 - 1) 69 | 70 | return row 71 | return fun 72 | 73 | 74 | def get_outliers(series, threshold): 75 | return ((series.diff(-1) > 0) & (series.diff(1) > 0) & (series > threshold)) | series.isnull() 76 | 77 | 78 | def slerp_quats(quat_df, prefix=""): 79 | valid_df = quat_df[~quat_df["outlier"]] 80 | valid_index = valid_df.index 81 | total_index = quat_df.index 82 | 83 | # Note that scipy uses a different order convention than colmap : XYZW instead of WXYZ 84 | quats = Rotation.from_quat(valid_df[["{}q{}".format(prefix, col) for col in list("xyzw")]].values) 85 | slerp = Slerp(valid_index, quats) 86 | quats = slerp(total_index).as_quat() 87 | quats[quats[:, -1] < 0] *= -1 88 | return pd.DataFrame(slerp(total_index).as_quat(), index=quat_df.index) 89 | 90 | 91 | def filter_colmap_model(input_images_colmap, output_images_colmap, metadata, 92 | filter_degree=3, filter_time=0.1, 93 | threshold_t=0.01, threshold_q=5e-3, 94 | visualize=False, **env): 95 | if input_images_colmap.ext == ".txt": 96 | images_dict = rm.read_images_text(input_images_colmap) 97 | elif input_images_colmap.ext == ".bin": 98 | images_dict = rm.read_images_binary(input_images_colmap) 99 | else: 100 | print(input_images_colmap.ext) 101 | framerate = metadata["framerate"].iloc[0] 102 | filter_length = 2*int(filter_time * framerate) + 1 103 | 104 | image_df = pd.DataFrame.from_dict(images_dict, orient="index").set_index("id") 105 | image_df = image_df.reindex(metadata.index) 106 | metadata["outlier"] = image_df.isna().any(axis="columns") 107 | colmap_outliers = sum(metadata["outlier"]) 108 | total_frames = len(metadata) 109 | image_df = image_df.dropna() 110 | tvec = np.stack(image_df["tvec"].values) 111 | qvec = np.stack(image_df["qvec"].values) 112 | 113 | # Check if quats are flipped by computing shifted dot product. 114 | # If no flip (and continuous orientation), the dot product is around 1. Otherwise, it's around -1 115 | # A quaternion is flipped if the dot product is negative 116 | 117 | flips = list((np.sum(qvec[1:] * qvec[:-1], axis=1) < 0).nonzero()[0] + 1) 118 | flips.append(qvec.shape[0]) 119 | for j, k in zip(flips[::2], flips[1::2]): 120 | qvec[j:k] *= -1 121 | 122 | tvec_columns = ["colmap_tx", "colmap_ty", "colmap_tz"] 123 | quat_columns = ["colmap_qw", "colmap_qx", "colmap_qy", "colmap_qz"] 124 | metadata[tvec_columns] = pd.DataFrame(tvec, index=image_df.index) 125 | metadata[quat_columns] = pd.DataFrame(qvec, index=image_df.index) 126 | metadata["time (s)"] = metadata["time"] / 1e6 127 | metadata = metadata.set_index("time (s)") 128 | 129 | # Interpolate missing values for tvec and quat 130 | # In order to avoid extrapolation, we get rid of outlier at the beginning and the end of the sequence 131 | 132 | first_valid = metadata["outlier"].idxmin() 133 | last_valid = metadata["outlier"][::-1].idxmin() 134 | metadata = metadata.loc[first_valid:last_valid] 135 | 136 | metadata[tvec_columns] = metadata[tvec_columns].interpolate() 137 | metadata[["colmap_qx", "colmap_qy", "colmap_qz", "colmap_qw"]] = slerp_quats(metadata, prefix="colmap_") 138 | 139 | if visualize: 140 | metadata[["colmap_qw", "colmap_qx", "colmap_qy", "colmap_qz"]].plot() 141 | plt.gcf().canvas.set_window_title('Colmap quaternion (continuous)') 142 | 143 | qvec = metadata[["colmap_qx", "colmap_qy", "colmap_qz", "colmap_qw"]].values 144 | tvec = metadata[["colmap_tx", "colmap_ty", "colmap_tz"]].values 145 | 146 | world_tvec = colmap_to_world(tvec, qvec) 147 | 148 | world_tvec_filtered = savgol_filter(world_tvec, filter_length, filter_degree, axis=0) 149 | 150 | # TODO : this is linear filtering with renormalization, 151 | # mostly good enough but ideally should be something with slerp for quaternions 152 | qvec_filtered = savgol_filter(qvec, filter_length, filter_degree, axis=0) 153 | qvec_filtered = qvec_filtered / (np.linalg.norm(qvec_filtered, axis=-1, keepdims=True) + 1e-10) 154 | 155 | # Distances from raw and filtered values, we will dismiss those that are too far 156 | metadata["outlier_rot"] = np.arccos(2 * (np.sum(qvec * qvec_filtered, axis=1)) ** 2 - 1) 157 | 158 | metadata["outliers_pos"] = np.linalg.norm(world_tvec - world_tvec_filtered, axis=1) 159 | if visualize: 160 | metadata[["outliers_pos"]].plot() 161 | plt.gcf().canvas.set_window_title('difference between speed from colmap and from filtered') 162 | metadata[["outlier_rot"]].plot() 163 | plt.gcf().canvas.set_window_title('difference between rot speed from colmap and from filtered') 164 | 165 | metadata["tx"] = world_tvec[:, 0] 166 | metadata["ty"] = world_tvec[:, 1] 167 | metadata["tz"] = world_tvec[:, 2] 168 | metadata["qx"] = qvec[:, 0] 169 | metadata["qy"] = qvec[:, 1] 170 | metadata["qz"] = qvec[:, 2] 171 | metadata["qw"] = qvec[:, 3] 172 | 173 | if visualize: 174 | frame_q = metadata[["frame_quat_w", "frame_quat_x", "frame_quat_y", "frame_quat_z"]].values 175 | qref_list = [] 176 | for q in frame_q: 177 | qref_list.append(NEDtoworld(q)) 178 | qref = np.stack(qref_list) 179 | metadata["ref_qw"] = qref[:, 0] 180 | metadata["ref_qx"] = qref[:, 1] 181 | metadata["ref_qy"] = qref[:, 2] 182 | metadata["ref_qz"] = qref[:, 3] 183 | 184 | metadata["qw_filtered"] = qvec_filtered[:, 0] 185 | metadata["qx_filtered"] = qvec_filtered[:, 1] 186 | metadata["qy_filtered"] = qvec_filtered[:, 2] 187 | metadata["qz_filtered"] = qvec_filtered[:, 3] 188 | 189 | metadata["speed_up"] = -metadata["speed_down"] 190 | metadata[["speed_east", "speed_north", "speed_up"]].plot() 191 | plt.gcf().canvas.set_window_title('speed from sensors') 192 | colmap_speeds = framerate * metadata[["tx", "ty", "tz"]].diff() 193 | colmap_speeds.plot() 194 | plt.gcf().canvas.set_window_title('speeds from colmap (noisy)') 195 | metadata[["x", "y", "z", "tx", "ty", "tz"]].plot() 196 | plt.gcf().canvas.set_window_title('GPS(xyz) vs colmap position (tx,ty,tz)') 197 | metadata[["ref_qw", "ref_qx", "ref_qy", "ref_qz"]].plot() 198 | plt.gcf().canvas.set_window_title('quaternions from sensor') 199 | ax_q = metadata[["qw", "qx", "qy", "qz"]].plot() 200 | plt.gcf().canvas.set_window_title('quaternions from colmap vs from smoothed') 201 | 202 | metadata[["colmap_q{}2".format(col) for col in list('wxyz')]] = metadata[['colmap_qw', 203 | 'colmap_qx', 204 | 'colmap_qy', 205 | 'colmap_qz']].shift() 206 | metadata[["q{}2_filtered".format(col) for col in list('wxyz')]] = metadata[['qw_filtered', 207 | 'qx_filtered', 208 | 'qy_filtered', 209 | 'qz_filtered']].shift() 210 | metadata = metadata.apply(quaternion_distances(prefix="colmap_"), axis='columns') 211 | metadata = metadata.apply(quaternion_distances(suffix="_filtered"), axis='columns') 212 | ax_qdist = metadata[["colmap_qdist", "qdist_filtered"]].plot() 213 | plt.gcf().canvas.set_window_title('quaternions variations colmap vs filtered vs smoothed') 214 | 215 | metadata["outlier"] = metadata["outlier"] | \ 216 | get_outliers(metadata["outliers_pos"], threshold_t) | \ 217 | get_outliers(metadata["outlier_rot"], threshold_q) 218 | 219 | first_valid = metadata["outlier"].idxmin() 220 | last_valid = metadata["outlier"][::-1].idxmin() 221 | metadata = metadata.loc[first_valid:last_valid] 222 | 223 | metadata.loc[metadata["outlier"], ["tx", "ty", "tz", "qw", "qx", "qy", "qz"]] = np.NaN 224 | world_tvec_interp = metadata[["tx", "ty", "tz"]].interpolate(method="polynomial", order=3).values 225 | world_qvec_interp = slerp_quats(metadata).values 226 | 227 | world_tvec_smoothed = savgol_filter(world_tvec_interp, filter_length, filter_degree, axis=0) 228 | qvec_smoothed = savgol_filter(world_qvec_interp, filter_length, filter_degree, axis=0) 229 | qvec_smoothed /= np.linalg.norm(qvec_smoothed, axis=1, keepdims=True) 230 | 231 | colmap_tvec_smoothed = world_to_colmap(world_tvec_smoothed, qvec_smoothed) 232 | 233 | metadata["tx_smoothed"] = colmap_tvec_smoothed[:, 0] 234 | metadata["ty_smoothed"] = colmap_tvec_smoothed[:, 1] 235 | metadata["tz_smoothed"] = colmap_tvec_smoothed[:, 2] 236 | 237 | metadata["qx_smoothed"] = qvec_smoothed[:, 0] 238 | metadata["qy_smoothed"] = qvec_smoothed[:, 1] 239 | metadata["qz_smoothed"] = qvec_smoothed[:, 2] 240 | metadata["qw_smoothed"] = qvec_smoothed[:, 3] 241 | 242 | if visualize: 243 | metadata["world_tx_smoothed"] = world_tvec_smoothed[:, 0] 244 | metadata["world_ty_smoothed"] = world_tvec_smoothed[:, 1] 245 | metadata["world_tz_smoothed"] = world_tvec_smoothed[:, 2] 246 | metadata[["world_tx_smoothed", "world_ty_smoothed", "world_tz_smoothed"]].diff().plot() 247 | plt.gcf().canvas.set_window_title('speed from filtered and smoothed') 248 | metadata[["qw_smoothed", "qx_smoothed", "qy_smoothed", "qz_smoothed"]].plot(ax=ax_q) 249 | metadata[["q{}2_smoothed".format(col) for col in list('wxyz')]] = metadata[['qw_smoothed', 250 | 'qx_smoothed', 251 | 'qy_smoothed', 252 | 'qz_smoothed']].shift() 253 | metadata = metadata.apply(quaternion_distances(suffix="_smoothed"), axis='columns') 254 | metadata[["qdist_smoothed"]].plot(ax=ax_qdist) 255 | metadata[["outlier"]].astype(float).plot() 256 | plt.gcf().canvas.set_window_title('outliers indices') 257 | 258 | print("number of not localized by colmap : {}/{} ({:.2f}%)".format(colmap_outliers, 259 | total_frames, 260 | 100 * colmap_outliers/total_frames)) 261 | print("Total number of outliers for valid sequence : {} / {} ({:.2f}%)".format(sum(metadata["outlier"]), 262 | len(metadata), 263 | 100 * sum(metadata["outlier"])/len(metadata))) 264 | 265 | if visualize: 266 | plt.show() 267 | 268 | if output_images_colmap is not None: 269 | smoothed_images_dict = {} 270 | interpolated_frames = [] 271 | for _, row in metadata.iterrows(): 272 | db_id = row["db_id"] 273 | if row["outlier"]: 274 | interpolated_frames.append(row["image_path"]) 275 | smoothed_images_dict[db_id] = rm.Image(id=db_id, 276 | qvec=row[["qw_smoothed", "qx_smoothed", "qy_smoothed", "qz_smoothed"]].values, 277 | tvec=row[["tx_smoothed", "ty_smoothed", "tz_smoothed"]].values, 278 | camera_id=row["camera_id"], 279 | name=row["image_path"], 280 | xys=[], point3D_ids=[]) 281 | if output_images_colmap.ext == ".txt": 282 | rm.write_images_text(smoothed_images_dict, output_images_colmap) 283 | elif output_images_colmap.ext == ".bin": 284 | rm.write_images_bin(smoothed_images_dict, output_images_colmap) 285 | else: 286 | print(output_images_colmap.ext) 287 | 288 | return interpolated_frames 289 | 290 | 291 | if __name__ == '__main__': 292 | args = parser.parse_args() 293 | env = vars(args) 294 | metadata = pd.read_csv(args.metadata_path).set_index("db_id", drop=False).sort_values("time") 295 | interpolated_frames = filter_colmap_model(metadata=metadata, **env) 296 | with open(args.interpolated_frames_list, "w") as f: 297 | f.write("\n".join(interpolated_frames) + "\n") 298 | -------------------------------------------------------------------------------- /generate_sky_masks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import imageio 4 | from model.enet import ENet 5 | from path import Path 6 | from tqdm import tqdm 7 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 8 | import numpy as np 9 | 10 | cityscapes_labels = ['unlabeled', 'road', 'sidewalk', 11 | 'building', 'wall', 'fence', 'pole', 12 | 'traffic_light', 'traffic_sign', 'vegetation', 13 | 'terrain', 'sky', 'person', 'rider', 'car', 14 | 'truck', 'bus', 'train', 'motorcycle', 'bicycle'] 15 | 16 | sky_index = cityscapes_labels.index('sky') 17 | 18 | 19 | def prepare_network(): 20 | ENet_model = ENet(len(cityscapes_labels)) 21 | checkpoint = torch.load('model/ENet') 22 | ENet_model.load_state_dict(checkpoint['state_dict']) 23 | return ENet_model.eval().cuda() 24 | 25 | 26 | def erosion(width, mask): 27 | kernel = torch.ones(1, 1, 2 * width + 1, 2 * width + 1).to(mask) / (2 * width + 1)**2 28 | padded = torch.nn.functional.pad(mask.unsqueeze(1), [width]*4, value=1) 29 | filtered = torch.nn.functional.conv2d(padded, kernel) 30 | mask = (filtered == 1).float() 31 | 32 | return mask 33 | 34 | 35 | @torch.no_grad() 36 | def extract_sky_mask(network, image_paths, mask_folder): 37 | images = np.stack([imageio.imread(i) for i in image_paths]) 38 | if len(images.shape) == 3: 39 | images = np.stack(3 * [images], axis=-1) 40 | b, h, w, _ = images.shape 41 | image_tensor = torch.from_numpy(images).float()/255 42 | image_tensor = image_tensor.permute(0, 3, 1, 2) # shape [B, C, H, W] 43 | 44 | w_r = 512 45 | h_r = int(512 * h / w) 46 | reduced = F.interpolate(image_tensor, size=(h_r, w_r), mode='area') 47 | 48 | result = network(reduced.cuda()) 49 | classes = torch.max(result, 1)[1] 50 | mask = (classes == sky_index).float() 51 | 52 | filtered_mask = erosion(1, mask) 53 | upsampled = F.interpolate(filtered_mask, size=(h, w), mode='nearest') 54 | 55 | final_masks = 1 - upsampled.permute(0, 2, 3, 1).cpu().numpy() 56 | 57 | for f, path in zip(final_masks, image_paths): 58 | imageio.imwrite(mask_folder/(path.basename() + '.png'), (f*255).astype(np.uint8)) 59 | 60 | 61 | def process_folder(folder_to_process, colmap_img_root, mask_path, pic_ext, verbose=False, batchsize=8, **env): 62 | network = prepare_network() 63 | folders = [folder_to_process] + list(folder_to_process.walkdirs()) 64 | 65 | for folder in folders: 66 | 67 | mask_folder = mask_path/colmap_img_root.relpathto(folder) 68 | mask_folder.makedirs_p() 69 | images = sum((folder.files('*{}'.format(ext)) for ext in pic_ext), []) 70 | if images: 71 | if verbose: 72 | print("Generating masks for images in {}".format(str(folder))) 73 | images = tqdm(images) 74 | to_process = [] 75 | for image_file in images: 76 | if (mask_folder / (image_file.basename() + '.png')).isfile(): 77 | continue 78 | to_process.append(image_file) 79 | if len(to_process) == batchsize: 80 | extract_sky_mask(network, to_process, mask_folder) 81 | to_process = [] 82 | if to_process: 83 | extract_sky_mask(network, to_process, mask_folder) 84 | del network 85 | torch.cuda.empty_cache() 86 | 87 | 88 | parser = ArgumentParser(description='sky mask generator using ENet trained on cityscapes', 89 | formatter_class=ArgumentDefaultsHelpFormatter) 90 | 91 | parser.add_argument('--img_dir', metavar='DIR', default=Path("workspace/Pictures"), 92 | help='path to image folder root', type=Path) 93 | parser.add_argument('--colmap_img_root', metavar='DIR', default=Path("workspace/Pictures"), type=Path, 94 | help='image_path you will give to colmap when extracting feature') 95 | parser.add_argument('--mask_root', metavar='DIR', default=Path("workspace/Masks"), 96 | help='where to store the generated_masks', type=Path) 97 | parser.add_argument("--batch_size", "-b", type=int, default=8) 98 | 99 | if __name__ == '__main__': 100 | args = parser.parse_args() 101 | network = prepare_network() 102 | if args.img_dir[-1] == "/": 103 | args.img_dir = args.img_dir[:-1] 104 | args.mask_root.makedirs_p() 105 | file_exts = ['jpg', 'JPG'] 106 | 107 | process_folder(args.img_dir, args.colmap_img_root, args.mask_root, file_exts, True, args.batchsize) 108 | -------------------------------------------------------------------------------- /images/drone1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/drone1.jpg -------------------------------------------------------------------------------- /images/drone2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/drone2.jpg -------------------------------------------------------------------------------- /images/example_viz.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/example_viz.jpg -------------------------------------------------------------------------------- /images/optimal_sample1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/optimal_sample1.jpg -------------------------------------------------------------------------------- /images/optimal_sample2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/optimal_sample2.jpg -------------------------------------------------------------------------------- /images/photog1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/photog1.jpg -------------------------------------------------------------------------------- /images/photog2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/photog2.jpg -------------------------------------------------------------------------------- /images/piloting1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/piloting1.jpg -------------------------------------------------------------------------------- /images/piloting2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/piloting2.jpg -------------------------------------------------------------------------------- /images/plan1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/plan1.jpg -------------------------------------------------------------------------------- /images/plan2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/plan2.jpg -------------------------------------------------------------------------------- /images/plan3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/plan3.jpg -------------------------------------------------------------------------------- /images/plan4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/plan4.jpg -------------------------------------------------------------------------------- /images/plot/GTwise_depth_diff_DepthNet.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/plot/GTwise_depth_diff_DepthNet.jpg -------------------------------------------------------------------------------- /images/plot/GTwise_depth_diff_SfmLearner.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/plot/GTwise_depth_diff_SfmLearner.jpg -------------------------------------------------------------------------------- /images/plot/depth_distrib.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/plot/depth_distrib.jpg -------------------------------------------------------------------------------- /images/plot/est_error_quantiles.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/plot/est_error_quantiles.jpg -------------------------------------------------------------------------------- /images/plot/fpv_error_quantiles.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/plot/fpv_error_quantiles.jpg -------------------------------------------------------------------------------- /images/plot/global_depth_diff.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/plot/global_depth_diff.jpg -------------------------------------------------------------------------------- /images/plot/gt_error_quantiles.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/plot/gt_error_quantiles.jpg -------------------------------------------------------------------------------- /images/pointcloud1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/pointcloud1.jpg -------------------------------------------------------------------------------- /images/pointcloud2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/pointcloud2.jpg -------------------------------------------------------------------------------- /images/result1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/result1.jpg -------------------------------------------------------------------------------- /images/result2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/result2.jpg -------------------------------------------------------------------------------- /images/result3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/result3.jpg -------------------------------------------------------------------------------- /images/splats.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/images/splats.jpg -------------------------------------------------------------------------------- /install_dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script helps you install the necessary tools to construct a depth enabled dataset with Anafi videos 4 | # Note that CUDA and Anaconda need to be already installed. 5 | # For CUDA, try to install the last package : https://developer.nvidia.com/cuda-downloads instead of the one installed by APT 6 | # Also note that for repo to work, git needs to be parametrized with email and name. 7 | # It has been tested with Ubuntu 18.04 and Ubunut 20.04 8 | 9 | 10 | # This command makes sure that the .so files pointed by the cmake commands are the right ones 11 | # Alternatively, you can add conda to the cmake folders with the following command : 12 | # CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} 13 | # Note that the alternative will trigger erros if you move or delete the anaconda folder. 14 | 15 | eval "$(conda shell.bash hook)" 16 | conda deactivate 17 | 18 | sudo apt update 19 | sudo apt install -y git \ 20 | curl \ 21 | cmake \ 22 | ffmpeg \ 23 | build-essential \ 24 | pkg-config \ 25 | libboost-all-dev \ 26 | libeigen3-dev \ 27 | libsuitesparse-dev \ 28 | libfreeimage-dev \ 29 | libgoogle-glog-dev \ 30 | libgflags-dev \ 31 | libglew-dev \ 32 | qtbase5-dev \ 33 | libqt5opengl5-dev \ 34 | libcgal-dev \ 35 | libflann-dev \ 36 | cmake libgmp-dev \ 37 | libgoogle-glog-dev \ 38 | libqwt-qt5-dev \ 39 | libpcl-dev \ 40 | libproj-dev \ 41 | libcgal-qt5-dev \ 42 | libatlas-base-dev \ 43 | libsuitesparse-dev \ 44 | zlib1g-dev \ 45 | libglfw3-dev \ 46 | libsdl2-dev rsync 47 | 48 | git clone https://github.com/laurentkneip/opengv.git 49 | cd opengv \ 50 | && mkdir -p build \ 51 | && cd build \ 52 | && cmake .. \ 53 | && make -j8 54 | sudo make install 55 | cd ../../ 56 | 57 | git clone https://github.com/opencv/opencv.git 58 | cd opencv \ 59 | && git checkout 4.1.2 \ 60 | && mkdir -p build \ 61 | && cd build \ 62 | && cmake -D WITH_CUDA=OFF .. \ 63 | && make -j8 64 | sudo make install 65 | cd ../../ 66 | 67 | git clone https://github.com/ETH3D/dataset-pipeline 68 | cd dataset-pipeline \ 69 | && mkdir -p build \ 70 | && cd build \ 71 | && cmake .. \ 72 | && make -j8 \ 73 | && cd ../../ 74 | 75 | git clone https://ceres-solver.googlesource.com/ceres-solver 76 | cd ceres-solver \ 77 | && git checkout $(git describe --tags) \ 78 | && mkdir -p build \ 79 | && cd build \ 80 | && cmake .. -DBUILD_TESTING=OFF -DBUILD_EXAMPLES=OFF \ 81 | && make -j8 82 | sudo make install 83 | cd ../../ 84 | 85 | git clone https://github.com/colmap/colmap.git 86 | cd colmap \ 87 | && git checkout dev \ 88 | && mkdir -p build \ 89 | && cd build \ 90 | && cmake .. \ 91 | && make -j8 92 | sudo make install 93 | cd ../../ 94 | 95 | mkdir -p ~/.bin 96 | PATH="${HOME}/.bin:${PATH}" 97 | curl https://storage.googleapis.com/git-repo-downloads/repo > ~/.bin/repo 98 | chmod a+rx ~/.bin/repo 99 | 100 | conda activate 101 | mkdir -p groundsdk \ 102 | && cd groundsdk \ 103 | && repo init -u https://github.com/Parrot-Developers/groundsdk-manifest -m release.xml \ 104 | && repo sync \ 105 | && ./build.sh -p pdraw-linux -t build -j8 106 | cd ../ 107 | 108 | pip install -r requirements.txt 109 | 110 | ./build_pcl_util.sh 111 | 112 | # Note that other tree sizes are available, see https://demuc.de/colmap/#download 113 | wget https://demuc.de/colmap/vocab_tree_flickr100K_words256K.bin -------------------------------------------------------------------------------- /las2ply.py: -------------------------------------------------------------------------------- 1 | import laspy 2 | from pyntcloud import PyntCloud 3 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 4 | from path import Path 5 | import numpy as np 6 | 7 | 8 | parser = ArgumentParser(description='Convert las cloud to ply along with centroid', 9 | formatter_class=ArgumentDefaultsHelpFormatter) 10 | 11 | parser.add_argument('las', type=Path, 12 | help='Path to las file. Note that this script is compatible with PLY') 13 | parser.add_argument('--output_folder', metavar='PATH', 14 | default=None, type=Path, 15 | help="where to save ply file and txt centroid") 16 | parser.add_argument('--verbose', '-v', action='store_true') 17 | 18 | 19 | def load_and_convert(input_file, output_folder, verbose=False): 20 | output_folder.makedirs_p() 21 | ply_path = output_folder / input_file.stem + '.ply' 22 | txt_path = output_folder / input_file.stem + '_centroid.txt' 23 | file_type = input_file.ext[1:].upper() 24 | if file_type == "LAS": 25 | offset = np.array(laspy.file.File(input_file, mode="r").header.offset) 26 | else: 27 | offset = np.zeros(3) 28 | cloud = PyntCloud.from_file(input_file, xyz_dtype="float64") 29 | if verbose: 30 | print(cloud.points) 31 | 32 | points = cloud.points 33 | xyz = points[['x', 'y', 'z']] 34 | xyz += offset 35 | points[['x', 'y', 'z']] = xyz 36 | cloud.points = points 37 | 38 | if verbose: 39 | print("{} file with {:,} points " 40 | "(centroid : [{:.2f}, {:.2f}, {:.2f}] in km) " 41 | "successfully loaded".format(file_type, 42 | len((cloud.points)), 43 | *(cloud.centroid/1000))) 44 | 45 | output_centroid = cloud.centroid 46 | np.savetxt(txt_path, output_centroid) 47 | 48 | points = cloud.points 49 | xyz = points[['x', 'y', 'z']] 50 | xyz -= cloud.centroid 51 | points[['x', 'y', 'z']] = xyz 52 | if (all([c in points.keys() for c in ["red", "green", "blue"]])): 53 | points[['red', 'green', 'blue']] = (points[['red', 'green', 'blue']] / 255).astype(np.uint8) 54 | invalid_color = (points["red"] > 250) & (points["green"] > 250) & (points["blue"] > 250) 55 | cloud.points = points[["x", "y", "z", "red", "green", "blue"]][~invalid_color] 56 | 57 | cloud.to_file(ply_path) 58 | if verbose: 59 | print("saved shifted cloud to {}, centroid to {}".format(ply_path, txt_path)) 60 | return ply_path, output_centroid 61 | 62 | 63 | if __name__ == '__main__': 64 | args = parser.parse_args() 65 | if args.output_folder is None: 66 | args.output_folder = args.las_path.parent 67 | load_and_convert(args.las, args.output_folder, args.verbose) 68 | -------------------------------------------------------------------------------- /main_pipeline_no_lidar.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from wrappers import Colmap, FFMpeg, PDraw, ETH3D, PCLUtil 3 | from cli_utils import set_full_argparser, print_step, print_workflow 4 | from video_localization import localize_video, generate_GT, generate_GT_individual_pictures 5 | import meshlab_xml_writer as mxw 6 | import prepare_images as pi 7 | import prepare_workspace as pw 8 | 9 | 10 | def main(): 11 | args = set_full_argparser().parse_args() 12 | env = vars(args) 13 | if args.show_steps: 14 | print_workflow() 15 | return 16 | if args.add_new_videos: 17 | env["resume_work"] = True 18 | args.skip_step += [1, 3, 4] 19 | if args.begin_step is not None: 20 | args.skip_step += list(range(args.begin_step)) 21 | pw.check_input_folder(args.input_folder, with_lidar=False) 22 | args.workspace = args.workspace.abspath() 23 | pw.prepare_workspace(args.workspace, env, with_lidar=False) 24 | colmap = Colmap(db=env["thorough_db"], 25 | image_path=env["colmap_img_root"], 26 | mask_path=env["mask_path"], 27 | dense_workspace=env["dense_workspace"], 28 | binary=args.colmap, 29 | verbose=args.verbose, 30 | logfile=args.log) 31 | env["colmap"] = colmap 32 | ffmpeg = FFMpeg(args.ffmpeg, verbose=args.verbose, logfile=args.log) 33 | env["ffmpeg"] = ffmpeg 34 | pdraw = PDraw(args.nw, verbose=args.verbose, logfile=args.log) 35 | env["pdraw"] = pdraw 36 | eth3d = ETH3D(args.eth3d, args.raw_output_folder / "Images", args.max_occlusion_depth, 37 | verbose=args.verbose, logfile=args.log, splat_radius=args.eth3d_splat_radius) 38 | env["eth3d"] = eth3d 39 | pcl_util = PCLUtil(args.pcl_util, verbose=args.verbose, logfile=args.log) 40 | env["pcl_util"] = pcl_util 41 | env["videos_list"] = sum((list((args.input_folder/"Videos").walkfiles('*{}'.format(ext))) for ext in args.vid_ext), []) 42 | no_gt_folder = args.input_folder/"Videos"/"no_groundtruth" 43 | if no_gt_folder.isdir(): 44 | env["videos_to_localize"] = [v for v in env["videos_list"] if not str(v).startswith(no_gt_folder)] 45 | else: 46 | env["videos_to_localize"] = env["videos_list"] 47 | 48 | i = 1 49 | if i not in args.skip_step: 50 | print_step(i, "Pictures preparation") 51 | env["individual_pictures"] = pi.extract_pictures_to_workspace(**env) 52 | else: 53 | full_paths = sum((list(env["individual_pictures_path"].walkfiles('*{}'.format(ext))) for ext in env["pic_ext"]), []) 54 | env["individual_pictures"] = [path.relpath(env["colmap_img_root"]) for path in full_paths] 55 | 56 | i += 1 57 | # Get already existing_videos 58 | env["videos_frames_folders"] = {} 59 | by_name = {v.stem: v for v in env["videos_list"]} 60 | for folder in env["video_path"].walkdirs(): 61 | video_name = folder.basename() 62 | if video_name in by_name.keys(): 63 | env["videos_frames_folders"][by_name[video_name]] = folder 64 | if i not in args.skip_step: 65 | print_step(i, "Extracting Videos and selecting optimal frames for a thorough scan") 66 | new_video_frame_folders = pi.extract_videos_to_workspace(fps=args.lowfps, **env) 67 | # Concatenate both already treated videos and newly detected videos 68 | env["videos_frames_folders"] = {**env["videos_frames_folders"], **new_video_frame_folders} 69 | env["videos_workspaces"] = {} 70 | for v, frames_folder in env["videos_frames_folders"].items(): 71 | env["videos_workspaces"][v] = pw.prepare_video_workspace(v, frames_folder, **env) 72 | 73 | i += 1 74 | if i not in args.skip_step: 75 | print_step(i, "First thorough photogrammetry") 76 | env["thorough_recon"].makedirs_p() 77 | colmap.extract_features(image_list=env["video_frame_list_thorough"], more=args.more_sift_features) 78 | colmap.index_images(vocab_tree_output=env["indexed_vocab_tree"], vocab_tree_input=args.vocab_tree) 79 | if env["match_method"] == "vocab_tree": 80 | colmap.match(method="vocab_tree", vocab_tree=env["indexed_vocab_tree"], max_num_matches=env["max_num_matches"]) 81 | else: 82 | colmap.match(method="exhaustive", max_num_matches=env["max_num_matches"]) 83 | colmap.map(output=env["thorough_recon"], multiple_models=env["multiple_models"]) 84 | thorough_model = pi.choose_biggest_model(env["thorough_recon"]) 85 | colmap.adjust_bundle(thorough_model, thorough_model, 86 | num_iter=100, refine_extra_params=True) 87 | else: 88 | thorough_model = pi.choose_biggest_model(env["thorough_recon"]) 89 | 90 | i += 1 91 | if i not in args.skip_step: 92 | print_step(i, "Alignment of photogrammetric reconstruction with GPS") 93 | env["georef_recon"].makedirs_p() 94 | env["georef_full_recon"].makedirs_p() 95 | colmap.align_model(output=env["georef_recon"], 96 | input=thorough_model, 97 | ref_images=env["georef_frames_list"]) 98 | if not (env["georef_recon"]/"images.bin").isfile(): 99 | # GPS alignment failed, possibly because not enough GPS referenced images 100 | # Copy the original model without alignment 101 | print("Warning, model alignment failed, the model will be normalized, and thus the depth maps too") 102 | thorough_model.merge_tree(env["georef_recon"]) 103 | env["georef_recon"].merge_tree(env["georef_full_recon"]) 104 | if args.inspect_dataset: 105 | print("FIRST DATASET INSPECTION") 106 | print("Inspection of localisalization of frames used in thorough mapping " 107 | "w.r.t Sparse reconstruction") 108 | colmap.export_model(output=env["georef_recon"] / "georef_sparse.ply", 109 | input=env["georef_recon"]) 110 | georef_mlp = env["georef_recon"]/"georef_recon.mlp" 111 | mxw.create_project(georef_mlp, [env["georef_recon"] / "georef_sparse.ply"]) 112 | colmap.export_model(output=env["georef_recon"], 113 | input=env["georef_recon"], 114 | output_type="TXT") 115 | eth3d.inspect_dataset(scan_meshlab=georef_mlp, 116 | colmap_model=env["georef_recon"], 117 | image_path=env["colmap_img_root"]) 118 | 119 | i += 1 120 | if i not in args.skip_step: 121 | print_step(i, "Video localization with respect to reconstruction") 122 | for j, v in enumerate(env["videos_to_localize"]): 123 | print("\n\nNow working on video {} [{}/{}]".format(v, j + 1, len(env["videos_to_localize"]))) 124 | video_env = env["videos_workspaces"][v] 125 | localize_video(video_name=v, 126 | video_frames_folder=env["videos_frames_folders"][v], 127 | step_index=i, video_index=j+1, 128 | num_videos=len(env["videos_to_localize"]), 129 | **video_env, **env) 130 | 131 | i += 1 132 | if i not in args.skip_step: 133 | print_step(i, "Full reconstruction point cloud densificitation") 134 | env["georef_full_recon"].makedirs_p() 135 | colmap.undistort(input=env["georef_full_recon"]) 136 | colmap.dense_stereo(min_depth=env["stereo_min_depth"], max_depth=env["stereo_max_depth"]) 137 | colmap.stereo_fusion(output=env["georefrecon_ply"]) 138 | 139 | i += 1 140 | if i not in args.skip_step: 141 | print_step(i, "Reconstruction cleaning") 142 | filtered = env["georefrecon_ply"].stripext() + "_filtered.ply" 143 | pcl_util.filter_cloud(input_file=env["georefrecon_ply"], 144 | output_file=filtered, 145 | knn=args.SOR[0], std=args.SOR[1]) 146 | mxw.create_project(env["aligned_mlp"], [filtered]) 147 | 148 | i += 1 149 | if i not in args.skip_step: 150 | print_step(i, "Occlusion Mesh computing") 151 | colmap.delaunay_mesh(env["occlusion_ply"], input_ply=env["georefrecon_ply"]) 152 | if args.splats: 153 | eth3d.create_splats(env["splats_ply"], env["georefrecon_ply"].stripext() + "_filtered.ply", 154 | env["occlusion_ply"], env["splat_threshold"], 155 | env["max_splat_size"]) 156 | 157 | if args.inspect_dataset: 158 | print("SECOND DATASET INSPECTION") 159 | print("Inspection of localisalization of frames used in thorough mapping " 160 | "w.r.t Dense reconstruction") 161 | eth3d.inspect_dataset(scan_meshlab=env["aligned_mlp"], 162 | colmap_model=env["georef_recon"], 163 | image_path=env["colmap_img_root"]) 164 | print("SECOND DATASET INSPECTION") 165 | print("Inspection of localisalization of frames used in thorough mapping " 166 | "w.r.t Dense reconstruction and Occlusion Meshes") 167 | eth3d.inspect_dataset(scan_meshlab=env["aligned_mlp"], 168 | colmap_model=env["georef_recon"], 169 | image_path=env["colmap_img_root"], 170 | occlusions=env["occlusion_ply"], 171 | splats=env["splats_ply"]) 172 | 173 | i += 1 174 | if i not in args.skip_step: 175 | print_step(i, "Groud Truth generation") 176 | for j, v in enumerate(env["videos_to_localize"]): 177 | video_env = env["videos_workspaces"][v] 178 | 179 | generate_GT(video_name=v, GT_already_done=video_env["GT_already_done"], 180 | video_index=j+1, 181 | step_index=i, 182 | num_videos=len(env["videos_to_localize"]), 183 | metadata_path=video_env["metadata_path"], 184 | global_registration_matrix=np.eye(4), 185 | **video_env["output_env"], **env) 186 | if env["generate_groundtruth_for_individual_images"]: 187 | by_folder = pi.group_pics_by_folder(env["individual_pictures"]) 188 | for folder, pic_list in by_folder.items(): 189 | generate_GT_individual_pictures(input_colmap_model=env["georef_full_recon"], 190 | individual_pictures_list=pic_list, 191 | relpath=folder, 192 | step_index=i, **env) 193 | 194 | 195 | if __name__ == '__main__': 196 | main() 197 | -------------------------------------------------------------------------------- /meshlab_xml_writer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from lxml import etree 3 | from path import Path 4 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 5 | 6 | 7 | def create_project(mlp_path, model_paths, labels=None, transforms=None): 8 | if labels is not None: 9 | assert(len(model_paths) == len(labels)) 10 | else: 11 | labels = [m.basename() for m in model_paths] 12 | 13 | if transforms is not None: 14 | assert(len(model_paths) == len(transforms)) 15 | else: 16 | transforms = [np.eye(4) for _ in model_paths] 17 | base = etree.Element("MeshLabProject") 18 | group = etree.SubElement(base, "MeshGroup") 19 | for m, l, t in zip(model_paths, labels, transforms): 20 | mesh = etree.SubElement(group, "MLMesh") 21 | mesh.set("label", l) 22 | relative_path = m.relpath(mlp_path.parent) 23 | mesh.set("filename", relative_path) 24 | matrix = etree.SubElement(mesh, "MLMatrix44") 25 | matrix.text = "\n" + "\n".join(" ".join(str(element) for element in row) + " " for row in t) + "\n" 26 | tree = etree.ElementTree(base) 27 | tree.write(mlp_path, pretty_print=True) 28 | 29 | 30 | def remove_mesh_from_project(input_mlp, output_mlp, index): 31 | with open(input_mlp, "r") as f: 32 | to_modify = etree.parse(f) 33 | meshgroup = to_modify.getroot()[0] 34 | if index < len(meshgroup): 35 | removed = meshgroup[index] 36 | meshgroup.remove(removed) 37 | to_modify.write(output_mlp, pretty_print=True) 38 | transform = np.fromstring(removed[0].text, sep=" ").reshape(4, 4) 39 | filepath = removed.get("label") 40 | return transform, filepath 41 | 42 | 43 | def get_mesh(input_mlp, index): 44 | with open(input_mlp, "r") as f: 45 | to_modify = etree.parse(f) 46 | meshgroup = to_modify.getroot()[0] 47 | if index < len(meshgroup): 48 | removed = meshgroup[index] 49 | transform = np.fromstring(removed[0].text, sep=" ").reshape(4, 4) 50 | filepath = removed.get("label") 51 | return transform, filepath 52 | 53 | 54 | def get_meshes(input_mlp): 55 | with open(input_mlp, "r") as f: 56 | to_read = etree.parse(f) 57 | meshgroup = to_read.getroot()[0] 58 | meshes = [] 59 | for mesh in meshgroup: 60 | transform = np.fromstring(mesh[0].text, sep=" ").reshape(4, 4) 61 | filepath = mesh.get("label") 62 | meshes.append(transform, filepath) 63 | return meshes 64 | 65 | 66 | def add_meshes_to_project(input_mlp, output_mlp, model_paths, labels=None, transforms=None, start_index=-1): 67 | if labels is not None: 68 | assert(len(model_paths) == len(labels)) 69 | else: 70 | labels = [m.basename() for m in model_paths] 71 | 72 | if transforms is not None: 73 | assert(len(model_paths) == len(transforms)) 74 | else: 75 | transforms = [np.eye(4) for _ in model_paths] 76 | with open(input_mlp, "r") as f: 77 | to_modify = etree.parse(f) 78 | root = to_modify.getroot() 79 | group = root[0] 80 | if start_index < 0: 81 | start_index = len(group) 82 | for i, (m, l, t) in enumerate(zip(model_paths, labels, transforms)): 83 | mesh = etree.Element("MLMesh") 84 | mesh.set("label", l) 85 | relative_path = m.relpath(output_mlp.parent) 86 | mesh.set("filename", relative_path) 87 | matrix = etree.SubElement(mesh, "MLMatrix44") 88 | matrix.text = "\n" + "\n".join(" ".join(str(element) for element in row) + " " for row in t) + "\n" 89 | group.insert(start_index, mesh) 90 | to_modify.write(output_mlp, pretty_print=True) 91 | 92 | 93 | def apply_transform_to_project(input_mlp, output_mlp, transform): 94 | with open(input_mlp, "r") as f: 95 | to_modify = etree.parse(f) 96 | meshgroup = to_modify.getroot()[0] 97 | for mesh in meshgroup: 98 | former_transform = np.fromstring(mesh[0].text, sep=" ").reshape(4, 4) 99 | new_transform = transform @ former_transform 100 | mesh[0].text = "\n" + "\n".join(" ".join(str(element) for element in row) + " " for row in new_transform) + "\n" 101 | to_modify.write(output_mlp, pretty_print=True) 102 | 103 | 104 | parser = ArgumentParser(description='Create a meshlab project with ply files and transformations', 105 | formatter_class=ArgumentDefaultsHelpFormatter) 106 | 107 | subparsers = parser.add_subparsers(dest="operation") 108 | create_parser = subparsers.add_parser('create') 109 | create_parser.add_argument('--input_models', metavar='PLY', type=Path, nargs="+") 110 | create_parser.add_argument('--output_meshlab', metavar='MLP', type=Path, required=True) 111 | create_parser.add_argument('--transforms', metavar='TXT', type=Path, nargs="+") 112 | create_parser.add_argument('--labels', metavar='LABEL', nargs="*") 113 | 114 | remove_parser = subparsers.add_parser('remove') 115 | remove_parser.add_argument('--input_meshlab', metavar='MLP', type=Path, required=True) 116 | remove_parser.add_argument('--output_meshlab', metavar='MLP', type=Path, required=True) 117 | remove_parser.add_argument('--index', metavar="N", type=int, default=-1) 118 | 119 | add_parser = subparsers.add_parser('add') 120 | add_parser.add_argument('--input_models', metavar='PLY', type=Path, nargs="+") 121 | add_parser.add_argument('--input_meshlab', metavar='MLP', type=Path, required=True) 122 | add_parser.add_argument('--output_meshlab', metavar='MLP', type=Path, required=True) 123 | add_parser.add_argument('--transforms', metavar='TXT', type=Path, nargs="+") 124 | add_parser.add_argument('--labels', metavar='LABEL', nargs="*") 125 | add_parser.add_argument('--start_index', metavar='N', default=-1, type=int) 126 | 127 | transform_parser = subparsers.add_parser('transform') 128 | transform_parser.add_argument('--input_meshlab', metavar='MLP', type=Path, required=True) 129 | transform_parser.add_argument('--output_meshlab', metavar='MLP', type=Path, required=True) 130 | transform_parser.add_argument('--transform', metavar='TXT', type=Path, required=True) 131 | transform_parser.add_argument('--inverse', action='store_true') 132 | 133 | 134 | if __name__ == '__main__': 135 | args = parser.parse_args() 136 | if args.operation in ["add", "create"]: 137 | n_models = len(args.input_models) 138 | if args.labels is not None: 139 | assert n_models == len(args.labels) 140 | 141 | if args.transforms is None: 142 | transforms = [np.eye(4, 4)] * n_models 143 | elif len(args.transforms) == 1: 144 | transform = np.fromfile(args.transforms[0], sep=" ").reshape(4, 4) 145 | transforms = [transform] * n_models 146 | else: 147 | assert n_models == len(transforms) 148 | transforms = [np.fromfile(t, sep=" ").reshape(4, 4) for t in args.transforms] 149 | if args.operation == "create": 150 | create_project(args.output_meshlab, args.input_models, args.labels, transforms) 151 | if args.operation == "add": 152 | add_meshes_to_project(args.input_meshlab, 153 | args.output_meshlab, 154 | args.input_models, 155 | args.labels, 156 | transforms, 157 | args.start_index) 158 | if args.operation == "remove": 159 | matrix, filename = remove_mesh_from_project(args.input_meshlab, args.output_meshlab, args.index) 160 | print("Removed model {} with transform\n {} \nfrom meshlab".format(filename, matrix)) 161 | if args.operation == "transform": 162 | transform = np.fromfile(args.transform, sep=" ").reshape(4, 4) 163 | if args.inverse: 164 | transform = np.linalg.inverse(transform) 165 | apply_transform_to_project(args.input_meshlab, args.output_meshlab, transform) 166 | -------------------------------------------------------------------------------- /model/ENet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ClementPinard/depth-dataset-builder/faf9dc71f733dc561c0116960d4153677bfa7d4c/model/ENet -------------------------------------------------------------------------------- /model/ENet_summary.txt: -------------------------------------------------------------------------------- 1 | ARGUMENTS 2 | batch_size: 4 3 | dataset: cityscapes 4 | dataset_dir: ../../Datasets/Cityscapes/1024x512/ 5 | device: cuda 6 | epochs: 300 7 | height: 512 8 | ignore_unlabeled: True 9 | imshow_batch: False 10 | learning_rate: 0.0005 11 | lr_decay: 0.1 12 | lr_decay_epochs: 100 13 | mode: train 14 | name: ENet 15 | print_step: False 16 | resume: True 17 | save_dir: save 18 | weighing: ENet 19 | weight_decay: 0.0002 20 | width: 1024 21 | workers: 4 22 | 23 | BEST VALIDATION 24 | Epoch: 270 25 | Mean IoU: 0.6007430952025391 26 | -------------------------------------------------------------------------------- /pcl_util/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR) 2 | set(CMAKE_CXX_STANDARD 14) 3 | 4 | project(Parrot_photogrammetry_PCLutils) 5 | 6 | find_package(PCL REQUIRED) 7 | find_package(COLMAP REQUIRED) 8 | 9 | include_directories(${PCL_INCLUDE_DIRS}) 10 | link_directories(${PCL_LIBRARY_DIRS}) 11 | add_definitions(${PCL_DEFINITIONS}) 12 | 13 | include_directories(${COLMAP_INCLUDE_DIRS}) 14 | link_directories(${COLMAP_LINK_DIRS}) 15 | 16 | add_executable (PointCloudSubsampler pointcloud_subsampler.cpp) 17 | target_link_libraries (PointCloudSubsampler ${PCL_LIBRARIES} glog) 18 | 19 | add_executable (MeshTriangulator mesh_triangulator.cpp) 20 | target_link_libraries (MeshTriangulator ${PCL_LIBRARIES} glog) 21 | 22 | add_executable (CloudRegistrator cloud_registrator.cpp) 23 | target_link_libraries(CloudRegistrator ${PCL_LIBRARIES} glog) 24 | 25 | add_executable (CloudSOR pointcloud_sor.cpp) 26 | target_link_libraries(CloudSOR ${PCL_LIBRARIES} glog) 27 | 28 | add_executable (NormalsTransfer normals_transfer.cpp) 29 | target_link_libraries(NormalsTransfer ${PCL_LIBRARIES} glog) 30 | 31 | add_executable (CreateVisFile create_vis_file.cpp) 32 | target_link_libraries(CreateVisFile ${PCL_LIBRARIES} glog ${COLMAP_LIBRARIES}) 33 | -------------------------------------------------------------------------------- /pcl_util/cloud_registrator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | 13 | int main (int argc, char** argv) 14 | { 15 | FLAGS_logtostderr = 1; 16 | google::InitGoogleLogging(argv[0]); 17 | pcl::console::setVerbosityLevel(pcl::console::L_DEBUG); 18 | 19 | // Parse arguments. 20 | int dummy; 21 | if (argc <= 1 || 22 | pcl::console::parse_argument(argc, argv, "-h", dummy) >= 0 || 23 | pcl::console::parse_argument(argc, argv, "--help", dummy) >= 0) { 24 | LOG(INFO) << "Usage: " << argv[0] << " --georef --lidar " 25 | << "--max_distance --output_matrix (--output_cloud )"; 26 | return EXIT_FAILURE; 27 | } 28 | 29 | std::string georef_path; 30 | pcl::console::parse_argument(argc, argv, "--georef", georef_path); 31 | std::string lidar_path; 32 | pcl::console::parse_argument(argc, argv, "--lidar", lidar_path); 33 | float max_distance = 1; //1m max distance 34 | pcl::console::parse_argument(argc, argv, "--max_distance", max_distance); 35 | std::string output_matrix_path; 36 | pcl::console::parse_argument(argc, argv, "--output_matrix", output_matrix_path); 37 | std::string output_cloud_path; 38 | pcl::console::parse_argument(argc, argv, "--output_cloud", output_cloud_path); 39 | 40 | if (output_matrix_path.empty() && output_cloud_path.empty()){ 41 | LOG(ERROR) << "No output path was given"; 42 | LOG(INFO) << "Usage: " << argv[0] << " --georef --lidar " 43 | << "--max_distance --output_matrix (--output_cloud )"; 44 | return EXIT_FAILURE; 45 | } 46 | 47 | // Load point cloud with normals. 48 | LOG(INFO) << "Loading point clouds ..."; 49 | pcl::PointCloud::Ptr georef( 50 | new pcl::PointCloud()); 51 | if (pcl::io::loadPLYFile(georef_path, *georef) < 0) { 52 | return EXIT_FAILURE; 53 | } 54 | 55 | pcl::PointCloud::Ptr lidar( 56 | new pcl::PointCloud()); 57 | if (pcl::io::loadPLYFile(lidar_path, *lidar) < 0) { 58 | return EXIT_FAILURE; 59 | } 60 | 61 | LOG(INFO) << "point clouds loaded..."; 62 | 63 | // Filter to get inlier cloud, store in filtered_cloud. 64 | //pcl::PointCloud::Ptr geroef_filtered (new pcl::PointCloud); 65 | //pcl::StatisticalOutlierRemoval sor; 66 | //sor.setInputCloud(geroef); 67 | //sor.setMeanK(6); 68 | //sor.setStddevMulThresh(0.1); 69 | //sor.filter(*geroef_filtered); 70 | 71 | 72 | // Normal estimation* 73 | pcl::NormalEstimationOMP n; 74 | pcl::PointCloud::Ptr normals (new pcl::PointCloud); 75 | pcl::search::KdTree::Ptr tree (new pcl::search::KdTree); 76 | tree->setInputCloud (georef); 77 | n.setInputCloud (georef); 78 | n.setSearchMethod (tree); 79 | n.setKSearch (6); 80 | n.compute (*normals); 81 | pcl::PointCloud::Ptr geroef_normals (new pcl::PointCloud); 82 | pcl::concatenateFields (*georef, *normals, *geroef_normals); 83 | 84 | // pcl::io::savePLYFile("test_normals.ply", *geroef_normals); 85 | 86 | pcl::IterativeClosestPointWithNormals icp; 87 | pcl::registration::TransformationEstimationSVDScale::Ptr est; 88 | est.reset(new pcl::registration::TransformationEstimationSVDScale); 89 | icp.setTransformationEstimation(est); 90 | 91 | icp.setMaxCorrespondenceDistance (max_distance); 92 | icp.setTransformationEpsilon(0.0001); 93 | icp.setMaximumIterations(500); 94 | icp.setEuclideanFitnessEpsilon(0.0001); 95 | icp.setInputSource(geroef_normals); 96 | icp.setInputTarget(lidar); 97 | 98 | pcl::PointCloud Final; 99 | icp.align(Final); 100 | Eigen::Matrix4f transform = icp.getFinalTransformation(); 101 | pcl::PointCloud::Ptr lidar_aligned( 102 | new pcl::PointCloud()); 103 | pcl::transformPointCloud (*lidar, *lidar_aligned, transform); 104 | 105 | std::ofstream output_file; 106 | output_file.open(output_matrix_path); 107 | output_file << transform << std::endl; 108 | output_file.close(); 109 | 110 | if (!output_cloud_path.empty()) 111 | pcl::io::savePLYFileBinary(output_cloud_path, *lidar_aligned); 112 | 113 | return (0); 114 | } -------------------------------------------------------------------------------- /pcl_util/create_vis_file.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include "pointcloud_subsampler.h" 15 | 16 | 17 | int main (int argc, char** argv) 18 | { 19 | FLAGS_logtostderr = 1; 20 | google::InitGoogleLogging(argv[0]); 21 | pcl::console::setVerbosityLevel(pcl::console::L_DEBUG); 22 | 23 | // Parse arguments. 24 | int dummy; 25 | if (argc <= 1 || 26 | pcl::console::parse_argument(argc, argv, "-h", dummy) >= 0 || 27 | pcl::console::parse_argument(argc, argv, "--help", dummy) >= 0) { 28 | LOG(INFO) << "Usage: " << argv[0] << " --georef_dense --lidar " 29 | << "--georef_matrix --resolution (--output_cloud )"; 30 | return EXIT_FAILURE; 31 | } 32 | 33 | std::string georef_dense_path; 34 | pcl::console::parse_argument(argc, argv, "--georef_dense", georef_dense_path); 35 | std::string lidar_path; 36 | pcl::console::parse_argument(argc, argv, "--lidar", lidar_path); 37 | std::string output_cloud_path; 38 | pcl::console::parse_argument(argc, argv, "--output_cloud", output_cloud_path); 39 | float resolution = 0.2; //20cm resolution 40 | pcl::console::parse_argument(argc, argv, "--resolution", resolution); 41 | float max_distance = 10; 42 | pcl::console::parse_argument(argc, argv, "--max_distance", max_distance); 43 | 44 | if (output_cloud_path.empty()){ 45 | LOG(ERROR) << "No output path was given"; 46 | LOG(INFO) << "Usage: " << argv[0] << " --georef_dense --lidar " 47 | << "--output_cloud "; 48 | return EXIT_FAILURE; 49 | } 50 | 51 | // Load point cloud with normals. 52 | LOG(INFO) << "Loading point clouds ..."; 53 | pcl::PointCloud::Ptr georef_dense( 54 | new pcl::PointCloud()); 55 | if (pcl::io::loadPLYFile(georef_dense_path, *georef_dense) < 0) { 56 | return EXIT_FAILURE; 57 | } 58 | 59 | pcl::PointCloud::Ptr lidar( 60 | new pcl::PointCloud()); 61 | if (pcl::io::loadPLYFile(lidar_path, *lidar) < 0) { 62 | return EXIT_FAILURE; 63 | } 64 | LOG(INFO) << "point clouds loaded"; 65 | 66 | LOG(INFO) << "Subsampling Lidar point cloud to have a mean distance between points of " << resolution << " m"; 67 | lidar = filter(lidar, resolution); 68 | 69 | LOG(INFO) << "Loading georef_dense vis file..."; 70 | const std::string input_vis_path = georef_dense_path + ".vis"; 71 | std::fstream input_vis_file(input_vis_path, std::ios::in | std::ios::binary); 72 | CHECK(input_vis_file.is_open()) << input_vis_path; 73 | 74 | const size_t vis_num_points = colmap::ReadBinaryLittleEndian(&input_vis_file); 75 | CHECK_EQ(vis_num_points, georef_dense->size()); 76 | 77 | std::vector> input_vis_points; 78 | input_vis_points.reserve(georef_dense->size()); 79 | for (auto it=georef_dense->begin(); it!=georef_dense->end(); it++) { 80 | std::vector image_idx; 81 | int num_visible_images = 82 | colmap::ReadBinaryLittleEndian(&input_vis_file); 83 | image_idx.reserve(num_visible_images); 84 | for (uint32_t i = 0; i < num_visible_images; ++i) { 85 | image_idx.push_back(colmap::ReadBinaryLittleEndian(&input_vis_file)); 86 | } 87 | input_vis_points.push_back(image_idx); 88 | } 89 | 90 | LOG(INFO) << "visible images ids ready to be transferred"; 91 | 92 | pcl::KdTree::Ptr tree (new pcl::KdTreeFLANN); 93 | tree->setInputCloud(georef_dense); 94 | std::vector nn_indices (1); 95 | std::vector nn_dists (1); 96 | 97 | std::vector> output_vis_points; 98 | output_vis_points.reserve(lidar->size()); 99 | 100 | for(auto it = lidar->begin(); it != lidar->end(); it++){ 101 | tree->nearestKSearch(*it, 1, nn_indices, nn_dists); 102 | if(nn_dists[0] <= max_distance){ 103 | std::vector image_idx = input_vis_points.at(nn_indices[0]); 104 | output_vis_points.push_back(image_idx); 105 | }else{ 106 | output_vis_points.push_back(std::vector()); 107 | } 108 | } 109 | 110 | 111 | if (!output_cloud_path.empty()) { 112 | // Note : Instead of using pcl::savePLYFileBinary function, 113 | // we use a PLYwriter so that can set the camera parameter to false, 114 | // to not add its element in the header, because COLMAP doesn't like 115 | // PLY files with unknown headers. 116 | pcl::PLYWriter writer; 117 | const bool binary=true, use_camera=false; 118 | writer.write(output_cloud_path, *lidar, binary, use_camera); 119 | const std::string output_vis_path = output_cloud_path + ".vis"; 120 | colmap::mvs::WritePointsVisibility(output_vis_path, output_vis_points); 121 | } 122 | 123 | return (0); 124 | } -------------------------------------------------------------------------------- /pcl_util/mesh_triangulator.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2020 ENSTA Paris, Clément Pinard 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are met: 5 | // 6 | // 1. Redistributions of source code must retain the above copyright notice, 7 | // this list of conditions and the following disclaimer. 8 | // 9 | // 2. Redistributions in binary form must reproduce the above copyright notice, 10 | // this list of conditions and the following disclaimer in the documentation 11 | // and/or other materials provided with the distribution. 12 | // 13 | // 3. Neither the name of the copyright holder nor the names of its contributors 14 | // may be used to endorse or promote products derived from this software 15 | // without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | // POSSIBILITY OF SUCH DAMAGE. 28 | 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include "pointcloud_subsampler.h" 41 | 42 | int main(int argc, char** argv) { 43 | FLAGS_logtostderr = 1; 44 | google::InitGoogleLogging(argv[0]); 45 | pcl::console::setVerbosityLevel(pcl::console::L_DEBUG); 46 | 47 | // Parse arguments. 48 | int dummy; 49 | if (argc <= 1 || 50 | pcl::console::parse_argument(argc, argv, "-h", dummy) >= 0 || 51 | pcl::console::parse_argument(argc, argv, "--help", dummy) >= 0) { 52 | LOG(INFO) << "Usage: " << argv[0] << " --point_normal_cloud_path --resolution --out_mesh "; 53 | return EXIT_FAILURE; 54 | } 55 | 56 | std::string point_normal_cloud_path; 57 | pcl::console::parse_argument(argc, argv, "--point_normal_cloud_path", point_normal_cloud_path); 58 | float resolution = 0.2; //20cm resolution 59 | pcl::console::parse_argument(argc, argv, "--resolution", resolution); 60 | std::string mesh_output; 61 | pcl::console::parse_argument(argc, argv, "--out_mesh", mesh_output); 62 | 63 | if (mesh_output.empty()){ 64 | LOG(ERROR) << "No output path was given"; 65 | LOG(INFO) << "Usage: " << argv[0] << " --point_normal_cloud_path --resolution --out_mesh "; 66 | return EXIT_FAILURE; 67 | } 68 | 69 | // Load point cloud with normals. 70 | LOG(INFO) << "Loading point cloud ..."; 71 | pcl::PointCloud::Ptr point_normal_cloud( 72 | new pcl::PointCloud()); 73 | if (pcl::io::loadPLYFile(point_normal_cloud_path, *point_normal_cloud) < 0) { 74 | return EXIT_FAILURE; 75 | } 76 | 77 | LOG(INFO) << "Subsampling to have a mean distance between points of " << resolution << " m"; 78 | point_normal_cloud = filter(point_normal_cloud, resolution); 79 | 80 | LOG(INFO) << "Beginning triangulation"; 81 | // Create search tree* 82 | pcl::search::KdTree::Ptr tree (new pcl::search::KdTree); 83 | tree->setInputCloud (point_normal_cloud); 84 | 85 | pcl::GreedyProjectionTriangulation gp3; 86 | pcl::PolygonMesh triangles; 87 | 88 | // Set the maximum distance between connected points (maximum edge length) 89 | gp3.setSearchRadius (resolution * 2); 90 | 91 | // Set typical values for the parameters 92 | gp3.setMu (2.5); 93 | gp3.setMaximumNearestNeighbors (100); 94 | gp3.setMaximumSurfaceAngle(M_PI/2); 95 | gp3.setMinimumAngle(0); 96 | gp3.setMaximumAngle(2 * M_PI); 97 | gp3.setNormalConsistency(true); 98 | 99 | // Get result 100 | gp3.setInputCloud (point_normal_cloud); 101 | gp3.setSearchMethod (tree); 102 | gp3.reconstruct (triangles); 103 | 104 | LOG(INFO) << "Done."; 105 | 106 | // Additional vertex information 107 | if (!mesh_output.empty()) 108 | pcl::io::savePLYFileBinary (mesh_output, triangles); 109 | 110 | return EXIT_SUCCESS; 111 | } 112 | -------------------------------------------------------------------------------- /pcl_util/normals_transfer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | Eigen::Matrix4f readMatrix(std::string filename) 13 | { 14 | int cols = 4, rows = 4; 15 | Eigen::Matrix4f M; 16 | 17 | std::ifstream infile; 18 | infile.open(filename); 19 | int row = 0; 20 | while (! infile.eof() && row < 4) 21 | { 22 | std::string line; 23 | std::getline(infile, line); 24 | 25 | std::stringstream stream(line); 26 | stream >> M(row, 0) >> M(row, 1) >> M(row, 2) >> M(row, 3); 27 | row ++; 28 | } 29 | infile.close(); 30 | return M; 31 | }; 32 | 33 | 34 | int main (int argc, char** argv) 35 | { 36 | FLAGS_logtostderr = 1; 37 | google::InitGoogleLogging(argv[0]); 38 | pcl::console::setVerbosityLevel(pcl::console::L_DEBUG); 39 | 40 | // Parse arguments. 41 | int dummy; 42 | if (argc <= 1 || 43 | pcl::console::parse_argument(argc, argv, "-h", dummy) >= 0 || 44 | pcl::console::parse_argument(argc, argv, "--help", dummy) >= 0) { 45 | LOG(INFO) << "Usage: " << argv[0] << " --georef --lidar " 46 | << "--max_distance --output_matrix (--output_cloud )"; 47 | return EXIT_FAILURE; 48 | } 49 | 50 | std::string georef_dense_path; 51 | pcl::console::parse_argument(argc, argv, "--georef_dense", georef_dense_path); 52 | std::string lidar_path; 53 | pcl::console::parse_argument(argc, argv, "--lidar", lidar_path); 54 | std::string georef_matrix_path; 55 | pcl::console::parse_argument(argc, argv, "--georef_matrix", georef_matrix_path); 56 | std::string output_cloud_path; 57 | pcl::console::parse_argument(argc, argv, "--output_cloud", output_cloud_path); 58 | 59 | if (georef_matrix_path.empty() && output_cloud_path.empty()){ 60 | LOG(ERROR) << "No output path was given"; 61 | LOG(INFO) << "Usage: " << argv[0] << " --georef_dense --lidar " 62 | << "--georef_matrix --output_cloud "; 63 | return EXIT_FAILURE; 64 | } 65 | 66 | // Load point cloud with normals. 67 | LOG(INFO) << "Loading point clouds ..."; 68 | pcl::PointCloud::Ptr georef_dense( 69 | new pcl::PointCloud()); 70 | if (pcl::io::loadPLYFile(georef_dense_path, *georef_dense) < 0) { 71 | return EXIT_FAILURE; 72 | } 73 | 74 | pcl::PointCloud::Ptr lidar( 75 | new pcl::PointCloud()); 76 | if (pcl::io::loadPLYFile(lidar_path, *lidar) < 0) { 77 | return EXIT_FAILURE; 78 | } 79 | LOG(INFO) << "point clouds loaded"; 80 | 81 | LOG(INFO) << "Loading transformation matrix ..."; 82 | Eigen::Matrix4f M = readMatrix(georef_matrix_path); 83 | LOG(INFO) << "Matrix loaded"; 84 | 85 | // Filter to get inlier cloud, store in filtered_cloud. 86 | LOG(INFO) << "filtering georef cloud"; 87 | pcl::PointCloud::Ptr geroef_filtered (new pcl::PointCloud); 88 | pcl::StatisticalOutlierRemoval sor; 89 | sor.setInputCloud(georef_dense); 90 | sor.setMeanK(6); 91 | sor.setStddevMulThresh(0.1); 92 | sor.filter(*geroef_filtered); 93 | 94 | pcl::PointCloud::Ptr georef_aligned( 95 | new pcl::PointCloud()); 96 | pcl::transformPointCloudWithNormals (*geroef_filtered, *georef_aligned, M); 97 | 98 | LOG(INFO) << "Normals ready to be transferred"; 99 | 100 | pcl::KdTree::Ptr tree (new pcl::KdTreeFLANN); 101 | tree->setInputCloud(georef_aligned); 102 | std::vector nn_indices (1); 103 | std::vector nn_dists (1); 104 | 105 | for(auto it = lidar->begin(); it != lidar->end(); it++){ 106 | tree->nearestKSearch(*it, 1, nn_indices, nn_dists); 107 | auto n1 = georef_aligned->points[nn_indices[0]].getNormalVector3fMap(); 108 | auto n2 = it->getNormalVector3fMap(); 109 | if (n1.dot(n2) < 0){ 110 | n2 *= -1; 111 | } 112 | } 113 | 114 | 115 | if (!output_cloud_path.empty()) 116 | pcl::io::savePLYFileBinary(output_cloud_path, *lidar); 117 | 118 | return (0); 119 | } -------------------------------------------------------------------------------- /pcl_util/pointcloud_sor.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | template 9 | int sor(std::string input_path, std::string output_path, int knn, int std) 10 | { 11 | // Load point cloud with normals. 12 | LOG(INFO) << "Loading point cloud ..."; 13 | typename pcl::PointCloud::Ptr input( 14 | new pcl::PointCloud()); 15 | if (pcl::io::loadPLYFile(input_path, *input) < 0) { 16 | return EXIT_FAILURE; 17 | } 18 | LOG(INFO) << "point cloud loaded..."; 19 | 20 | // Filter to get inlier cloud, store in filtered_cloud. 21 | typename pcl::PointCloud::Ptr ouptut (new pcl::PointCloud); 22 | typename pcl::StatisticalOutlierRemoval sor; 23 | sor.setInputCloud(input); 24 | sor.setMeanK(knn); 25 | sor.setStddevMulThresh(std); 26 | sor.filter(*ouptut); 27 | 28 | if (!output_path.empty()) 29 | pcl::io::savePLYFileBinary(output_path, *ouptut); 30 | 31 | return (0); 32 | } 33 | 34 | int main (int argc, char** argv) 35 | { 36 | FLAGS_logtostderr = 1; 37 | google::InitGoogleLogging(argv[0]); 38 | pcl::console::setVerbosityLevel(pcl::console::L_DEBUG); 39 | 40 | // Parse arguments. 41 | int dummy; 42 | if (argc <= 1 || 43 | pcl::console::parse_argument(argc, argv, "-h", dummy) >= 0 || 44 | pcl::console::parse_argument(argc, argv, "--help", dummy) >= 0) { 45 | LOG(INFO) << "Usage: " << argv[0] << " --input " 46 | << "--knn --std --output )"; 47 | return EXIT_FAILURE; 48 | } 49 | 50 | std::string input_path; 51 | pcl::console::parse_argument(argc, argv, "--input", input_path); 52 | std::string output_path; 53 | pcl::console::parse_argument(argc, argv, "--output", output_path); 54 | float knn = 1; 55 | pcl::console::parse_argument(argc, argv, "--knn", knn); 56 | float std = 1; 57 | pcl::console::parse_argument(argc, argv, "--std", std); 58 | bool with_normals = pcl::console::find_switch(argc, argv, "-n"); 59 | bool with_colors = pcl::console::find_switch(argc, argv, "-c"); 60 | 61 | if (output_path.empty()){ 62 | LOG(ERROR) << "No output path was given"; 63 | LOG(INFO) << "Usage: " << argv[0] << " --georef --lidar " 64 | << "--max_distance --output_matrix (--output_cloud )"; 65 | return EXIT_FAILURE; 66 | } 67 | 68 | if(with_normals){ 69 | if(with_colors){ 70 | return sor(input_path, output_path, knn, std); 71 | }else{ 72 | return sor(input_path, output_path, knn, std); 73 | } 74 | }else{ 75 | if (with_colors){ 76 | return sor(input_path, output_path, knn, std); 77 | }else{ 78 | return sor(input_path, output_path, knn, std); 79 | } 80 | } 81 | 82 | } -------------------------------------------------------------------------------- /pcl_util/pointcloud_subsampler.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "pointcloud_subsampler.h" 7 | 8 | int 9 | main (int argc, char** argv) 10 | { 11 | FLAGS_logtostderr = 1; 12 | google::InitGoogleLogging(argv[0]); 13 | pcl::console::setVerbosityLevel(pcl::console::L_DEBUG); 14 | 15 | // Parse arguments. 16 | int dummy; 17 | if (argc <= 1 || 18 | pcl::console::parse_argument(argc, argv, "-h", dummy) >= 0 || 19 | pcl::console::parse_argument(argc, argv, "--help", dummy) >= 0) { 20 | LOG(INFO) << "Usage: " << argv[0] << " --point_cloud_path --resolution --output "; 21 | return EXIT_FAILURE; 22 | } 23 | 24 | std::string point_cloud_path; 25 | pcl::console::parse_argument(argc, argv, "--point_cloud_path", point_cloud_path); 26 | float resolution = 0.2; //20cm resolution 27 | pcl::console::parse_argument(argc, argv, "--resolution", resolution); 28 | std::string output_path; 29 | pcl::console::parse_argument(argc, argv, "--output", output_path); 30 | 31 | if (output_path.empty()){ 32 | LOG(ERROR) << "No output path was given"; 33 | LOG(INFO) << "Usage: " << argv[0] << " --point_cloud_path --resolution --output "; 34 | return EXIT_FAILURE; 35 | } 36 | 37 | // Load point cloud with normals. 38 | LOG(INFO) << "Loading point cloud ..."; 39 | pcl::PointCloud::Ptr point_cloud( 40 | new pcl::PointCloud()); 41 | if (pcl::io::loadPLYFile(point_cloud_path, *point_cloud) < 0) { 42 | return EXIT_FAILURE; 43 | } 44 | 45 | LOG(INFO) << "Subsampling to have a mean distance between points of " << resolution << " m"; 46 | point_cloud = filter(point_cloud, resolution); 47 | 48 | if (!output_path.empty()) 49 | pcl::io::savePLYFileBinary (output_path, *point_cloud); 50 | 51 | return (0); 52 | } -------------------------------------------------------------------------------- /pcl_util/pointcloud_subsampler.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | template 7 | typename pcl::PointCloud::Ptr filter(typename pcl::PointCloud::Ptr input, float resolution) { 8 | 9 | typename pcl::octree::OctreePointCloudPointVector octree (100*resolution); 10 | octree.setInputCloud(input); 11 | octree.addPointsFromInputCloud(); 12 | 13 | typename pcl::ExtractIndices extract; 14 | 15 | pcl::PointIndices::Ptr to_process (new pcl::PointIndices ()); 16 | typename pcl::PointCloud::Ptr input_subcloud (new pcl::PointCloud); 17 | typename pcl::PointCloud::Ptr final_cloud (new pcl::PointCloud); 18 | typename pcl::PointCloud::Ptr cloud_filtered (new pcl::PointCloud); 19 | pcl::VoxelGrid vox; 20 | vox.setLeafSize (resolution, resolution, resolution); 21 | 22 | for (auto it = octree.leaf_begin(); it != octree.leaf_end(); ++it) { 23 | pcl::octree::OctreeContainerPointIndices& container = it.getLeafContainer(); 24 | 25 | pcl::IndicesPtr indexVector(new std::vector); 26 | container.getPointIndices(*indexVector); 27 | extract.setInputCloud (input); 28 | extract.setIndices (indexVector); 29 | extract.filter (*input_subcloud); 30 | 31 | vox.setInputCloud (input_subcloud); 32 | vox.filter (*cloud_filtered); 33 | *final_cloud += *cloud_filtered; 34 | } 35 | return final_cloud; 36 | } -------------------------------------------------------------------------------- /picture_localization.py: -------------------------------------------------------------------------------- 1 | from wrappers import Colmap, ETH3D 2 | from cli_utils import set_new_images_arparser, print_step, get_matrix 3 | from video_localization import generate_GT_individual_pictures 4 | import meshlab_xml_writer as mxw 5 | import prepare_images as pi 6 | import prepare_workspace as pw 7 | import pcl_util 8 | import numpy as np 9 | 10 | 11 | def main(): 12 | args = set_new_images_arparser().parse_args() 13 | env = vars(args) 14 | args.workspace = args.workspace.abspath() 15 | pw.prepare_workspace(args.workspace, env) 16 | colmap = Colmap(db=env["thorough_db"], 17 | image_path=env["colmap_img_root"], 18 | mask_path=env["mask_path"], 19 | dense_workspace=env["dense_workspace"], 20 | binary=args.colmap, 21 | verbose=args.verbose, 22 | logfile=args.log) 23 | env["colmap"] = colmap 24 | eth3d = ETH3D(args.eth3d, args.raw_output_folder / "Images", args.max_occlusion_depth, 25 | verbose=args.verbose, logfile=args.log, splat_radius=args.eth3d_splat_radius) 26 | env["eth3d"] = eth3d 27 | env["videos_list"] = sum((list((args.input_folder/"Videos").walkfiles('*{}'.format(ext))) for ext in args.vid_ext), []) 28 | no_gt_folder = args.input_folder/"Videos"/"no_groundtruth" 29 | if no_gt_folder.isdir(): 30 | env["videos_to_localize"] = [v for v in env["videos_list"] if not str(v).startswith(no_gt_folder)] 31 | 32 | i = 1 33 | print_step(i, "Pictures preparation") 34 | env["individual_pictures"] = pi.extract_pictures_to_workspace(**env) 35 | 36 | i += 1 37 | print_step(i, "Add new pictures to COLMAP thorough model") 38 | colmap.db = env["thorough_db"] 39 | colmap.match(method="vocab_tree", vocab_tree=env["indexed_vocab_tree"], max_num_matches=env["max_num_matches"]) 40 | extended_georef = env["georef_recon"] + "_extended" 41 | extended_georef.makedirs_p() 42 | if args.map_new_images: 43 | colmap.map(input=env["georef_recon"], output=extended_georef) 44 | else: 45 | colmap.register_images(input=env["georef_recon"], output=extended_georef) 46 | colmap.adjust_bundle(extended_georef, extended_georef, 47 | num_iter=args.bundle_adjuster_steps, refine_extra_params=True) 48 | colmap.merge_models(output=env["georef_full_recon"], input1=env["georef_full_recon"], input2=extended_georef) 49 | 50 | if env["rebuild_occlusion_mesh"]: 51 | i += 1 52 | print_step(i, "Full reconstruction point cloud densificitation with new images") 53 | colmap.undistort(input=env["georef_full_recon"]) 54 | # This step should be fast since everything else than new images is already computed 55 | colmap.dense_stereo(min_depth=env["stereo_min_depth"], max_depth=env["stereo_max_depth"]) 56 | colmap.stereo_fusion(output=env["georefrecon_ply"]) 57 | if args.inspect_dataset: 58 | georef_mlp = env["georef_full_recon"]/"georef_recon.mlp" 59 | mxw.create_project(georef_mlp, [env["georefrecon_ply"]]) 60 | colmap.export_model(output=env["georef_full_recon"], 61 | input=env["georef_full_recon"], 62 | output_type="TXT") 63 | eth3d.inspect_dataset(scan_meshlab=georef_mlp, 64 | colmap_model=env["georef_full_recon"], 65 | image_path=env["colmap_img_root"]) 66 | eth3d.inspect_dataset(scan_meshlab=env["aligned_mlp"], 67 | colmap_model=env["georef_full_recon"], 68 | image_path=env["colmap_img_root"]) 69 | 70 | i += 1 71 | print_step(i, "Occlusion Mesh re-computing") 72 | '''combine the MLP file into a single ply file. We need the normals for the splats''' 73 | if args.normals_method == "radius": 74 | eth3d.compute_normals(env["with_normals_path"], env["aligned_mlp"], neighbor_radius=args.normals_radius) 75 | else: 76 | eth3d.compute_normals(env["with_normals_path"], env["aligned_mlp"], neighbor_count=args.normals_neighbours) 77 | '''Create vis file that will tell by what images each point can be seen. We transfer this knowledge from georefrecon 78 | to the Lidar model''' 79 | env["global_registration_matrix"] = get_matrix(env["matrix_path"]) 80 | scale = np.linalg.norm(env["global_registration_matrix"][:3, :3], ord=2) 81 | with_normals_subsampled = env["with_normals_path"].stripext() + "_subsampled.ply" 82 | pcl_util.create_vis_file(env["georefrecon_ply"], env["with_normals_path"], 83 | resolution=args.mesh_resolution / scale, 84 | output=with_normals_subsampled) 85 | '''Compute the occlusion mesh by fooling COLMAP into thinking the lidar point cloud was made with colmap''' 86 | colmap.delaunay_mesh(env["occlusion_ply"], input_ply=with_normals_subsampled) 87 | if args.splats: 88 | eth3d.create_splats(env["splats_ply"], with_normals_subsampled, 89 | env["occlusion_ply"], env["splat_threshold"] / scale, 90 | env["max_splat_size"]) 91 | 92 | i += 1 93 | if i not in args.skip_step: 94 | print_step(i, "Groud Truth generation") 95 | by_folder = pi.group_pics_by_folder(env["individual_pictures"]) 96 | for folder, pic_list in by_folder.items(): 97 | generate_GT_individual_pictures(input_colmap_model=env["georef_full_recon"], 98 | individual_pictures_list=pic_list, 99 | relpath=folder, 100 | step_index=i, **env) 101 | 102 | 103 | if __name__ == '__main__': 104 | main() 105 | -------------------------------------------------------------------------------- /prepare_images.py: -------------------------------------------------------------------------------- 1 | from pyproj import Proj 2 | from edit_exif import get_gps_location 3 | import numpy as np 4 | import pandas as pd 5 | import rawpy 6 | import imageio 7 | import generate_sky_masks as gsm 8 | import videos_to_colmap as v2c 9 | import colmap_util as ci 10 | 11 | 12 | def extract_gps_and_path(individual_pictures, colmap_img_root, system, centroid=None, **env): 13 | proj = Proj(system) 14 | georef_list = [] 15 | for img in individual_pictures: 16 | gps = get_gps_location(colmap_img_root / img) 17 | if gps is not None: 18 | lat, lng, alt = gps 19 | x, y = proj(lng, lat) 20 | if centroid is None: 21 | centroid = np.array([x, y, alt]) 22 | x -= centroid[0] 23 | y -= centroid[1] 24 | alt -= centroid[2] 25 | georef_list.append("{} {} {} {}\n".format(img, x, y, alt)) 26 | return georef_list, centroid 27 | 28 | 29 | def extract_pictures_to_workspace(input_folder, colmap_img_root, individual_pictures_path, workspace, colmap, 30 | raw_ext, pic_ext, more_sift_features, generic_model, **env): 31 | picture_folder = input_folder / "Pictures" 32 | pictures = [] 33 | raw_files = sum((list(picture_folder.walkfiles('*{}'.format(ext))) for ext in raw_ext), []) 34 | for raw in raw_files: 35 | if not any((raw.stripext() + ext).isfile() for ext in pic_ext): 36 | converted_pic_path = raw.relpath(picture_folder).stripext() + '.jpg' 37 | if not converted_pic_path.isfile(): 38 | raw_array = rawpy.imread(raw) 39 | rgb = raw_array.postprocess() 40 | dst = individual_pictures_path / converted_pic_path 41 | dst.parent.makedirs_p() 42 | imageio.imsave(dst, rgb) 43 | pictures.append(converted_pic_path) 44 | normal_files = sum((list(picture_folder.walkfiles('*{}'.format(ext))) for ext in pic_ext), []) 45 | for file in normal_files: 46 | pic_path = file.relpath(picture_folder) 47 | if not pic_path.isfile(): 48 | dst = individual_pictures_path / pic_path 49 | dst.parent.makedirs_p() 50 | file.copy(dst) 51 | pictures.append(colmap_img_root.relpathto(individual_pictures_path) / pic_path) 52 | gsm.process_folder(folder_to_process=individual_pictures_path, colmap_img_root=colmap_img_root, pic_ext=pic_ext, **env) 53 | with open(picture_folder / "individual_pictures.txt", 'w') as f: 54 | f.write("\n".join(pictures) + "\n") 55 | colmap.extract_features(per_sub_folder=True, model=generic_model, 56 | image_list=picture_folder/"individual_pictures.txt", more=more_sift_features) 57 | return pictures 58 | 59 | 60 | def extract_videos_to_workspace(video_path, video_frame_list_thorough, georef_frames_list, **env): 61 | existing_georef, env["centroid"] = extract_gps_and_path(**env) 62 | if env["full_metadata"] is None: 63 | env["full_metadata"] = env["workspace"] / "full_metadata.csv" 64 | if env["full_metadata"].isfile(): 65 | existing_metadata = pd.read_csv(env["full_metadata"]) 66 | else: 67 | existing_metadata = None 68 | path_lists, extracted_video_folders, full_metadata = v2c.process_video_folder(output_video_folder=video_path, 69 | existing_georef=existing_georef, 70 | existing_metadata=existing_metadata, 71 | **env) 72 | if path_lists is not None: 73 | full_metadata.to_csv(env["full_metadata"]) 74 | with open(video_frame_list_thorough, "w") as f: 75 | f.write("\n".join(path_lists["thorough"]["frames"])) 76 | with open(georef_frames_list, "w") as f: 77 | f.write("\n".join(existing_georef) + "\n") 78 | f.write("\n".join(path_lists["thorough"]["georef"]) + "\n") 79 | for v, video_folder in extracted_video_folders.items(): 80 | with open(video_folder / "lowfps.txt", "w") as f: 81 | f.write("\n".join(path_lists[v]["frames_lowfps"]) + "\n") 82 | with open(video_folder / "georef.txt", "w") as f: 83 | f.write("\n".join(existing_georef) + "\n") 84 | f.write("\n".join(path_lists["thorough"]["georef"]) + "\n") 85 | f.write("\n".join(path_lists[v]["georef_lowfps"]) + "\n") 86 | for j, l in enumerate(path_lists[v]["frames_full"]): 87 | with open(video_folder / "full_chunk_{}.txt".format(j), "w") as f: 88 | f.write("\n".join(l) + "\n") 89 | gsm.process_folder(folder_to_process=video_path, **env) 90 | return extracted_video_folders 91 | 92 | 93 | def choose_biggest_model(dir): 94 | colmap_model_dirs = dir.dirs("[0-9]*") 95 | model_sizes = [len(ci.read_model.read_images_binary(d/"images.bin")) for d in colmap_model_dirs] 96 | return colmap_model_dirs[model_sizes.index(max((model_sizes)))] 97 | 98 | 99 | def group_pics_by_folder(pictures): 100 | result = {} 101 | for p in pictures: 102 | key = p.parent 103 | if p.parent not in result.keys(): 104 | result[key] = [p] 105 | else: 106 | result[key].append(p) 107 | return result 108 | -------------------------------------------------------------------------------- /prepare_workspace.py: -------------------------------------------------------------------------------- 1 | def check_input_folder(path, with_lidar=True): 2 | def print_error_string(): 3 | print("Error, bad input folder structure") 4 | print("Expected :") 5 | if with_lidar: 6 | print(str(path/"Lidar")) 7 | print(str(path/"Pictures")) 8 | print(str(path/"Videos")) 9 | print() 10 | print("but got :") 11 | print("\n".join(str(d) for d in path.dirs())) 12 | 13 | expected_folders = ["Pictures", "Videos"] 14 | if with_lidar: 15 | expected_folders.append("Lidar") 16 | if all((path/d).isdir() for d in expected_folders): 17 | return 18 | else: 19 | print_error_string() 20 | 21 | 22 | def prepare_workspace(path, env, with_lidar=True): 23 | if with_lidar: 24 | env["lidar_path"] = path / "Lidar" 25 | env["lidar_path"].makedirs_p() 26 | env["lidar_mlp"] = env["workspace"] / "lidar.mlp" 27 | env["with_normals_path"] = env["lidar_path"] / "with_normals.ply" 28 | env["occlusion_ply"] = env["lidar_path"] / "occlusion_model.ply" 29 | env["splats_ply"] = env["lidar_path"] / "splats_model.ply" if env["splats"] else None 30 | env["occlusion_mlp"] = env["lidar_path"] / "occlusions.mlp" 31 | env["splats_mlp"] = env["lidar_path"] / "splats.mlp" 32 | env["matrix_path"] = env["workspace"] / "matrix_thorough.txt" 33 | else: 34 | env["occlusion_ply"] = path / "occlusion_model.ply" 35 | env["splats_ply"] = path / "splats_model.ply" 36 | 37 | env["colmap_img_root"] = path / "Pictures" 38 | env["mask_path"] = path / "Masks" 39 | env["individual_pictures_path"] = path / "Pictures" / "individual_pictures" 40 | env["individual_pictures_path"].makedirs_p() 41 | env["video_path"] = path / "Pictures" / "Videos" 42 | env["video_path"].makedirs_p() 43 | env["thorough_recon"] = path / "Thorough" 44 | env["thorough_recon"].makedirs_p() 45 | env["georef_recon"] = env["thorough_recon"] / "georef" 46 | env["georef_full_recon"] = env["thorough_recon"] / "georef_full" 47 | env["dense_workspace"] = env["thorough_recon"]/"dense" 48 | env["video_recon"] = path / "Videos_reconstructions" 49 | env["aligned_mlp"] = env["workspace"] / "aligned_model.mlp" 50 | 51 | env["centroid_path"] = path / "centroid.txt" 52 | env["thorough_db"] = path / "scan_thorough.db" 53 | env["video_frame_list_thorough"] = env["colmap_img_root"] / "video_frames_for_thorough_scan.txt" 54 | env["georef_frames_list"] = env["colmap_img_root"] / "georef.txt" 55 | 56 | env["georefrecon_ply"] = env["thorough_recon"] / "georef_reconstruction.ply" 57 | env["indexed_vocab_tree"] = env["workspace"] / "vocab_tree_thorough.bin" 58 | 59 | 60 | def prepare_video_workspace(video_name, video_frames_folder, 61 | raw_output_folder, converted_output_folder, 62 | video_recon, video_path, **env): 63 | video_env = {video_name: video_name, 64 | video_frames_folder: video_frames_folder} 65 | relative_path_folder = video_frames_folder.relpath(video_path) 66 | video_env["lowfps_db"] = video_frames_folder / "video_low_fps.db" 67 | video_env["metadata_path"] = video_frames_folder / "metadata.csv" 68 | video_env["lowfps_image_list_path"] = video_frames_folder / "lowfps.txt" 69 | video_env["chunk_image_list_paths"] = sorted(video_frames_folder.files("full_chunk_*.txt")) 70 | video_env["chunk_dbs"] = [video_frames_folder / fp.stem + ".db" for fp in video_env["chunk_image_list_paths"]] 71 | colmap_root = video_recon / relative_path_folder 72 | video_env["colmap_models_root"] = colmap_root 73 | video_env["full_model"] = colmap_root 74 | video_env["lowfps_model"] = colmap_root / "lowfps" 75 | num_chunks = len(video_env["chunk_image_list_paths"]) 76 | video_env["chunk_models"] = [colmap_root / "chunk_{}".format(index) for index in range(num_chunks)] 77 | video_env["final_model"] = colmap_root / "final" 78 | output = {} 79 | output["images_root_folder"] = raw_output_folder / "images" 80 | output["video_frames_folder"] = output["images_root_folder"] / "Videos" / relative_path_folder 81 | output["model_folder"] = raw_output_folder / "models" / relative_path_folder 82 | output["interpolated_frames_list"] = output["model_folder"] / "interpolated_frames.txt" 83 | output["final_model"] = output["model_folder"] / "final" 84 | output["kitti_format_folder"] = converted_output_folder / "KITTI" / relative_path_folder 85 | output["viz_folder"] = converted_output_folder / "visualization" / relative_path_folder 86 | video_env["output_env"] = output 87 | video_env["already_localized"] = env["resume_work"] and output["model_folder"].isdir() 88 | video_env["GT_already_done"] = env["resume_work"] and (raw_output_folder / "ground_truth_depth" / video_name.stem).isdir() 89 | return video_env 90 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | laspy 3 | pylas 4 | pyproj 5 | path 6 | tqdm 7 | numpy 8 | piexif 9 | pandas 10 | imageio 11 | pyntcloud 12 | ezdxf 13 | meshio 14 | scikit-learn 15 | pebble 16 | dicttoxml 17 | lxml 18 | scikit-image 19 | rawpy 20 | -------------------------------------------------------------------------------- /resize_colmap_cameras.py: -------------------------------------------------------------------------------- 1 | from colmap_util import read_model as rm 2 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 3 | from path import Path 4 | 5 | parser = ArgumentParser(description='Resize cameras in a colmap model', 6 | formatter_class=ArgumentDefaultsHelpFormatter) 7 | 8 | parser.add_argument('-i', '--input_cameras_colmap', metavar='FILE', type=Path, required=True, 9 | help='Input COLMAP cameras.bin or caameras.txt file to rescale or resize.') 10 | parser.add_argument('-o', '--output_cameras_colmap', metavar='FILE', type=Path, required=True, 11 | help='Output images.bin or images.txt file with filtered frame localizations') 12 | im_resize = parser.add_mutually_exclusive_group() 13 | im_resize.add_argument('-w', '--width', type=float, default=None, 14 | help='Output width of cameras every camera will have, ' 15 | 'even though they are initially of different size. ' 16 | 'Height reflect initial ratio') 17 | im_resize.add_argument('-r', '--rescale', type=float, default=1, 18 | help='float to which each camera dimension will be multiplied. ' 19 | 'As such, cameras ay have different widths') 20 | 21 | 22 | def resize_cameras(input_cameras, output_cameras, output_width=None, output_rescale=1): 23 | if input_cameras.ext == ".txt": 24 | cameras = rm.read_cameras_text(input_cameras) 25 | elif input_cameras.ext == ".bin": 26 | cameras = rm.read_cameras_binary(input_cameras) 27 | else: 28 | print(input_cameras.ext) 29 | cameras_rescaled = {} 30 | for i, c in cameras.items(): 31 | if output_width is not None: 32 | output_rescale = output_width / c.width 33 | else: 34 | output_width = int(c.width * output_rescale) 35 | output_height = int(c.height * output_rescale) 36 | output_params = c.params 37 | single_focal = ('SIMPLE' in c.model) or ('RADIAL' in c.model) 38 | output_params[:3] *= output_rescale 39 | if not single_focal: 40 | output_params[3] *= output_rescale 41 | 42 | cameras_rescaled[i] = rm.Camera(id=c.id, model=c.model, 43 | width=output_width, height=output_height, 44 | params=c.params) 45 | 46 | rm.write_cameras_text(cameras_rescaled, output_cameras) 47 | 48 | 49 | if __name__ == '__main__': 50 | args = parser.parse_args() 51 | resize_cameras(input_cameras=args.input_cameras_colmap, 52 | output_cameras=args.output_cameras_colmap, 53 | output_width=args.output_width, 54 | output_rescale=args.output_rescale) 55 | -------------------------------------------------------------------------------- /split_dataset.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 2 | from path import Path 3 | import pandas as pd 4 | from tqdm import tqdm 5 | import numpy as np 6 | from scipy.spatial.transform import Rotation 7 | import random 8 | 9 | 10 | parser = ArgumentParser(description='Convert dataset to KITTI format, optionnally create a visualization video', 11 | formatter_class=ArgumentDefaultsHelpFormatter) 12 | 13 | parser.add_argument('--dataset_dir', metavar='DIR', type=Path, required=True, 14 | help='folder containing the converted dataset') 15 | parser.add_argument('--output_dir', metavar='DIR', type=Path, required=True, 16 | help='foutput folder which will be used as a dataset for unsupervised depth training') 17 | parser.add_argument('--scenes_list', type=Path, default=None, 18 | help="List of folders containing videos to split, useful if some are used for testing") 19 | parser.add_argument('--verbose', '-v', action='count', default=0) 20 | parser.add_argument('--min_displacement', default=0, type=float, 21 | help='Minimum displacement between two frames') 22 | parser.add_argument('--max_rotation', default=1, type=float, 23 | help='Maximum rotation between two frames. If above, frame is discarded and video is split') 24 | parser.add_argument('--min_num_frames', default=5, type=int, 25 | help='minimum number of frames in a video split. If below, split is removed') 26 | parser.add_argument('--allow_interpolated_frames', action='store_true', 27 | help='If set, will consider frames with interpolated odometry to be valid') 28 | parser.add_argument('--adapt_min_disp_to_depth', action='store_true') 29 | parser.add_argument('--train_split', type=float, default=0.9, 30 | help='proportion of videos taken for training split, the other videos are taken for validation') 31 | parser.add_argument('--seed', type=int, default=0) 32 | 33 | 34 | def sample_splits(sequence, min_displacement, max_rot, min_num_frames): 35 | def get_rotation(row): 36 | flat_matrix = row[["pose00", "pose01", "pose02", 37 | "pose10", "pose11", "pose12", 38 | "pose20", "pose21", "pose22"]].values 39 | rotation = Rotation.from_matrix(flat_matrix.reshape(3, 3)) 40 | return rotation 41 | current_split = [0] 42 | last_rot = None 43 | last_intrinsics = None 44 | for k in sample_frames(sequence, min_displacement): 45 | row = sequence.iloc[k] 46 | current_rot = get_rotation(row) 47 | current_intrinsics = row[["fx", "fy", "cx", "cy"]].values.astype(float) 48 | if last_rot is not None: 49 | rot_diff_mag = (current_rot.inv() * last_rot).magnitude() 50 | else: 51 | rot_diff_mag = 0 52 | if last_intrinsics is None: 53 | last_intrinsics = current_intrinsics 54 | 55 | if rot_diff_mag > max_rot or not np.allclose(current_intrinsics, last_intrinsics): 56 | if len(current_split) > min_num_frames: 57 | yield current_split 58 | current_split = [] 59 | last_rot = None 60 | last_intrinsics = None 61 | else: 62 | last_rot = current_rot 63 | current_split.append(k) 64 | if len(current_split) > min_num_frames: 65 | yield current_split 66 | 67 | 68 | def sample_frames(sequence, min_displacement): 69 | tvec = sequence[["pose03", "pose13", "pose23"]].values 70 | origin = tvec[0] 71 | current_disp = 0 72 | for j, pos in enumerate(tvec): 73 | current_disp = np.linalg.norm(pos - origin) 74 | if current_disp > min_displacement: 75 | yield j 76 | origin = pos 77 | 78 | 79 | def get_min_depth(row): 80 | depth_path = row["image_path"].splitext() + ".npy" 81 | depth = np.load(depth_path) 82 | return depth.min() 83 | 84 | 85 | def sample_frames_depth(sequence, min_displacement, min_depth): 86 | tvec = sequence[["pose03", "pose13", "pose23"]].values 87 | origin = tvec[0] 88 | current_disp = 0 89 | last_min_depth = get_min_depth(sequence.iloc[0]) 90 | for i, pos in enumerate(tvec): 91 | current_min_depth = get_min_depth(sequence.iloc[i]) 92 | if current_disp * np.min(last_min_depth, current_min_depth) / min_depth > min_displacement: 93 | yield i 94 | origin = pos 95 | current_disp = np.lialg.norm(pos - origin) 96 | 97 | 98 | def main(): 99 | args = parser.parse_args() 100 | random.seed(args.seed) 101 | args.output_dir.makedirs_p() 102 | total_scenes = 0 103 | total_frames = 0 104 | frames_per_video = {} 105 | dirs_per_video = {} 106 | if args.scenes_list is not None: 107 | with open(args.scenes_list, 'r') as f: 108 | dataset_folders = [args.dataset_dir / path[:-1] for path in f.readlines()] 109 | else: 110 | dataset_folders = [f for f in args.dataset_dir.walkdirs() if f.files("*.jpg")] 111 | for v in tqdm(dataset_folders): 112 | frames_per_video[v] = 0 113 | dirs_per_video[v] = [] 114 | metadata = pd.read_csv(v / "metadata.csv") 115 | metadata["full_image_path"] = [v / Path(f).basename() 116 | for f in metadata["image_path"].values] 117 | valid_odometry_frames = metadata["registered"] 118 | if not args.allow_interpolated_frames: 119 | valid_odometry_frames = valid_odometry_frames & ~metadata["interpolated"] 120 | # Construct valid sequences 121 | valid_diff = valid_odometry_frames.astype(float).diff() 122 | invalidity_start = valid_diff.index[valid_diff == -1].tolist() 123 | validity_start = valid_diff.index[valid_diff == 1].tolist() 124 | if valid_odometry_frames.iloc[0]: 125 | validity_start = [0] + validity_start 126 | if valid_odometry_frames.iloc[-1]: 127 | invalidity_start.append(len(valid_odometry_frames)) 128 | valid_sequences = [metadata.iloc[s:e].copy() for s, e in zip(validity_start, invalidity_start)] 129 | for i, seq in enumerate(valid_sequences): 130 | splits = sample_splits(seq, args.min_displacement, args.max_rotation, args.min_num_frames) 131 | for j, s in enumerate(splits): 132 | total_scenes += 1 133 | total_frames += len(s) 134 | frames_per_video[v] += len(s) 135 | final_seq = seq.iloc[s] 136 | relative_dir = args.dataset_dir.relpathto(v) 137 | folder_tree = relative_dir.splitall()[1:] 138 | folder_name = '_'.join(folder_tree) + '_{:02d}_{:02d}'.format(i, j) 139 | output_dir = args.output_dir / folder_name 140 | dirs_per_video[v].append(folder_name) 141 | output_dir.makedirs_p() 142 | for _, row in final_seq.iterrows(): 143 | image_name = Path(row["image_path"]).basename() 144 | src_img_path = v / image_name 145 | tgt_img_path = output_dir / image_name 146 | src_depth_path = src_img_path.stripext() + '.npy' 147 | tgt_depth_path = tgt_img_path.stripext() + '.npy' 148 | src_img_path.copy(tgt_img_path) 149 | src_depth_path.copy(tgt_depth_path) 150 | 151 | poses = final_seq[["pose00", "pose01", "pose02", "pose03", 152 | "pose10", "pose11", "pose12", "pose13", 153 | "pose20", "pose21", "pose22", "pose23"]] 154 | np.savetxt(output_dir / "poses.txt", poses) 155 | final_seq.to_csv(output_dir / "metadata.csv") 156 | intrinsics = final_seq[["fx", "fy", "cx", "cy"]].values[0] 157 | cam = np.array([[intrinsics[0], 0, intrinsics[2]], 158 | [0, intrinsics[1], intrinsics[3]], 159 | [0, 0, 1]]) 160 | np.savetxt(output_dir / "cam.txt", cam) 161 | print("constructed a training set of {} frames in {} scenes".format(total_frames, total_scenes)) 162 | 163 | random.shuffle(dataset_folders) 164 | train_frames = total_frames * args.train_split 165 | cumulative_num_frames = np.cumsum(np.array([frames_per_video[v] for v in dataset_folders])) 166 | used_for_train = (cumulative_num_frames - train_frames) < 0 167 | train_folders = [dirs_per_video[dataset_folders[i]] for i in np.where(used_for_train)[0]] 168 | val_folders = [dirs_per_video[dataset_folders[i]] for i in np.where(~used_for_train)[0]] 169 | with open(args.output_dir / "train.txt", 'w') as f: 170 | f.writelines([scene + '\n' for tf in train_folders for scene in tf]) 171 | with open(args.output_dir / "val.txt", 'w') as f: 172 | f.writelines([scene + '\n' for vf in val_folders for scene in vf]) 173 | 174 | 175 | if __name__ == '__main__': 176 | main() 177 | -------------------------------------------------------------------------------- /wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from .colmap import Colmap 2 | from .eth3d import ETH3D 3 | from .pdraw import PDraw 4 | from .ffmpeg import FFMpeg 5 | from .pcl_util import PCLUtil 6 | __all__ = ["Colmap", "ETH3D", "PDraw", "FFMPeg"] 7 | -------------------------------------------------------------------------------- /wrappers/cloudcompare.py: -------------------------------------------------------------------------------- 1 | from .default_wrapper import Wrapper 2 | 3 | 4 | class CloudCompare(Wrapper): 5 | def __init__(self, binary, logfile): 6 | super().__init__(binary) 7 | self.base_options = ["-SILENT", "-AUTO_SAVE", "OFF", "-C_EXPORT_FMT", "PLY"] 8 | 9 | def compute_normals_mst(self, output_file, input_file, mst_knn=6): 10 | options = ["-O", input_file, "-OCTREE_NORMALS", "auto", 11 | "-ORIENT_NORMS_MST", str(mst_knn), "-SAVE_CLOUDS", output_file] 12 | self.__call__(self.base_options + options) 13 | -------------------------------------------------------------------------------- /wrappers/colmap.py: -------------------------------------------------------------------------------- 1 | from .default_wrapper import Wrapper 2 | 3 | 4 | class Colmap(Wrapper): 5 | """docstring for Colmap""" 6 | 7 | def __init__(self, db, image_path, mask_path, dense_workspace, *args, **kwargs): 8 | super().__init__(*args, **kwargs) 9 | self.db = db 10 | self.image_path = image_path 11 | self.mask_path = mask_path 12 | self.dense_workspace = dense_workspace 13 | 14 | def extract_features(self, per_sub_folder=False, image_list=None, model="RADIAL", more=False): 15 | options = ["feature_extractor", "--database_path", self.db, 16 | "--image_path", self.image_path, "--ImageReader.mask_path", self.mask_path, 17 | "--ImageReader.camera_model", model] 18 | if per_sub_folder: 19 | options += ["--ImageReader.single_camera_per_folder", "1"] 20 | if image_list is not None: 21 | options += ["--image_list_path", image_list] 22 | if more: 23 | options += ["--SiftExtraction.domain_size_pooling", "1", 24 | "--SiftExtraction.estimate_affine_shape", "1"] 25 | else: 26 | # See issue https://github.com/colmap/colmap/issues/627 27 | # If COLMAP is updated to work better on newest driver, this should be removed 28 | # options += ["--SiftExtraction.use_gpu", "0"] 29 | pass 30 | self.__call__(options) 31 | 32 | def match(self, method="exhaustive", guided_matching=True, vocab_tree=None, max_num_matches=32768): 33 | options = ["{}_matcher".format(method), 34 | "--database_path", self.db, 35 | "--SiftMatching.guided_matching", "1" if guided_matching else "0", 36 | "--SiftMatching.max_num_matches", str(max_num_matches)] 37 | if method == "sequential": 38 | assert vocab_tree is not None 39 | options += ["--SequentialMatching.loop_detection", "1", 40 | "--SequentialMatching.vocab_tree_path", vocab_tree] 41 | if method == "vocab_tree": 42 | assert vocab_tree is not None 43 | options += ["--VocabTreeMatching.vocab_tree_path", vocab_tree] 44 | self.__call__(options) 45 | 46 | def map(self, output, input=None, multiple_models=True, start_frame_id=None): 47 | options = ["mapper", "--database_path", self.db, 48 | "--image_path", self.image_path, 49 | "--output_path", output, 50 | "--Mapper.ba_refine_principal_point", "1"] 51 | if start_frame_id is not None: 52 | options += ["--Mapper.init_image_id1", str(start_frame_id)] 53 | if not multiple_models: 54 | options += ["--Mapper.multiple_models", "0"] 55 | if input is not None: 56 | options += ["--input_path", input] 57 | options += ["--Mapper.fix_existing_images", "1"] 58 | self.__call__(options) 59 | 60 | def register_images(self, output, input): 61 | options = ["image_registrator", "--database_path", self.db, 62 | "--output_path", output, 63 | "--input_path", input] 64 | self.__call__(options) 65 | 66 | def adjust_bundle(self, output, input, refine_extra_params=False, num_iter=10): 67 | options = ["bundle_adjuster", 68 | "--output_path", output, 69 | "--input_path", input, 70 | "--BundleAdjustment.max_num_iterations", str(num_iter)] 71 | if not refine_extra_params: 72 | options += ["--BundleAdjustment.refine_extra_params", "0"] 73 | self.__call__(options) 74 | 75 | def triangulate_points(self, output, input, clear_points=True): 76 | options = ["point_triangulator", 77 | "--database_path", self.db, 78 | "--image_path", self.image_path, 79 | "--output_path", output, 80 | "--input_path", input] 81 | if clear_points: 82 | options += ["--clear_points", "1"] 83 | self.__call__(options) 84 | 85 | def align_model(self, output, input, ref_images, max_error=5): 86 | options = ["model_aligner", "--input_path", input, 87 | "--output_path", output, "--ref_images_path", 88 | ref_images, "--robust_alignment_max_error", str(max_error)] 89 | self.__call__(options) 90 | 91 | def export_model(self, output, input, output_type="PLY"): 92 | options = ["model_converter", "--input_path", input, 93 | "--output_path", output, "--output_type", output_type] 94 | self.__call__(options) 95 | 96 | def undistort(self, input, max_image_size=1000): 97 | options = ["image_undistorter", "--image_path", self.image_path, 98 | "--input_path", input, "--output_path", self.dense_workspace, 99 | "--output_type", "COLMAP", "--max_image_size", str(max_image_size), 100 | "--min_scale", "1"] 101 | self.__call__(options) 102 | 103 | def dense_stereo(self, max_depth=None, min_depth=None): 104 | options = ["patch_match_stereo", "--workspace_path", self.dense_workspace, 105 | "--workspace_format", "COLMAP", 106 | "--PatchMatchStereo.geom_consistency", "1"] 107 | if min_depth is not None: 108 | options += ["--PatchMatchStereo.depth_min", str(min_depth)] 109 | if max_depth is not None: 110 | options += ["--PatchMatchStereo.depth_max", str(max_depth)] 111 | self.__call__(options) 112 | 113 | def stereo_fusion(self, output): 114 | options = ["stereo_fusion", "--workspace_path", self.dense_workspace, 115 | "--workspace_format", "COLMAP", 116 | "--input_type", "geometric", 117 | "--output_path", output] 118 | self.__call__(options) 119 | 120 | def delaunay_mesh(self, output, input_ply, input_vis=None): 121 | if input_vis is None: 122 | input_vis = input_ply + ".vis" 123 | 124 | fused = self.dense_workspace / "fused.ply" 125 | if fused != input_ply: 126 | fused.remove_p() 127 | (fused + ".vis").remove_p() 128 | input_ply.link(fused) 129 | input_vis.link(fused + ".vis") 130 | options = ["delaunay_mesher", "--input_type", "dense", "--input_path", self.dense_workspace, "--output_path", output] 131 | self.__call__(options) 132 | 133 | def merge_models(self, output, input1, input2): 134 | options = ["model_merger", "--output_path", output, 135 | "--input_path1", input1, 136 | "--input_path2", input2] 137 | self.__call__(options) 138 | 139 | def index_images(self, vocab_tree_output, vocab_tree_input): 140 | options = ["vocab_tree_retriever", "--database_path", self.db, 141 | "--vocab_tree_path", vocab_tree_input, "--output_index", vocab_tree_output] 142 | self.__call__(options) 143 | -------------------------------------------------------------------------------- /wrappers/default_wrapper.py: -------------------------------------------------------------------------------- 1 | from os import devnull 2 | from subprocess import check_call, STDOUT 3 | 4 | 5 | class Wrapper: 6 | def __init__(self, binary, verbose=2, logfile=None): 7 | self.binary = binary 8 | self.logfile = logfile 9 | self.verbose = verbose 10 | 11 | def tofile(self, command, file): 12 | with open(file, 'a') as f: 13 | check_call(command, stdout=f, stderr=STDOUT) 14 | 15 | def __call__(self, options): 16 | command = [self.binary, *options] 17 | if self.verbose > 0: 18 | print("Calling command") 19 | print(" ".join(command)) 20 | if self.logfile is not None: 21 | self.tofile(command, self.logfile) 22 | elif self.verbose < 2: 23 | self.tofile(command, devnull) 24 | else: 25 | check_call(command) 26 | -------------------------------------------------------------------------------- /wrappers/eth3d.py: -------------------------------------------------------------------------------- 1 | from .default_wrapper import Wrapper 2 | 3 | 4 | class ETH3D(Wrapper): 5 | """docstring for Colmap""" 6 | 7 | def __init__(self, build_folder, image_path, max_occlusion_depth, splat_radius, *args, **kwargs): 8 | super().__init__(None, *args, **kwargs) 9 | self.build_folder = build_folder 10 | self.image_path = image_path 11 | self.max_occlusion_depth = max_occlusion_depth 12 | self.splat_radius = splat_radius 13 | 14 | def __call__(self, options): 15 | self.binary = self.build_folder / options[0] 16 | super().__call__(options[1:]) 17 | 18 | def align_with_ICP(self, input_model, output_model, scales=5): 19 | options = ["ICPScanAligner", "-i", input_model, 20 | "-o", output_model, "--number_of_scales", str(scales)] 21 | self.__call__(options) 22 | 23 | def clean_pointcloud(self, input_model, filter=(5, 10)): 24 | options = ["PointCloudCleaner", "--in", input_model, 25 | "--filter", "{},{}".format(*filter)] 26 | self.__call__(options) 27 | 28 | def compute_normals(self, output_ply, scan_meshlab, neighbor_count=None, neighbor_radius=0.2): 29 | options = ["NormalEstimator", "-i", scan_meshlab, "-o", output_ply] 30 | if neighbor_count is not None: 31 | options += ["--neighbor_count", str(neighbor_count)] 32 | elif neighbor_radius is not None: 33 | options += ["--neighbor_radius", str(neighbor_radius)] 34 | self.__call__(options) 35 | 36 | def create_splats(self, output_splats, pointnormals_ply, occlusion_ply, threshold=0.1, max_splat_size=None): 37 | options = ["SplatCreator", "--point_normal_cloud_path", pointnormals_ply, 38 | "--mesh_path", occlusion_ply, 39 | "--output_path", output_splats, 40 | "--distance_threshold", str(threshold)] 41 | if max_splat_size is not None: 42 | options += ["--max_splat_size", str(max_splat_size)] 43 | else: 44 | options += ["--max_splat_size", str(2.5 * threshold)] 45 | self.__call__(options) 46 | 47 | def create_ground_truth(self, scan_meshlab, colmap_model, output_folder, occlusions=None, splats=None, 48 | point_cloud=True, depth_maps=True, occlusion_maps=True): 49 | options = ["GroundTruthCreator", "--scan_alignment_path", scan_meshlab, 50 | "--image_base_path", self.image_path, "--state_path", colmap_model, 51 | "--output_folder_path", output_folder, 52 | "--max_occlusion_depth", str(self.max_occlusion_depth), 53 | "--splat_radius", str(self.splat_radius), 54 | "--write_point_cloud", "1" if point_cloud else "0", 55 | "--write_depth_maps", "1" if depth_maps else "0", 56 | "--write_occlusion_depth", "1" if occlusion_maps else "0", 57 | "--compress_depth_maps", "1"] 58 | if occlusions is not None: 59 | options += ["--occlusion_mesh_path", occlusions] 60 | if splats is not None: 61 | options += ["--occlusion_splats_path", splats] 62 | 63 | self.__call__(options) 64 | 65 | def inspect_dataset(self, scan_meshlab, colmap_model, occlusions=None, splats=None, image_path=None): 66 | if image_path is None: 67 | image_path = self.image_path 68 | options = ["DatasetInspector", "--scan_alignment_path", scan_meshlab, 69 | "--image_base_path", image_path, "--state_path", colmap_model, 70 | "--max_occlusion_depth", str(self.max_occlusion_depth), 71 | "--splat_radius", str(self.splat_radius)] 72 | if occlusions is not None: 73 | options += ["--occlusion_mesh_path", occlusions] 74 | if splats is not None: 75 | options += ["--occlusion_splats_path", splats] 76 | self.__call__(options) 77 | -------------------------------------------------------------------------------- /wrappers/ffmpeg.py: -------------------------------------------------------------------------------- 1 | from subprocess import Popen, PIPE 2 | from .default_wrapper import Wrapper 3 | import json 4 | 5 | 6 | class FFMpeg(Wrapper): 7 | def __init__(self, binary="ffmpeg", probe="ffprobe", *args, **kwargs): 8 | super().__init__(binary, *args, **kwargs) 9 | self.probe = probe 10 | 11 | def extract_images(self, video_file, output_folder, fps=None): 12 | if fps is not None: 13 | fps_arg = ["-vf", "fps={}".format(fps)] 14 | else: 15 | fps_arg = [] 16 | 17 | self.__call__(["-y", "-i", str(video_file), "-vsync", "0", "-qscale:v", "2"] 18 | + fps_arg + [str(output_folder / output_folder.stem + "_%05d.jpg")]) 19 | return sorted(output_folder.files("*.jpg")) 20 | 21 | def extract_specific_frames(self, video_file, output_folder, frame_ids): 22 | ''' 23 | Typical command string : 24 | ffmpeg -i in.mp4 -vf select='eq(n\\,100)+eq(n\\,184)+eq(n\\,213)' -vsync 0 frames%d.jpg 25 | 26 | Note: Surprisingly, frames in the eq function are starting at 0, whereas the rest is starting at 1, 27 | so we need to decrement the frame index compared to what we would have got when extracting every frame 28 | ''' 29 | select_string = "select='" + '+'.join(['eq(n\\,{})'.format(f-1) for f in frame_ids]) + "'" 30 | frame_string = output_folder/(video_file.stem + "tmp_%05d.jpg") 31 | ffmpeg_options = ["-y", "-i", video_file, 32 | "-vf", select_string, "-vsync", "0", 33 | "-qscale:v", "2", frame_string] 34 | self.__call__(ffmpeg_options) 35 | frame_files = sorted(output_folder.files(video_file.stem + "tmp_*.jpg")) 36 | assert(len(frame_files) == len(frame_ids)), \ 37 | "error, got {} frame_ids, but got {} images extracted".format(len(frame_ids), len(frame_files)) 38 | 39 | for f, frame_id in zip(frame_files, frame_ids): 40 | f.rename(f.parent / (video_file.stem + "_{:05d}.jpg".format(frame_id))) 41 | return sorted(output_folder.files("*.jpg")) 42 | 43 | def get_size_and_framerate(self, video_file): 44 | probe_process = Popen([self.probe, "-show_entries", "stream=height,width,r_frame_rate,nb_frames", 45 | "-of", "json", "-select_streams", "v", str(video_file)], 46 | stdout=PIPE, stderr=PIPE) 47 | json_cam = json.loads(probe_process.communicate()[0])['streams'] 48 | if len(json_cam) > 1: 49 | print("Warning for video {0} : Multiple streams detected ({1}), only the first one will be considered, " 50 | "please split the file into {1} separate mp4 files to analyze everything".format(video_file.basename(), len(json_cam))) 51 | return (int(json_cam[0]["width"]), 52 | int(json_cam[0]["height"]), 53 | frac_to_float(json_cam[0]["r_frame_rate"]), 54 | int(json_cam[0]["nb_frames"])) 55 | 56 | def create_video(self, video_path, input_string, glob=True, fps=30): 57 | ffmpeg_options = ["-y", "-r", "{:.2f}".format(fps)] + \ 58 | (["-pattern_type", "glob"] if glob else []) + \ 59 | ["-i", input_string, video_path] 60 | self.__call__(ffmpeg_options) 61 | 62 | 63 | def frac_to_float(frac_str): 64 | try: 65 | return float(frac_str) 66 | except ValueError: 67 | num, denom = frac_str.split('/') 68 | return float(num) / float(denom) 69 | -------------------------------------------------------------------------------- /wrappers/pcl_util.py: -------------------------------------------------------------------------------- 1 | from .default_wrapper import Wrapper 2 | 3 | 4 | class PCLUtil(Wrapper): 5 | 6 | def __init__(self, build_folder, *args, **kwargs): 7 | super().__init__(None, *args, **kwargs) 8 | self.build_folder = build_folder 9 | 10 | def __call__(self, options): 11 | self.binary = self.build_folder / options[0] 12 | super().__call__(options[1:]) 13 | 14 | def subsample(self, input_file, output_file, resolution=0.05): 15 | options = ["PointCloudSubsampler", "--point_cloud_path", input_file, 16 | "--resolution", str(resolution), "--output", output_file] 17 | self.__call__(options) 18 | 19 | def triangulate_mesh(self, output_file, input_file, resolution=0.2): 20 | options = ["MeshTriangulator", "--point_normal_cloud_path", input_file, 21 | "--resolution", str(resolution), "--out_mesh", output_file] 22 | self.__call__(options) 23 | 24 | def register_reconstruction(self, georef, lidar, output_matrix, output_cloud=None, max_distance=1): 25 | options = ["CloudRegistrator", "--georef", georef, 26 | "--lidar", lidar, "--max_distance", str(max_distance), 27 | "--output_matrix", output_matrix] 28 | if output_cloud is not None: 29 | options += ["--output_cloud", output_cloud] 30 | self.__call__(options) 31 | 32 | def filter_cloud(self, output_file, input_file, knn=6, std=5, with_normals=False): 33 | options = ["CloudSOR", "--input", input_file, 34 | "--output", output_file, "--knn", str(knn), 35 | "--std", str(std)] 36 | if with_normals: 37 | options.append("-n") 38 | self.__call__(options) 39 | 40 | def create_vis_file(self, georef_dense, lidar, resolution=0.1, output=None): 41 | if output is None: 42 | output = lidar 43 | options = ["CreateVisFile", "--georef_dense", georef_dense, 44 | "--lidar", lidar, 45 | "--output_cloud", output, "--resolution", str(resolution)] 46 | self.__call__(options) 47 | -------------------------------------------------------------------------------- /wrappers/pdraw.py: -------------------------------------------------------------------------------- 1 | from .default_wrapper import Wrapper 2 | import tempfile 3 | import pandas as pd 4 | 5 | 6 | class PDraw(Wrapper): 7 | def vmeta_extract(self, video): 8 | temp = tempfile.NamedTemporaryFile() 9 | options = ["vmeta-extract", str(video), "--csv", temp.name] 10 | self.__call__(options) 11 | return pd.read_csv(temp.name, sep=" ") 12 | --------------------------------------------------------------------------------