├── .gitignore ├── .DS_Store ├── assets ├── .DS_Store ├── process.png ├── teaser.png └── pipeline.png ├── README.md └── vrso └── database.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaiYingFeng/VRSO/HEAD/.DS_Store -------------------------------------------------------------------------------- /assets/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaiYingFeng/VRSO/HEAD/assets/.DS_Store -------------------------------------------------------------------------------- /assets/process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaiYingFeng/VRSO/HEAD/assets/process.png -------------------------------------------------------------------------------- /assets/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaiYingFeng/VRSO/HEAD/assets/teaser.png -------------------------------------------------------------------------------- /assets/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaiYingFeng/VRSO/HEAD/assets/pipeline.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VRSO 2 | 3 | **VRSO**: **V**isual-Centric **R**econstruction for **S**tatic **O**bject Annotation 4 | 5 | [Arxiv](https://arxiv.org/pdf/2403.15026.pdf) | [Youtube](https://www.youtube.com/watch?v=hhfTyvTEYMM) | [Bilibili](https://www.bilibili.com/video/BV1tPiVeNEyX/?spm_id_from=333.999.0.0&vd_source=34df4267be146d2dde6e0bf98a2ce363) 6 | 7 | waymo example: 8 |

9 | 10 |

11 | Comparison between our proposed VRSO (green) and Waymo (red) annotations after reprojection (from 3D space to 2D images). All images are from the Waymo Open Dataset (WOD). We can easily observe the reprojection errors (false positives and false negatives) among the Waymo annotations. For instance, the traffic signs (in both full and zoomed regions) are ignored or wrongly labelled in (a) and (c), while the red boxes do not tightly cover the targets in (b) and (d). Differently, VRSO yields consistent and accurate annotation among all images, even in low-resolution and illuminance conditions (b). 12 | 13 | ## Pipeline 14 |

15 | 16 | 17 |

18 | -------------------------------------------------------------------------------- /vrso/database.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023, ETH Zurich and UNC Chapel Hill. 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # * Redistributions of source code must retain the above copyright 8 | # notice, this list of conditions and the following disclaimer. 9 | # 10 | # * Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # 14 | # * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of 15 | # its contributors may be used to endorse or promote products derived 16 | # from this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE 22 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | # POSSIBILITY OF SUCH DAMAGE. 29 | 30 | 31 | # This script is based on an original implementation by True Price. 32 | 33 | import sys 34 | import sqlite3 35 | import numpy as np 36 | 37 | 38 | IS_PYTHON3 = sys.version_info[0] >= 3 39 | 40 | MAX_IMAGE_ID = 2 ** 31 - 1 41 | 42 | CREATE_CAMERAS_TABLE = """CREATE TABLE IF NOT EXISTS cameras ( 43 | camera_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 44 | model INTEGER NOT NULL, 45 | width INTEGER NOT NULL, 46 | height INTEGER NOT NULL, 47 | params BLOB, 48 | prior_focal_length INTEGER NOT NULL)""" 49 | 50 | CREATE_DESCRIPTORS_TABLE = """CREATE TABLE IF NOT EXISTS descriptors ( 51 | image_id INTEGER PRIMARY KEY NOT NULL, 52 | rows INTEGER NOT NULL, 53 | cols INTEGER NOT NULL, 54 | data BLOB, 55 | FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)""" 56 | 57 | CREATE_IMAGES_TABLE = """CREATE TABLE IF NOT EXISTS images ( 58 | image_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 59 | name TEXT NOT NULL UNIQUE, 60 | camera_id INTEGER NOT NULL, 61 | prior_qw REAL, 62 | prior_qx REAL, 63 | prior_qy REAL, 64 | prior_qz REAL, 65 | prior_tx REAL, 66 | prior_ty REAL, 67 | prior_tz REAL, 68 | CONSTRAINT image_id_check CHECK(image_id >= 0 and image_id < {}), 69 | FOREIGN KEY(camera_id) REFERENCES cameras(camera_id)) 70 | """.format( 71 | MAX_IMAGE_ID 72 | ) 73 | 74 | CREATE_TWO_VIEW_GEOMETRIES_TABLE = """ 75 | CREATE TABLE IF NOT EXISTS two_view_geometries ( 76 | pair_id INTEGER PRIMARY KEY NOT NULL, 77 | rows INTEGER NOT NULL, 78 | cols INTEGER NOT NULL, 79 | data BLOB, 80 | config INTEGER NOT NULL, 81 | F BLOB, 82 | E BLOB, 83 | H BLOB, 84 | qvec BLOB, 85 | tvec BLOB) 86 | """ 87 | 88 | CREATE_KEYPOINTS_TABLE = """CREATE TABLE IF NOT EXISTS keypoints ( 89 | image_id INTEGER PRIMARY KEY NOT NULL, 90 | rows INTEGER NOT NULL, 91 | cols INTEGER NOT NULL, 92 | data BLOB, 93 | FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE) 94 | """ 95 | 96 | CREATE_MATCHES_TABLE = """CREATE TABLE IF NOT EXISTS matches ( 97 | pair_id INTEGER PRIMARY KEY NOT NULL, 98 | rows INTEGER NOT NULL, 99 | cols INTEGER NOT NULL, 100 | data BLOB)""" 101 | 102 | CREATE_NAME_INDEX = ( 103 | "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)" 104 | ) 105 | 106 | CREATE_ALL = "; ".join( 107 | [ 108 | CREATE_CAMERAS_TABLE, 109 | CREATE_IMAGES_TABLE, 110 | CREATE_KEYPOINTS_TABLE, 111 | CREATE_DESCRIPTORS_TABLE, 112 | CREATE_MATCHES_TABLE, 113 | CREATE_TWO_VIEW_GEOMETRIES_TABLE, 114 | CREATE_NAME_INDEX, 115 | ] 116 | ) 117 | 118 | 119 | def image_ids_to_pair_id(image_id1, image_id2): 120 | if image_id1 > image_id2: 121 | image_id1, image_id2 = image_id2, image_id1 122 | return image_id1 * MAX_IMAGE_ID + image_id2 123 | 124 | 125 | def pair_id_to_image_ids(pair_id): 126 | image_id2 = pair_id % MAX_IMAGE_ID 127 | image_id1 = (pair_id - image_id2) / MAX_IMAGE_ID 128 | return image_id1, image_id2 129 | 130 | 131 | def array_to_blob(array): 132 | if IS_PYTHON3: 133 | return array.tostring() 134 | else: 135 | return np.getbuffer(array) 136 | 137 | 138 | def blob_to_array(blob, dtype, shape=(-1,)): 139 | if IS_PYTHON3: 140 | return np.fromstring(blob, dtype=dtype).reshape(*shape) 141 | else: 142 | return np.frombuffer(blob, dtype=dtype).reshape(*shape) 143 | 144 | 145 | class COLMAPDatabase(sqlite3.Connection): 146 | @staticmethod 147 | def connect(database_path): 148 | return sqlite3.connect(database_path, factory=COLMAPDatabase) 149 | 150 | def __init__(self, *args, **kwargs): 151 | super(COLMAPDatabase, self).__init__(*args, **kwargs) 152 | 153 | self.create_tables = lambda: self.executescript(CREATE_ALL) 154 | self.create_cameras_table = lambda: self.executescript( 155 | CREATE_CAMERAS_TABLE 156 | ) 157 | self.create_descriptors_table = lambda: self.executescript( 158 | CREATE_DESCRIPTORS_TABLE 159 | ) 160 | self.create_images_table = lambda: self.executescript( 161 | CREATE_IMAGES_TABLE 162 | ) 163 | self.create_two_view_geometries_table = lambda: self.executescript( 164 | CREATE_TWO_VIEW_GEOMETRIES_TABLE 165 | ) 166 | self.create_keypoints_table = lambda: self.executescript( 167 | CREATE_KEYPOINTS_TABLE 168 | ) 169 | self.create_matches_table = lambda: self.executescript( 170 | CREATE_MATCHES_TABLE 171 | ) 172 | self.create_name_index = lambda: self.executescript(CREATE_NAME_INDEX) 173 | 174 | def add_camera( 175 | self, 176 | model, 177 | width, 178 | height, 179 | params, 180 | prior_focal_length=False, 181 | camera_id=None, 182 | ): 183 | params = np.asarray(params, np.float64) 184 | cursor = self.execute( 185 | "INSERT INTO cameras VALUES (?, ?, ?, ?, ?, ?)", 186 | ( 187 | camera_id, 188 | model, 189 | width, 190 | height, 191 | array_to_blob(params), 192 | prior_focal_length, 193 | ), 194 | ) 195 | return cursor.lastrowid 196 | 197 | def add_image( 198 | self, 199 | name, 200 | camera_id, 201 | prior_q=np.full(4, np.NaN), 202 | prior_t=np.full(3, np.NaN), 203 | image_id=None, 204 | ): 205 | cursor = self.execute( 206 | "INSERT INTO images VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", 207 | ( 208 | image_id, 209 | name, 210 | camera_id, 211 | prior_q[0], 212 | prior_q[1], 213 | prior_q[2], 214 | prior_q[3], 215 | prior_t[0], 216 | prior_t[1], 217 | prior_t[2], 218 | ), 219 | ) 220 | return cursor.lastrowid 221 | 222 | def add_keypoints(self, image_id, keypoints): 223 | assert len(keypoints.shape) == 2 224 | assert keypoints.shape[1] in [2, 4, 6] 225 | 226 | keypoints = np.asarray(keypoints, np.float32) 227 | self.execute( 228 | "INSERT INTO keypoints VALUES (?, ?, ?, ?)", 229 | (image_id,) + keypoints.shape + (array_to_blob(keypoints),), 230 | ) 231 | 232 | def add_descriptors(self, image_id, descriptors): 233 | descriptors = np.ascontiguousarray(descriptors, np.uint8) 234 | self.execute( 235 | "INSERT INTO descriptors VALUES (?, ?, ?, ?)", 236 | (image_id,) + descriptors.shape + (array_to_blob(descriptors),), 237 | ) 238 | 239 | def add_matches(self, image_id1, image_id2, matches): 240 | assert len(matches.shape) == 2 241 | assert matches.shape[1] == 2 242 | 243 | if image_id1 > image_id2: 244 | matches = matches[:, ::-1] 245 | 246 | pair_id = image_ids_to_pair_id(image_id1, image_id2) 247 | matches = np.asarray(matches, np.uint32) 248 | self.execute( 249 | "INSERT INTO matches VALUES (?, ?, ?, ?)", 250 | (pair_id,) + matches.shape + (array_to_blob(matches),), 251 | ) 252 | 253 | def add_two_view_geometry( 254 | self, 255 | image_id1, 256 | image_id2, 257 | matches, 258 | F=np.eye(3), 259 | E=np.eye(3), 260 | H=np.eye(3), 261 | qvec=np.array([1.0, 0.0, 0.0, 0.0]), 262 | tvec=np.zeros(3), 263 | config=2, 264 | ): 265 | assert len(matches.shape) == 2 266 | assert matches.shape[1] == 2 267 | 268 | if image_id1 > image_id2: 269 | matches = matches[:, ::-1] 270 | 271 | pair_id = image_ids_to_pair_id(image_id1, image_id2) 272 | matches = np.asarray(matches, np.uint32) 273 | F = np.asarray(F, dtype=np.float64) 274 | E = np.asarray(E, dtype=np.float64) 275 | H = np.asarray(H, dtype=np.float64) 276 | qvec = np.asarray(qvec, dtype=np.float64) 277 | tvec = np.asarray(tvec, dtype=np.float64) 278 | self.execute( 279 | "INSERT INTO two_view_geometries VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", 280 | (pair_id,) 281 | + matches.shape 282 | + ( 283 | array_to_blob(matches), 284 | config, 285 | array_to_blob(F), 286 | array_to_blob(E), 287 | array_to_blob(H), 288 | array_to_blob(qvec), 289 | array_to_blob(tvec), 290 | ), 291 | ) 292 | 293 | 294 | def example_usage(): 295 | import os 296 | import argparse 297 | 298 | parser = argparse.ArgumentParser() 299 | parser.add_argument("--database_path", default="database.db") 300 | args = parser.parse_args() 301 | 302 | if os.path.exists(args.database_path): 303 | print("ERROR: database path already exists -- will not modify it.") 304 | return 305 | 306 | # Open the database. 307 | 308 | db = COLMAPDatabase.connect(args.database_path) 309 | 310 | # For convenience, try creating all the tables upfront. 311 | 312 | db.create_tables() 313 | 314 | # Create dummy cameras. 315 | 316 | model1, width1, height1, params1 = ( 317 | 0, 318 | 1024, 319 | 768, 320 | np.array((1024.0, 512.0, 384.0)), 321 | ) 322 | model2, width2, height2, params2 = ( 323 | 2, 324 | 1024, 325 | 768, 326 | np.array((1024.0, 512.0, 384.0, 0.1)), 327 | ) 328 | 329 | camera_id1 = db.add_camera(model1, width1, height1, params1) 330 | camera_id2 = db.add_camera(model2, width2, height2, params2) 331 | 332 | # Create dummy images. 333 | 334 | image_id1 = db.add_image("image1.png", camera_id1) 335 | image_id2 = db.add_image("image2.png", camera_id1) 336 | image_id3 = db.add_image("image3.png", camera_id2) 337 | image_id4 = db.add_image("image4.png", camera_id2) 338 | 339 | # Create dummy keypoints. 340 | # 341 | # Note that COLMAP supports: 342 | # - 2D keypoints: (x, y) 343 | # - 4D keypoints: (x, y, theta, scale) 344 | # - 6D affine keypoints: (x, y, a_11, a_12, a_21, a_22) 345 | 346 | num_keypoints = 1000 347 | keypoints1 = np.random.rand(num_keypoints, 2) * (width1, height1) 348 | keypoints2 = np.random.rand(num_keypoints, 2) * (width1, height1) 349 | keypoints3 = np.random.rand(num_keypoints, 2) * (width2, height2) 350 | keypoints4 = np.random.rand(num_keypoints, 2) * (width2, height2) 351 | 352 | db.add_keypoints(image_id1, keypoints1) 353 | db.add_keypoints(image_id2, keypoints2) 354 | db.add_keypoints(image_id3, keypoints3) 355 | db.add_keypoints(image_id4, keypoints4) 356 | 357 | # Create dummy matches. 358 | 359 | M = 50 360 | matches12 = np.random.randint(num_keypoints, size=(M, 2)) 361 | matches23 = np.random.randint(num_keypoints, size=(M, 2)) 362 | matches34 = np.random.randint(num_keypoints, size=(M, 2)) 363 | 364 | db.add_matches(image_id1, image_id2, matches12) 365 | db.add_matches(image_id2, image_id3, matches23) 366 | db.add_matches(image_id3, image_id4, matches34) 367 | 368 | # Commit the data to the file. 369 | 370 | db.commit() 371 | 372 | # Read and check cameras. 373 | 374 | rows = db.execute("SELECT * FROM cameras") 375 | 376 | camera_id, model, width, height, params, prior = next(rows) 377 | params = blob_to_array(params, np.float64) 378 | assert camera_id == camera_id1 379 | assert model == model1 and width == width1 and height == height1 380 | assert np.allclose(params, params1) 381 | 382 | camera_id, model, width, height, params, prior = next(rows) 383 | params = blob_to_array(params, np.float64) 384 | assert camera_id == camera_id2 385 | assert model == model2 and width == width2 and height == height2 386 | assert np.allclose(params, params2) 387 | 388 | # Read and check keypoints. 389 | 390 | keypoints = dict( 391 | (image_id, blob_to_array(data, np.float32, (-1, 2))) 392 | for image_id, data in db.execute("SELECT image_id, data FROM keypoints") 393 | ) 394 | 395 | assert np.allclose(keypoints[image_id1], keypoints1) 396 | assert np.allclose(keypoints[image_id2], keypoints2) 397 | assert np.allclose(keypoints[image_id3], keypoints3) 398 | assert np.allclose(keypoints[image_id4], keypoints4) 399 | 400 | # Read and check matches. 401 | 402 | pair_ids = [ 403 | image_ids_to_pair_id(*pair) 404 | for pair in ( 405 | (image_id1, image_id2), 406 | (image_id2, image_id3), 407 | (image_id3, image_id4), 408 | ) 409 | ] 410 | 411 | matches = dict( 412 | (pair_id_to_image_ids(pair_id), blob_to_array(data, np.uint32, (-1, 2))) 413 | for pair_id, data in db.execute("SELECT pair_id, data FROM matches") 414 | ) 415 | 416 | assert np.all(matches[(image_id1, image_id2)] == matches12) 417 | assert np.all(matches[(image_id2, image_id3)] == matches23) 418 | assert np.all(matches[(image_id3, image_id4)] == matches34) 419 | 420 | # Clean up. 421 | 422 | db.close() 423 | 424 | if os.path.exists(args.database_path): 425 | os.remove(args.database_path) 426 | 427 | 428 | if __name__ == "__main__": 429 | example_usage() 430 | --------------------------------------------------------------------------------