├── .gitignore
├── .DS_Store
├── assets
    ├── .DS_Store
    ├── process.png
    ├── teaser.png
    └── pipeline.png
├── README.md
└── vrso
    └── database.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaiYingFeng/VRSO/HEAD/.DS_Store


--------------------------------------------------------------------------------
/assets/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaiYingFeng/VRSO/HEAD/assets/.DS_Store


--------------------------------------------------------------------------------
/assets/process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaiYingFeng/VRSO/HEAD/assets/process.png


--------------------------------------------------------------------------------
/assets/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaiYingFeng/VRSO/HEAD/assets/teaser.png


--------------------------------------------------------------------------------
/assets/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaiYingFeng/VRSO/HEAD/assets/pipeline.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # VRSO
 2 | 
 3 | **VRSO**: **V**isual-Centric **R**econstruction for **S**tatic **O**bject Annotation
 4 | 
 5 | [Arxiv](https://arxiv.org/pdf/2403.15026.pdf) | [Youtube](https://www.youtube.com/watch?v=hhfTyvTEYMM) | [Bilibili](https://www.bilibili.com/video/BV1tPiVeNEyX/?spm_id_from=333.999.0.0&vd_source=34df4267be146d2dde6e0bf98a2ce363)
 6 | 
 7 | waymo example:  
 8 | <p align="left">
 9 |   <img src="assets/teaser.png" width="80%"/>
10 | </p>
11 | Comparison between our proposed VRSO (green) and Waymo (red) annotations after reprojection (from 3D space to 2D images). All images are from the Waymo Open Dataset (WOD). We can easily observe the reprojection errors (false positives and false negatives) among the Waymo annotations. For instance, the traffic signs (in both full and zoomed regions) are ignored or wrongly labelled in (a) and (c), while the red boxes do not tightly cover the targets in (b) and (d). Differently, VRSO yields consistent and accurate annotation among all images, even in low-resolution and illuminance conditions (b).
12 | 
13 | ## Pipeline
14 | <p align="left">
15 |   <img src="assets/pipeline.png" width="80%"/>
16 |   <img src="assets/process.png" width="80%"/>
17 | </p>
18 | 


--------------------------------------------------------------------------------
/vrso/database.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023, ETH Zurich and UNC Chapel Hill.
  2 | # All rights reserved.
  3 | #
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #
  7 | #     * Redistributions of source code must retain the above copyright
  8 | #       notice, this list of conditions and the following disclaimer.
  9 | #
 10 | #     * Redistributions in binary form must reproduce the above copyright
 11 | #       notice, this list of conditions and the following disclaimer in the
 12 | #       documentation and/or other materials provided with the distribution.
 13 | #
 14 | #     * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
 15 | #       its contributors may be used to endorse or promote products derived
 16 | #       from this software without specific prior written permission.
 17 | #
 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 21 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
 22 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 23 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 24 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 25 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 26 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 27 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 28 | # POSSIBILITY OF SUCH DAMAGE.
 29 | 
 30 | 
 31 | # This script is based on an original implementation by True Price.
 32 | 
 33 | import sys
 34 | import sqlite3
 35 | import numpy as np
 36 | 
 37 | 
 38 | IS_PYTHON3 = sys.version_info[0] >= 3
 39 | 
 40 | MAX_IMAGE_ID = 2 ** 31 - 1
 41 | 
 42 | CREATE_CAMERAS_TABLE = """CREATE TABLE IF NOT EXISTS cameras (
 43 |     camera_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
 44 |     model INTEGER NOT NULL,
 45 |     width INTEGER NOT NULL,
 46 |     height INTEGER NOT NULL,
 47 |     params BLOB,
 48 |     prior_focal_length INTEGER NOT NULL)"""
 49 | 
 50 | CREATE_DESCRIPTORS_TABLE = """CREATE TABLE IF NOT EXISTS descriptors (
 51 |     image_id INTEGER PRIMARY KEY NOT NULL,
 52 |     rows INTEGER NOT NULL,
 53 |     cols INTEGER NOT NULL,
 54 |     data BLOB,
 55 |     FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)"""
 56 | 
 57 | CREATE_IMAGES_TABLE = """CREATE TABLE IF NOT EXISTS images (
 58 |     image_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
 59 |     name TEXT NOT NULL UNIQUE,
 60 |     camera_id INTEGER NOT NULL,
 61 |     prior_qw REAL,
 62 |     prior_qx REAL,
 63 |     prior_qy REAL,
 64 |     prior_qz REAL,
 65 |     prior_tx REAL,
 66 |     prior_ty REAL,
 67 |     prior_tz REAL,
 68 |     CONSTRAINT image_id_check CHECK(image_id >= 0 and image_id < {}),
 69 |     FOREIGN KEY(camera_id) REFERENCES cameras(camera_id))
 70 | """.format(
 71 |     MAX_IMAGE_ID
 72 | )
 73 | 
 74 | CREATE_TWO_VIEW_GEOMETRIES_TABLE = """
 75 | CREATE TABLE IF NOT EXISTS two_view_geometries (
 76 |     pair_id INTEGER PRIMARY KEY NOT NULL,
 77 |     rows INTEGER NOT NULL,
 78 |     cols INTEGER NOT NULL,
 79 |     data BLOB,
 80 |     config INTEGER NOT NULL,
 81 |     F BLOB,
 82 |     E BLOB,
 83 |     H BLOB,
 84 |     qvec BLOB,
 85 |     tvec BLOB)
 86 | """
 87 | 
 88 | CREATE_KEYPOINTS_TABLE = """CREATE TABLE IF NOT EXISTS keypoints (
 89 |     image_id INTEGER PRIMARY KEY NOT NULL,
 90 |     rows INTEGER NOT NULL,
 91 |     cols INTEGER NOT NULL,
 92 |     data BLOB,
 93 |     FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)
 94 | """
 95 | 
 96 | CREATE_MATCHES_TABLE = """CREATE TABLE IF NOT EXISTS matches (
 97 |     pair_id INTEGER PRIMARY KEY NOT NULL,
 98 |     rows INTEGER NOT NULL,
 99 |     cols INTEGER NOT NULL,
100 |     data BLOB)"""
101 | 
102 | CREATE_NAME_INDEX = (
103 |     "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)"
104 | )
105 | 
106 | CREATE_ALL = "; ".join(
107 |     [
108 |         CREATE_CAMERAS_TABLE,
109 |         CREATE_IMAGES_TABLE,
110 |         CREATE_KEYPOINTS_TABLE,
111 |         CREATE_DESCRIPTORS_TABLE,
112 |         CREATE_MATCHES_TABLE,
113 |         CREATE_TWO_VIEW_GEOMETRIES_TABLE,
114 |         CREATE_NAME_INDEX,
115 |     ]
116 | )
117 | 
118 | 
119 | def image_ids_to_pair_id(image_id1, image_id2):
120 |     if image_id1 > image_id2:
121 |         image_id1, image_id2 = image_id2, image_id1
122 |     return image_id1 * MAX_IMAGE_ID + image_id2
123 | 
124 | 
125 | def pair_id_to_image_ids(pair_id):
126 |     image_id2 = pair_id % MAX_IMAGE_ID
127 |     image_id1 = (pair_id - image_id2) / MAX_IMAGE_ID
128 |     return image_id1, image_id2
129 | 
130 | 
131 | def array_to_blob(array):
132 |     if IS_PYTHON3:
133 |         return array.tostring()
134 |     else:
135 |         return np.getbuffer(array)
136 | 
137 | 
138 | def blob_to_array(blob, dtype, shape=(-1,)):
139 |     if IS_PYTHON3:
140 |         return np.fromstring(blob, dtype=dtype).reshape(*shape)
141 |     else:
142 |         return np.frombuffer(blob, dtype=dtype).reshape(*shape)
143 | 
144 | 
145 | class COLMAPDatabase(sqlite3.Connection):
146 |     @staticmethod
147 |     def connect(database_path):
148 |         return sqlite3.connect(database_path, factory=COLMAPDatabase)
149 | 
150 |     def __init__(self, *args, **kwargs):
151 |         super(COLMAPDatabase, self).__init__(*args, **kwargs)
152 | 
153 |         self.create_tables = lambda: self.executescript(CREATE_ALL)
154 |         self.create_cameras_table = lambda: self.executescript(
155 |             CREATE_CAMERAS_TABLE
156 |         )
157 |         self.create_descriptors_table = lambda: self.executescript(
158 |             CREATE_DESCRIPTORS_TABLE
159 |         )
160 |         self.create_images_table = lambda: self.executescript(
161 |             CREATE_IMAGES_TABLE
162 |         )
163 |         self.create_two_view_geometries_table = lambda: self.executescript(
164 |             CREATE_TWO_VIEW_GEOMETRIES_TABLE
165 |         )
166 |         self.create_keypoints_table = lambda: self.executescript(
167 |             CREATE_KEYPOINTS_TABLE
168 |         )
169 |         self.create_matches_table = lambda: self.executescript(
170 |             CREATE_MATCHES_TABLE
171 |         )
172 |         self.create_name_index = lambda: self.executescript(CREATE_NAME_INDEX)
173 | 
174 |     def add_camera(
175 |         self,
176 |         model,
177 |         width,
178 |         height,
179 |         params,
180 |         prior_focal_length=False,
181 |         camera_id=None,
182 |     ):
183 |         params = np.asarray(params, np.float64)
184 |         cursor = self.execute(
185 |             "INSERT INTO cameras VALUES (?, ?, ?, ?, ?, ?)",
186 |             (
187 |                 camera_id,
188 |                 model,
189 |                 width,
190 |                 height,
191 |                 array_to_blob(params),
192 |                 prior_focal_length,
193 |             ),
194 |         )
195 |         return cursor.lastrowid
196 | 
197 |     def add_image(
198 |         self,
199 |         name,
200 |         camera_id,
201 |         prior_q=np.full(4, np.NaN),
202 |         prior_t=np.full(3, np.NaN),
203 |         image_id=None,
204 |     ):
205 |         cursor = self.execute(
206 |             "INSERT INTO images VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
207 |             (
208 |                 image_id,
209 |                 name,
210 |                 camera_id,
211 |                 prior_q[0],
212 |                 prior_q[1],
213 |                 prior_q[2],
214 |                 prior_q[3],
215 |                 prior_t[0],
216 |                 prior_t[1],
217 |                 prior_t[2],
218 |             ),
219 |         )
220 |         return cursor.lastrowid
221 | 
222 |     def add_keypoints(self, image_id, keypoints):
223 |         assert len(keypoints.shape) == 2
224 |         assert keypoints.shape[1] in [2, 4, 6]
225 | 
226 |         keypoints = np.asarray(keypoints, np.float32)
227 |         self.execute(
228 |             "INSERT INTO keypoints VALUES (?, ?, ?, ?)",
229 |             (image_id,) + keypoints.shape + (array_to_blob(keypoints),),
230 |         )
231 | 
232 |     def add_descriptors(self, image_id, descriptors):
233 |         descriptors = np.ascontiguousarray(descriptors, np.uint8)
234 |         self.execute(
235 |             "INSERT INTO descriptors VALUES (?, ?, ?, ?)",
236 |             (image_id,) + descriptors.shape + (array_to_blob(descriptors),),
237 |         )
238 | 
239 |     def add_matches(self, image_id1, image_id2, matches):
240 |         assert len(matches.shape) == 2
241 |         assert matches.shape[1] == 2
242 | 
243 |         if image_id1 > image_id2:
244 |             matches = matches[:, ::-1]
245 | 
246 |         pair_id = image_ids_to_pair_id(image_id1, image_id2)
247 |         matches = np.asarray(matches, np.uint32)
248 |         self.execute(
249 |             "INSERT INTO matches VALUES (?, ?, ?, ?)",
250 |             (pair_id,) + matches.shape + (array_to_blob(matches),),
251 |         )
252 | 
253 |     def add_two_view_geometry(
254 |         self,
255 |         image_id1,
256 |         image_id2,
257 |         matches,
258 |         F=np.eye(3),
259 |         E=np.eye(3),
260 |         H=np.eye(3),
261 |         qvec=np.array([1.0, 0.0, 0.0, 0.0]),
262 |         tvec=np.zeros(3),
263 |         config=2,
264 |     ):
265 |         assert len(matches.shape) == 2
266 |         assert matches.shape[1] == 2
267 | 
268 |         if image_id1 > image_id2:
269 |             matches = matches[:, ::-1]
270 | 
271 |         pair_id = image_ids_to_pair_id(image_id1, image_id2)
272 |         matches = np.asarray(matches, np.uint32)
273 |         F = np.asarray(F, dtype=np.float64)
274 |         E = np.asarray(E, dtype=np.float64)
275 |         H = np.asarray(H, dtype=np.float64)
276 |         qvec = np.asarray(qvec, dtype=np.float64)
277 |         tvec = np.asarray(tvec, dtype=np.float64)
278 |         self.execute(
279 |             "INSERT INTO two_view_geometries VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
280 |             (pair_id,)
281 |             + matches.shape
282 |             + (
283 |                 array_to_blob(matches),
284 |                 config,
285 |                 array_to_blob(F),
286 |                 array_to_blob(E),
287 |                 array_to_blob(H),
288 |                 array_to_blob(qvec),
289 |                 array_to_blob(tvec),
290 |             ),
291 |         )
292 | 
293 | 
294 | def example_usage():
295 |     import os
296 |     import argparse
297 | 
298 |     parser = argparse.ArgumentParser()
299 |     parser.add_argument("--database_path", default="database.db")
300 |     args = parser.parse_args()
301 | 
302 |     if os.path.exists(args.database_path):
303 |         print("ERROR: database path already exists -- will not modify it.")
304 |         return
305 | 
306 |     # Open the database.
307 | 
308 |     db = COLMAPDatabase.connect(args.database_path)
309 | 
310 |     # For convenience, try creating all the tables upfront.
311 | 
312 |     db.create_tables()
313 | 
314 |     # Create dummy cameras.
315 | 
316 |     model1, width1, height1, params1 = (
317 |         0,
318 |         1024,
319 |         768,
320 |         np.array((1024.0, 512.0, 384.0)),
321 |     )
322 |     model2, width2, height2, params2 = (
323 |         2,
324 |         1024,
325 |         768,
326 |         np.array((1024.0, 512.0, 384.0, 0.1)),
327 |     )
328 | 
329 |     camera_id1 = db.add_camera(model1, width1, height1, params1)
330 |     camera_id2 = db.add_camera(model2, width2, height2, params2)
331 | 
332 |     # Create dummy images.
333 | 
334 |     image_id1 = db.add_image("image1.png", camera_id1)
335 |     image_id2 = db.add_image("image2.png", camera_id1)
336 |     image_id3 = db.add_image("image3.png", camera_id2)
337 |     image_id4 = db.add_image("image4.png", camera_id2)
338 | 
339 |     # Create dummy keypoints.
340 |     #
341 |     # Note that COLMAP supports:
342 |     #      - 2D keypoints: (x, y)
343 |     #      - 4D keypoints: (x, y, theta, scale)
344 |     #      - 6D affine keypoints: (x, y, a_11, a_12, a_21, a_22)
345 | 
346 |     num_keypoints = 1000
347 |     keypoints1 = np.random.rand(num_keypoints, 2) * (width1, height1)
348 |     keypoints2 = np.random.rand(num_keypoints, 2) * (width1, height1)
349 |     keypoints3 = np.random.rand(num_keypoints, 2) * (width2, height2)
350 |     keypoints4 = np.random.rand(num_keypoints, 2) * (width2, height2)
351 | 
352 |     db.add_keypoints(image_id1, keypoints1)
353 |     db.add_keypoints(image_id2, keypoints2)
354 |     db.add_keypoints(image_id3, keypoints3)
355 |     db.add_keypoints(image_id4, keypoints4)
356 | 
357 |     # Create dummy matches.
358 | 
359 |     M = 50
360 |     matches12 = np.random.randint(num_keypoints, size=(M, 2))
361 |     matches23 = np.random.randint(num_keypoints, size=(M, 2))
362 |     matches34 = np.random.randint(num_keypoints, size=(M, 2))
363 | 
364 |     db.add_matches(image_id1, image_id2, matches12)
365 |     db.add_matches(image_id2, image_id3, matches23)
366 |     db.add_matches(image_id3, image_id4, matches34)
367 | 
368 |     # Commit the data to the file.
369 | 
370 |     db.commit()
371 | 
372 |     # Read and check cameras.
373 | 
374 |     rows = db.execute("SELECT * FROM cameras")
375 | 
376 |     camera_id, model, width, height, params, prior = next(rows)
377 |     params = blob_to_array(params, np.float64)
378 |     assert camera_id == camera_id1
379 |     assert model == model1 and width == width1 and height == height1
380 |     assert np.allclose(params, params1)
381 | 
382 |     camera_id, model, width, height, params, prior = next(rows)
383 |     params = blob_to_array(params, np.float64)
384 |     assert camera_id == camera_id2
385 |     assert model == model2 and width == width2 and height == height2
386 |     assert np.allclose(params, params2)
387 | 
388 |     # Read and check keypoints.
389 | 
390 |     keypoints = dict(
391 |         (image_id, blob_to_array(data, np.float32, (-1, 2)))
392 |         for image_id, data in db.execute("SELECT image_id, data FROM keypoints")
393 |     )
394 | 
395 |     assert np.allclose(keypoints[image_id1], keypoints1)
396 |     assert np.allclose(keypoints[image_id2], keypoints2)
397 |     assert np.allclose(keypoints[image_id3], keypoints3)
398 |     assert np.allclose(keypoints[image_id4], keypoints4)
399 | 
400 |     # Read and check matches.
401 | 
402 |     pair_ids = [
403 |         image_ids_to_pair_id(*pair)
404 |         for pair in (
405 |             (image_id1, image_id2),
406 |             (image_id2, image_id3),
407 |             (image_id3, image_id4),
408 |         )
409 |     ]
410 | 
411 |     matches = dict(
412 |         (pair_id_to_image_ids(pair_id), blob_to_array(data, np.uint32, (-1, 2)))
413 |         for pair_id, data in db.execute("SELECT pair_id, data FROM matches")
414 |     )
415 | 
416 |     assert np.all(matches[(image_id1, image_id2)] == matches12)
417 |     assert np.all(matches[(image_id2, image_id3)] == matches23)
418 |     assert np.all(matches[(image_id3, image_id4)] == matches34)
419 | 
420 |     # Clean up.
421 | 
422 |     db.close()
423 | 
424 |     if os.path.exists(args.database_path):
425 |         os.remove(args.database_path)
426 | 
427 | 
428 | if __name__ == "__main__":
429 |     example_usage()
430 | 


--------------------------------------------------------------------------------