├── .nojekyll ├── 2D_info_extract.py ├── README.md ├── arguments ├── __init__.py └── __pycache__ │ ├── __init__.cpython-310.pyc │ └── __init__.cpython-37.pyc ├── gaussian_render ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ └── network_gui.cpython-37.pyc └── network_gui.py ├── index.html ├── scene ├── BIRCH_quantize.py ├── __init__.py ├── __pycache__ │ ├── BIRCH_quantize.cpython-37.pyc │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-37.pyc │ ├── cameras.cpython-310.pyc │ ├── cameras.cpython-37.pyc │ ├── colmap_loader.cpython-310.pyc │ ├── colmap_loader.cpython-37.pyc │ ├── dataset_readers.cpython-310.pyc │ ├── dataset_readers.cpython-37.pyc │ ├── gaussian_model.cpython-310.pyc │ ├── gaussian_model.cpython-37.pyc │ ├── kmeans_quantize.cpython-37.pyc │ └── kmeans_quantize_ablation.cpython-37.pyc ├── cameras.py ├── colmap_loader.py ├── dataset_readers.py └── gaussian_model.py └── static ├── css ├── bulma-carousel.min.css ├── bulma-slider.min.css ├── bulma.css.map.txt ├── bulma.min.css ├── fontawesome.all.min.css └── index.css ├── images ├── Downstream tasks.jpg ├── Method_Overview.jpg ├── Object grounding on LERF.jpg ├── carousel1.jpg ├── carousel2.jpg ├── carousel3.jpg ├── carousel4.jpg └── favicon.ico ├── js ├── bulma-carousel.js ├── bulma-carousel.min.js ├── bulma-slider.js ├── bulma-slider.min.js ├── fontawesome.all.min.js └── index.js ├── pdfs └── sample.pdf └── videos ├── banner_video.mp4 ├── carousel1.mp4 ├── carousel2.mp4 ├── carousel3.mp4 ├── overview_video_raw.mp4 └── scannet0000_compress.mp4 /.nojekyll: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /2D_info_extract.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | import json 5 | import random 6 | import argparse 7 | import cv2 8 | import copy 9 | import numpy as np 10 | from tqdm import tqdm 11 | from PIL import Image 12 | from dataclasses import dataclass, field 13 | from typing import Tuple, Type 14 | import open3d as o3d 15 | from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer 16 | import torch 17 | import torchvision 18 | from torch import nn 19 | from loguru import logger 20 | try: 21 | import open_clip 22 | except ImportError: 23 | assert False, "open_clip is not installed, install it with `pip install open-clip-torch`" 24 | 25 | from submodules.segment_anything.sam2.build_sam import build_sam2 26 | from submodules.segment_anything.sam2.automatic_mask_generator_2 import SAM2AutomaticMaskGenerator 27 | from submodules.segment_anything.sam2.sam2_image_predictor import SAM2ImagePredictor 28 | from submodules.groundingdino.groundingdino.util.inference import Model 29 | from submodules.llava.utils import disable_torch_init 30 | from submodules.llava.model.builder import load_pretrained_model 31 | from submodules.llava.mm_utils import process_images, tokenizer_image_token, get_model_name_from_path 32 | from submodules.llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN 33 | from submodules.llava.conversation import conv_templates 34 | 35 | @dataclass 36 | class OpenCLIPNetworkConfig: 37 | _target: Type = field(default_factory=lambda: OpenCLIPNetwork) 38 | clip_model_type: str = "ViT-B-16" 39 | clip_model_pretrained: str = "/home/wangxihan/LangSplat/LangSplat-main/submodules/open_clip/open_clip_pytorch_model.bin" 40 | clip_n_dims: int = 512 41 | 42 | class OpenCLIPNetwork(nn.Module): 43 | def __init__(self, config: OpenCLIPNetworkConfig): 44 | super().__init__() 45 | self.config = config 46 | self.process = torchvision.transforms.Compose( 47 | [ 48 | torchvision.transforms.Resize((224, 224)), 49 | torchvision.transforms.Normalize( 50 | mean=[0.48145466, 0.4578275, 0.40821073], 51 | std=[0.26862954, 0.26130258, 0.27577711], 52 | ), 53 | ] 54 | ) 55 | 56 | model, _, _ = open_clip.create_model_and_transforms( 57 | self.config.clip_model_type, 58 | self.config.clip_model_pretrained, 59 | precision="fp16", 60 | ) 61 | model.eval() 62 | self.tokenizer = open_clip.get_tokenizer(self.config.clip_model_type) 63 | self.model = model.to(args.device) 64 | self.clip_n_dims = self.config.clip_n_dims 65 | 66 | @property 67 | def name(self) -> str: 68 | return "openclip_{}_{}".format(self.config.clip_model_type, self.config.clip_model_pretrained) 69 | 70 | @property 71 | def embedding_dim(self) -> int: 72 | return self.config.clip_n_dims 73 | 74 | def encode_image(self, input): 75 | processed_input = self.process(input).half() 76 | return self.model.encode_image(processed_input) 77 | 78 | def encode_texts(self, class_ids, classes): 79 | with torch.no_grad(): 80 | tokenized_texts = torch.cat([self.tokenizer(classes[class_id]) for class_id in class_ids]).to(args.device) 81 | text_feats = self.model.encode_text(tokenized_texts) 82 | text_feats /= text_feats.norm(dim=-1, keepdim=True) 83 | return text_feats 84 | 85 | class LLaVaChat(): 86 | # Model Constants 87 | IGNORE_INDEX = -100 88 | IMAGE_TOKEN_INDEX = -200 89 | DEFAULT_IMAGE_TOKEN = "" 90 | DEFAULT_IMAGE_PATCH_TOKEN = "" 91 | DEFAULT_IM_START_TOKEN = "" 92 | DEFAULT_IM_END_TOKEN = "" 93 | IMAGE_PLACEHOLDER = "" 94 | 95 | def __init__(self, model_path): 96 | disable_torch_init() 97 | 98 | self.model_name = get_model_name_from_path(model_path) 99 | self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model( 100 | model_path, None, self.model_name, device="cuda") 101 | 102 | if "llama-2" in self.model_name.lower(): 103 | self.conv_mode = "llava_llama_2" 104 | elif "mistral" in self.model_name.lower(): 105 | self.conv_mode = "mistral_instruct" 106 | elif "v1.6-34b" in self.model_name.lower(): 107 | self.conv_mode = "chatml_direct" 108 | elif "v1" in self.model_name.lower(): 109 | self.conv_mode = "llava_v1" 110 | elif "mpt" in self.model_name.lower(): 111 | self.conv_mode = "mpt" 112 | else: 113 | self.conv_mode = "llava_v0" 114 | 115 | def preprocess_image(self, images): 116 | x = process_images( 117 | images, 118 | self.image_processor, 119 | self.model.config) 120 | 121 | return x.to(self.model.device, dtype=torch.float16) 122 | 123 | def __call__(self, query, image_features, image_sizes): 124 | # Given this query, and the image_featurese, prompt LLaVA with the query, 125 | # using the image_features as context. 126 | 127 | conv = conv_templates[self.conv_mode].copy() 128 | 129 | if self.model.config.mm_use_im_start_end: 130 | inp = LLaVaChat.DEFAULT_IM_START_TOKEN +\ 131 | LLaVaChat.DEFAULT_IMAGE_TOKEN +\ 132 | LLaVaChat.DEFAULT_IM_END_TOKEN + '\n' + query 133 | else: 134 | inp = LLaVaChat.DEFAULT_IMAGE_TOKEN + '\n' + query 135 | conv.append_message(conv.roles[0], inp) 136 | 137 | conv.append_message(conv.roles[1], None) 138 | prompt = conv.get_prompt() 139 | 140 | input_ids = tokenizer_image_token( 141 | prompt, self.tokenizer, LLaVaChat.IMAGE_TOKEN_INDEX, 142 | return_tensors='pt').unsqueeze(0).to("cuda") 143 | streamer = TextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True) 144 | 145 | self.temperature = 0 146 | self.max_new_tokens = 512 147 | with torch.inference_mode(): 148 | output_ids = self.model.generate( 149 | input_ids, 150 | images=image_features, 151 | image_sizes=image_sizes, 152 | do_sample=True if self.temperature > 0 else False, 153 | temperature=self.temperature, 154 | max_new_tokens=self.max_new_tokens, 155 | streamer=streamer, 156 | use_cache=True) 157 | 158 | outputs = self.tokenizer.decode(output_ids[0]).strip() 159 | return outputs 160 | 161 | def describe_LLAVA(mask_id, image, chat:LLaVaChat, class_i, class_j, Cord_i, Cord_j, mode): 162 | 163 | ### caption 164 | image_sizes = [image.size] 165 | image_tensor = chat.preprocess_image([image]).to("cuda", dtype=torch.float16) 166 | template = {} 167 | 168 | if mode == "category": 169 | query_base = """Identify and list only the main object categories clearly visible in the image.""" 170 | 171 | query_tail = """ 172 | Provide only the category names, separated by commas. 173 | Only list the main object categories in the image. 174 | Maximum 10 categories, focus on clear, foreground objects 175 | Each category should be listed only once, even if multiple instances of the same category are present. 176 | Avoid overly specific or recursive descriptions. 177 | Do not include descriptions, explanations, or duplicates. 178 | Do not include quotes, brackets, or any additional formatting in the output. 179 | Examples: 180 | Chair, Table, Window 181 | """ 182 | query = query_base + "\n" + query_tail 183 | text = chat(query=query, image_features=image_tensor, image_sizes=image_sizes) 184 | template["categories"] = re.sub(r'\s+', ' ', text.replace("", "").replace("", "").replace("-", "").strip()) 185 | 186 | if mode == "captions": 187 | query_base = """Describe the visible object in front of you, 188 | focusing on its spatial dimensions, visual attributes, and material properties.""" 189 | 190 | query_tail = """ 191 | The object is typically found in indoor scenes and its category is {class_i}. 192 | Briefly describe the object within ten word. Keep the description concise. 193 | Focus on the object's appearance, geometry, and material. Do not describe the background or unrelated details. 194 | Ensure the description is specific and avoids vague terms. 195 | Examples: 196 | a closed wooden door with a glass panel; 197 | a pillow with a floral pattern; 198 | a wooden table; 199 | a gray wall. 200 | """ 201 | query = query_base + "\n" + query_tail 202 | text = chat(query=query.format(class_i=class_i), image_features=image_tensor, image_sizes=image_sizes) 203 | template["id"] = mask_id 204 | template["description"] = text.replace("", "").replace("", "").strip() 205 | 206 | elif mode == "relationships": 207 | query_base = """There are two objects with category and 2D coordinate, 208 | paying close attention to the positional relationship between two selected objects.""" 209 | query_tail = """ 210 | You are capable of analyzing spatial relationships between objects in an image. 211 | 212 | In the given image, there are two boxed objects: 213 | - The object selected by the red box is [{class_i}], and its bounding box coordinates are {bbox1}. 214 | - The object selected by the blue box is [{class_j}], and its bounding box coordinates are {bbox2}. 215 | 216 | Note: The bounding box coordinates are in the format (x_min, y_min, x_max, y_max), where (x_min, y_min) represents the top-left corner of the box and (x_max, y_max) represents the bottom-right corner of the box. 217 | 218 | The spatial relationship between [{class_i}] and [{class_j}] may include, but is not limited to, the following types: 219 | - "Above" means Object A is located higher in vertical position (y_min smaller). 220 | - "Below" means Object A is located lower in vertical position (y_min larger). 221 | - "Left" means Object A's x_min is smaller than Object B's x_min. 222 | - "Right" means Object A's x_min is larger than Object B's x_min. 223 | - "Inside" means Object A's bounding box is fully contained within Object B's bounding box. 224 | - "Contains" means Object A's bounding box fully contains Object B's bounding box. 225 | - "Next to" means the distance between boxes is very small, without overlap. 226 | 227 | Please provide the output in the following format: 228 | Coarse: The spatial relationship between {class_i} and {class_j}; Fine: A detailed description of the relationship (optional). 229 | 230 | Example output: 231 | Coarse: The cup is on the table; Fine: The cup is resting near the center of the table, with its handle facing outward. 232 | Coarse: The book is under the lamp; Fine: The book lies directly beneath the lamp, slightly tilted, as if recently placed. 233 | Coarse: The cat is next to the sofa; Fine: The cat is sitting closely beside the sofa's left armrest, partially leaning on it. 234 | """ 235 | query = query_base + "\n" + query_tail 236 | text = chat(query=query.format(class_i=class_i, class_j=class_j, bbox1=Cord_i, bbox2=Cord_j), image_features=image_tensor, image_sizes=image_sizes) 237 | template["id_pair"] = mask_id 238 | template["relationship"] = text.replace("", "").replace("", "").strip() 239 | 240 | return template 241 | 242 | def mask_nms(masks, scores, iou_thr=0.7, score_thr=0.1, inner_thr=0.2, **kwargs): 243 | """ 244 | Perform mask non-maximum suppression (NMS) on a set of masks based on their scores. 245 | 246 | Args: 247 | masks (torch.Tensor): has shape (num_masks, H, W) 248 | scores (torch.Tensor): The scores of the masks, has shape (num_masks,) 249 | iou_thr (float, optional): The threshold for IoU. 250 | score_thr (float, optional): The threshold for the mask scores. 251 | inner_thr (float, optional): The threshold for the overlap rate. 252 | **kwargs: Additional keyword arguments. 253 | Returns: 254 | selected_idx (torch.Tensor): A tensor representing the selected indices of the masks after NMS. 255 | """ 256 | 257 | scores, idx = scores.sort(0, descending=True) 258 | num_masks = idx.shape[0] 259 | 260 | masks_ord = masks[idx.view(-1), :] 261 | masks_area = torch.sum(masks_ord, dim=(1, 2), dtype=torch.float) 262 | 263 | iou_matrix = torch.zeros((num_masks,) * 2, dtype=torch.float, device=masks.device) 264 | inner_iou_matrix = torch.zeros((num_masks,) * 2, dtype=torch.float, device=masks.device) 265 | for i in range(num_masks): 266 | for j in range(i, num_masks): 267 | intersection = torch.sum(torch.logical_and(masks_ord[i], masks_ord[j]), dtype=torch.float) 268 | union = torch.sum(torch.logical_or(masks_ord[i], masks_ord[j]), dtype=torch.float) 269 | iou = intersection / union 270 | iou_matrix[i, j] = iou 271 | # select mask pairs that may have a severe internal relationship 272 | if intersection / masks_area[i] < 0.5 and intersection / masks_area[j] >= 0.85: 273 | inner_iou = 1 - (intersection / masks_area[j]) * (intersection / masks_area[i]) 274 | inner_iou_matrix[i, j] = inner_iou 275 | if intersection / masks_area[i] >= 0.85 and intersection / masks_area[j] < 0.5: 276 | inner_iou = 1 - (intersection / masks_area[j]) * (intersection / masks_area[i]) 277 | inner_iou_matrix[j, i] = inner_iou 278 | 279 | iou_matrix.triu_(diagonal=1) 280 | iou_max, _ = iou_matrix.max(dim=0) 281 | inner_iou_matrix_u = torch.triu(inner_iou_matrix, diagonal=1) 282 | inner_iou_max_u, _ = inner_iou_matrix_u.max(dim=0) 283 | inner_iou_matrix_l = torch.tril(inner_iou_matrix, diagonal=1) 284 | inner_iou_max_l, _ = inner_iou_matrix_l.max(dim=0) 285 | 286 | keep = iou_max <= iou_thr 287 | keep_conf = scores > score_thr 288 | keep_inner_u = inner_iou_max_u <= 1 - inner_thr 289 | keep_inner_l = inner_iou_max_l <= 1 - inner_thr 290 | 291 | # If there are no masks with scores above threshold, the top 3 masks are selected 292 | if keep_conf.sum() == 0: 293 | index = scores.topk(3).indices 294 | keep_conf[index, 0] = True 295 | if keep_inner_u.sum() == 0: 296 | index = scores.topk(3).indices 297 | keep_inner_u[index, 0] = True 298 | if keep_inner_l.sum() == 0: 299 | index = scores.topk(3).indices 300 | keep_inner_l[index, 0] = True 301 | keep *= keep_conf 302 | keep *= keep_inner_u 303 | keep *= keep_inner_l 304 | 305 | selected_idx = idx[keep] 306 | return selected_idx 307 | 308 | def masks_update(*args, **kwargs): 309 | # remove redundant masks based on the scores and overlap rate between masks 310 | masks_new = () 311 | for masks_lvl in (args): 312 | if not masks_lvl: 313 | masks_new += ([],) # 或者其他适当的处理 314 | continue 315 | seg_pred = torch.from_numpy(np.stack([m['segmentation'] for m in masks_lvl], axis=0)) 316 | iou_pred = torch.from_numpy(np.stack([m['predicted_iou'] for m in masks_lvl], axis=0)) 317 | stability = torch.from_numpy(np.stack([m['stability_score'] for m in masks_lvl], axis=0)) 318 | 319 | scores = stability * iou_pred 320 | keep_mask_nms = mask_nms(seg_pred, scores, **kwargs) 321 | masks_lvl = filter(keep_mask_nms, masks_lvl) 322 | 323 | masks_new += (masks_lvl,) 324 | return masks_new 325 | 326 | def get_seg_img(mask, image): 327 | image = image.copy() 328 | image[mask['segmentation']==0] = np.array([0, 0, 0], dtype=np.uint8) 329 | x,y,w,h = np.int32(mask['bbox']) 330 | seg_img = image[y:y+h, x:x+w, ...] 331 | return seg_img 332 | 333 | def pad_img(img): 334 | h, w, _ = img.shape 335 | l = max(w,h) 336 | pad = np.zeros((l,l,3), dtype=np.uint8) 337 | if h > w: 338 | pad[:,(h-w)//2:(h-w)//2 + w, :] = img 339 | else: 340 | pad[(w-h)//2:(w-h)//2 + h, :, :] = img 341 | return pad 342 | 343 | def compute_iou_matrix(generated_masks, seg_map, unique_mask_indices): 344 | """ 345 | 计算所有生成的掩码与seg_map中所有掩码的IoU 346 | :param generated_masks: (num_masks, H, W),生成的掩码数组 347 | :param seg_map: (H, W), 全图的seg_map 348 | :param unique_mask_indices: seg_map中唯一的掩码索引 349 | :return: (num_masks, num_seg_masks) 的IoU矩阵 350 | """ 351 | num_seg_masks = len(unique_mask_indices) 352 | generated_masks = generated_masks.astype(np.bool_) 353 | # 初始化一个空的IoU矩阵 354 | iou_matrix = np.zeros((1, num_seg_masks)) 355 | 356 | # 逐个计算IoU 357 | for i, mask_index in enumerate(unique_mask_indices): 358 | if mask_index == -1: # 跳过背景 359 | continue 360 | 361 | # 获取seg_map中当前掩码的区域 362 | seg_mask = (seg_map == mask_index) # (H, W) 363 | 364 | # 计算交集和并集 365 | intersection = np.sum(generated_masks & seg_mask) # (num_masks, H, W) 与 (H, W) 计算交集 366 | union = np.sum(generated_masks | seg_mask) # (num_masks, H, W) 与 (H, W) 计算并集 367 | 368 | # 计算IoU 369 | iou_matrix[:, i] = intersection / (union + 1e-6) # 防止除以零,1e-6为小常数 370 | 371 | return iou_matrix 372 | 373 | def get_bbox_img(box, image): 374 | image = image.copy() 375 | x_min, y_min, x_max, y_max = map(int, box) 376 | # 从图像中截取框内区域 377 | seg_img = image[y_min:y_max, x_min:x_max] 378 | return seg_img 379 | 380 | def sam_predictor(seg_map, image, detections): 381 | 382 | with torch.inference_mode(), torch.autocast('cuda', dtype=torch.bfloat16): 383 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 384 | predictor_sam.set_image(image=image) 385 | seg_img_list = [] 386 | classes_list = [] 387 | mask_indices = {} 388 | 389 | # 获取所有模式下唯一的掩码索引(减少冗余计算) 390 | unique_mask_indices_cache = {} 391 | for mode in ['default', 's', 'm', 'l']: 392 | unique_mask_indices_cache[mode] = np.unique(seg_map[mode]) 393 | 394 | for i, box in enumerate(detections.xyxy): 395 | category_id = detections.class_id[i] 396 | classes_list.append(category_id) 397 | masks, scores, logits = predictor_sam.predict(box=box, multimask_output=True) 398 | index = np.argmax(scores) 399 | generated_mask = masks[index] 400 | 401 | mode_mask_indices = {} 402 | for mode, unique_mask_indices in unique_mask_indices_cache.items(): 403 | 404 | # 计算IoU矩阵 405 | iou_matrix = compute_iou_matrix(generated_mask[None, :, :], seg_map[mode], unique_mask_indices) 406 | best_mask_index = unique_mask_indices[np.argmax(iou_matrix)] 407 | mode_mask_indices[mode] = best_mask_index 408 | 409 | mask_indices[i] = mode_mask_indices 410 | 411 | seg_img = get_bbox_img(box, image) 412 | pad_seg_img = cv2.resize(pad_img(seg_img), (224,224)) 413 | seg_img_list.append(pad_seg_img) 414 | 415 | if len(classes_list) > 0: 416 | catogories = torch.from_numpy(np.stack(classes_list, axis=0)) 417 | seg_imgs = np.stack(seg_img_list, axis=0) # b,H,W,3 418 | seg_imgs = (torch.from_numpy(seg_imgs.astype("float32")).permute(0,3,1,2) / 255.0).to(args.device) 419 | 420 | return seg_imgs, catogories, mask_indices 421 | 422 | def sam_encoder(image): 423 | 424 | # pre-compute masks 425 | masks_default, masks_s, masks_m, masks_l = mask_generator.generate(image) 426 | # pre-compute postprocess 427 | #masks_default, masks_s, masks_m, masks_l = masks_update(masks_default, masks_s, masks_m, masks_l, iou_thr=0.7, score_thr=0.6, inner_thr=0.5) 428 | 429 | def mask2segmap(masks, image): 430 | seg_img_list = [] 431 | seg_map = -np.ones(image.shape[:2], dtype=np.int32) 432 | for i in range(len(masks)): 433 | mask = masks[i] 434 | seg_img = get_seg_img(mask, image) 435 | pad_seg_img = cv2.resize(pad_img(seg_img), (224,224)) 436 | seg_img_list.append(pad_seg_img) 437 | 438 | seg_map[masks[i]['segmentation']] = i 439 | seg_imgs = np.stack(seg_img_list, axis=0) # b,H,W,3 440 | 441 | seg_imgs = (torch.from_numpy(seg_imgs.astype("float32")).permute(0,3,1,2) / 255.0).to('cuda') 442 | 443 | return seg_imgs, seg_map 444 | seg_images, seg_maps = {}, {} 445 | seg_images['default'], seg_maps['default'] = mask2segmap(masks_default, image) 446 | if len(masks_s) != 0: 447 | seg_images['s'], seg_maps['s'] = mask2segmap(masks_s, image) 448 | if len(masks_m) != 0: 449 | seg_images['m'], seg_maps['m'] = mask2segmap(masks_m, image) 450 | if len(masks_l) != 0: 451 | seg_images['l'], seg_maps['l'] = mask2segmap(masks_l, image) 452 | 453 | # 0:default 1:s 2:m 3:l 454 | return seg_images, seg_maps 455 | 456 | def is_overlapping(box1, box2): 457 | """ 458 | Check if two bounding boxes overlap. 459 | 460 | Args: 461 | box1 (list or array): Coordinates of the first box [x1, y1, x2, y2]. 462 | box2 (list or array): Coordinates of the second box [x1, y1, x2, y2]. 463 | 464 | Returns: 465 | bool: True if the boxes overlap, False otherwise. 466 | """ 467 | x1_min, y1_min, x1_max, y1_max = box1 468 | x2_min, y2_min, x2_max, y2_max = box2 469 | 470 | # Check if there is no overlap 471 | if x1_max < x2_min or x2_max < x1_min or y1_max < y2_min or y2_max < y1_min: 472 | return False 473 | return True 474 | 475 | def object_pairs(box1, box2): 476 | x1_min, y1_min, x1_max, y1_max = box1 477 | x2_min, y2_min, x2_max, y2_max = box2 478 | 479 | center1 = torch.tensor([(box1[0] + box1[2]) / 2, (box1[1] + box1[3]) / 2]) 480 | center2 = torch.tensor([(box2[0] + box2[2]) / 2, (box2[1] + box2[3]) / 2]) 481 | dist = torch.norm(center1 - center2, p=2) 482 | 483 | # Check if there is no overlap 484 | if x1_max < x2_min or x2_max < x1_min or y1_max < y2_min or y2_max < y1_min: 485 | overlapping = False 486 | else: 487 | overlapping = True 488 | 489 | return overlapping, dist 490 | 491 | def crop_and_blackout(image, bbox1, bbox2, padding): 492 | """ 493 | 从图像中截取指定索引的两个矩形框区域,并将其余部分设为黑色。 494 | 495 | 参数: 496 | image: 输入图像 (H, W, C) 497 | detections: 检测框列表,每个元素是一个 [x1, y1, x2, y2] 498 | idx1: 第一个矩形框的索引 499 | idx2: 第二个矩形框的索引 500 | 501 | 返回: 502 | cropped_image: 包含两个矩形框的图像,其他区域为黑色 503 | """ 504 | height, width = image.shape[:2] 505 | # 复制图像,初始化为黑色图像 506 | cropped_image = np.zeros_like(image) 507 | 508 | # 获取第一个矩形框的坐标 509 | x1, y1, x2, y2 = map(int, bbox1) 510 | # 扩充裁剪区域,确保不会超出图像边界 511 | x1 = max(0, x1 - padding) 512 | y1 = max(0, y1 - padding) 513 | x2 = min(width, x2 + padding) 514 | y2 = min(height, y2 + padding) 515 | # 将第一个矩形框区域复制到黑色图像中 516 | cropped_image[y1:y2, x1:x2] = image[y1:y2, x1:x2] 517 | 518 | # 获取第二个矩形框的坐标 519 | x1, y1, x2, y2 = map(int, bbox2) 520 | # 扩充裁剪区域,确保不会超出图像边界 521 | x1 = max(0, x1 - padding) 522 | y1 = max(0, y1 - padding) 523 | x2 = min(width, x2 + padding) 524 | y2 = min(height, y2 + padding) 525 | # 将第二个矩形框区域复制到黑色图像中 526 | cropped_image[y1:y2, x1:x2] = image[y1:y2, x1:x2] 527 | 528 | return cropped_image 529 | 530 | def draw_bounding_boxes(image, bbox1, bbox2, color1=(0, 255, 0), color2=(0, 0, 255), thickness=2): 531 | """ 532 | 在图像上绘制两个矩形框。 533 | 534 | 参数: 535 | image: 输入图像 (H, W, C) 536 | bbox1: 第一个矩形框的坐标 [x1, y1, x2, y2] 537 | bbox2: 第二个矩形框的坐标 [x1, y1, x2, y2] 538 | color1: 第一个矩形框的颜色 (B, G, R) 539 | color2: 第二个矩形框的颜色 (B, G, R) 540 | thickness: 矩形框的线条粗细 541 | """ 542 | x1_min, y1_min, x1_max, y1_max = map(int, bbox1) 543 | x2_min, y2_min, x2_max, y2_max = map(int, bbox2) 544 | 545 | # 绘制第一个矩形框 546 | cv2.rectangle(image, (x1_min, y1_min), (x1_max, y1_max), color1, thickness) 547 | 548 | # 绘制第二个矩形框 549 | cv2.rectangle(image, (x2_min, y2_min), (x2_max, y2_max), color2, thickness) 550 | 551 | def graph_construct(image_path, sam_predictor, sam_encoder, llava_chat, classes_set): 552 | 553 | image_pil = Image.open(image_path).convert("RGB") 554 | image = cv2.imread(image_path) 555 | resolution = (800, 800) 556 | image = cv2.resize(image, resolution) 557 | image_pil = image_pil.resize((resolution[1], resolution[0]), Image.ANTIALIAS) 558 | 559 | seg_images, seg_map = sam_encoder(np.array(image_pil)) 560 | 561 | clip_embeds = {} 562 | for mode in ['default', 's', 'm', 'l']: 563 | tiles = seg_images[mode] 564 | tiles = tiles.to("cuda") 565 | with torch.no_grad(): 566 | clip_embed = clip_model.encode_image(tiles) 567 | clip_embed /= clip_embed.norm(dim=-1, keepdim=True) 568 | clip_embeds[mode] = clip_embed.detach().cpu().half() 569 | 570 | graph_dict = {} 571 | print(image_path, '******************') 572 | with torch.no_grad(): 573 | classes_info = describe_LLAVA(mask_id=None, image=image_pil, chat=llava_chat, 574 | class_i=None, class_j=None, Cord_i=None, Cord_j=None, mode='category') 575 | classes = list(set(classes_info['categories'].strip('"').split(','))) 576 | classes = [item.strip().replace(',', '') for item in classes] 577 | print(classes, 'class') 578 | classes_set.update(classes) 579 | 580 | # grounding_dino detector 581 | if len(classes) > 0: 582 | classes = classes 583 | else: 584 | assert len(classes) == 0, "Error: No target detected in the image!" 585 | 586 | graph_dict['classes'] = classes 587 | 588 | detections = grounding_dino_model.predict_with_classes( 589 | image=image, # This function expects a BGR image... 590 | classes=classes, 591 | box_threshold=0.5, 592 | text_threshold=0.4, 593 | ) 594 | 595 | if len(detections.class_id) > 0: 596 | ### Non-maximum suppression ### 597 | # print(f"Before NMS: {len(detections.xyxy)} boxes") 598 | nms_idx = torchvision.ops.nms( 599 | torch.from_numpy(detections.xyxy), 600 | torch.from_numpy(detections.confidence), 601 | args.nms_threshold 602 | ).numpy().tolist() 603 | # print(f"After NMS: {len(detections.xyxy)} boxes") 604 | 605 | detections.xyxy = detections.xyxy[nms_idx] 606 | detections.confidence = detections.confidence[nms_idx] 607 | detections.class_id = detections.class_id[nms_idx] 608 | 609 | # Somehow some detections will have class_id=-1, remove them 610 | valid_idx = detections.class_id != -1 611 | detections.xyxy = detections.xyxy[valid_idx] 612 | detections.confidence = detections.confidence[valid_idx] 613 | detections.class_id = detections.class_id[valid_idx] 614 | 615 | else: 616 | detections = grounding_dino_model.predict_with_classes( 617 | image=image, # This function expects a BGR image... 618 | classes=classes, 619 | box_threshold=0.2, 620 | text_threshold=0.2, 621 | ) 622 | 623 | if len(detections.class_id) == 0: 624 | assert len(detections.class_id) == 0, "Error: No target detected in the image!" 625 | 626 | valid_idx = detections.class_id != -1 627 | detections.xyxy = detections.xyxy[valid_idx] 628 | detections.confidence = detections.confidence[valid_idx] 629 | detections.class_id = detections.class_id[valid_idx] 630 | 631 | # sam segmentation 632 | seg_bbox, categories, match_indices = sam_predictor(seg_map, image, detections) 633 | 634 | # clip 635 | tiles = seg_bbox.to(args.device) 636 | categories = categories.to(args.device) 637 | 638 | # captions of foreground objects 639 | descriptions = [] 640 | for idx, fore_box in enumerate(detections.xyxy): 641 | cropped_image = np.zeros_like(image_pil) 642 | 643 | # 获取矩形框的坐标 644 | x1, y1, x2, y2 = map(int, fore_box) 645 | cropped_image[y1:y2, x1:x2] = np.array(image_pil)[y1:y2, x1:x2] 646 | cropped_image = Image.fromarray(cropped_image) 647 | 648 | match_idx = {} 649 | for mode in ['default', 's', 'm', 'l']: 650 | match_idx[mode] = int(match_indices[idx][mode]) 651 | 652 | class_i = classes[detections.class_id[idx]] 653 | description = describe_LLAVA(mask_id=match_idx, image=cropped_image, chat=llava_chat, 654 | class_i=class_i, class_j=None, Cord_i=None, Cord_j=None, mode='captions') 655 | descriptions.append(description) 656 | 657 | graph_dict['captions'] = descriptions 658 | 659 | image_embed = clip_model.encode_image(tiles) 660 | image_embed /= image_embed.norm(dim=-1, keepdim=True) 661 | # text_embed = clip_model.encode_texts(categories, classes) 662 | 663 | # generate relation 664 | relations = [] 665 | for idx_i, bbox_i in enumerate(detections.xyxy): 666 | for idx_j, bbox_j in enumerate(detections.xyxy[idx_i + 1:], start=idx_i + 1): 667 | if idx_i == idx_j: 668 | continue 669 | torch.cuda.empty_cache() 670 | # 计算特征相似度 671 | #similarity = torch.cosine_similarity(image_embed[idx_i].unsqueeze(0), image_embed[idx_j].unsqueeze(0), dim=1).item() 672 | 673 | inter, dist = object_pairs(detections.xyxy[idx_i], detections.xyxy[idx_j]) 674 | 675 | class_i, class_j = classes[detections.class_id[idx_i]], classes[detections.class_id[idx_j]] 676 | 677 | image_height, image_width = image.shape[:2] 678 | image_diag = torch.sqrt(torch.tensor(image_width ** 2 + image_height ** 2)) 679 | 680 | if inter or dist < 0.3 * image_diag: 681 | match_idx_i = {} 682 | match_idx_j = {} 683 | for mode in ['default', 's', 'm', 'l']: 684 | match_idx_i[mode] = int(match_indices[idx_i][mode]) 685 | match_idx_j[mode] = int(match_indices[idx_j][mode]) 686 | image_copy = image.copy() 687 | draw_bounding_boxes(image_copy, detections.xyxy[idx_i], detections.xyxy[idx_j]) 688 | boxed_image = Image.fromarray(cv2.cvtColor(image_copy, cv2.COLOR_BGR2RGB)) 689 | output_path = os.path.join(args.output_dir, f"object_i{idx_i}_j{idx_j}.png") 690 | boxed_image.save(output_path) 691 | relation_info = describe_LLAVA((match_idx_i, match_idx_j), boxed_image, llava_chat, 692 | class_i, class_j, detections.xyxy[idx_i], detections.xyxy[idx_j], mode='relationships') 693 | print(relation_info) 694 | relations.append(relation_info) 695 | 696 | graph_dict['relations'] = relations 697 | 698 | return clip_embeds, seg_map, graph_dict 699 | 700 | def create(args, img_folder, save_folder): 701 | data_list = os.listdir(img_folder) 702 | data_list.sort() 703 | assert len(data_list) is not None, "image_list must be provided to generate features" 704 | timer = 0 705 | embed_size=512 706 | seg_maps = [] 707 | total_lengths = [] 708 | timer = 0 709 | img_embeds = torch.zeros((len(data_list), 100, embed_size)) 710 | seg_maps = torch.zeros((len(data_list), 4, 800, 800)) 711 | llava_chat = LLaVaChat(args.llava_ckpt) 712 | classes_set = set() 713 | mask_generator.predictor.model 714 | 715 | for i, data_path in tqdm(enumerate(data_list), desc="Embedding images", leave=False): 716 | timer += 1 717 | torch.cuda.empty_cache() 718 | image_path = os.path.join(img_folder, data_path) 719 | 720 | img_embed, seg_map, graph_dict = graph_construct(image_path, sam_predictor, sam_encoder, llava_chat, classes_set) 721 | 722 | lengths = [len(v) for k, v in img_embed.items()] 723 | total_length = sum(lengths) 724 | total_lengths.append(total_length) 725 | 726 | if total_length > img_embeds.shape[1]: 727 | pad = total_length - img_embeds.shape[1] 728 | img_embeds = torch.cat([ 729 | img_embeds, 730 | torch.zeros((len(data_list), pad, embed_size)) 731 | ], dim=1) 732 | img_embed = torch.cat([v for k, v in img_embed.items()], dim=0) 733 | assert img_embed.shape[0] == total_length 734 | 735 | img_embeds[i, :total_length] = img_embed 736 | seg_map_tensor = [] 737 | lengths_cumsum = lengths.copy() 738 | for j in range(1, len(lengths)): 739 | lengths_cumsum[j] += lengths_cumsum[j-1] 740 | for j, (k, v) in enumerate(seg_map.items()): 741 | if j == 0: 742 | seg_map_tensor.append(torch.from_numpy(v)) 743 | continue 744 | assert v.max() == lengths[j] - 1, f"{j}, {v.max()}, {lengths[j]-1}" 745 | v[v != -1] += lengths_cumsum[j-1] 746 | seg_map_tensor.append(torch.from_numpy(v)) 747 | seg_map = torch.stack(seg_map_tensor, dim=0) 748 | seg_maps[i] = seg_map 749 | 750 | # 保存每个图像的 img_embed, seg_map和rel_info 751 | save_path = os.path.join(save_folder, os.path.splitext(os.path.basename(image_path))[0]) 752 | 753 | # 确保 seg_map 的最大值与长度一致 754 | assert total_lengths[i] == int(seg_maps[i].max() + 1) 755 | curr = { 756 | 'feature': img_embeds[i, :total_lengths[i]], 757 | 'seg_maps': seg_maps[i], 758 | 'graph': graph_dict 759 | } 760 | 761 | sava_numpy(save_path, curr) 762 | 763 | def sava_numpy(save_path, data): 764 | save_path_s = save_path + '_s.npy' 765 | save_path_f = save_path + '_f.npy' 766 | save_path_r = save_path + '_r.json' 767 | np.save(save_path_s, data['seg_maps'].numpy()) 768 | np.save(save_path_f, data['feature'].numpy()) 769 | with open(save_path_r, 'w') as f: 770 | json.dump(data['graph'], f) 771 | 772 | if __name__ == '__main__': 773 | 774 | parser = argparse.ArgumentParser() 775 | parser.add_argument('--config', default="sam2.1_hiera_l.yaml") 776 | parser.add_argument('--sam_ckpt', default="../submodules/segment_anything/checkpoints/sam2.1_hiera_large.pt") 777 | parser.add_argument('--dataset_path', type=str, default="/home/a_datasets1/wangxihan/ScanNet/scene0000_00/") 778 | parser.add_argument('--gsa_config', default="../submodules/groundingdino/groundingdino/config/GroundingDINO_SwinT_OGC.py") 779 | parser.add_argument('--gsa_ckpt', type=str, default="../submodules/groundingdino/groundingdino_swint_ogc.pth") 780 | parser.add_argument('--llava_ckpt', type=str, default="../submodules/llava/llava-next/llava_1.6") 781 | parser.add_argument("--box_threshold", type=float, default=0.2) 782 | parser.add_argument("--text_threshold", type=float, default=0.2) 783 | parser.add_argument("--nms_threshold", type=float, default=0.2) 784 | parser.add_argument('--resolution', type=int, default=-1) 785 | parser.add_argument('--output_dir', type=str, default="../vis/pairs") 786 | parser.add_argument('--device', type=str, default="cuda:0") 787 | args = parser.parse_args() 788 | torch.set_default_dtype(torch.float32) 789 | 790 | dataset_path = args.dataset_path 791 | 792 | # 判断路径是否存在 793 | if os.path.exists(os.path.join(dataset_path, 'color')): 794 | img_folder = os.path.join(dataset_path, 'color') 795 | elif os.path.exists(os.path.join(dataset_path, 'images')): 796 | img_folder = os.path.join(dataset_path, 'images') 797 | else: 798 | raise ValueError('Image folder not found') 799 | 800 | clip_model = OpenCLIPNetwork(OpenCLIPNetworkConfig) 801 | grounding_dino_model = Model(model_config_path=args.gsa_config, model_checkpoint_path=args.gsa_ckpt, device=args.device) 802 | sam = build_sam2(args.config, args.sam_ckpt, args.device, apply_postprocessing=False) 803 | predictor_sam = SAM2ImagePredictor(sam_model=sam) 804 | mask_generator = SAM2AutomaticMaskGenerator( 805 | model=sam) 806 | WARNED = False 807 | 808 | save_folder = os.path.join(dataset_path, 'language_features') 809 | os.makedirs(save_folder, exist_ok=True) 810 | create(args, img_folder, save_folder) 811 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GaussianGraph: 3D Gaussian-based Scene Graph Generation for Open-world Scene Understanding 2 | -------------------------------------------------------------------------------- /arguments/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | from argparse import ArgumentParser, Namespace 13 | import sys 14 | import os 15 | 16 | class GroupParams: 17 | pass 18 | 19 | class ParamGroup: 20 | def __init__(self, parser: ArgumentParser, name : str, fill_none = False): 21 | group = parser.add_argument_group(name) 22 | for key, value in vars(self).items(): 23 | shorthand = False 24 | if key.startswith("_"): 25 | shorthand = True 26 | key = key[1:] 27 | t = type(value) 28 | value = value if not fill_none else None 29 | if shorthand: 30 | if t == bool: 31 | group.add_argument("--" + key, ("-" + key[0:1]), default=value, action="store_true") 32 | else: 33 | group.add_argument("--" + key, ("-" + key[0:1]), default=value, type=t) 34 | else: 35 | if t == bool: 36 | group.add_argument("--" + key, default=value, action="store_true") 37 | else: 38 | group.add_argument("--" + key, default=value, type=t) 39 | 40 | def extract(self, args): 41 | group = GroupParams() 42 | for arg in vars(args).items(): 43 | if arg[0] in vars(self) or ("_" + arg[0]) in vars(self): 44 | setattr(group, arg[0], arg[1]) 45 | return group 46 | 47 | class ModelParams(ParamGroup): 48 | def __init__(self, parser, sentinel=False): 49 | self.sh_degree = 3 50 | self._source_path = "" 51 | self._model_path = "" 52 | self._images = "images" 53 | self._resolution = -1 54 | self._white_background = False 55 | self.data_device = "cuda" 56 | self.eval = False 57 | super().__init__(parser, "Loading Parameters", sentinel) 58 | 59 | def extract(self, args): 60 | g = super().extract(args) 61 | g.source_path = os.path.abspath(g.source_path) 62 | return g 63 | 64 | class PipelineParams(ParamGroup): 65 | def __init__(self, parser): 66 | self.convert_SHs_python = False 67 | self.compute_cov3D_python = False 68 | self.debug = False 69 | super().__init__(parser, "Pipeline Parameters") 70 | 71 | class OptimizationParams(ParamGroup): 72 | def __init__(self, parser): 73 | self.update_fr = 100 # coarse-level codebook update frequency 74 | self.ins_feat_dim = 6 75 | self.position_lr_init = 0.00016 76 | self.position_lr_final = 0.0000016 77 | self.position_lr_delay_mult = 0.01 78 | self.position_lr_max_steps = 30_000 79 | self.feature_lr = 0.0025 80 | self.ins_feat_lr = 0.001 81 | self.opacity_lr = 0.05 82 | self.scaling_lr = 0.005 83 | self.rotation_lr = 0.001 84 | self.percent_dense = 0.01 85 | self.lambda_dssim = 0.2 86 | self.densification_interval = 100 87 | self.opacity_reset_interval = 3000 88 | self.densify_from_iter = 500 89 | self.densify_until_iter = 10_000 90 | self.densify_grad_threshold = 0.0002 91 | self.random_background = False 92 | 93 | parser.add_argument('--pos_weight', type=float, default=1.0) # position weight for coarse codebook 94 | parser.add_argument('--loss_weight', type=float, default=0.1) # loss_cohesion weight 95 | 96 | parser.add_argument('--iterations', type=int, default=70_000) # default 7w, scannet 9w 97 | parser.add_argument('--start_ins_feat_iter', type=int, default=30_000) # default 3w 98 | parser.add_argument('--start_control_cb_iter', type=int, default=40_000) # default 4w, scannet 5w 99 | parser.add_argument('--start_follow_cb_iter', type=int, default=50_000) # default 5w, scannet 7w 100 | 101 | # note: Freeze the position of the initial point, do not densify. for ScanNet 102 | parser.add_argument('--frozen_init_pts', action='store_true', default=False) 103 | parser.add_argument('--sam_level', type=int, default=3) 104 | 105 | parser.add_argument('--save_memory', action='store_true', default=False) 106 | super().__init__(parser, "Optimization Parameters") 107 | 108 | def extract(self, args): 109 | g = super().extract(args) 110 | g.pos_weight = args.pos_weight 111 | g.loss_weight = args.loss_weight 112 | g.frozen_init_pts = args.frozen_init_pts 113 | g.sam_level = args.sam_level 114 | g.iterations = args.iterations 115 | g.start_ins_feat_iter = args.start_ins_feat_iter 116 | g.start_control_cb_iter = args.start_control_cb_iter 117 | g.start_follow_cb_iter = args.start_follow_cb_iter 118 | g.save_memory = args.save_memory 119 | 120 | return g 121 | 122 | def get_combined_args(parser : ArgumentParser): 123 | cmdlne_string = sys.argv[1:] 124 | cfgfile_string = "Namespace()" 125 | args_cmdline = parser.parse_args(cmdlne_string) 126 | 127 | try: 128 | cfgfilepath = os.path.join(args_cmdline.model_path, "cfg_args") 129 | print("Looking for config file in", cfgfilepath) 130 | with open(cfgfilepath) as cfg_file: 131 | print("Config file found: {}".format(cfgfilepath)) 132 | cfgfile_string = cfg_file.read() 133 | except TypeError: 134 | print("Config file not found at") 135 | pass 136 | args_cfgfile = eval(cfgfile_string) 137 | 138 | merged_dict = vars(args_cfgfile).copy() 139 | for k,v in vars(args_cmdline).items(): 140 | if v != None: 141 | merged_dict[k] = v 142 | return Namespace(**merged_dict) 143 | -------------------------------------------------------------------------------- /arguments/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/arguments/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /arguments/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/arguments/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /gaussian_render/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import math 14 | # from diff_gaussian_rasterization import GaussianRasterizationSettings, GaussianRasterizer 15 | from ashawkey_diff_gaussian_rasterization import GaussianRasterizationSettings, GaussianRasterizer 16 | from scene.gaussian_model import GaussianModel 17 | from utils.sh_utils import eval_sh 18 | from utils.opengs_utlis import * 19 | # from sklearn.neighbors import NearestNeighbors 20 | import pytorch3d.ops 21 | 22 | def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor, iteration, 23 | scaling_modifier = 1.0, override_color = None, visible_mask = None, mask_num=0, 24 | control_cluster_idx=None, # per-point cluster id (control stage) 25 | follow_cluster_idx=None, # per-point cluster id (follow stage) 26 | rescale=True, # re-scale (for enhance ins_feat) 27 | origin_feat=False, # origin ins_feat (not quantized) 28 | render_feat_map=True, # render image-level feat map 29 | render_color=True, # render rgb image 30 | render_cluster=False, # render cluster, stage 2.2 31 | better_vis=False, # filter some points 32 | selected_cluster_id=None, # coarse-level cluster id 33 | pre_mask=None, 34 | seg_rgb=False, # render cluster rgb, not feat 35 | post_process=False, # post 36 | control_indices=None, 37 | cluster_num=None, 38 | control_points=False 39 | ): 40 | """ 41 | Render the scene. 42 | 43 | Background tensor (bg_color) must be on GPU! 44 | """ 45 | 46 | # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means 47 | screenspace_points = torch.zeros_like(pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0 48 | try: 49 | screenspace_points.retain_grad() 50 | except: 51 | pass 52 | 53 | # Set up rasterization configuration 54 | tanfovx = math.tan(viewpoint_camera.FoVx * 0.5) 55 | tanfovy = math.tan(viewpoint_camera.FoVy * 0.5) 56 | 57 | raster_settings = GaussianRasterizationSettings( 58 | image_height=int(viewpoint_camera.image_height), 59 | image_width=int(viewpoint_camera.image_width), 60 | tanfovx=tanfovx, 61 | tanfovy=tanfovy, 62 | bg=bg_color, 63 | scale_modifier=scaling_modifier, 64 | viewmatrix=viewpoint_camera.world_view_transform, 65 | projmatrix=viewpoint_camera.full_proj_transform, 66 | sh_degree=pc.active_sh_degree, 67 | campos=viewpoint_camera.camera_center, 68 | prefiltered=False, 69 | debug=pipe.debug 70 | ) 71 | 72 | rasterizer = GaussianRasterizer(raster_settings=raster_settings) 73 | 74 | means3D = pc.get_xyz 75 | means2D = screenspace_points 76 | opacity = pc.get_opacity 77 | 78 | # If precomputed 3d covariance is provided, use it. If not, then it will be computed from 79 | # scaling / rotation by the rasterizer. 80 | scales = None 81 | rotations = None 82 | cov3D_precomp = None 83 | if pipe.compute_cov3D_python: 84 | cov3D_precomp = pc.get_covariance(scaling_modifier) 85 | else: 86 | scales = pc.get_scaling 87 | rotations = pc.get_rotation 88 | 89 | # If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors 90 | # from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer. 91 | shs = None 92 | colors_precomp = None 93 | if override_color is None: 94 | if pipe.convert_SHs_python: 95 | shs_view = pc.get_features.transpose(1, 2).view(-1, 3, (pc.max_sh_degree+1)**2) 96 | dir_pp = (pc.get_xyz - viewpoint_camera.camera_center.repeat(pc.get_features.shape[0], 1)) 97 | dir_pp_normalized = dir_pp/dir_pp.norm(dim=1, keepdim=True) 98 | sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized) 99 | colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0) 100 | else: 101 | shs = pc.get_features 102 | else: 103 | colors_precomp = override_color 104 | 105 | if render_color: 106 | rendered_image, radii, rendered_depth, rendered_alpha = rasterizer( 107 | means3D = means3D, 108 | means2D = means2D, 109 | shs = shs, 110 | colors_precomp = colors_precomp, 111 | opacities = opacity, 112 | scales = scales, 113 | rotations = rotations, 114 | cov3D_precomp = cov3D_precomp) 115 | else: 116 | rendered_image, radii, rendered_depth, rendered_alpha = None, None, None, None 117 | 118 | # ################################################################ 119 | # [Stage 1, Stage 2.1] Render image-level instance feature map # 120 | # - rendered_ins_feat: image-level feat map # 121 | # ################################################################ 122 | # probabilistically rescale 123 | prob = torch.rand(1) 124 | rescale_factor = torch.tensor(1.0, dtype=torch.float32).cuda() 125 | if prob > 0.5 and rescale: 126 | rescale_factor = torch.rand(1).cuda() 127 | if render_feat_map: 128 | # get feature 129 | ins_feat = (pc.get_ins_feat(origin=origin_feat) + 1) / 2 # pseudo -> norm, else -> raw 130 | # first three channels 131 | if control_points: 132 | 133 | rendered_ins_feat, _, _, _ = rasterizer( 134 | means3D = means3D[control_indices], 135 | means2D = means2D[control_indices], 136 | shs = None, 137 | colors_precomp = ins_feat[control_indices, :3], # render features as pre-computed colors 138 | opacities = opacity[control_indices], 139 | scales = scales * rescale_factor, 140 | rotations = rotations[control_indices], 141 | cov3D_precomp = cov3D_precomp) 142 | 143 | # last three channels 144 | if ins_feat.shape[-1] > 3: 145 | rendered_ins_feat2, _, _, _ = rasterizer( 146 | means3D = means3D[control_indices], 147 | means2D = means2D[control_indices], 148 | shs = None, 149 | colors_precomp = ins_feat[control_indices, 3:6], # render features as pre-computed colors 150 | opacities = opacity[control_indices], 151 | scales = scales[control_indices] * rescale_factor, 152 | rotations = rotations[control_indices], 153 | cov3D_precomp = cov3D_precomp) 154 | 155 | rendered_ins_feat = torch.cat((rendered_ins_feat, rendered_ins_feat2), dim=0) 156 | # mask 157 | _, _, _, silhouette = rasterizer( 158 | means3D = means3D[control_indices], 159 | means2D = means2D[control_indices], 160 | shs = shs[control_indices], 161 | colors_precomp = colors_precomp, 162 | opacities = opacity[control_indices], 163 | scales = scales[control_indices] * rescale_factor, 164 | # opacities = opacity*0+1.0, # 165 | # scales = scales*0+0.001, # *0.1 166 | rotations = rotations[control_indices], 167 | cov3D_precomp = cov3D_precomp) 168 | else: 169 | ins_feat = (pc.get_ins_feat(origin=origin_feat) + 1) / 2 # pseudo -> norm, else -> raw 170 | # first three channels 171 | rendered_ins_feat, _, _, _ = rasterizer( 172 | means3D = means3D, 173 | means2D = means2D, 174 | shs = None, 175 | colors_precomp = ins_feat[:, :3], # render features as pre-computed colors 176 | opacities = opacity, 177 | scales = scales * rescale_factor, 178 | 179 | rotations = rotations, 180 | cov3D_precomp = cov3D_precomp) 181 | # last three channels 182 | if ins_feat.shape[-1] > 3: 183 | rendered_ins_feat2, _, _, _ = rasterizer( 184 | means3D = means3D, 185 | means2D = means2D, 186 | shs = None, 187 | colors_precomp = ins_feat[:, 3:6], # render features as pre-computed colors 188 | opacities = opacity, 189 | scales = scales * rescale_factor, 190 | 191 | rotations = rotations, 192 | cov3D_precomp = cov3D_precomp) 193 | rendered_ins_feat = torch.cat((rendered_ins_feat, rendered_ins_feat2), dim=0) 194 | # mask 195 | _, _, _, silhouette = rasterizer( 196 | means3D = means3D, 197 | means2D = means2D, 198 | shs = shs, 199 | colors_precomp = colors_precomp, 200 | opacities = opacity, 201 | scales = scales * rescale_factor, 202 | # opacities = opacity*0+1.0, # 203 | # scales = scales*0+0.001, # *0.1 204 | rotations = rotations, 205 | cov3D_precomp = cov3D_precomp) 206 | else: 207 | rendered_ins_feat, silhouette = None, None 208 | 209 | 210 | # ######################################################################## 211 | # [Preprocessing for Stage 2.2]: render (control) cluster-level feat map # 212 | # - rendered_clusters: feat map of the control clusters # 213 | # - rendered_cluster_silhouettes: cluster mask # 214 | # ######################################################################## 215 | # 需要先得到控制点的全局索引,再得到控制点中每个聚类的索引,从而得到每个聚类对应的全局索引 216 | viewed_pts = radii > 0 # ignore the invisible points 217 | if control_cluster_idx is not None: 218 | num_cluster = control_cluster_idx.max() 219 | cluster_occur = torch.zeros(num_cluster).to(torch.bool) # [num_cluster], bool 220 | else: 221 | cluster_occur = None 222 | if render_cluster and control_cluster_idx is not None and viewed_pts.sum() != 0: 223 | ins_feat = (pc.get_ins_feat(origin=origin_feat) + 1) / 2 # pseudo -> norm, else -> raw 224 | rendered_clusters = [] 225 | rendered_cluster_silhouettes = [] 226 | scale_filter = (scales < 0.5).all(dim=1) # filter 227 | for idx in range(1, num_cluster): 228 | if not better_vis and idx != selected_cluster_id: 229 | continue 230 | 231 | # ignore the invisible coarse-level cluster 232 | if viewpoint_camera.bClusterOccur is not None and viewpoint_camera.bClusterOccur[idx] == False: 233 | continue 234 | 235 | # NOTE: Render only the idx-th coarse cluster 236 | filter_idx = control_cluster_idx == idx 237 | filter_idx = filter_idx & viewed_pts 238 | # todo: filter 239 | if better_vis: 240 | filter_idx = filter_idx & scale_filter 241 | if filter_idx.sum() < 100: 242 | continue 243 | 244 | # render cluster-level feat map 245 | rendered_cluster, _, _, cluster_silhouette = rasterizer( 246 | means3D = means3D[filter_idx], 247 | means2D = means2D[filter_idx], 248 | shs = None, # feat 249 | colors_precomp = ins_feat[:, :3][filter_idx], # feat 250 | # shs = shs[filter_idx], # rgb 251 | # colors_precomp = None, # rgb 252 | opacities = opacity[filter_idx], 253 | scales = scales[filter_idx] * rescale_factor, 254 | rotations = rotations[filter_idx], 255 | cov3D_precomp = cov3D_precomp) 256 | if ins_feat.shape[-1] > 3: 257 | rendered_cluster2, _, _, cluster_silhouette = rasterizer( 258 | means3D = means3D[filter_idx], 259 | means2D = means2D[filter_idx], 260 | shs = None, # feat 261 | colors_precomp = ins_feat[:, 3:][filter_idx], # feat 262 | # shs = shs[filter_idx], # rgb 263 | # colors_precomp = None, # rgb 264 | opacities = opacity[filter_idx], 265 | scales = scales[filter_idx] * rescale_factor, 266 | rotations = rotations[filter_idx], 267 | cov3D_precomp = cov3D_precomp) 268 | rendered_cluster = torch.cat((rendered_cluster, rendered_cluster2), dim=0) 269 | 270 | # alpha --> mask 271 | if cluster_silhouette.max() > 0.8: 272 | cluster_occur[idx] = True 273 | rendered_clusters.append(rendered_cluster) 274 | rendered_cluster_silhouettes.append(cluster_silhouette) 275 | if len(rendered_cluster_silhouettes) != 0: 276 | rendered_cluster_silhouettes = torch.vstack(rendered_cluster_silhouettes) 277 | else: 278 | rendered_clusters, rendered_cluster_silhouettes = None, None 279 | 280 | 281 | # ############################################################### 282 | # [Stage 2.2 & Stage 3] render (follow) cluster-level feat map # 283 | # - rendered_follow_clusters: feat map of the follow clusters # 284 | # - rendered_follow_cluster_silhouettes: follow cluster mask # 285 | # - occured_follow_id: visible follow cluster id # 286 | # ############################################################### 287 | if follow_cluster_idx is not None and follow_cluster_idx.numel() > 0: 288 | ins_feat = (pc.get_ins_feat(origin=origin_feat) + 1) / 2 # pseudo -> norm, else -> raw 289 | # todo: rescale 290 | scale_filter = (scales < 0.1).all(dim=1) 291 | # scale_filter = (scales < 0.1).all(dim=1) & (opacity > 0.1).squeeze(-1) 292 | re_scale_factor = torch.ones_like(opacity) # not used 293 | 294 | rendered_follow_clusters = [] 295 | rendered_follow_cluster_silhouettes = [] 296 | occured_follow_id = [] 297 | 298 | filter_idx = (follow_cluster_idx.unsqueeze(1) == selected_cluster_id).any(dim=1) 299 | 300 | # pre-mask 301 | if pre_mask is not None: 302 | filter_idx = filter_idx & pre_mask 303 | 304 | filter_idx = filter_idx & viewed_pts 305 | # filter 306 | if better_vis: 307 | filter_idx = filter_idx & scale_filter 308 | 309 | # TODO post process (for 3D object selection) 310 | # pre_count = filter_idx.sum() 311 | max_time = 5 312 | if post_process and max_time > 0: 313 | nearest_k_distance = pytorch3d.ops.knn_points( 314 | means3D[filter_idx].unsqueeze(0), 315 | means3D[filter_idx].unsqueeze(0), 316 | # K=int(filter_idx.sum()**0.5), 317 | K=int(filter_idx.sum()**0.5), 318 | ).dists 319 | mean_nearest_k_distance, std_nearest_k_distance = nearest_k_distance.mean(), nearest_k_distance.std() 320 | # print(std_nearest_k_distance, "std_nearest_k_distance") 321 | 322 | mask = nearest_k_distance.mean(dim = -1) < mean_nearest_k_distance + std_nearest_k_distance 323 | # mask = nearest_k_distance.mean(dim = -1) < mean_nearest_k_distance + 0.1 * std_nearest_k_distance 324 | 325 | mask = mask.squeeze() 326 | if filter_idx is not None: 327 | filter_idx[filter_idx != 0] = mask 328 | max_time -= 1 329 | 330 | # record the fine cluster id appears in the current view. 331 | occured_follow_id.append(selected_cluster_id) 332 | 333 | # note: render cluster rgb or feat. 334 | if seg_rgb: 335 | _shs = shs[filter_idx] 336 | _colors_precomp1 = None 337 | _colors_precomp2 = None 338 | else: 339 | _shs = None 340 | _colors_precomp1 = ins_feat[:, :3][filter_idx] 341 | _colors_precomp2 = ins_feat[:, 3:][filter_idx] 342 | 343 | rendered_follow_cluster, _, _, follow_cluster_silhouette = rasterizer( 344 | means3D = means3D[filter_idx], 345 | means2D = means2D[filter_idx], 346 | shs = _shs, # rgb or feat 347 | colors_precomp = _colors_precomp1, # rgb or feat 348 | opacities = opacity[filter_idx], 349 | scales = (scales * re_scale_factor)[filter_idx], 350 | rotations = rotations[filter_idx], 351 | cov3D_precomp = cov3D_precomp) 352 | if ins_feat.shape[-1] > 3: 353 | rendered_follow_cluster2, _, _, _ = rasterizer( 354 | means3D = means3D[filter_idx], 355 | means2D = means2D[filter_idx], 356 | shs = _shs, # rgb or feat 357 | colors_precomp = _colors_precomp2, # rgb or feat 358 | opacities = opacity[filter_idx], 359 | scales = (scales * re_scale_factor)[filter_idx], 360 | rotations = rotations[filter_idx], 361 | cov3D_precomp = cov3D_precomp) 362 | rendered_follow_cluster = torch.cat((rendered_follow_cluster, rendered_follow_cluster2), dim=0) 363 | rendered_follow_clusters.append(rendered_follow_cluster) 364 | rendered_follow_cluster_silhouettes.append(follow_cluster_silhouette) 365 | 366 | if len(rendered_follow_cluster_silhouettes) != 0: 367 | rendered_follow_cluster_silhouettes = torch.vstack(rendered_follow_cluster_silhouettes) 368 | else: 369 | rendered_follow_clusters = None 370 | rendered_follow_cluster_silhouettes = None 371 | occured_leaf_id = None 372 | 373 | # Those Gaussians that were frustum culled or had a radius of 0 were not visible. 374 | # They will be excluded from value updates used in the splitting criteria. 375 | return {"render": rendered_image, 376 | "alpha": rendered_alpha, 377 | "depth": rendered_depth, # not used 378 | "silhouette": silhouette, 379 | "ins_feat": rendered_ins_feat, # image-level feat map 380 | "cluster_imgs": rendered_clusters, # coarse cluster feat map/image 381 | "cluster_silhouettes": rendered_cluster_silhouettes, # coarse cluster mask 382 | "follow_cluster_imgs": rendered_follow_clusters, # fine cluster feat map/image 383 | "follow_cluster_silhouettes": rendered_follow_cluster_silhouettes, # fine cluster mask 384 | "cluster_occur": cluster_occur, # coarse cluster 385 | "viewspace_points": screenspace_points, 386 | "visibility_filter" : radii > 0, 387 | "radii": radii} -------------------------------------------------------------------------------- /gaussian_render/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/gaussian_render/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /gaussian_render/__pycache__/network_gui.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/gaussian_render/__pycache__/network_gui.cpython-37.pyc -------------------------------------------------------------------------------- /gaussian_render/network_gui.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import traceback 14 | import socket 15 | import json 16 | from scene.cameras import MiniCam 17 | 18 | host = "127.0.0.1" 19 | port = 6009 20 | 21 | conn = None 22 | addr = None 23 | 24 | listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 25 | 26 | def init(wish_host, wish_port): 27 | global host, port, listener 28 | host = wish_host 29 | port = wish_port 30 | listener.bind((host, port)) 31 | listener.listen() 32 | listener.settimeout(0) 33 | 34 | def try_connect(): 35 | global conn, addr, listener 36 | try: 37 | conn, addr = listener.accept() 38 | print(f"\nConnected by {addr}") 39 | conn.settimeout(None) 40 | except Exception as inst: 41 | pass 42 | 43 | def read(): 44 | global conn 45 | messageLength = conn.recv(4) 46 | messageLength = int.from_bytes(messageLength, 'little') 47 | message = conn.recv(messageLength) 48 | return json.loads(message.decode("utf-8")) 49 | 50 | def send(message_bytes, verify): 51 | global conn 52 | if message_bytes != None: 53 | conn.sendall(message_bytes) 54 | conn.sendall(len(verify).to_bytes(4, 'little')) 55 | conn.sendall(bytes(verify, 'ascii')) 56 | 57 | def receive(): 58 | message = read() 59 | 60 | width = message["resolution_x"] 61 | height = message["resolution_y"] 62 | 63 | if width != 0 and height != 0: 64 | try: 65 | do_training = bool(message["train"]) 66 | fovy = message["fov_y"] 67 | fovx = message["fov_x"] 68 | znear = message["z_near"] 69 | zfar = message["z_far"] 70 | do_shs_python = bool(message["shs_python"]) 71 | do_rot_scale_python = bool(message["rot_scale_python"]) 72 | keep_alive = bool(message["keep_alive"]) 73 | scaling_modifier = message["scaling_modifier"] 74 | world_view_transform = torch.reshape(torch.tensor(message["view_matrix"]), (4, 4)).cuda() 75 | world_view_transform[:,1] = -world_view_transform[:,1] 76 | world_view_transform[:,2] = -world_view_transform[:,2] 77 | full_proj_transform = torch.reshape(torch.tensor(message["view_projection_matrix"]), (4, 4)).cuda() 78 | full_proj_transform[:,1] = -full_proj_transform[:,1] 79 | custom_cam = MiniCam(width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform) 80 | except Exception as e: 81 | print("") 82 | traceback.print_exc() 83 | raise e 84 | return custom_cam, do_training, do_shs_python, do_rot_scale_python, keep_alive, scaling_modifier 85 | else: 86 | return None, None, None, None, None, None -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | GaussianGraph: 3D Gaussian-based Scene Graph Generation for Open-world Scene Understanding 28 | 30 | 31 | 32 | 33 | 34 | 35 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 |
50 |
51 |
52 |
53 |
54 |

GaussianGraph: 3D Gaussian-based Scene Graph Generation for Open-world Scene Understanding

55 |
56 | 57 | 58 | Xihan Wang, 59 | 60 | Dianyi Yang, 61 | 62 | Yu Gao 63 | 64 |
65 | 66 |
67 | Beijing Institute of Technology
IROS2025
68 |
69 | 70 |
71 | 116 |
117 |
118 |
119 |
120 |
121 |
122 | 123 | 124 | 125 |
126 |
127 |
128 | 133 |

134 | The motivation, core methods, and visualization results of GaussianGraph. 135 |

136 |
137 |
138 |
139 | 140 | 141 | 142 |
143 |
144 |
145 |
146 |

Abstract

147 |
148 |

149 | Recent advancements in 3D Gaussian Splatting(3DGS) have significantly improved semantic scene understanding, enabling natural language queries to localize objects within a scene. However, existing methods primarily focus on embedding compressed CLIP features to 3D Gaussians, suffering from low object segmentation accuracy and lack spatial reasoning capabilities. To address these limitations, we propose GaussianGraph, a novel framework that enhances 3DGS-based scene understanding by integrating adaptive semantic clustering and scene graph generation. We introduce a "Control-Follow" clustering strategy, which dynamically adapts to scene scale and feature distribution, avoiding feature compression and significantly improving segmentation accuracy. Additionally, we enrich scene representation by integrating object attributes and spatial relations extracted from 2D foundation models. To address inaccuracies in spatial relationships, we propose 3D correction modules that filter implausible relations through spatial consistency verification, ensuring reliable scene graph construction. Extensive experiments on three datasets demonstrate that GaussianGraph outperforms state-of-the-art methods in both semantic segmentation and object grounding tasks, providing a robust solution for complex scene understanding and interaction. 150 |

151 |
152 |
153 |
154 |
155 | 156 | 157 | 158 |
159 |
160 |
161 | 162 | 163 |
164 | 165 |
166 | 167 |

168 | The goal of GaussianGraph is constructing 3D scene graph in open-world scenes for downstream tasks. First, We extract 2D features including CLIP, segmentation, captions and relations. Foreground objects and object-pairs are input to LLaVA with prompts to generate captions and relations, which are combined with CLIP features and segmentation by mask index. Second, with posed multi-view images, we utilize 3DGS to reconstruct the scene and perform ``Control-Follow" clustering strategy to generate Gaussian clusters. Third, after 3D Gaussian clustering, we build 3D scene graph through rendering each cluster to multi-view images and match them with CLIP features, captions and relations. Finally, 3D correction modules are used to refine the scene graph with four sub-modules. 169 |

170 |
171 |
172 | 173 | 174 |
175 | 176 |
177 | 178 |

179 | Object grounding on the LERF dataset. Our GaussianGraph can reason the accurate object category with less artifacts and noise. 180 |

181 |
182 |
183 | 184 | 185 |
186 | 187 |
188 | 189 |

190 | Downsteam tasks including visual question answering and object grounding. The model needs to accurately identify the object attributes(blue) and spatial relationships(red) contained in the query and infer the correct objects. In the object grounding task, our model effectively mitigates the interference caused by similar objects in adjacent areas. 191 |

192 |
193 |
194 | 195 |
196 |
197 |
198 | 199 | 200 | 201 | 202 |
203 |
204 |
205 |

More Results on Replica and ScanNet Datasets

206 | 207 | 208 |
209 | 213 |
214 | 215 |
216 |
217 |
218 | 219 | 220 | 221 | 222 |
223 |
224 |

BibTeX

225 |
@misc{wang2025gaussiangraph3dgaussianbasedscene,
226 |       title={GaussianGraph: 3D Gaussian-based Scene Graph Generation for Open-world Scene Understanding}, 
227 |       author={Xihan Wang and Dianyi Yang and Yu Gao and Yufeng Yue and Yi Yang and Mengyin Fu},
228 |       year={2025},
229 |       eprint={2503.04034},
230 |       archivePrefix={arXiv},
231 |       primaryClass={cs.CV},
232 |       url={https://arxiv.org/abs/2503.04034}, 
233 | }
234 |
235 |
236 | 237 | 238 | 239 |
240 |
241 |
242 |
243 |
244 | 245 |

246 |

247 | 248 |
249 |
250 |
251 |
252 |
253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | -------------------------------------------------------------------------------- /scene/BIRCH_quantize.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('/home/wangxihan/OpenGaussian/') 3 | import torch 4 | import torch.nn.functional as F 5 | import numpy as np 6 | import open3d as o3d 7 | from open3d import visualization 8 | from sklearn.cluster import DBSCAN 9 | from plyfile import PlyData 10 | from utils.birch_utils import Birch 11 | from gaussian_renderer import GaussianModel 12 | from argparse import ArgumentParser, Namespace 13 | from arguments import ModelParams, PipelineParams, OptimizationParams 14 | 15 | class Cf_cluster(): 16 | 17 | def __init__(self, feat_scale=1, voxel_size=0.02, radius=0.1, max_nn=50, control_points_num=5000, branching_factor=100): 18 | self.feat_scale = feat_scale 19 | self.voxel_size = voxel_size 20 | self.radius = radius 21 | self.max_nn = max_nn 22 | self.control_points_num = control_points_num 23 | self.branching_factor = branching_factor 24 | self.centers = torch.empty(0) 25 | self.cls_ids = torch.empty(0) 26 | self.follow_cls_ids = torch.empty(0) 27 | 28 | def control_points(self, gaussians): 29 | # 采样稀疏控制点需要考虑语义梯度和空间位置,目前只包括空间位置 30 | # 需要对比最远点采样和FPFS采样 31 | #_, self.denoise_indices = self.denoise(gaussians._ins_feat) 32 | #sample_points, sample_indices = self.fpfh_sampling(gaussians._xyz[self.denoise_indices].cpu().numpy()) 33 | self.device = gaussians._xyz.device 34 | sample_points, sample_indices = self.fpfh_sampling(gaussians._xyz.detach().cpu().numpy()) 35 | self.sample_points = torch.from_numpy(np.asarray(sample_points.points)).to(self.device) 36 | sample_indices = torch.from_numpy(sample_indices).to(self.device) 37 | #self.control_indices = self.denoise_indices[sample_indices] 38 | self.control_indices = sample_indices 39 | self.vec_dim = 9 40 | self.sample_features = gaussians._ins_feat[self.control_indices].to(self.device) 41 | 42 | self.all_cluster_ids = torch.zeros(gaussians._xyz.shape[0], dtype=torch.long, device=self.device) 43 | all_indices = torch.arange(gaussians._xyz.shape[0], device=self.device) 44 | self.remain_indices = all_indices[~torch.isin(all_indices, self.control_indices)] 45 | self.remain_points = gaussians._xyz[self.remain_indices].to(self.device) 46 | self.remain_features = gaussians._ins_feat[self.remain_indices].to(self.device) 47 | 48 | def cluster_group(self, mode, iteration, opt): 49 | 50 | if mode == "control": 51 | if iteration == opt.start_control_cb_iter + 1: 52 | self.partial = False 53 | feat_std = self.sample_features.std(dim=0).mean().item() # 取所有特征的平均标准差 54 | xyz_std = self.sample_points.std(dim=0).mean().item() # 取空间坐标的标准差平均值 55 | threshold = (feat_std + xyz_std) / 2 56 | self.birch_model = Birch(threshold=threshold, branching_factor=self.branching_factor, n_clusters=None) 57 | else: 58 | self.partial = True 59 | cluster_ids, cluster_centers, birch_model = self.birch(self.sample_features, self.sample_points, self.partial, self.birch_model) 60 | self.all_cluster_ids[self.control_indices] = cluster_ids + 1 61 | self.control_cls_ids = self.all_cluster_ids 62 | self.control_centers = cluster_centers 63 | self.cluster_num = max(cluster_ids) + 1 64 | self.birch_model = birch_model 65 | 66 | # 可视化采样点BIRCH聚类结果 67 | #self.visualize_clusters(sample_points, cluster_ids) 68 | 69 | elif mode == "follow": 70 | # BIRCH.predict是否可以替代match_remain_points??? 71 | if iteration == opt.start_follow_cb_iter + 1: 72 | self.cluster_centers = self.control_centers 73 | self.cluster_centers, cluster_num, remain_cluster_ids = self.match_remain_points(torch.cat([self.remain_features*self.feat_scale, self.remain_points],dim=1), self.cluster_centers) 74 | self.all_cluster_ids[self.remain_indices] = remain_cluster_ids 75 | self.follow_cls_ids = self.all_cluster_ids 76 | self.follow_centers = self.cluster_centers 77 | #self.visualize_clusters(gaussians._xyz, all_cluster_ids) 78 | self.cluster_num = cluster_num 79 | 80 | def denoise(self, features): 81 | # 将PyTorch张量转换为NumPy数组供DBSCAN使用 82 | features_np = features.cpu().numpy() 83 | 84 | # 使用DBSCAN进行聚类 85 | db = DBSCAN(eps=0.5, min_samples=10) 86 | labels = db.fit_predict(features_np) 87 | 88 | # 在标签中,-1表示噪声点,我们需要过滤掉这些噪声点 89 | noise_points_mask = labels == -1 90 | 91 | # 提取去噪后的数据(去除噪声点) 92 | clean_points = features[~torch.tensor(noise_points_mask).cuda()] 93 | # 获取去噪后数据的索引 94 | clean_indices = torch.nonzero(~torch.tensor(noise_points_mask).cuda()).squeeze() 95 | 96 | return clean_points, clean_indices 97 | 98 | def fpfh_sampling(self, points): 99 | """ 100 | 从给定的点云中进行最远点采样并返回采样的点以及其对应的索引。 101 | 102 | Args: 103 | points (numpy.ndarray): 输入的点云数据,形状为 (N, 3)。 104 | num_samples (int): 需要采样的点的数量。 105 | 106 | Returns: 107 | sampled_points (numpy.ndarray): 采样得到的点,形状为 (num_samples, 3)。 108 | indices (list): 被采样点的索引列表。 109 | """ 110 | # 将输入的点云转为 open3d 点云对象 111 | pcd = o3d.geometry.PointCloud() 112 | pcd.points = o3d.utility.Vector3dVector(points) 113 | 114 | # 下采样点云 115 | voxel_down_pcd = pcd.voxel_down_sample(self.voxel_size) 116 | 117 | # 计算法线 118 | voxel_down_pcd.estimate_normals( 119 | search_param=o3d.geometry.KDTreeSearchParamHybrid(self.radius, self.max_nn)) 120 | 121 | # 计算FPFH特征 122 | fpfh = o3d.pipelines.registration.compute_fpfh_feature( 123 | voxel_down_pcd, 124 | search_param=o3d.geometry.KDTreeSearchParamHybrid(self.radius, self.max_nn)) 125 | 126 | # 获取关键点索引 127 | keypoints_indices = np.argpartition(np.asarray(fpfh.data).sum(axis=0), -self.control_points_num)[-self.control_points_num:] 128 | keypoints_cloud = voxel_down_pcd.select_by_index(keypoints_indices) 129 | 130 | return keypoints_cloud, keypoints_indices 131 | 132 | def birch(self, feat, xyz, partial, birch_model): 133 | 134 | cluster_feature = torch.cat([feat*self.feat_scale, xyz], dim=1) 135 | birch_model.fit(cluster_feature, partial) 136 | 137 | # Get cluster assignments for control clusters 138 | cls_ids = torch.tensor(birch_model.predict(cluster_feature), dtype=torch.long).to(self.device) # [num_pts] 139 | # Extract control cluster centers (centroids) 140 | centers = torch.tensor(birch_model.subcluster_centers_, dtype=torch.float32).to(self.device) # [k1, 9] 141 | 142 | return cls_ids, centers, birch_model 143 | 144 | def get_dist(self, features, cluster_feats, mode='sq_euclidean_chunk'): 145 | """ 146 | 计算给定特征与聚类中心之间的距离。 147 | 148 | features: 当前待处理的特征 (batch_size, feature_dim) 149 | cluster_feats: 已知的聚类中心特征 (cluster_num, feature_dim) 150 | mode: 距离计算方式 ('sq_euclidean_chunk' 或 'cosine') 151 | 152 | 返回: 153 | dist: 特征与聚类中心之间的距离 (batch_size, cluster_num) 154 | """ 155 | if mode == 'sq_euclidean_chunk': 156 | # 计算欧几里得距离(平方) 157 | dist = torch.cdist(features, cluster_feats, p=2) ** 2 158 | elif mode == 'cosine': 159 | # 计算余弦距离 160 | features_norm = F.normalize(features, p=2, dim=-1) 161 | cluster_feats_norm = F.normalize(cluster_feats, p=2, dim=-1) 162 | dist = 1 - torch.mm(features_norm, cluster_feats_norm.T) 163 | return dist 164 | 165 | def update_centers_(self, old_features, new_features, dist, curr_nn_index, cluster_weight = 0.9): 166 | """ 167 | 更新聚类中心。 168 | 169 | features: 当前聚类中的特征 (batch_size, feature_dim) 170 | dist: 特征与聚类中心的归类情况 (batch_size, cluster_num) 171 | curr_nn_index: 当前聚类中心索引 172 | avg: 是否使用平均值更新聚类中心 173 | 174 | 返回: 175 | updated_centers: 更新后的聚类中心 (cluster_num, feature_dim) 176 | """ 177 | updated_centers = [] 178 | cluster_num = dist.shape[1] # 聚类中心的数量 179 | for i in range(cluster_num): 180 | # 获取当前聚类中所有归类到该聚类的特征 181 | new_cluster_features = new_features[dist[:, i] == 1] 182 | if new_cluster_features.size(0) > 0: 183 | 184 | # 计算合并后的平均值作为新的聚类中心 185 | updated_center = cluster_weight * old_features[i] + (1 - cluster_weight) * new_cluster_features.mean(dim=0) 186 | updated_centers.append(updated_center) 187 | else: 188 | # 如果当前聚类没有任何点,保留原聚类中心 189 | updated_centers.append(old_features[i]) 190 | 191 | return torch.stack(updated_centers) 192 | 193 | def match_remain_points(self, remain_feats, cluster_feats, threshold=0.1, chunk=10000): 194 | """ 195 | 对特征点进行聚类匹配,更新聚类中心,形成新的聚类。 196 | 197 | remain_feats: 所有剩余跟随点的特征 [N, feature_dim] 198 | cluster_feats: 已知的聚类中心特征 [cluster_num, feature_dim] 199 | threshold: 距离阈值,小于该阈值则归类为已有聚类,否则形成新聚类 200 | chunk: 分块大小,减少内存占用 201 | 202 | 返回: 203 | - updated_feats: 更新后的特征 204 | - updated_cluster_feats: 更新后的聚类中心 205 | - updated_cluster_num: 更新后的聚类数量 206 | """ 207 | updated_feats = remain_feats 208 | updated_cluster_feats = cluster_feats 209 | updated_cluster_num = cluster_feats.shape[0] 210 | remain_num = remain_feats.size(0) 211 | cluster_ids = torch.zeros(remain_num, dtype=torch.long).to(remain_feats.device) 212 | 213 | for i in range(0, remain_num, chunk): 214 | # 处理每个块 215 | end_idx = min(i + chunk, remain_num) 216 | chunk_feats = remain_feats[i:end_idx] 217 | 218 | # 计算该块特征与聚类中心的距离 219 | dist = self.get_dist(chunk_feats, updated_cluster_feats, mode='sq_euclidean_chunk') 220 | curr_nn_index = torch.argmin(dist, dim=-1) + 1 # shape: [chunk_num, cluster_num] 221 | # 是否转成one-hot编码??? 222 | 223 | # 归一化距离到 [0, 1] 224 | dist_min = dist.min() 225 | dist_max = dist.max() 226 | normalized_dist = (dist - dist_min) / (dist_max - dist_min) 227 | 228 | # 判断是否符合归类条件 229 | # 将距离小于阈值的点归为已有聚类 230 | cluster_ids[i:end_idx][normalized_dist.min(dim=-1).values <= threshold] = curr_nn_index[normalized_dist.min(dim=-1).values < threshold] 231 | # 将距离大于阈值的点归为新聚类 232 | new_feats_mask = normalized_dist.min(dim=-1).values > threshold 233 | new_feats = chunk_feats[new_feats_mask] 234 | 235 | # 检测新聚类能否合并 236 | if new_feats.size(0) > 0: 237 | # 创建新的聚类(每个点分配一个类别) 238 | new_cluster_ids = torch.arange(updated_cluster_num + 1, updated_cluster_num + new_feats.size(0) + 1, device=remain_feats.device) 239 | cluster_ids[i:end_idx][new_feats_mask] = new_cluster_ids # 给新点分配新的聚类 ID 240 | 241 | updated_cluster_feats = torch.cat([updated_cluster_feats, new_feats], dim=0) 242 | 243 | # 计算新聚类之间的距离矩阵 244 | new_dist = self.get_dist(new_feats, new_feats, mode='sq_euclidean_chunk') 245 | 246 | # 归一化距离到 [0, 1] 247 | dist_min = new_dist.min() 248 | dist_max = new_dist.max() 249 | normalized_dist = (new_dist - dist_min) / (dist_max - dist_min) 250 | 251 | # 创建合并条件:新聚类之间的距离小于阈值,表示可以合并 252 | merge_mask = normalized_dist <= threshold 253 | # 标记是否被删除的聚类中心 254 | deleted = torch.zeros(updated_cluster_feats.shape[0], dtype=torch.bool) 255 | visited = torch.zeros([updated_cluster_feats.shape[0], updated_cluster_feats.shape[0]], dtype=torch.bool) 256 | 257 | # 如果合并条件成立,合并聚类 258 | if merge_mask.any(): 259 | # 计算合并后的聚类 ID 260 | merge_indices = torch.where(merge_mask) # 找到需要合并的聚类对 261 | 262 | for idx1, idx2 in zip(*merge_indices): 263 | if idx1 != idx2: # 防止自己合并自己 264 | idx1, idx2 = idx1+cluster_feats.shape[0]+1, idx2+cluster_feats.shape[0]+1 265 | 266 | # 确保每对聚类中心只合并一次 267 | if visited[idx1][idx2] or visited[idx2][idx1]: 268 | continue 269 | 270 | # 将属于 idx2 的聚类点合并到 idx1 271 | cluster_ids[(cluster_ids == idx2) | (cluster_ids == idx1)] = min(idx1, idx2) 272 | 273 | # 更新聚类中心为两者的均值 274 | updated_cluster_feats[min(idx1, idx2)] = (updated_cluster_feats[idx1] * len(cluster_ids[cluster_ids == idx1]) 275 | + updated_cluster_feats[idx2] * len(cluster_ids[cluster_ids == idx2]) 276 | ) / (len(cluster_ids[cluster_ids == idx1]) + len(cluster_ids[cluster_ids == idx2])) 277 | 278 | # 删除 idx2 聚类中心 279 | deleted[max(idx1,idx2)] = True 280 | visited[idx1][idx2] =True 281 | visited[idx2][idx1] =True 282 | 283 | updated_cluster_feats = updated_cluster_feats[~deleted] 284 | 285 | updated_cluster_num = updated_cluster_feats.shape[0] 286 | 287 | return updated_cluster_feats, updated_cluster_num, cluster_ids 288 | 289 | def sigmoid(x): 290 | """Sigmoid function.""" 291 | return 1 / (1 + np.exp(-x)) 292 | 293 | def visualize_ply(self, ply_path): 294 | # Load the PLY file 295 | ply_data = PlyData.read(ply_path) 296 | vertex_data = ply_data['vertex'].data 297 | 298 | # Extract the point cloud attributes 299 | points = np.array([vertex_data['x'], vertex_data['y'], vertex_data['z']]).T 300 | colors = np.array([vertex_data['red'], vertex_data['green'], vertex_data['blue']]).T / 255.0 301 | opacity = vertex_data['opacity'] 302 | 303 | # Apply the opacity filter 304 | sigmoid_opacity = self.sigmoid(opacity) 305 | filtered_indices = sigmoid_opacity >= 0.1 306 | filtered_points = points[filtered_indices] 307 | filtered_colors = colors[filtered_indices] 308 | 309 | # Create an Open3D PointCloud object 310 | pcd = o3d.geometry.PointCloud() 311 | pcd.points = o3d.utility.Vector3dVector(filtered_points) 312 | pcd.colors = o3d.utility.Vector3dVector(filtered_colors) 313 | 314 | # Visualize the point cloud 315 | o3d.visualization.draw_geometries([pcd]) 316 | 317 | def visualize_clusters(xyz, cls_ids): 318 | """ 319 | 可视化聚类结果 320 | :param xyz: 点云坐标, shape: [num_pts, 3] 321 | :param cls_ids: 点云的聚类标签, shape: [num_pts] 322 | :param centers: 聚类中心, shape: [num_clusters, 3] 323 | """ 324 | # 将数据转换为 open3d 点云对象 325 | xyz_np = xyz.cpu().numpy() # 转换为 NumPy 数组 326 | cls_ids = cls_ids.cpu().numpy() 327 | 328 | # 获取最大类别数并创建颜色映射 329 | unique_cls_ids = np.unique(cls_ids) 330 | n_cls = len(unique_cls_ids) 331 | 332 | # 为每个类别生成随机颜色 (RGB值在 [0, 1] 之间) 333 | np.random.seed(42) # 为了确保每次生成相同的随机颜色 334 | random_colors = np.random.rand(n_cls, 3) # 生成随机 RGB 颜色,形状是 [n_cls, 3] 335 | 336 | # 为每个点分配颜色 337 | colors = np.array([random_colors[np.where(unique_cls_ids == cls_id)[0][0]] for cls_id in cls_ids]) 338 | 339 | pcd = o3d.geometry.PointCloud() 340 | pcd.points = o3d.utility.Vector3dVector(xyz_np) 341 | pcd.colors = o3d.utility.Vector3dVector(colors) 342 | 343 | # Visualize the point cloud 344 | o3d.visualization.draw_geometries([pcd]) 345 | 346 | 347 | def forward(self, gaussian, mode, iteration, opt): 348 | 349 | # 每轮重新选择控制点还是使用相同的控制点??? 350 | # self.control_points(gaussians=gaussian) 351 | self.cluster_group(mode, iteration, opt) 352 | 353 | if mode == "control": 354 | centers = self.control_centers 355 | self.nn_index = self.control_cls_ids - 1 356 | elif mode == "follow": 357 | centers = self.follow_centers 358 | self.nn_index = self.follow_cls_ids - 1 359 | 360 | # 检查 nn_index 是否超出范围 361 | out_of_bounds_mask = (self.nn_index < -1) | (self.nn_index > centers.shape[0]) 362 | 363 | if torch.any(out_of_bounds_mask): 364 | # 打印出超出范围的索引值 365 | out_of_bounds_indices = self.nn_index[out_of_bounds_mask] 366 | print("Out of bounds indices:", out_of_bounds_indices) 367 | 368 | # 抛出 AssertionError 369 | raise AssertionError(f"nn_index is out of the bound of centers.") 370 | 371 | if mode == "control": 372 | valid_mask = self.nn_index != -1 373 | sampled_centers = torch.zeros((self.nn_index.shape[0], self.vec_dim), device=self.device) 374 | sampled_centers[valid_mask] = torch.gather(centers, 0, self.nn_index[valid_mask].unsqueeze(-1).repeat(1, self.vec_dim)) 375 | elif mode == "follow": 376 | sampled_centers = torch.gather(centers, 0, self.nn_index.unsqueeze(-1).repeat(1, self.vec_dim)) 377 | 378 | # NOTE: "During backpropagation, the gradients of the quantized features are copied to the instance features", mentioned in the paper. 379 | # _ins_feat_q 在数值上等于 sampled_centers[:, :6],但在反向传播时,梯度会直接传递给原始的 _ins_feat 380 | gaussian._ins_feat_q = gaussian._ins_feat - gaussian._ins_feat.detach() + sampled_centers[:,:6] 381 | # 在反向传播时,确保只对有效索引计算梯度,其他部分的梯度不会传播 382 | # gaussian._ins_feat_q = gaussian._ins_feat_q * valid_mask.unsqueeze(-1).float() # 只对有效索引进行梯度计算 383 | 384 | 385 | 386 | -------------------------------------------------------------------------------- /scene/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import os 13 | import random 14 | import json 15 | from utils.system_utils import searchForMaxIteration 16 | from scene.dataset_readers import sceneLoadTypeCallbacks 17 | from scene.gaussian_model import GaussianModel 18 | from arguments import ModelParams 19 | from utils.camera_utils import cameraList_from_camInfos, camera_to_JSON 20 | 21 | class Scene: 22 | 23 | gaussians : GaussianModel 24 | 25 | def __init__(self, args : ModelParams, gaussians : GaussianModel, load_iteration=None, shuffle=True, resolution_scales=[1.0]): 26 | """b 27 | :param path: Path to colmap scene main folder. 28 | """ 29 | self.model_path = args.model_path 30 | self.loaded_iter = None 31 | self.gaussians = gaussians 32 | 33 | if load_iteration: 34 | if load_iteration == -1: 35 | self.loaded_iter = searchForMaxIteration(os.path.join(self.model_path, "point_cloud")) 36 | else: 37 | self.loaded_iter = load_iteration 38 | print("Loading trained model at iteration {}".format(self.loaded_iter)) 39 | 40 | self.train_cameras = {} 41 | self.test_cameras = {} 42 | if os.path.exists(os.path.join(args.source_path, "sparse")): 43 | scene_info = sceneLoadTypeCallbacks["Colmap"](args.source_path, args.images, args.eval) 44 | elif os.path.exists(os.path.join(args.source_path, "transforms_train.json")): 45 | print("Found transforms_train.json file, assuming Blender data set!") 46 | scene_info = sceneLoadTypeCallbacks["Blender"](args.source_path, args.white_background, args.eval) 47 | else: 48 | assert False, "Could not recognize scene type!" 49 | 50 | if not self.loaded_iter: 51 | with open(scene_info.ply_path, 'rb') as src_file, open(os.path.join(self.model_path, "input.ply") , 'wb') as dest_file: 52 | dest_file.write(src_file.read()) 53 | json_cams = [] 54 | camlist = [] 55 | if scene_info.test_cameras: 56 | camlist.extend(scene_info.test_cameras) 57 | if scene_info.train_cameras: 58 | camlist.extend(scene_info.train_cameras) 59 | for id, cam in enumerate(camlist): 60 | json_cams.append(camera_to_JSON(id, cam)) 61 | with open(os.path.join(self.model_path, "cameras.json"), 'w') as file: 62 | json.dump(json_cams, file) 63 | 64 | if shuffle: 65 | random.shuffle(scene_info.train_cameras) # Multi-res consistent random shuffling 66 | random.shuffle(scene_info.test_cameras) # Multi-res consistent random shuffling 67 | 68 | self.cameras_extent = scene_info.nerf_normalization["radius"] 69 | 70 | for resolution_scale in resolution_scales: 71 | print("Resolution: ", resolution_scale) 72 | print("Loading Training Cameras") 73 | self.train_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.train_cameras, resolution_scale, args) 74 | print("Loading Test Cameras") 75 | self.test_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.test_cameras, resolution_scale, args) 76 | 77 | if self.loaded_iter: 78 | self.gaussians.load_ply(os.path.join(self.model_path, 79 | "point_cloud", 80 | "iteration_" + str(self.loaded_iter), 81 | "point_cloud.ply")) 82 | else: 83 | self.gaussians.create_from_pcd(scene_info.point_cloud, self.cameras_extent) 84 | 85 | def save(self, iteration, save_q=[]): 86 | point_cloud_path = os.path.join(self.model_path, "point_cloud/iteration_{}".format(iteration)) 87 | self.gaussians.save_ply(os.path.join(point_cloud_path, "point_cloud.ply"), save_q) 88 | 89 | def getTrainCameras(self, scale=1.0): 90 | return self.train_cameras[scale] 91 | 92 | def getTestCameras(self, scale=1.0): 93 | return self.test_cameras[scale] -------------------------------------------------------------------------------- /scene/__pycache__/BIRCH_quantize.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/scene/__pycache__/BIRCH_quantize.cpython-37.pyc -------------------------------------------------------------------------------- /scene/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/scene/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /scene/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/scene/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /scene/__pycache__/cameras.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/scene/__pycache__/cameras.cpython-310.pyc -------------------------------------------------------------------------------- /scene/__pycache__/cameras.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/scene/__pycache__/cameras.cpython-37.pyc -------------------------------------------------------------------------------- /scene/__pycache__/colmap_loader.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/scene/__pycache__/colmap_loader.cpython-310.pyc -------------------------------------------------------------------------------- /scene/__pycache__/colmap_loader.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/scene/__pycache__/colmap_loader.cpython-37.pyc -------------------------------------------------------------------------------- /scene/__pycache__/dataset_readers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/scene/__pycache__/dataset_readers.cpython-310.pyc -------------------------------------------------------------------------------- /scene/__pycache__/dataset_readers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/scene/__pycache__/dataset_readers.cpython-37.pyc -------------------------------------------------------------------------------- /scene/__pycache__/gaussian_model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/scene/__pycache__/gaussian_model.cpython-310.pyc -------------------------------------------------------------------------------- /scene/__pycache__/gaussian_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/scene/__pycache__/gaussian_model.cpython-37.pyc -------------------------------------------------------------------------------- /scene/__pycache__/kmeans_quantize.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/scene/__pycache__/kmeans_quantize.cpython-37.pyc -------------------------------------------------------------------------------- /scene/__pycache__/kmeans_quantize_ablation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/scene/__pycache__/kmeans_quantize_ablation.cpython-37.pyc -------------------------------------------------------------------------------- /scene/cameras.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | from torch import nn 14 | import numpy as np 15 | from utils.graphics_utils import getWorld2View2, getProjectionMatrix 16 | 17 | class Camera(nn.Module): 18 | def __init__(self, colmap_id, R, T, FoVx, FoVy, cx, cy, image, depth, gt_alpha_mask, 19 | gt_sam_mask, gt_mask_feat, 20 | image_name, uid, 21 | trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda" 22 | ): 23 | super(Camera, self).__init__() 24 | 25 | self.uid = uid 26 | self.colmap_id = colmap_id 27 | self.R = R 28 | self.T = T 29 | self.FoVx = FoVx 30 | self.FoVy = FoVy 31 | # modify ----- 32 | self.cx = cx 33 | self.cy = cy 34 | # modify ----- 35 | self.image_name = image_name 36 | 37 | try: 38 | self.data_device = torch.device(data_device) 39 | except Exception as e: 40 | print(e) 41 | print(f"[Warning] Custom device {data_device} failed, fallback to default cuda device" ) 42 | self.data_device = torch.device("cuda") 43 | 44 | self.data_on_gpu = True # note 45 | self.original_image = image.clamp(0.0, 1.0).to(self.data_device) 46 | # modify ----- 47 | self.original_mask = gt_alpha_mask.to(self.data_device) if gt_alpha_mask is not None else None 48 | 49 | # modify ----- 50 | self.original_sam_mask = gt_sam_mask.to(self.data_device) if gt_sam_mask is not None else None 51 | self.original_mask_feat = gt_mask_feat.to(self.data_device) if gt_mask_feat is not None else None 52 | self.pesudo_ins_feat = None 53 | self.pesudo_mask_bool = None 54 | self.cluster_masks = None 55 | self.bClusterOccur = None 56 | self.image_width = self.original_image.shape[2] 57 | self.image_height = self.original_image.shape[1] 58 | 59 | if gt_alpha_mask is not None: 60 | self.original_image *= gt_alpha_mask.to(self.data_device) 61 | else: 62 | self.original_image *= torch.ones((1, self.image_height, self.image_width), device=self.data_device) 63 | 64 | self.zfar = 100.0 65 | self.znear = 0.01 66 | 67 | self.trans = trans 68 | self.scale = scale 69 | 70 | self.world_view_transform = torch.tensor(getWorld2View2(R, T, trans, scale)).transpose(0, 1).cuda() 71 | self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda() 72 | self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0) 73 | self.camera_center = self.world_view_transform.inverse()[3, :3] 74 | 75 | # modify ----- 76 | def to_gpu(self): 77 | for attr_name in dir(self): 78 | attr = getattr(self, attr_name) 79 | if isinstance(attr, torch.Tensor) and not attr.is_cuda: 80 | setattr(self, attr_name, attr.to('cuda')) 81 | self.data_on_gpu = True 82 | 83 | # modify ----- 84 | def to_cpu(self): 85 | for attr_name in dir(self): 86 | attr = getattr(self, attr_name) 87 | if isinstance(attr, torch.Tensor) and attr.is_cuda: 88 | setattr(self, attr_name, attr.to('cpu')) 89 | self.data_on_gpu = False 90 | 91 | class MiniCam: 92 | def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform): 93 | self.image_width = width 94 | self.image_height = height 95 | self.FoVy = fovy 96 | self.FoVx = fovx 97 | self.znear = znear 98 | self.zfar = zfar 99 | self.world_view_transform = world_view_transform 100 | self.full_proj_transform = full_proj_transform 101 | view_inv = torch.inverse(self.world_view_transform) 102 | self.camera_center = view_inv[3][:3] 103 | 104 | -------------------------------------------------------------------------------- /scene/colmap_loader.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import numpy as np 13 | import collections 14 | import struct 15 | 16 | CameraModel = collections.namedtuple( 17 | "CameraModel", ["model_id", "model_name", "num_params"]) 18 | Camera = collections.namedtuple( 19 | "Camera", ["id", "model", "width", "height", "params"]) 20 | BaseImage = collections.namedtuple( 21 | "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]) 22 | Point3D = collections.namedtuple( 23 | "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"]) 24 | CAMERA_MODELS = { 25 | CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3), 26 | CameraModel(model_id=1, model_name="PINHOLE", num_params=4), 27 | CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4), 28 | CameraModel(model_id=3, model_name="RADIAL", num_params=5), 29 | CameraModel(model_id=4, model_name="OPENCV", num_params=8), 30 | CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8), 31 | CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12), 32 | CameraModel(model_id=7, model_name="FOV", num_params=5), 33 | CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4), 34 | CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5), 35 | CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12) 36 | } 37 | CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model) 38 | for camera_model in CAMERA_MODELS]) 39 | CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model) 40 | for camera_model in CAMERA_MODELS]) 41 | 42 | 43 | def qvec2rotmat(qvec): 44 | return np.array([ 45 | [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2, 46 | 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], 47 | 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]], 48 | [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], 49 | 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2, 50 | 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]], 51 | [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], 52 | 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], 53 | 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]]) 54 | 55 | def rotmat2qvec(R): 56 | Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat 57 | K = np.array([ 58 | [Rxx - Ryy - Rzz, 0, 0, 0], 59 | [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0], 60 | [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0], 61 | [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0 62 | eigvals, eigvecs = np.linalg.eigh(K) 63 | qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)] 64 | if qvec[0] < 0: 65 | qvec *= -1 66 | return qvec 67 | 68 | class Image(BaseImage): 69 | def qvec2rotmat(self): 70 | return qvec2rotmat(self.qvec) 71 | 72 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"): 73 | """Read and unpack the next bytes from a binary file. 74 | :param fid: 75 | :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc. 76 | :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}. 77 | :param endian_character: Any of {@, =, <, >, !} 78 | :return: Tuple of read and unpacked values. 79 | """ 80 | data = fid.read(num_bytes) 81 | return struct.unpack(endian_character + format_char_sequence, data) 82 | 83 | def read_points3D_text(path): 84 | """ 85 | see: src/base/reconstruction.cc 86 | void Reconstruction::ReadPoints3DText(const std::string& path) 87 | void Reconstruction::WritePoints3DText(const std::string& path) 88 | """ 89 | xyzs = None 90 | rgbs = None 91 | errors = None 92 | num_points = 0 93 | with open(path, "r") as fid: 94 | while True: 95 | line = fid.readline() 96 | if not line: 97 | break 98 | line = line.strip() 99 | if len(line) > 0 and line[0] != "#": 100 | num_points += 1 101 | 102 | 103 | xyzs = np.empty((num_points, 3)) 104 | rgbs = np.empty((num_points, 3)) 105 | errors = np.empty((num_points, 1)) 106 | count = 0 107 | with open(path, "r") as fid: 108 | while True: 109 | line = fid.readline() 110 | if not line: 111 | break 112 | line = line.strip() 113 | if len(line) > 0 and line[0] != "#": 114 | elems = line.split() 115 | xyz = np.array(tuple(map(float, elems[1:4]))) 116 | rgb = np.array(tuple(map(int, elems[4:7]))) 117 | error = np.array(float(elems[7])) 118 | xyzs[count] = xyz 119 | rgbs[count] = rgb 120 | errors[count] = error 121 | count += 1 122 | 123 | return xyzs, rgbs, errors 124 | 125 | def read_points3D_binary(path_to_model_file): 126 | """ 127 | see: src/base/reconstruction.cc 128 | void Reconstruction::ReadPoints3DBinary(const std::string& path) 129 | void Reconstruction::WritePoints3DBinary(const std::string& path) 130 | """ 131 | 132 | 133 | with open(path_to_model_file, "rb") as fid: 134 | num_points = read_next_bytes(fid, 8, "Q")[0] 135 | 136 | xyzs = np.empty((num_points, 3)) 137 | rgbs = np.empty((num_points, 3)) 138 | errors = np.empty((num_points, 1)) 139 | 140 | for p_id in range(num_points): 141 | binary_point_line_properties = read_next_bytes( 142 | fid, num_bytes=43, format_char_sequence="QdddBBBd") 143 | xyz = np.array(binary_point_line_properties[1:4]) 144 | rgb = np.array(binary_point_line_properties[4:7]) 145 | error = np.array(binary_point_line_properties[7]) 146 | track_length = read_next_bytes( 147 | fid, num_bytes=8, format_char_sequence="Q")[0] 148 | track_elems = read_next_bytes( 149 | fid, num_bytes=8*track_length, 150 | format_char_sequence="ii"*track_length) 151 | xyzs[p_id] = xyz 152 | rgbs[p_id] = rgb 153 | errors[p_id] = error 154 | return xyzs, rgbs, errors 155 | 156 | def read_intrinsics_text(path): 157 | """ 158 | Taken from https://github.com/colmap/colmap/blob/dev/scripts/python/read_write_model.py 159 | """ 160 | cameras = {} 161 | with open(path, "r") as fid: 162 | while True: 163 | line = fid.readline() 164 | if not line: 165 | break 166 | line = line.strip() 167 | if len(line) > 0 and line[0] != "#": 168 | elems = line.split() 169 | camera_id = int(elems[0]) 170 | model = elems[1] 171 | assert model == "PINHOLE", "While the loader support other types, the rest of the code assumes PINHOLE" 172 | width = int(elems[2]) 173 | height = int(elems[3]) 174 | params = np.array(tuple(map(float, elems[4:]))) 175 | cameras[camera_id] = Camera(id=camera_id, model=model, 176 | width=width, height=height, 177 | params=params) 178 | return cameras 179 | 180 | def read_extrinsics_binary(path_to_model_file): 181 | """ 182 | see: src/base/reconstruction.cc 183 | void Reconstruction::ReadImagesBinary(const std::string& path) 184 | void Reconstruction::WriteImagesBinary(const std::string& path) 185 | """ 186 | images = {} 187 | with open(path_to_model_file, "rb") as fid: 188 | num_reg_images = read_next_bytes(fid, 8, "Q")[0] 189 | for _ in range(num_reg_images): 190 | binary_image_properties = read_next_bytes( 191 | fid, num_bytes=64, format_char_sequence="idddddddi") 192 | image_id = binary_image_properties[0] 193 | qvec = np.array(binary_image_properties[1:5]) 194 | tvec = np.array(binary_image_properties[5:8]) 195 | camera_id = binary_image_properties[8] 196 | image_name = "" 197 | current_char = read_next_bytes(fid, 1, "c")[0] 198 | while current_char != b"\x00": # look for the ASCII 0 entry 199 | image_name += current_char.decode("utf-8") 200 | current_char = read_next_bytes(fid, 1, "c")[0] 201 | num_points2D = read_next_bytes(fid, num_bytes=8, 202 | format_char_sequence="Q")[0] 203 | x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D, 204 | format_char_sequence="ddq"*num_points2D) 205 | xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])), 206 | tuple(map(float, x_y_id_s[1::3]))]) 207 | point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3]))) 208 | images[image_id] = Image( 209 | id=image_id, qvec=qvec, tvec=tvec, 210 | camera_id=camera_id, name=image_name, 211 | xys=xys, point3D_ids=point3D_ids) 212 | return images 213 | 214 | 215 | def read_intrinsics_binary(path_to_model_file): 216 | """ 217 | see: src/base/reconstruction.cc 218 | void Reconstruction::WriteCamerasBinary(const std::string& path) 219 | void Reconstruction::ReadCamerasBinary(const std::string& path) 220 | """ 221 | cameras = {} 222 | with open(path_to_model_file, "rb") as fid: 223 | num_cameras = read_next_bytes(fid, 8, "Q")[0] 224 | for _ in range(num_cameras): 225 | camera_properties = read_next_bytes( 226 | fid, num_bytes=24, format_char_sequence="iiQQ") 227 | camera_id = camera_properties[0] 228 | model_id = camera_properties[1] 229 | model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name 230 | width = camera_properties[2] 231 | height = camera_properties[3] 232 | num_params = CAMERA_MODEL_IDS[model_id].num_params 233 | params = read_next_bytes(fid, num_bytes=8*num_params, 234 | format_char_sequence="d"*num_params) 235 | cameras[camera_id] = Camera(id=camera_id, 236 | model=model_name, 237 | width=width, 238 | height=height, 239 | params=np.array(params)) 240 | assert len(cameras) == num_cameras 241 | return cameras 242 | 243 | 244 | def read_extrinsics_text(path): 245 | """ 246 | Taken from https://github.com/colmap/colmap/blob/dev/scripts/python/read_write_model.py 247 | """ 248 | images = {} 249 | with open(path, "r") as fid: 250 | while True: 251 | line = fid.readline() 252 | if not line: 253 | break 254 | line = line.strip() 255 | if len(line) > 0 and line[0] != "#": 256 | elems = line.split() 257 | image_id = int(elems[0]) 258 | qvec = np.array(tuple(map(float, elems[1:5]))) 259 | tvec = np.array(tuple(map(float, elems[5:8]))) 260 | camera_id = int(elems[8]) 261 | image_name = elems[9] 262 | elems = fid.readline().split() 263 | xys = np.column_stack([tuple(map(float, elems[0::3])), 264 | tuple(map(float, elems[1::3]))]) 265 | point3D_ids = np.array(tuple(map(int, elems[2::3]))) 266 | images[image_id] = Image( 267 | id=image_id, qvec=qvec, tvec=tvec, 268 | camera_id=camera_id, name=image_name, 269 | xys=xys, point3D_ids=point3D_ids) 270 | return images 271 | 272 | 273 | def read_colmap_bin_array(path): 274 | """ 275 | Taken from https://github.com/colmap/colmap/blob/dev/scripts/python/read_dense.py 276 | 277 | :param path: path to the colmap binary file. 278 | :return: nd array with the floating point values in the value 279 | """ 280 | with open(path, "rb") as fid: 281 | width, height, channels = np.genfromtxt(fid, delimiter="&", max_rows=1, 282 | usecols=(0, 1, 2), dtype=int) 283 | fid.seek(0) 284 | num_delimiter = 0 285 | byte = fid.read(1) 286 | while True: 287 | if byte == b"&": 288 | num_delimiter += 1 289 | if num_delimiter >= 3: 290 | break 291 | byte = fid.read(1) 292 | array = np.fromfile(fid, np.float32) 293 | array = array.reshape((width, height, channels), order="F") 294 | return np.transpose(array, (1, 0, 2)).squeeze() 295 | -------------------------------------------------------------------------------- /scene/dataset_readers.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import os 13 | import sys 14 | import json 15 | from PIL import Image 16 | from typing import NamedTuple 17 | from scene.colmap_loader import read_extrinsics_text, read_intrinsics_text, qvec2rotmat, \ 18 | read_extrinsics_binary, read_intrinsics_binary, read_points3D_binary, read_points3D_text 19 | from utils.graphics_utils import getWorld2View2, focal2fov, fov2focal 20 | import numpy as np 21 | import json 22 | import random 23 | from tqdm import tqdm 24 | from pathlib import Path 25 | from plyfile import PlyData, PlyElement 26 | from utils.sh_utils import SH2RGB 27 | from scene.gaussian_model import BasicPointCloud 28 | 29 | class CameraInfo(NamedTuple): 30 | uid: int 31 | R: np.array 32 | T: np.array 33 | FovY: np.array 34 | FovX: np.array 35 | cx: np.array 36 | cy: np.array 37 | image: np.array 38 | depth: np.array # not used 39 | sam_mask: np.array # modify ----- 40 | mask_feat: np.array # modify ----- 41 | image_path: str 42 | image_name: str 43 | width: int 44 | height: int 45 | 46 | class SceneInfo(NamedTuple): 47 | point_cloud: BasicPointCloud 48 | train_cameras: list 49 | test_cameras: list 50 | nerf_normalization: dict 51 | ply_path: str 52 | 53 | def getNerfppNorm(cam_info): 54 | def get_center_and_diag(cam_centers): 55 | cam_centers = np.hstack(cam_centers) 56 | avg_cam_center = np.mean(cam_centers, axis=1, keepdims=True) 57 | center = avg_cam_center 58 | dist = np.linalg.norm(cam_centers - center, axis=0, keepdims=True) 59 | diagonal = np.max(dist) 60 | return center.flatten(), diagonal 61 | 62 | cam_centers = [] 63 | 64 | for cam in cam_info: 65 | W2C = getWorld2View2(cam.R, cam.T) 66 | C2W = np.linalg.inv(W2C) 67 | cam_centers.append(C2W[:3, 3:4]) 68 | 69 | center, diagonal = get_center_and_diag(cam_centers) 70 | radius = diagonal * 1.1 71 | 72 | translate = -center 73 | 74 | return {"translate": translate, "radius": radius} 75 | 76 | def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder): 77 | cam_infos = [] 78 | 79 | for idx, key in enumerate(cam_extrinsics): 80 | sys.stdout.write('\r') 81 | # the exact output you're looking for: 82 | sys.stdout.write("Reading camera {}/{}".format(idx+1, len(cam_extrinsics))) 83 | sys.stdout.flush() 84 | 85 | extr = cam_extrinsics[key] 86 | intr = cam_intrinsics[extr.camera_id] 87 | height = intr.height 88 | width = intr.width 89 | 90 | uid = intr.id 91 | R = np.transpose(qvec2rotmat(extr.qvec)) 92 | T = np.array(extr.tvec) 93 | 94 | if intr.model=="SIMPLE_PINHOLE": 95 | focal_length_x = intr.params[0] 96 | FovY = focal2fov(focal_length_x, height) 97 | FovX = focal2fov(focal_length_x, width) 98 | elif intr.model=="PINHOLE": 99 | focal_length_x = intr.params[0] 100 | focal_length_y = intr.params[1] 101 | FovY = focal2fov(focal_length_y, height) 102 | FovX = focal2fov(focal_length_x, width) 103 | else: 104 | assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!" 105 | 106 | image_path = os.path.join(images_folder, os.path.basename(extr.name)) 107 | if not os.path.exists(image_path): 108 | # modify ----- 109 | base, ext = os.path.splitext(image_path) 110 | if ext.lower() == ".jpg": 111 | image_path = base + ".png" 112 | elif ext.lower() == ".png": 113 | image_path = base + ".jpg" 114 | if not os.path.exists(image_path): 115 | continue 116 | # modify ---- 117 | 118 | image_name = os.path.basename(image_path).split(".")[0] 119 | image = Image.open(image_path) 120 | 121 | # NOTE: load SAM mask and CLIP feat. [OpenGaussian] 122 | mask_seg_path = os.path.join(images_folder[:-6], "language_features/" + extr.name.split('/')[-1][:-4] + "_s.npy") 123 | mask_feat_path = os.path.join(images_folder[:-6], "language_features/" + extr.name.split('/')[-1][:-4] + "_f.npy") 124 | 125 | if os.path.exists(mask_seg_path): 126 | sam_mask = np.load(mask_seg_path) # [level=4, H, W] 127 | else: 128 | sam_mask = None 129 | if mask_feat_path is not None and os.path.exists(mask_feat_path): 130 | mask_feat = np.load(mask_feat_path) # [level=4, H, W] 131 | else: 132 | mask_feat = None 133 | # modify ----- 134 | 135 | cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, cx=width/2, cy=height/2, image=image, 136 | depth=None, sam_mask=sam_mask, mask_feat=mask_feat, 137 | image_path=image_path, image_name=image_name, width=width, height=height) 138 | cam_infos.append(cam_info) 139 | sys.stdout.write('\n') 140 | return cam_infos 141 | 142 | def fetchPly(path): 143 | plydata = PlyData.read(path) 144 | vertices = plydata['vertex'] 145 | positions = np.vstack([vertices['x'], vertices['y'], vertices['z']]).T 146 | if {'red', 'green', 'blue'}.issubset(vertices.data.dtype.names): 147 | colors = np.vstack([vertices['red'], vertices['green'], vertices['blue']]).T / 255.0 148 | else: 149 | colors = np.random.rand(positions.shape[0], 3) 150 | if {'nx', 'ny', 'nz'}.issubset(vertices.data.dtype.names): 151 | normals = np.vstack([vertices['nx'], vertices['ny'], vertices['nz']]).T 152 | else: 153 | normals = np.random.rand(positions.shape[0], 3) 154 | 155 | return BasicPointCloud(points=positions, colors=colors, normals=normals) 156 | 157 | def storePly(path, xyz, rgb): 158 | # Define the dtype for the structured array 159 | dtype = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'), 160 | ('nx', 'f4'), ('ny', 'f4'), ('nz', 'f4'), 161 | ('red', 'u1'), ('green', 'u1'), ('blue', 'u1')] 162 | 163 | normals = np.zeros_like(xyz) 164 | 165 | elements = np.empty(xyz.shape[0], dtype=dtype) 166 | attributes = np.concatenate((xyz, normals, rgb), axis=1) 167 | elements[:] = list(map(tuple, attributes)) 168 | 169 | # Create the PlyData object and write to file 170 | vertex_element = PlyElement.describe(elements, 'vertex') 171 | ply_data = PlyData([vertex_element]) 172 | ply_data.write(path) 173 | 174 | def readColmapSceneInfo(path, images, eval, llffhold=8): 175 | try: 176 | cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.txt") 177 | cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.txt") 178 | cam_extrinsics = read_extrinsics_text(cameras_extrinsic_file) 179 | cam_intrinsics = read_intrinsics_text(cameras_intrinsic_file) 180 | 181 | except: 182 | cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.bin") 183 | cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.bin") 184 | cam_extrinsics = read_extrinsics_binary(cameras_extrinsic_file) 185 | cam_intrinsics = read_intrinsics_binary(cameras_intrinsic_file) 186 | 187 | reading_dir = "images" if images == None else images 188 | cam_infos_unsorted = readColmapCameras(cam_extrinsics=cam_extrinsics, cam_intrinsics=cam_intrinsics, images_folder=os.path.join(path, reading_dir)) 189 | cam_infos = sorted(cam_infos_unsorted.copy(), key = lambda x : x.image_name) 190 | 191 | if eval: 192 | train_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold != 0] 193 | test_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold == 0] 194 | else: 195 | train_cam_infos = cam_infos 196 | test_cam_infos = [] 197 | 198 | nerf_normalization = getNerfppNorm(train_cam_infos) 199 | 200 | ply_path = os.path.join(path, "sparse/0/points3D.ply") 201 | bin_path = os.path.join(path, "sparse/0/points3D.bin") 202 | txt_path = os.path.join(path, "sparse/0/points3D.txt") 203 | if not os.path.exists(ply_path): 204 | print("Converting point3d.bin to .ply, will happen only the first time you open the scene.") 205 | try: 206 | xyz, rgb, _ = read_points3D_binary(bin_path) 207 | except: 208 | xyz, rgb, _ = read_points3D_text(txt_path) 209 | storePly(ply_path, xyz, rgb) 210 | try: 211 | pcd = fetchPly(ply_path) 212 | except: 213 | pcd = None 214 | 215 | scene_info = SceneInfo(point_cloud=pcd, 216 | train_cameras=train_cam_infos, 217 | test_cameras=test_cam_infos, 218 | nerf_normalization=nerf_normalization, 219 | ply_path=ply_path) 220 | return scene_info 221 | 222 | def readCamerasFromTransforms(path, transformsfile, white_background, extension=".png"): 223 | cam_infos = [] 224 | 225 | with open(os.path.join(path, transformsfile)) as json_file: 226 | contents = json.load(json_file) 227 | 228 | # ----- modify ----- 229 | if "camera_angle_x" not in contents.keys(): 230 | fovx = None 231 | else: 232 | fovx = contents["camera_angle_x"] 233 | # ----- modify ----- 234 | 235 | # modify ----- 236 | cx, cy = -1, -1 237 | if "cx" in contents.keys(): 238 | cx = contents["cx"] 239 | cy = contents["cy"] 240 | elif "h" in contents.keys(): 241 | cx = contents["w"] / 2 242 | cy = contents["h"] / 2 243 | # modify ----- 244 | 245 | frames = contents["frames"] 246 | # for idx, frame in enumerate(frames): 247 | for idx, frame in tqdm(enumerate(frames), total=len(frames), desc="load images"): 248 | cam_name = os.path.join(path, frame["file_path"] + extension) 249 | 250 | # NeRF 'transform_matrix' is a camera-to-world transform 251 | c2w = np.array(frame["transform_matrix"]) 252 | # change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward) 253 | c2w[:3, 1:3] *= -1 # TODO 254 | 255 | # get the world-to-camera transform and set R, T 256 | w2c = np.linalg.inv(c2w) 257 | R = np.transpose(w2c[:3,:3]) # R is stored transposed due to 'glm' in CUDA code 258 | T = w2c[:3, 3] 259 | 260 | image_path = os.path.join(path, cam_name) 261 | if not os.path.exists(image_path): 262 | # modify ----- 263 | base, ext = os.path.splitext(image_path) 264 | if ext.lower() == ".jpg": 265 | image_path = base + ".png" 266 | elif ext.lower() == ".png": 267 | image_path = base + ".jpg" 268 | if not os.path.exists(image_path): 269 | continue 270 | # modify ---- 271 | 272 | image_name = Path(cam_name).stem 273 | image = Image.open(image_path) 274 | 275 | im_data = np.array(image.convert("RGBA")) 276 | 277 | bg = np.array([1,1,1]) if white_background else np.array([0, 0, 0]) 278 | 279 | norm_data = im_data / 255.0 280 | arr = norm_data[:,:,:3] * norm_data[:, :, 3:4] + bg * (1 - norm_data[:, :, 3:4]) 281 | image = Image.fromarray(np.array(arr*255.0, dtype=np.byte), "RGB") 282 | 283 | # NOTE: load SAM mask and CLIP feat. [OpenGaussian] 284 | mask_seg_path = os.path.join(path, "language_features/" + frame["file_path"].split('/')[-1] + "_s.npy") 285 | mask_feat_path = os.path.join(path, "language_features/" + frame["file_path"].split('/')[-1] + "_f.npy") 286 | 287 | if os.path.exists(mask_seg_path): 288 | sam_mask = np.load(mask_seg_path) # [level=4, H, W] 289 | else: 290 | sam_mask = None 291 | if os.path.exists(mask_feat_path): 292 | mask_feat = np.load(mask_feat_path) # [num_mask, dim=512] 293 | else: 294 | mask_feat = None 295 | # modify ----- 296 | 297 | # ----- modify ----- 298 | if "K" in frame.keys(): 299 | cx = frame["K"][0][2] 300 | cy = frame["K"][1][2] 301 | if cx == -1: 302 | cx = image.size[0] / 2 303 | cy = image.size[1] / 2 304 | # ----- modify ----- 305 | 306 | # ----- modify ----- 307 | if fovx == None: 308 | if "K" in frame.keys(): 309 | focal_length = frame["K"][0][0] 310 | if "fl_x" in contents.keys(): 311 | focal_length = contents["fl_x"] 312 | if "fl_x" in frame.keys(): 313 | focal_length = frame["fl_x"] 314 | FovY = focal2fov(focal_length, image.size[1]) 315 | FovX = focal2fov(focal_length, image.size[0]) 316 | else: 317 | fovy = focal2fov(fov2focal(fovx, image.size[0]), image.size[1]) 318 | FovY = fovx 319 | FovX = fovy 320 | # ----- modify ----- 321 | 322 | cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, cx=cx, cy=cy, image=image, 323 | depth=None, sam_mask=sam_mask, mask_feat=mask_feat, 324 | image_path=image_path, image_name=image_name, width=image.size[0], height=image.size[1])) 325 | 326 | return cam_infos 327 | 328 | def readNerfSyntheticInfo(path, white_background, eval, extension=".png"): 329 | print("Reading Training Transforms") 330 | train_cam_infos = readCamerasFromTransforms(path, "transforms_train.json", white_background, extension) 331 | print("Reading Test Transforms") 332 | if os.path.exists(os.path.join(path, "transforms_test.json")): 333 | test_cam_infos = readCamerasFromTransforms(path, "transforms_test.json", white_background, extension) 334 | else: 335 | test_cam_infos = train_cam_infos 336 | 337 | if not eval: 338 | train_cam_infos.extend(test_cam_infos) 339 | test_cam_infos = [] 340 | 341 | nerf_normalization = getNerfppNorm(train_cam_infos) 342 | 343 | ply_path = os.path.join(path, "points3d.ply") 344 | if not os.path.exists(ply_path): 345 | # Since this data set has no colmap data, we start with random points 346 | num_pts = 100_000 347 | print(f"Generating random point cloud ({num_pts})...") 348 | 349 | # We create random points inside the bounds of the synthetic Blender scenes 350 | xyz = np.random.random((num_pts, 3)) * 2.6 - 1.3 351 | shs = np.random.random((num_pts, 3)) / 255.0 352 | pcd = BasicPointCloud(points=xyz, colors=SH2RGB(shs), normals=np.zeros((num_pts, 3))) 353 | 354 | storePly(ply_path, xyz, SH2RGB(shs) * 255) 355 | try: 356 | pcd = fetchPly(ply_path) 357 | except: 358 | pcd = None 359 | 360 | scene_info = SceneInfo(point_cloud=pcd, 361 | train_cameras=train_cam_infos, 362 | test_cameras=test_cam_infos, 363 | nerf_normalization=nerf_normalization, 364 | ply_path=ply_path) 365 | return scene_info 366 | 367 | sceneLoadTypeCallbacks = { 368 | "Colmap": readColmapSceneInfo, 369 | "Blender" : readNerfSyntheticInfo 370 | } -------------------------------------------------------------------------------- /scene/gaussian_model.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | import torch 13 | import numpy as np 14 | from utils.general_utils import inverse_sigmoid, get_expon_lr_func, build_rotation 15 | from torch import nn 16 | import os 17 | from utils.system_utils import mkdir_p 18 | from plyfile import PlyData, PlyElement 19 | from utils.sh_utils import RGB2SH 20 | # from simple_knn._C import distCUDA2 # no need 21 | from scipy.spatial import KDTree # modify 22 | from utils.graphics_utils import BasicPointCloud 23 | from utils.general_utils import strip_symmetric, build_scaling_rotation 24 | 25 | def sigmoid(x): 26 | return 1 / (1 + np.exp(-x)) 27 | 28 | def distCUDA2(points): 29 | ''' 30 | https://github.com/graphdeco-inria/gaussian-splatting/issues/292 31 | ''' 32 | points_np = points.detach().cpu().float().numpy() 33 | dists, inds = KDTree(points_np).query(points_np, k=4) 34 | meanDists = (dists[:, 1:] ** 2).mean(1) 35 | 36 | return torch.tensor(meanDists, dtype=points.dtype, device=points.device) 37 | 38 | class GaussianModel: 39 | 40 | def setup_functions(self): 41 | def build_covariance_from_scaling_rotation(scaling, scaling_modifier, rotation): 42 | L = build_scaling_rotation(scaling_modifier * scaling, rotation) 43 | actual_covariance = L @ L.transpose(1, 2) 44 | symm = strip_symmetric(actual_covariance) 45 | return symm 46 | 47 | self.scaling_activation = torch.exp 48 | self.scaling_inverse_activation = torch.log 49 | 50 | self.covariance_activation = build_covariance_from_scaling_rotation 51 | 52 | self.opacity_activation = torch.sigmoid 53 | self.inverse_opacity_activation = inverse_sigmoid 54 | 55 | self.rotation_activation = torch.nn.functional.normalize 56 | 57 | 58 | def __init__(self, sh_degree : int): 59 | self.active_sh_degree = 0 60 | self.max_sh_degree = sh_degree 61 | self._xyz = torch.empty(0) 62 | self._features_dc = torch.empty(0) 63 | self._features_rest = torch.empty(0) 64 | self._scaling = torch.empty(0) 65 | self._rotation = torch.empty(0) 66 | self._opacity = torch.empty(0) 67 | self._ins_feat = torch.empty(0) # Continuous instance features before quantization 68 | self._ins_feat_q = torch.empty(0) # Discrete instance features after quantization 69 | self.iClusterSubNum = torch.empty(0) 70 | self.max_radii2D = torch.empty(0) 71 | self.xyz_gradient_accum = torch.empty(0) 72 | self.denom = torch.empty(0) 73 | self.optimizer = None 74 | self.percent_dense = 0 75 | self.spatial_lr_scale = 0 76 | self.setup_functions() 77 | 78 | def capture(self): 79 | return ( 80 | self.active_sh_degree, 81 | self._xyz, 82 | self._features_dc, 83 | self._features_rest, 84 | self._scaling, 85 | self._rotation, 86 | self._opacity, 87 | self._ins_feat, # Continuous instance features before quantization 88 | self._ins_feat_q, # Discrete instance features after quantization 89 | self.max_radii2D, 90 | self.xyz_gradient_accum, 91 | self.denom, 92 | self.optimizer.state_dict(), 93 | self.spatial_lr_scale, 94 | ) 95 | 96 | def restore(self, model_args, training_args): 97 | (self.active_sh_degree, 98 | self._xyz, 99 | self._features_dc, 100 | self._features_rest, 101 | self._scaling, 102 | self._rotation, 103 | self._opacity, 104 | self._ins_feat, # Continuous instance features before quantization 105 | self._ins_feat_q, # Discrete instance features after quantization 106 | self.max_radii2D, 107 | xyz_gradient_accum, 108 | denom, 109 | opt_dict, 110 | self.spatial_lr_scale) = model_args 111 | self.training_setup(training_args) 112 | self.xyz_gradient_accum = xyz_gradient_accum 113 | self.denom = denom 114 | self.optimizer.load_state_dict(opt_dict) 115 | 116 | @property 117 | def get_scaling(self): 118 | return self.scaling_activation(self._scaling) 119 | 120 | @property 121 | def get_scaling_origin(self): 122 | return self.scaling_activation(self._scaling) 123 | 124 | @property 125 | def get_rotation(self): 126 | return self.rotation_activation(self._rotation) 127 | 128 | @property 129 | def get_rotation_matrix(self): 130 | return build_rotation(self._rotation) 131 | 132 | @property 133 | def get_eigenvector(self): 134 | scales = self.get_scaling_origin 135 | N = scales.shape[0] 136 | idx = torch.min(scales, dim=1)[1] 137 | normals = self.get_rotation_matrix[np.arange(N), :, idx] 138 | normals = torch.nn.functional.normalize(normals, dim=1) 139 | return normals 140 | 141 | @property 142 | def get_xyz(self): 143 | return self._xyz 144 | 145 | @property 146 | def get_features(self): 147 | features_dc = self._features_dc 148 | features_rest = self._features_rest 149 | return torch.cat((features_dc, features_rest), dim=1) 150 | 151 | @property 152 | def get_opacity(self): 153 | return self.opacity_activation(self._opacity) 154 | 155 | # NOTE: get instance feature 156 | # @property 157 | def get_ins_feat(self, origin=False): 158 | if len(self._ins_feat_q) == 0 or origin: 159 | ins_feat = self._ins_feat 160 | else: 161 | ins_feat = self._ins_feat_q 162 | ins_feat = torch.nn.functional.normalize(ins_feat, dim=1) 163 | return ins_feat 164 | 165 | def get_covariance(self, scaling_modifier = 1): 166 | return self.covariance_activation(self.get_scaling, scaling_modifier, self._rotation) 167 | 168 | def oneupSHdegree(self): 169 | if self.active_sh_degree < self.max_sh_degree: 170 | self.active_sh_degree += 1 171 | 172 | def create_from_pcd(self, pcd : BasicPointCloud, spatial_lr_scale : float): 173 | self.spatial_lr_scale = spatial_lr_scale 174 | fused_point_cloud = torch.tensor(np.asarray(pcd.points)).float().cuda() 175 | fused_color = RGB2SH(torch.tensor(np.asarray(pcd.colors)).float().cuda()) 176 | features = torch.zeros((fused_color.shape[0], 3, (self.max_sh_degree + 1) ** 2)).float().cuda() # [N, 3, 16] 177 | features[:, :3, 0 ] = fused_color 178 | features[:, 3:, 1:] = 0.0 179 | 180 | print("Number of points at initialisation : ", fused_point_cloud.shape[0]) 181 | 182 | dist2 = torch.clamp_min(distCUDA2(torch.from_numpy(np.asarray(pcd.points)).float().cuda()), 0.0000001) 183 | scales = torch.log(torch.sqrt(dist2))[...,None].repeat(1, 3) 184 | rots = torch.zeros((fused_point_cloud.shape[0], 4), device="cuda") 185 | rots[:, 0] = 1 186 | 187 | opacities = inverse_sigmoid(0.1 * torch.ones((fused_point_cloud.shape[0], 1), dtype=torch.float, device="cuda")) 188 | 189 | # modify ----- 190 | ins_feat = torch.rand((fused_point_cloud.shape[0], 6), dtype=torch.float, device="cuda") 191 | 192 | self._xyz = nn.Parameter(fused_point_cloud.requires_grad_(True)) 193 | self._features_dc = nn.Parameter(features[:,:,0:1].transpose(1, 2).contiguous().requires_grad_(True)) 194 | self._features_rest = nn.Parameter(features[:,:,1:].transpose(1, 2).contiguous().requires_grad_(True)) 195 | self._scaling = nn.Parameter(scales.requires_grad_(True)) 196 | self._rotation = nn.Parameter(rots.requires_grad_(True)) 197 | self._opacity = nn.Parameter(opacities.requires_grad_(True)) 198 | # modify ----- 199 | self._ins_feat = nn.Parameter(ins_feat.requires_grad_(True)) 200 | self.max_radii2D = torch.zeros((self.get_xyz.shape[0]), device="cuda") 201 | 202 | def training_setup(self, training_args): 203 | self.percent_dense = training_args.percent_dense 204 | self.xyz_gradient_accum = torch.zeros((self.get_xyz.shape[0], 1), device="cuda") 205 | self.denom = torch.zeros((self.get_xyz.shape[0], 1), device="cuda") 206 | 207 | l = [ 208 | {'params': [self._xyz], 'lr': training_args.position_lr_init * self.spatial_lr_scale, "name": "xyz"}, 209 | {'params': [self._features_dc], 'lr': training_args.feature_lr, "name": "f_dc"}, 210 | {'params': [self._features_rest], 'lr': training_args.feature_lr / 20.0, "name": "f_rest"}, 211 | {'params': [self._opacity], 'lr': training_args.opacity_lr, "name": "opacity"}, 212 | {'params': [self._scaling], 'lr': training_args.scaling_lr, "name": "scaling"}, 213 | {'params': [self._rotation], 'lr': training_args.rotation_lr, "name": "rotation"}, 214 | {'params': [self._ins_feat], 'lr': training_args.ins_feat_lr, "name": "ins_feat"} # modify ----- 215 | ] 216 | 217 | # note: Freeze the position of the initial point, do not densify. for ScanNet 3DGS pre-train stage 218 | if training_args.frozen_init_pts: 219 | self._xyz = self._xyz.detach() 220 | 221 | self.optimizer = torch.optim.Adam(l, lr=0.0, eps=1e-15) 222 | self.xyz_scheduler_args = get_expon_lr_func(lr_init=training_args.position_lr_init*self.spatial_lr_scale, 223 | lr_final=training_args.position_lr_final*self.spatial_lr_scale, 224 | lr_delay_mult=training_args.position_lr_delay_mult, 225 | max_steps=training_args.position_lr_max_steps) 226 | 227 | def update_learning_rate(self, iteration, root_start, leaf_start): 228 | ''' Learning rate scheduling per step ''' 229 | for param_group in self.optimizer.param_groups: 230 | if param_group["name"] == "xyz": 231 | lr = self.xyz_scheduler_args(iteration) 232 | param_group['lr'] = lr 233 | # return lr 234 | if param_group["name"] == "ins_feat": 235 | if iteration > root_start and iteration <= leaf_start: # TODO: update lr 236 | param_group['lr'] = param_group['lr'] * 0 + 0.0001 237 | else: 238 | param_group['lr'] = param_group['lr'] * 0 + 0.001 239 | 240 | def construct_list_of_attributes(self): 241 | l = ['x', 'y', 'z', 'nx', 'ny', 'nz', 'ins_feat_r', 'ins_feat_g', 'ins_feat_b', \ 242 | 'ins_feat_r2', 'ins_feat_g2', 'ins_feat_b2'] 243 | # All channels except the 3 DC 244 | for i in range(self._features_dc.shape[1]*self._features_dc.shape[2]): 245 | l.append('f_dc_{}'.format(i)) 246 | for i in range(self._features_rest.shape[1]*self._features_rest.shape[2]): 247 | l.append('f_rest_{}'.format(i)) 248 | l.append('opacity') 249 | for i in range(self._scaling.shape[1]): 250 | l.append('scale_{}'.format(i)) 251 | for i in range(self._rotation.shape[1]): 252 | l.append('rot_{}'.format(i)) 253 | return l 254 | 255 | def save_ply(self, path, save_q=[]): 256 | mkdir_p(os.path.dirname(path)) 257 | 258 | xyz = self._xyz.detach().cpu().numpy() 259 | normals = np.zeros_like(xyz) 260 | f_dc = self._features_dc.detach().transpose(1, 2).flatten(start_dim=1).contiguous().cpu().numpy() 261 | f_rest = self._features_rest.detach().transpose(1, 2).flatten(start_dim=1).contiguous().cpu().numpy() 262 | opacities = self._opacity.detach().cpu().numpy() 263 | scale = self._scaling.detach().cpu().numpy() 264 | rotation = self._rotation.detach().cpu().numpy() 265 | if "ins_feat" in save_q: 266 | ins_feat = self._ins_feat_q.detach().cpu().numpy() 267 | else: 268 | ins_feat = self._ins_feat.detach().cpu().numpy() 269 | 270 | # NOTE: pts feat visualization 271 | vis_color = (ins_feat + 1) / 2 * 255 272 | r, g, b = vis_color[:, 0].reshape(-1, 1), vis_color[:, 1].reshape(-1, 1), vis_color[:, 2].reshape(-1, 1) 273 | 274 | # todo: points not fully optimized due to sampled training images. 275 | ignored_ind = sigmoid(opacities) < 0.1 276 | r[ignored_ind], g[ignored_ind], b[ignored_ind] = 128, 128, 128 277 | 278 | dtype_full = [(attribute, 'f4') for attribute in self.construct_list_of_attributes()] 279 | dtype_full = dtype_full + [('red', 'u1'), ('green', 'u1'), ('blue', 'u1')] # modify 280 | 281 | elements = np.empty(xyz.shape[0], dtype=dtype_full) 282 | attributes = np.concatenate((xyz, normals, ins_feat,\ 283 | f_dc, f_rest, opacities, scale, rotation,\ 284 | r, g, b), axis=1) 285 | elements[:] = list(map(tuple, attributes)) 286 | el = PlyElement.describe(elements, 'vertex') 287 | PlyData([el]).write(path) 288 | 289 | def reset_opacity(self): 290 | opacities_new = inverse_sigmoid(torch.min(self.get_opacity, torch.ones_like(self.get_opacity)*0.01)) 291 | optimizable_tensors = self.replace_tensor_to_optimizer(opacities_new, "opacity") 292 | self._opacity = optimizable_tensors["opacity"] 293 | 294 | def load_ply(self, path): 295 | plydata = PlyData.read(path) 296 | 297 | xyz = np.stack((np.asarray(plydata.elements[0]["x"]), 298 | np.asarray(plydata.elements[0]["y"]), 299 | np.asarray(plydata.elements[0]["z"])), axis=1) 300 | ins_feat = np.stack((np.asarray(plydata.elements[0]["ins_feat_r"]), 301 | np.asarray(plydata.elements[0]["ins_feat_g"]), 302 | np.asarray(plydata.elements[0]["ins_feat_b"]), 303 | np.asarray(plydata.elements[0]["ins_feat_r2"]), 304 | np.asarray(plydata.elements[0]["ins_feat_g2"]), 305 | np.asarray(plydata.elements[0]["ins_feat_b2"])), axis=1) 306 | opacities = np.asarray(plydata.elements[0]["opacity"])[..., np.newaxis] 307 | if not opacities.flags['C_CONTIGUOUS']: 308 | opacities = np.ascontiguousarray(opacities) 309 | 310 | features_dc = np.zeros((xyz.shape[0], 3, 1)) 311 | features_dc[:, 0, 0] = np.asarray(plydata.elements[0]["f_dc_0"]) 312 | features_dc[:, 1, 0] = np.asarray(plydata.elements[0]["f_dc_1"]) 313 | features_dc[:, 2, 0] = np.asarray(plydata.elements[0]["f_dc_2"]) 314 | 315 | extra_f_names = [p.name for p in plydata.elements[0].properties if p.name.startswith("f_rest_")] 316 | extra_f_names = sorted(extra_f_names, key = lambda x: int(x.split('_')[-1])) 317 | assert len(extra_f_names)==3*(self.max_sh_degree + 1) ** 2 - 3 318 | features_extra = np.zeros((xyz.shape[0], len(extra_f_names))) 319 | for idx, attr_name in enumerate(extra_f_names): 320 | features_extra[:, idx] = np.asarray(plydata.elements[0][attr_name]) 321 | # Reshape (P,F*SH_coeffs) to (P, F, SH_coeffs except DC) 322 | features_extra = features_extra.reshape((features_extra.shape[0], 3, (self.max_sh_degree + 1) ** 2 - 1)) 323 | 324 | scale_names = [p.name for p in plydata.elements[0].properties if p.name.startswith("scale_")] 325 | scale_names = sorted(scale_names, key = lambda x: int(x.split('_')[-1])) 326 | scales = np.zeros((xyz.shape[0], len(scale_names))) 327 | for idx, attr_name in enumerate(scale_names): 328 | scales[:, idx] = np.asarray(plydata.elements[0][attr_name]) 329 | 330 | rot_names = [p.name for p in plydata.elements[0].properties if p.name.startswith("rot")] 331 | rot_names = sorted(rot_names, key = lambda x: int(x.split('_')[-1])) 332 | rots = np.zeros((xyz.shape[0], len(rot_names))) 333 | for idx, attr_name in enumerate(rot_names): 334 | rots[:, idx] = np.asarray(plydata.elements[0][attr_name]) 335 | 336 | self._xyz = nn.Parameter(torch.tensor(xyz, dtype=torch.float, device="cuda").requires_grad_(True)) 337 | self._features_dc = nn.Parameter(torch.tensor(features_dc, dtype=torch.float, device="cuda").transpose(1, 2).contiguous().requires_grad_(True)) 338 | self._features_rest = nn.Parameter(torch.tensor(features_extra, dtype=torch.float, device="cuda").transpose(1, 2).contiguous().requires_grad_(True)) 339 | self._opacity = nn.Parameter(torch.tensor(opacities, dtype=torch.float, device="cuda").requires_grad_(True)) 340 | self._scaling = nn.Parameter(torch.tensor(scales, dtype=torch.float, device="cuda").requires_grad_(True)) 341 | self._rotation = nn.Parameter(torch.tensor(rots, dtype=torch.float, device="cuda").requires_grad_(True)) 342 | self._ins_feat = nn.Parameter(torch.tensor(ins_feat, dtype=torch.float, device="cuda").requires_grad_(True)) 343 | 344 | self.active_sh_degree = self.max_sh_degree 345 | 346 | def replace_tensor_to_optimizer(self, tensor, name): 347 | optimizable_tensors = {} 348 | for group in self.optimizer.param_groups: 349 | if group["name"] == name: 350 | stored_state = self.optimizer.state.get(group['params'][0], None) 351 | stored_state["exp_avg"] = torch.zeros_like(tensor) 352 | stored_state["exp_avg_sq"] = torch.zeros_like(tensor) 353 | 354 | del self.optimizer.state[group['params'][0]] 355 | group["params"][0] = nn.Parameter(tensor.requires_grad_(True)) 356 | self.optimizer.state[group['params'][0]] = stored_state 357 | 358 | optimizable_tensors[group["name"]] = group["params"][0] 359 | return optimizable_tensors 360 | 361 | def _prune_optimizer(self, mask): 362 | optimizable_tensors = {} 363 | for group in self.optimizer.param_groups: 364 | stored_state = self.optimizer.state.get(group['params'][0], None) 365 | if stored_state is not None: 366 | stored_state["exp_avg"] = stored_state["exp_avg"][mask] 367 | stored_state["exp_avg_sq"] = stored_state["exp_avg_sq"][mask] 368 | 369 | del self.optimizer.state[group['params'][0]] 370 | group["params"][0] = nn.Parameter((group["params"][0][mask].requires_grad_(True))) 371 | self.optimizer.state[group['params'][0]] = stored_state 372 | 373 | optimizable_tensors[group["name"]] = group["params"][0] 374 | else: 375 | group["params"][0] = nn.Parameter(group["params"][0][mask].requires_grad_(True)) 376 | optimizable_tensors[group["name"]] = group["params"][0] 377 | return optimizable_tensors 378 | 379 | def prune_points(self, mask): 380 | valid_points_mask = ~mask 381 | optimizable_tensors = self._prune_optimizer(valid_points_mask) 382 | 383 | self._xyz = optimizable_tensors["xyz"] 384 | self._features_dc = optimizable_tensors["f_dc"] 385 | self._features_rest = optimizable_tensors["f_rest"] 386 | self._opacity = optimizable_tensors["opacity"] 387 | self._scaling = optimizable_tensors["scaling"] 388 | self._rotation = optimizable_tensors["rotation"] 389 | self._ins_feat = optimizable_tensors["ins_feat"] 390 | 391 | self.xyz_gradient_accum = self.xyz_gradient_accum[valid_points_mask] 392 | 393 | self.denom = self.denom[valid_points_mask] 394 | self.max_radii2D = self.max_radii2D[valid_points_mask] 395 | 396 | def cat_tensors_to_optimizer(self, tensors_dict): 397 | optimizable_tensors = {} 398 | for group in self.optimizer.param_groups: 399 | assert len(group["params"]) == 1 400 | extension_tensor = tensors_dict[group["name"]] 401 | stored_state = self.optimizer.state.get(group['params'][0], None) 402 | if stored_state is not None: 403 | 404 | stored_state["exp_avg"] = torch.cat((stored_state["exp_avg"], torch.zeros_like(extension_tensor)), dim=0) 405 | stored_state["exp_avg_sq"] = torch.cat((stored_state["exp_avg_sq"], torch.zeros_like(extension_tensor)), dim=0) 406 | 407 | del self.optimizer.state[group['params'][0]] 408 | group["params"][0] = nn.Parameter(torch.cat((group["params"][0], extension_tensor), dim=0).requires_grad_(True)) 409 | self.optimizer.state[group['params'][0]] = stored_state 410 | 411 | optimizable_tensors[group["name"]] = group["params"][0] 412 | else: 413 | group["params"][0] = nn.Parameter(torch.cat((group["params"][0], extension_tensor), dim=0).requires_grad_(True)) 414 | optimizable_tensors[group["name"]] = group["params"][0] 415 | 416 | return optimizable_tensors 417 | 418 | def densification_postfix(self, new_xyz, new_features_dc, new_features_rest, new_opacities, \ 419 | new_scaling, new_rotation, new_ins_feat): 420 | d = {"xyz": new_xyz, 421 | "f_dc": new_features_dc, 422 | "f_rest": new_features_rest, 423 | "opacity": new_opacities, 424 | "scaling" : new_scaling, 425 | "rotation" : new_rotation, 426 | "ins_feat": new_ins_feat} 427 | 428 | optimizable_tensors = self.cat_tensors_to_optimizer(d) 429 | self._xyz = optimizable_tensors["xyz"] 430 | self._features_dc = optimizable_tensors["f_dc"] 431 | self._features_rest = optimizable_tensors["f_rest"] 432 | self._opacity = optimizable_tensors["opacity"] 433 | self._scaling = optimizable_tensors["scaling"] 434 | self._rotation = optimizable_tensors["rotation"] 435 | self._ins_feat = optimizable_tensors["ins_feat"] 436 | 437 | self.xyz_gradient_accum = torch.zeros((self.get_xyz.shape[0], 1), device="cuda") 438 | self.denom = torch.zeros((self.get_xyz.shape[0], 1), device="cuda") 439 | self.max_radii2D = torch.zeros((self.get_xyz.shape[0]), device="cuda") 440 | 441 | def densify_and_split(self, grads, grad_threshold, scene_extent, N=2): 442 | n_init_points = self.get_xyz.shape[0] 443 | # Extract points that satisfy the gradient condition 444 | padded_grad = torch.zeros((n_init_points), device="cuda") 445 | padded_grad[:grads.shape[0]] = grads.squeeze() 446 | selected_pts_mask = torch.where(padded_grad >= grad_threshold, True, False) 447 | selected_pts_mask = torch.logical_and(selected_pts_mask, 448 | torch.max(self.get_scaling, dim=1).values > self.percent_dense*scene_extent) 449 | 450 | stds = self.get_scaling[selected_pts_mask].repeat(N,1) 451 | means =torch.zeros((stds.size(0), 3),device="cuda") 452 | samples = torch.normal(mean=means, std=stds) 453 | rots = build_rotation(self._rotation[selected_pts_mask]).repeat(N,1,1) 454 | new_xyz = torch.bmm(rots, samples.unsqueeze(-1)).squeeze(-1) + self.get_xyz[selected_pts_mask].repeat(N, 1) 455 | new_scaling = self.scaling_inverse_activation(self.get_scaling[selected_pts_mask].repeat(N,1) / (0.8*N)) 456 | new_rotation = self._rotation[selected_pts_mask].repeat(N,1) 457 | new_features_dc = self._features_dc[selected_pts_mask].repeat(N,1,1) 458 | new_features_rest = self._features_rest[selected_pts_mask].repeat(N,1,1) 459 | new_opacity = self._opacity[selected_pts_mask].repeat(N,1) 460 | new_ins_feat = self._ins_feat[selected_pts_mask].repeat(N,1) 461 | 462 | self.densification_postfix(new_xyz, new_features_dc, new_features_rest, \ 463 | new_opacity, new_scaling, new_rotation, new_ins_feat) 464 | 465 | prune_filter = torch.cat((selected_pts_mask, torch.zeros(N * selected_pts_mask.sum(), device="cuda", dtype=bool))) 466 | self.prune_points(prune_filter) 467 | 468 | def densify_and_clone(self, grads, grad_threshold, scene_extent): 469 | # Extract points that satisfy the gradient condition 470 | selected_pts_mask = torch.where(torch.norm(grads, dim=-1) >= grad_threshold, True, False) 471 | selected_pts_mask = torch.logical_and(selected_pts_mask, 472 | torch.max(self.get_scaling, dim=1).values <= self.percent_dense*scene_extent) 473 | 474 | new_xyz = self._xyz[selected_pts_mask] 475 | new_features_dc = self._features_dc[selected_pts_mask] 476 | new_features_rest = self._features_rest[selected_pts_mask] 477 | new_opacities = self._opacity[selected_pts_mask] 478 | new_scaling = self._scaling[selected_pts_mask] 479 | new_rotation = self._rotation[selected_pts_mask] 480 | new_ins_feat = self._ins_feat[selected_pts_mask] 481 | 482 | self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacities, \ 483 | new_scaling, new_rotation, new_ins_feat) 484 | 485 | def densify_and_prune(self, max_grad, min_opacity, extent, max_screen_size): 486 | grads = self.xyz_gradient_accum / self.denom 487 | grads[grads.isnan()] = 0.0 488 | 489 | self.densify_and_clone(grads, max_grad, extent) 490 | self.densify_and_split(grads, max_grad, extent) 491 | 492 | prune_mask = (self.get_opacity < min_opacity).squeeze() 493 | if max_screen_size: 494 | big_points_vs = self.max_radii2D > max_screen_size 495 | big_points_ws = self.get_scaling.max(dim=1).values > 0.1 * extent 496 | prune_mask = torch.logical_or(torch.logical_or(prune_mask, big_points_vs), big_points_ws) 497 | self.prune_points(prune_mask) 498 | 499 | torch.cuda.empty_cache() 500 | 501 | def add_densification_stats(self, viewspace_point_tensor, update_filter): 502 | self.xyz_gradient_accum[update_filter] += torch.norm(viewspace_point_tensor.grad[update_filter,:2], dim=-1, keepdim=True) 503 | self.denom[update_filter] += 1 -------------------------------------------------------------------------------- /static/css/bulma-carousel.min.css: -------------------------------------------------------------------------------- 1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.slider{position:relative;width:100%}.slider-container{display:flex;flex-wrap:nowrap;flex-direction:row;overflow:hidden;-webkit-transform:translate3d(0,0,0);transform:translate3d(0,0,0);min-height:100%}.slider-container.is-vertical{flex-direction:column}.slider-container .slider-item{flex:none}.slider-container .slider-item .image.is-covered img{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.slider-container .slider-item .video-container{height:0;padding-bottom:0;padding-top:56.25%;margin:0;position:relative}.slider-container .slider-item .video-container.is-1by1,.slider-container .slider-item .video-container.is-square{padding-top:100%}.slider-container .slider-item .video-container.is-4by3{padding-top:75%}.slider-container .slider-item .video-container.is-21by9{padding-top:42.857143%}.slider-container .slider-item .video-container embed,.slider-container .slider-item .video-container iframe,.slider-container .slider-item .video-container object{position:absolute;top:0;left:0;width:100%!important;height:100%!important}.slider-navigation-next,.slider-navigation-previous{display:flex;justify-content:center;align-items:center;position:absolute;width:42px;height:42px;background:#fff center center no-repeat;background-size:20px 20px;border:1px solid #fff;border-radius:25091983px;box-shadow:0 2px 5px #3232321a;top:50%;margin-top:-20px;left:0;cursor:pointer;transition:opacity .3s,-webkit-transform .3s;transition:transform .3s,opacity .3s;transition:transform .3s,opacity .3s,-webkit-transform .3s}.slider-navigation-next:hover,.slider-navigation-previous:hover{-webkit-transform:scale(1.2);transform:scale(1.2)}.slider-navigation-next.is-hidden,.slider-navigation-previous.is-hidden{display:none;opacity:0}.slider-navigation-next svg,.slider-navigation-previous svg{width:25%}.slider-navigation-next{left:auto;right:0;background:#fff center center no-repeat;background-size:20px 20px}.slider-pagination{display:none;justify-content:center;align-items:center;position:absolute;bottom:0;left:0;right:0;padding:.5rem 1rem;text-align:center}.slider-pagination .slider-page{background:#fff;width:10px;height:10px;border-radius:25091983px;display:inline-block;margin:0 3px;box-shadow:0 2px 5px #3232321a;transition:-webkit-transform .3s;transition:transform .3s;transition:transform .3s,-webkit-transform .3s;cursor:pointer}.slider-pagination .slider-page.is-active,.slider-pagination .slider-page:hover{-webkit-transform:scale(1.4);transform:scale(1.4)}@media screen and (min-width:800px){.slider-pagination{display:flex}}.hero.has-carousel{position:relative}.hero.has-carousel+.hero-body,.hero.has-carousel+.hero-footer,.hero.has-carousel+.hero-head{z-index:10;overflow:hidden}.hero.has-carousel .hero-carousel{position:absolute;top:0;left:0;bottom:0;right:0;height:auto;border:none;margin:auto;padding:0;z-index:0}.hero.has-carousel .hero-carousel .slider{width:100%;max-width:100%;overflow:hidden;height:100%!important;max-height:100%;z-index:0}.hero.has-carousel .hero-carousel .slider .has-background{max-height:100%}.hero.has-carousel .hero-carousel .slider .has-background .is-background{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.hero.has-carousel .hero-body{margin:0 3rem;z-index:10} -------------------------------------------------------------------------------- /static/css/bulma-slider.min.css: -------------------------------------------------------------------------------- 1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}input[type=range].slider{-webkit-appearance:none;-moz-appearance:none;appearance:none;margin:1rem 0;background:0 0;touch-action:none}input[type=range].slider.is-fullwidth{display:block;width:100%}input[type=range].slider:focus{outline:0}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{width:100%}input[type=range].slider:not([orient=vertical])::-moz-range-track{width:100%}input[type=range].slider:not([orient=vertical])::-ms-track{width:100%}input[type=range].slider:not([orient=vertical]).has-output+output,input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{width:3rem;background:#4a4a4a;border-radius:4px;padding:.4rem .8rem;font-size:.75rem;line-height:.75rem;text-align:center;text-overflow:ellipsis;white-space:nowrap;color:#fff;overflow:hidden;pointer-events:none;z-index:200}input[type=range].slider:not([orient=vertical]).has-output-tooltip:disabled+output,input[type=range].slider:not([orient=vertical]).has-output:disabled+output{opacity:.5}input[type=range].slider:not([orient=vertical]).has-output{display:inline-block;vertical-align:middle;width:calc(100% - (4.2rem))}input[type=range].slider:not([orient=vertical]).has-output+output{display:inline-block;margin-left:.75rem;vertical-align:middle}input[type=range].slider:not([orient=vertical]).has-output-tooltip{display:block}input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{position:absolute;left:0;top:-.1rem}input[type=range].slider[orient=vertical]{-webkit-appearance:slider-vertical;-moz-appearance:slider-vertical;appearance:slider-vertical;-webkit-writing-mode:bt-lr;-ms-writing-mode:bt-lr;writing-mode:bt-lr}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{height:100%}input[type=range].slider[orient=vertical]::-moz-range-track{height:100%}input[type=range].slider[orient=vertical]::-ms-track{height:100%}input[type=range].slider::-webkit-slider-runnable-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-moz-range-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-fill-lower{background:#dbdbdb;border-radius:4px}input[type=range].slider::-ms-fill-upper{background:#dbdbdb;border-radius:4px}input[type=range].slider::-webkit-slider-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-moz-range-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-ms-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-webkit-slider-thumb{-webkit-appearance:none;appearance:none}input[type=range].slider.is-circle::-webkit-slider-thumb{border-radius:290486px}input[type=range].slider.is-circle::-moz-range-thumb{border-radius:290486px}input[type=range].slider.is-circle::-ms-thumb{border-radius:290486px}input[type=range].slider:active::-webkit-slider-thumb{-webkit-transform:scale(1.25);transform:scale(1.25)}input[type=range].slider:active::-moz-range-thumb{transform:scale(1.25)}input[type=range].slider:active::-ms-thumb{transform:scale(1.25)}input[type=range].slider:disabled{opacity:.5;cursor:not-allowed}input[type=range].slider:disabled::-webkit-slider-thumb{cursor:not-allowed;-webkit-transform:scale(1);transform:scale(1)}input[type=range].slider:disabled::-moz-range-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:disabled::-ms-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:not([orient=vertical]){min-height:calc((1rem + 2px) * 1.25)}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-moz-range-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-ms-track{height:.5rem}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{width:.5rem}input[type=range].slider[orient=vertical]::-moz-range-track{width:.5rem}input[type=range].slider[orient=vertical]::-ms-track{width:.5rem}input[type=range].slider::-webkit-slider-thumb{height:1rem;width:1rem}input[type=range].slider::-moz-range-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{margin-top:0}input[type=range].slider::-webkit-slider-thumb{margin-top:-.25rem}input[type=range].slider[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.25rem}input[type=range].slider.is-small:not([orient=vertical]){min-height:calc((.75rem + 2px) * 1.25)}input[type=range].slider.is-small:not([orient=vertical])::-webkit-slider-runnable-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-moz-range-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-ms-track{height:.375rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-runnable-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-moz-range-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-ms-track{width:.375rem}input[type=range].slider.is-small::-webkit-slider-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-moz-range-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{margin-top:0}input[type=range].slider.is-small::-webkit-slider-thumb{margin-top:-.1875rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.1875rem}input[type=range].slider.is-medium:not([orient=vertical]){min-height:calc((1.25rem + 2px) * 1.25)}input[type=range].slider.is-medium:not([orient=vertical])::-webkit-slider-runnable-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-moz-range-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-ms-track{height:.625rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-runnable-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-moz-range-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-ms-track{width:.625rem}input[type=range].slider.is-medium::-webkit-slider-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-moz-range-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{margin-top:0}input[type=range].slider.is-medium::-webkit-slider-thumb{margin-top:-.3125rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.3125rem}input[type=range].slider.is-large:not([orient=vertical]){min-height:calc((1.5rem + 2px) * 1.25)}input[type=range].slider.is-large:not([orient=vertical])::-webkit-slider-runnable-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-moz-range-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-ms-track{height:.75rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-runnable-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-moz-range-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-ms-track{width:.75rem}input[type=range].slider.is-large::-webkit-slider-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-moz-range-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{margin-top:0}input[type=range].slider.is-large::-webkit-slider-thumb{margin-top:-.375rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.375rem}input[type=range].slider.is-white::-moz-range-track{background:#fff!important}input[type=range].slider.is-white::-webkit-slider-runnable-track{background:#fff!important}input[type=range].slider.is-white::-ms-track{background:#fff!important}input[type=range].slider.is-white::-ms-fill-lower{background:#fff}input[type=range].slider.is-white::-ms-fill-upper{background:#fff}input[type=range].slider.is-white .has-output-tooltip+output,input[type=range].slider.is-white.has-output+output{background-color:#fff;color:#0a0a0a}input[type=range].slider.is-black::-moz-range-track{background:#0a0a0a!important}input[type=range].slider.is-black::-webkit-slider-runnable-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-fill-lower{background:#0a0a0a}input[type=range].slider.is-black::-ms-fill-upper{background:#0a0a0a}input[type=range].slider.is-black .has-output-tooltip+output,input[type=range].slider.is-black.has-output+output{background-color:#0a0a0a;color:#fff}input[type=range].slider.is-light::-moz-range-track{background:#f5f5f5!important}input[type=range].slider.is-light::-webkit-slider-runnable-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-fill-lower{background:#f5f5f5}input[type=range].slider.is-light::-ms-fill-upper{background:#f5f5f5}input[type=range].slider.is-light .has-output-tooltip+output,input[type=range].slider.is-light.has-output+output{background-color:#f5f5f5;color:#363636}input[type=range].slider.is-dark::-moz-range-track{background:#363636!important}input[type=range].slider.is-dark::-webkit-slider-runnable-track{background:#363636!important}input[type=range].slider.is-dark::-ms-track{background:#363636!important}input[type=range].slider.is-dark::-ms-fill-lower{background:#363636}input[type=range].slider.is-dark::-ms-fill-upper{background:#363636}input[type=range].slider.is-dark .has-output-tooltip+output,input[type=range].slider.is-dark.has-output+output{background-color:#363636;color:#f5f5f5}input[type=range].slider.is-primary::-moz-range-track{background:#00d1b2!important}input[type=range].slider.is-primary::-webkit-slider-runnable-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-fill-lower{background:#00d1b2}input[type=range].slider.is-primary::-ms-fill-upper{background:#00d1b2}input[type=range].slider.is-primary .has-output-tooltip+output,input[type=range].slider.is-primary.has-output+output{background-color:#00d1b2;color:#fff}input[type=range].slider.is-link::-moz-range-track{background:#3273dc!important}input[type=range].slider.is-link::-webkit-slider-runnable-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-fill-lower{background:#3273dc}input[type=range].slider.is-link::-ms-fill-upper{background:#3273dc}input[type=range].slider.is-link .has-output-tooltip+output,input[type=range].slider.is-link.has-output+output{background-color:#3273dc;color:#fff}input[type=range].slider.is-info::-moz-range-track{background:#209cee!important}input[type=range].slider.is-info::-webkit-slider-runnable-track{background:#209cee!important}input[type=range].slider.is-info::-ms-track{background:#209cee!important}input[type=range].slider.is-info::-ms-fill-lower{background:#209cee}input[type=range].slider.is-info::-ms-fill-upper{background:#209cee}input[type=range].slider.is-info .has-output-tooltip+output,input[type=range].slider.is-info.has-output+output{background-color:#209cee;color:#fff}input[type=range].slider.is-success::-moz-range-track{background:#23d160!important}input[type=range].slider.is-success::-webkit-slider-runnable-track{background:#23d160!important}input[type=range].slider.is-success::-ms-track{background:#23d160!important}input[type=range].slider.is-success::-ms-fill-lower{background:#23d160}input[type=range].slider.is-success::-ms-fill-upper{background:#23d160}input[type=range].slider.is-success .has-output-tooltip+output,input[type=range].slider.is-success.has-output+output{background-color:#23d160;color:#fff}input[type=range].slider.is-warning::-moz-range-track{background:#ffdd57!important}input[type=range].slider.is-warning::-webkit-slider-runnable-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-fill-lower{background:#ffdd57}input[type=range].slider.is-warning::-ms-fill-upper{background:#ffdd57}input[type=range].slider.is-warning .has-output-tooltip+output,input[type=range].slider.is-warning.has-output+output{background-color:#ffdd57;color:rgba(0,0,0,.7)}input[type=range].slider.is-danger::-moz-range-track{background:#ff3860!important}input[type=range].slider.is-danger::-webkit-slider-runnable-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-fill-lower{background:#ff3860}input[type=range].slider.is-danger::-ms-fill-upper{background:#ff3860}input[type=range].slider.is-danger .has-output-tooltip+output,input[type=range].slider.is-danger.has-output+output{background-color:#ff3860;color:#fff} -------------------------------------------------------------------------------- /static/css/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: 'Noto Sans', sans-serif; 3 | } 4 | 5 | 6 | .footer .icon-link { 7 | font-size: 25px; 8 | color: #000; 9 | } 10 | 11 | .link-block a { 12 | margin-top: 5px; 13 | margin-bottom: 5px; 14 | } 15 | 16 | .dnerf { 17 | font-variant: small-caps; 18 | } 19 | 20 | 21 | .teaser .hero-body { 22 | padding-top: 0; 23 | padding-bottom: 3rem; 24 | } 25 | 26 | .teaser { 27 | font-family: 'Google Sans', sans-serif; 28 | } 29 | 30 | 31 | .publication-title { 32 | } 33 | 34 | .publication-banner { 35 | max-height: parent; 36 | 37 | } 38 | 39 | .publication-banner video { 40 | position: relative; 41 | left: auto; 42 | top: auto; 43 | transform: none; 44 | object-fit: fit; 45 | } 46 | 47 | .publication-header .hero-body { 48 | } 49 | 50 | .publication-title { 51 | font-family: 'Google Sans', sans-serif; 52 | } 53 | 54 | .publication-authors { 55 | font-family: 'Google Sans', sans-serif; 56 | } 57 | 58 | .publication-venue { 59 | color: #555; 60 | width: fit-content; 61 | font-weight: bold; 62 | } 63 | 64 | .publication-awards { 65 | color: #ff3860; 66 | width: fit-content; 67 | font-weight: bolder; 68 | } 69 | 70 | .publication-authors { 71 | } 72 | 73 | .publication-authors a { 74 | color: hsl(204, 86%, 53%) !important; 75 | } 76 | 77 | .publication-authors a:hover { 78 | text-decoration: underline; 79 | } 80 | 81 | .author-block { 82 | display: inline-block; 83 | } 84 | 85 | .publication-banner img { 86 | } 87 | 88 | .publication-authors { 89 | /*color: #4286f4;*/ 90 | } 91 | 92 | .publication-video { 93 | position: relative; 94 | width: 100%; 95 | height: 0; 96 | padding-bottom: 56.25%; 97 | 98 | overflow: hidden; 99 | border-radius: 10px !important; 100 | } 101 | 102 | .publication-video iframe { 103 | position: absolute; 104 | top: 0; 105 | left: 0; 106 | width: 100%; 107 | height: 100%; 108 | } 109 | 110 | .publication-body img { 111 | } 112 | 113 | .results-carousel { 114 | overflow: hidden; 115 | } 116 | 117 | .results-carousel .item { 118 | margin: 5px; 119 | overflow: hidden; 120 | padding: 20px; 121 | font-size: 0; 122 | } 123 | 124 | .results-carousel video { 125 | margin: 0; 126 | } 127 | 128 | .slider-pagination .slider-page { 129 | background: #000000; 130 | } 131 | 132 | .eql-cntrb { 133 | font-size: smaller; 134 | } 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /static/images/Downstream tasks.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/images/Downstream tasks.jpg -------------------------------------------------------------------------------- /static/images/Method_Overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/images/Method_Overview.jpg -------------------------------------------------------------------------------- /static/images/Object grounding on LERF.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/images/Object grounding on LERF.jpg -------------------------------------------------------------------------------- /static/images/carousel1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/images/carousel1.jpg -------------------------------------------------------------------------------- /static/images/carousel2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/images/carousel2.jpg -------------------------------------------------------------------------------- /static/images/carousel3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/images/carousel3.jpg -------------------------------------------------------------------------------- /static/images/carousel4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/images/carousel4.jpg -------------------------------------------------------------------------------- /static/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/images/favicon.ico -------------------------------------------------------------------------------- /static/js/bulma-slider.js: -------------------------------------------------------------------------------- 1 | (function webpackUniversalModuleDefinition(root, factory) { 2 | if(typeof exports === 'object' && typeof module === 'object') 3 | module.exports = factory(); 4 | else if(typeof define === 'function' && define.amd) 5 | define([], factory); 6 | else if(typeof exports === 'object') 7 | exports["bulmaSlider"] = factory(); 8 | else 9 | root["bulmaSlider"] = factory(); 10 | })(typeof self !== 'undefined' ? self : this, function() { 11 | return /******/ (function(modules) { // webpackBootstrap 12 | /******/ // The module cache 13 | /******/ var installedModules = {}; 14 | /******/ 15 | /******/ // The require function 16 | /******/ function __webpack_require__(moduleId) { 17 | /******/ 18 | /******/ // Check if module is in cache 19 | /******/ if(installedModules[moduleId]) { 20 | /******/ return installedModules[moduleId].exports; 21 | /******/ } 22 | /******/ // Create a new module (and put it into the cache) 23 | /******/ var module = installedModules[moduleId] = { 24 | /******/ i: moduleId, 25 | /******/ l: false, 26 | /******/ exports: {} 27 | /******/ }; 28 | /******/ 29 | /******/ // Execute the module function 30 | /******/ modules[moduleId].call(module.exports, module, module.exports, __webpack_require__); 31 | /******/ 32 | /******/ // Flag the module as loaded 33 | /******/ module.l = true; 34 | /******/ 35 | /******/ // Return the exports of the module 36 | /******/ return module.exports; 37 | /******/ } 38 | /******/ 39 | /******/ 40 | /******/ // expose the modules object (__webpack_modules__) 41 | /******/ __webpack_require__.m = modules; 42 | /******/ 43 | /******/ // expose the module cache 44 | /******/ __webpack_require__.c = installedModules; 45 | /******/ 46 | /******/ // define getter function for harmony exports 47 | /******/ __webpack_require__.d = function(exports, name, getter) { 48 | /******/ if(!__webpack_require__.o(exports, name)) { 49 | /******/ Object.defineProperty(exports, name, { 50 | /******/ configurable: false, 51 | /******/ enumerable: true, 52 | /******/ get: getter 53 | /******/ }); 54 | /******/ } 55 | /******/ }; 56 | /******/ 57 | /******/ // getDefaultExport function for compatibility with non-harmony modules 58 | /******/ __webpack_require__.n = function(module) { 59 | /******/ var getter = module && module.__esModule ? 60 | /******/ function getDefault() { return module['default']; } : 61 | /******/ function getModuleExports() { return module; }; 62 | /******/ __webpack_require__.d(getter, 'a', getter); 63 | /******/ return getter; 64 | /******/ }; 65 | /******/ 66 | /******/ // Object.prototype.hasOwnProperty.call 67 | /******/ __webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); }; 68 | /******/ 69 | /******/ // __webpack_public_path__ 70 | /******/ __webpack_require__.p = ""; 71 | /******/ 72 | /******/ // Load entry module and return exports 73 | /******/ return __webpack_require__(__webpack_require__.s = 0); 74 | /******/ }) 75 | /************************************************************************/ 76 | /******/ ([ 77 | /* 0 */ 78 | /***/ (function(module, __webpack_exports__, __webpack_require__) { 79 | 80 | "use strict"; 81 | Object.defineProperty(__webpack_exports__, "__esModule", { value: true }); 82 | /* harmony export (binding) */ __webpack_require__.d(__webpack_exports__, "isString", function() { return isString; }); 83 | /* harmony import */ var __WEBPACK_IMPORTED_MODULE_0__events__ = __webpack_require__(1); 84 | var _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; 85 | 86 | var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); 87 | 88 | var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; 89 | 90 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } 91 | 92 | function _possibleConstructorReturn(self, call) { if (!self) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return call && (typeof call === "object" || typeof call === "function") ? call : self; } 93 | 94 | function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; } 95 | 96 | 97 | 98 | var isString = function isString(unknown) { 99 | return typeof unknown === 'string' || !!unknown && (typeof unknown === 'undefined' ? 'undefined' : _typeof(unknown)) === 'object' && Object.prototype.toString.call(unknown) === '[object String]'; 100 | }; 101 | 102 | var bulmaSlider = function (_EventEmitter) { 103 | _inherits(bulmaSlider, _EventEmitter); 104 | 105 | function bulmaSlider(selector) { 106 | var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; 107 | 108 | _classCallCheck(this, bulmaSlider); 109 | 110 | var _this = _possibleConstructorReturn(this, (bulmaSlider.__proto__ || Object.getPrototypeOf(bulmaSlider)).call(this)); 111 | 112 | _this.element = typeof selector === 'string' ? document.querySelector(selector) : selector; 113 | // An invalid selector or non-DOM node has been provided. 114 | if (!_this.element) { 115 | throw new Error('An invalid selector or non-DOM node has been provided.'); 116 | } 117 | 118 | _this._clickEvents = ['click']; 119 | /// Set default options and merge with instance defined 120 | _this.options = _extends({}, options); 121 | 122 | _this.onSliderInput = _this.onSliderInput.bind(_this); 123 | 124 | _this.init(); 125 | return _this; 126 | } 127 | 128 | /** 129 | * Initiate all DOM element containing selector 130 | * @method 131 | * @return {Array} Array of all slider instances 132 | */ 133 | 134 | 135 | _createClass(bulmaSlider, [{ 136 | key: 'init', 137 | 138 | 139 | /** 140 | * Initiate plugin 141 | * @method init 142 | * @return {void} 143 | */ 144 | value: function init() { 145 | this._id = 'bulmaSlider' + new Date().getTime() + Math.floor(Math.random() * Math.floor(9999)); 146 | this.output = this._findOutputForSlider(); 147 | 148 | this._bindEvents(); 149 | 150 | if (this.output) { 151 | if (this.element.classList.contains('has-output-tooltip')) { 152 | // Get new output position 153 | var newPosition = this._getSliderOutputPosition(); 154 | 155 | // Set output position 156 | this.output.style['left'] = newPosition.position; 157 | } 158 | } 159 | 160 | this.emit('bulmaslider:ready', this.element.value); 161 | } 162 | }, { 163 | key: '_findOutputForSlider', 164 | value: function _findOutputForSlider() { 165 | var _this2 = this; 166 | 167 | var result = null; 168 | var outputs = document.getElementsByTagName('output') || []; 169 | 170 | Array.from(outputs).forEach(function (output) { 171 | if (output.htmlFor == _this2.element.getAttribute('id')) { 172 | result = output; 173 | return true; 174 | } 175 | }); 176 | return result; 177 | } 178 | }, { 179 | key: '_getSliderOutputPosition', 180 | value: function _getSliderOutputPosition() { 181 | // Update output position 182 | var newPlace, minValue; 183 | 184 | var style = window.getComputedStyle(this.element, null); 185 | // Measure width of range input 186 | var sliderWidth = parseInt(style.getPropertyValue('width'), 10); 187 | 188 | // Figure out placement percentage between left and right of input 189 | if (!this.element.getAttribute('min')) { 190 | minValue = 0; 191 | } else { 192 | minValue = this.element.getAttribute('min'); 193 | } 194 | var newPoint = (this.element.value - minValue) / (this.element.getAttribute('max') - minValue); 195 | 196 | // Prevent bubble from going beyond left or right (unsupported browsers) 197 | if (newPoint < 0) { 198 | newPlace = 0; 199 | } else if (newPoint > 1) { 200 | newPlace = sliderWidth; 201 | } else { 202 | newPlace = sliderWidth * newPoint; 203 | } 204 | 205 | return { 206 | 'position': newPlace + 'px' 207 | }; 208 | } 209 | 210 | /** 211 | * Bind all events 212 | * @method _bindEvents 213 | * @return {void} 214 | */ 215 | 216 | }, { 217 | key: '_bindEvents', 218 | value: function _bindEvents() { 219 | if (this.output) { 220 | // Add event listener to update output when slider value change 221 | this.element.addEventListener('input', this.onSliderInput, false); 222 | } 223 | } 224 | }, { 225 | key: 'onSliderInput', 226 | value: function onSliderInput(e) { 227 | e.preventDefault(); 228 | 229 | if (this.element.classList.contains('has-output-tooltip')) { 230 | // Get new output position 231 | var newPosition = this._getSliderOutputPosition(); 232 | 233 | // Set output position 234 | this.output.style['left'] = newPosition.position; 235 | } 236 | 237 | // Check for prefix and postfix 238 | var prefix = this.output.hasAttribute('data-prefix') ? this.output.getAttribute('data-prefix') : ''; 239 | var postfix = this.output.hasAttribute('data-postfix') ? this.output.getAttribute('data-postfix') : ''; 240 | 241 | // Update output with slider value 242 | this.output.value = prefix + this.element.value + postfix; 243 | 244 | this.emit('bulmaslider:ready', this.element.value); 245 | } 246 | }], [{ 247 | key: 'attach', 248 | value: function attach() { 249 | var _this3 = this; 250 | 251 | var selector = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 'input[type="range"].slider'; 252 | var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; 253 | 254 | var instances = new Array(); 255 | 256 | var elements = isString(selector) ? document.querySelectorAll(selector) : Array.isArray(selector) ? selector : [selector]; 257 | elements.forEach(function (element) { 258 | if (typeof element[_this3.constructor.name] === 'undefined') { 259 | var instance = new bulmaSlider(element, options); 260 | element[_this3.constructor.name] = instance; 261 | instances.push(instance); 262 | } else { 263 | instances.push(element[_this3.constructor.name]); 264 | } 265 | }); 266 | 267 | return instances; 268 | } 269 | }]); 270 | 271 | return bulmaSlider; 272 | }(__WEBPACK_IMPORTED_MODULE_0__events__["a" /* default */]); 273 | 274 | /* harmony default export */ __webpack_exports__["default"] = (bulmaSlider); 275 | 276 | /***/ }), 277 | /* 1 */ 278 | /***/ (function(module, __webpack_exports__, __webpack_require__) { 279 | 280 | "use strict"; 281 | var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); 282 | 283 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } 284 | 285 | var EventEmitter = function () { 286 | function EventEmitter() { 287 | var listeners = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : []; 288 | 289 | _classCallCheck(this, EventEmitter); 290 | 291 | this._listeners = new Map(listeners); 292 | this._middlewares = new Map(); 293 | } 294 | 295 | _createClass(EventEmitter, [{ 296 | key: "listenerCount", 297 | value: function listenerCount(eventName) { 298 | if (!this._listeners.has(eventName)) { 299 | return 0; 300 | } 301 | 302 | var eventListeners = this._listeners.get(eventName); 303 | return eventListeners.length; 304 | } 305 | }, { 306 | key: "removeListeners", 307 | value: function removeListeners() { 308 | var _this = this; 309 | 310 | var eventName = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null; 311 | var middleware = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false; 312 | 313 | if (eventName !== null) { 314 | if (Array.isArray(eventName)) { 315 | name.forEach(function (e) { 316 | return _this.removeListeners(e, middleware); 317 | }); 318 | } else { 319 | this._listeners.delete(eventName); 320 | 321 | if (middleware) { 322 | this.removeMiddleware(eventName); 323 | } 324 | } 325 | } else { 326 | this._listeners = new Map(); 327 | } 328 | } 329 | }, { 330 | key: "middleware", 331 | value: function middleware(eventName, fn) { 332 | var _this2 = this; 333 | 334 | if (Array.isArray(eventName)) { 335 | name.forEach(function (e) { 336 | return _this2.middleware(e, fn); 337 | }); 338 | } else { 339 | if (!Array.isArray(this._middlewares.get(eventName))) { 340 | this._middlewares.set(eventName, []); 341 | } 342 | 343 | this._middlewares.get(eventName).push(fn); 344 | } 345 | } 346 | }, { 347 | key: "removeMiddleware", 348 | value: function removeMiddleware() { 349 | var _this3 = this; 350 | 351 | var eventName = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null; 352 | 353 | if (eventName !== null) { 354 | if (Array.isArray(eventName)) { 355 | name.forEach(function (e) { 356 | return _this3.removeMiddleware(e); 357 | }); 358 | } else { 359 | this._middlewares.delete(eventName); 360 | } 361 | } else { 362 | this._middlewares = new Map(); 363 | } 364 | } 365 | }, { 366 | key: "on", 367 | value: function on(name, callback) { 368 | var _this4 = this; 369 | 370 | var once = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false; 371 | 372 | if (Array.isArray(name)) { 373 | name.forEach(function (e) { 374 | return _this4.on(e, callback); 375 | }); 376 | } else { 377 | name = name.toString(); 378 | var split = name.split(/,|, | /); 379 | 380 | if (split.length > 1) { 381 | split.forEach(function (e) { 382 | return _this4.on(e, callback); 383 | }); 384 | } else { 385 | if (!Array.isArray(this._listeners.get(name))) { 386 | this._listeners.set(name, []); 387 | } 388 | 389 | this._listeners.get(name).push({ once: once, callback: callback }); 390 | } 391 | } 392 | } 393 | }, { 394 | key: "once", 395 | value: function once(name, callback) { 396 | this.on(name, callback, true); 397 | } 398 | }, { 399 | key: "emit", 400 | value: function emit(name, data) { 401 | var _this5 = this; 402 | 403 | var silent = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false; 404 | 405 | name = name.toString(); 406 | var listeners = this._listeners.get(name); 407 | var middlewares = null; 408 | var doneCount = 0; 409 | var execute = silent; 410 | 411 | if (Array.isArray(listeners)) { 412 | listeners.forEach(function (listener, index) { 413 | // Start Middleware checks unless we're doing a silent emit 414 | if (!silent) { 415 | middlewares = _this5._middlewares.get(name); 416 | // Check and execute Middleware 417 | if (Array.isArray(middlewares)) { 418 | middlewares.forEach(function (middleware) { 419 | middleware(data, function () { 420 | var newData = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null; 421 | 422 | if (newData !== null) { 423 | data = newData; 424 | } 425 | doneCount++; 426 | }, name); 427 | }); 428 | 429 | if (doneCount >= middlewares.length) { 430 | execute = true; 431 | } 432 | } else { 433 | execute = true; 434 | } 435 | } 436 | 437 | // If Middleware checks have been passed, execute 438 | if (execute) { 439 | if (listener.once) { 440 | listeners[index] = null; 441 | } 442 | listener.callback(data); 443 | } 444 | }); 445 | 446 | // Dirty way of removing used Events 447 | while (listeners.indexOf(null) !== -1) { 448 | listeners.splice(listeners.indexOf(null), 1); 449 | } 450 | } 451 | } 452 | }]); 453 | 454 | return EventEmitter; 455 | }(); 456 | 457 | /* harmony default export */ __webpack_exports__["a"] = (EventEmitter); 458 | 459 | /***/ }) 460 | /******/ ])["default"]; 461 | }); -------------------------------------------------------------------------------- /static/js/bulma-slider.min.js: -------------------------------------------------------------------------------- 1 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.bulmaSlider=e():t.bulmaSlider=e()}("undefined"!=typeof self?self:this,function(){return function(n){var r={};function i(t){if(r[t])return r[t].exports;var e=r[t]={i:t,l:!1,exports:{}};return n[t].call(e.exports,e,e.exports,i),e.l=!0,e.exports}return i.m=n,i.c=r,i.d=function(t,e,n){i.o(t,e)||Object.defineProperty(t,e,{configurable:!1,enumerable:!0,get:n})},i.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return i.d(e,"a",e),e},i.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},i.p="",i(i.s=0)}([function(t,e,n){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),n.d(e,"isString",function(){return l});var r=n(1),i=Object.assign||function(t){for(var e=1;e=l.length&&(s=!0)):s=!0),s&&(t.once&&(u[e]=null),t.callback(r))});-1!==u.indexOf(null);)u.splice(u.indexOf(null),1)}}]),e}();e.a=i}]).default}); -------------------------------------------------------------------------------- /static/js/index.js: -------------------------------------------------------------------------------- 1 | window.HELP_IMPROVE_VIDEOJS = false; 2 | 3 | 4 | $(document).ready(function() { 5 | // Check for click events on the navbar burger icon 6 | 7 | var options = { 8 | slidesToScroll: 1, 9 | slidesToShow: 1, 10 | loop: true, 11 | infinite: true, 12 | autoplay: true, 13 | autoplaySpeed: 5000, 14 | } 15 | 16 | // Initialize all div with carousel class 17 | var carousels = bulmaCarousel.attach('.carousel', options); 18 | 19 | bulmaSlider.attach(); 20 | 21 | }) 22 | -------------------------------------------------------------------------------- /static/pdfs/sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/pdfs/sample.pdf -------------------------------------------------------------------------------- /static/videos/banner_video.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/videos/banner_video.mp4 -------------------------------------------------------------------------------- /static/videos/carousel1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/videos/carousel1.mp4 -------------------------------------------------------------------------------- /static/videos/carousel2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/videos/carousel2.mp4 -------------------------------------------------------------------------------- /static/videos/carousel3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/videos/carousel3.mp4 -------------------------------------------------------------------------------- /static/videos/overview_video_raw.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/videos/overview_video_raw.mp4 -------------------------------------------------------------------------------- /static/videos/scannet0000_compress.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WangXihan-bit/GaussianGraph/56aa8ffecb50f2d2333dfc62d2fc02163d3d04f4/static/videos/scannet0000_compress.mp4 --------------------------------------------------------------------------------