├── Uni3D
    ├── data
    │   ├── __init__.py
    │   ├── ModelNet40_openshape.yaml
    │   ├── ScanObjNN_openshape.yaml
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   └── datasets.cpython-38.pyc
    │   ├── utils
    │   │   ├── __pycache__
    │   │   │   ├── data.cpython-38.pyc
    │   │   │   ├── io.cpython-38.pyc
    │   │   │   ├── build.cpython-38.pyc
    │   │   │   ├── config.cpython-38.pyc
    │   │   │   ├── logger.cpython-38.pyc
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   └── registry.cpython-38.pyc
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   ├── io.py
    │   │   ├── config.py
    │   │   ├── data.py
    │   │   └── logger.py
    │   ├── Objaverse_lvis_openshape.yaml
    │   ├── dataset_catalog.json
    │   ├── DATASETS.md
    │   └── templates.json
    ├── utils
    │   ├── __init__.py
    │   ├── bpe_simple_vocab_16e6.txt.gz
    │   ├── __pycache__
    │   │   ├── logger.cpython-38.pyc
    │   │   ├── optim.cpython-38.pyc
    │   │   ├── params.cpython-38.pyc
    │   │   ├── utils.cpython-38.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── scheduler.cpython-38.pyc
    │   │   ├── tokenizer.cpython-38.pyc
    │   │   └── distributed.cpython-38.pyc
    │   ├── logger.py
    │   ├── scheduler.py
    │   ├── misc.py
    │   ├── tokenizer.py
    │   └── dist.py
    ├── assets
    │   ├── editing.jpg
    │   ├── overview.jpg
    │   ├── retrival.jpg
    │   ├── vis_part.jpg
    │   ├── retrival_text.jpg
    │   └── scene_understanding.jpg
    ├── __pycache__
    │   └── main.cpython-38.pyc
    └── model
    │   ├── __pycache__
    │       ├── losses.cpython-38.pyc
    │       ├── uni3d.cpython-38.pyc
    │       └── point_encoder.cpython-38.pyc
    │   ├── uni3d.py
    │   └── losses.py
├── scannet
    ├── meta_data
    │   ├── 1.txt
    │   ├── scannet_means.npz
    │   ├── scannetv2_test.txt
    │   ├── scannetv2_val.txt
    │   └── scannetv2_val_copy.txt
    ├── __pycache__
    │   ├── scannet_utils.cpython-38.pyc
    │   └── load_scannet_data.cpython-38.pyc
    ├── wget-log
    ├── wget-log.1
    ├── data_viz.py
    ├── scannet_utils.py
    ├── batch_load_scannet_data.py
    ├── model_util_scannet.py
    └── load_scannet_data.py
├── models
    ├── modules
    │   ├── __init__.py
    │   ├── resnet_block.py
    │   └── common.py
    ├── __pycache__
    │   ├── __init__.cpython-38.pyc
    │   ├── helpers.cpython-38.pyc
    │   ├── mink_resnet.cpython-38.pyc
    │   ├── model_vdetr.cpython-38.pyc
    │   ├── position_embedding.cpython-38.pyc
    │   └── vdetr_transformer.cpython-38.pyc
    ├── __init__.py
    ├── mink_resnet.py
    ├── helpers.py
    └── position_embedding.py
├── pointnet2
    ├── pointnet2.egg-info
    │   ├── dependency_links.txt
    │   ├── top_level.txt
    │   ├── PKG-INFO
    │   └── SOURCES.txt
    ├── __pycache__
    │   ├── pytorch_utils.cpython-38.pyc
    │   ├── pointnet2_utils.cpython-38.pyc
    │   └── pointnet2_modules.cpython-38.pyc
    ├── dist
    │   └── pointnet2-0.0.0-py3.8-linux-x86_64.egg
    ├── build
    │   ├── lib.linux-x86_64-3.8
    │   │   └── pointnet2
    │   │   │   └── _ext.cpython-38-x86_64-linux-gnu.so
    │   └── temp.linux-x86_64-3.8
    │   │   └── build.ninja
    ├── _ext_src
    │   ├── include
    │   │   ├── ball_query.h
    │   │   ├── group_points.h
    │   │   ├── sampling.h
    │   │   ├── interpolate.h
    │   │   ├── utils.h
    │   │   └── cuda_utils.h
    │   └── src
    │   │   ├── bindings.cpp
    │   │   ├── ball_query.cpp
    │   │   ├── ball_query_gpu.cu
    │   │   ├── group_points.cpp
    │   │   ├── sampling.cpp
    │   │   ├── group_points_gpu.cu
    │   │   ├── interpolate.cpp
    │   │   ├── interpolate_gpu.cu
    │   │   └── sampling_gpu.cu
    ├── pointnet2_test.py
    └── setup.py
├── util
    ├── __init__.py
    ├── __pycache__
    │   ├── io.cpython-38.pyc
    │   ├── nms.cpython-38.pyc
    │   ├── dist.cpython-38.pyc
    │   ├── misc.cpython-38.pyc
    │   ├── __init__.cpython-38.pyc
    │   ├── __init__.cpython-39.pyc
    │   ├── box_util.cpython-38.pyc
    │   ├── eval_det.cpython-38.pyc
    │   ├── pc_util.cpython-38.pyc
    │   ├── pc_util.cpython-39.pyc
    │   ├── o3d_helper.cpython-38.pyc
    │   ├── ap_calculator.cpython-38.pyc
    │   └── random_cuboid.cpython-38.pyc
    ├── box_intersection.cpython-38-x86_64-linux-gnu.so
    ├── build
    │   ├── temp.linux-x86_64-3.8
    │   │   └── box_intersection.o
    │   └── lib.linux-x86_64-3.8
    │   │   └── box_intersection.cpython-38-x86_64-linux-gnu.so
    ├── cython_compile.py
    ├── logger.py
    ├── io.py
    ├── misc.py
    ├── random_cuboid.py
    ├── nms.py
    ├── dist.py
    ├── box_intersection.pyx
    └── ply_helper.py
├── datasets
    ├── __pycache__
    │   ├── scannet.cpython-38.pyc
    │   ├── scannet.cpython-39.pyc
    │   ├── __init__.cpython-38.pyc
    │   └── __init__.cpython-39.pyc
    └── __init__.py
├── optimizer.py
├── run.sh
├── README.md
└── point2graph.yaml


/Uni3D/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Uni3D/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scannet/meta_data/1.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pointnet2/pointnet2.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/pointnet2/pointnet2.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | pointnet2
2 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 | from .pc_util import *
2 | from .box_util import *
3 | 


--------------------------------------------------------------------------------
/Uni3D/assets/editing.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/assets/editing.jpg


--------------------------------------------------------------------------------
/Uni3D/assets/overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/assets/overview.jpg


--------------------------------------------------------------------------------
/Uni3D/assets/retrival.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/assets/retrival.jpg


--------------------------------------------------------------------------------
/Uni3D/assets/vis_part.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/assets/vis_part.jpg


--------------------------------------------------------------------------------
/Uni3D/data/ModelNet40_openshape.yaml:
--------------------------------------------------------------------------------
1 | NAME: ModelNet40_openshape
2 | DATA_PATH: ./data/test_datasets/modelnet40


--------------------------------------------------------------------------------
/Uni3D/data/ScanObjNN_openshape.yaml:
--------------------------------------------------------------------------------
1 | NAME: ScanObjNN_openshape
2 | DATA_PATH: ./data/test_datasets/scanobjectnn


--------------------------------------------------------------------------------
/Uni3D/assets/retrival_text.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/assets/retrival_text.jpg


--------------------------------------------------------------------------------
/scannet/meta_data/scannet_means.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/scannet/meta_data/scannet_means.npz


--------------------------------------------------------------------------------
/util/__pycache__/io.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/io.cpython-38.pyc


--------------------------------------------------------------------------------
/util/__pycache__/nms.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/nms.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/__pycache__/main.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/__pycache__/main.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/assets/scene_understanding.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/assets/scene_understanding.jpg


--------------------------------------------------------------------------------
/util/__pycache__/dist.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/dist.cpython-38.pyc


--------------------------------------------------------------------------------
/util/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/misc.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/utils/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/util/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/util/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/util/__pycache__/box_util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/box_util.cpython-38.pyc


--------------------------------------------------------------------------------
/util/__pycache__/eval_det.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/eval_det.cpython-38.pyc


--------------------------------------------------------------------------------
/util/__pycache__/pc_util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/pc_util.cpython-38.pyc


--------------------------------------------------------------------------------
/util/__pycache__/pc_util.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/pc_util.cpython-39.pyc


--------------------------------------------------------------------------------
/datasets/__pycache__/scannet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/datasets/__pycache__/scannet.cpython-38.pyc


--------------------------------------------------------------------------------
/datasets/__pycache__/scannet.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/datasets/__pycache__/scannet.cpython-39.pyc


--------------------------------------------------------------------------------
/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/helpers.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/models/__pycache__/helpers.cpython-38.pyc


--------------------------------------------------------------------------------
/util/__pycache__/o3d_helper.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/o3d_helper.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/model/__pycache__/losses.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/model/__pycache__/losses.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/model/__pycache__/uni3d.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/model/__pycache__/uni3d.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/utils/__pycache__/logger.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/logger.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/utils/__pycache__/optim.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/optim.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/utils/__pycache__/params.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/params.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/utils/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/datasets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/datasets/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/datasets/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/datasets/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/models/__pycache__/mink_resnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/models/__pycache__/mink_resnet.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/model_vdetr.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/models/__pycache__/model_vdetr.cpython-38.pyc


--------------------------------------------------------------------------------
/util/__pycache__/ap_calculator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/ap_calculator.cpython-38.pyc


--------------------------------------------------------------------------------
/util/__pycache__/random_cuboid.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/random_cuboid.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/data/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/data/__pycache__/datasets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/__pycache__/datasets.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/data/utils/__pycache__/data.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/data.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/data/utils/__pycache__/io.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/io.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/utils/__pycache__/scheduler.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/scheduler.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/utils/__pycache__/tokenizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/tokenizer.cpython-38.pyc


--------------------------------------------------------------------------------
/scannet/__pycache__/scannet_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/scannet/__pycache__/scannet_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/data/utils/__pycache__/build.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/build.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/data/utils/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/config.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/data/utils/__pycache__/logger.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/logger.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/utils/__pycache__/distributed.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/distributed.cpython-38.pyc


--------------------------------------------------------------------------------
/pointnet2/__pycache__/pytorch_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/pointnet2/__pycache__/pytorch_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/data/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/data/utils/__pycache__/registry.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/registry.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/model/__pycache__/point_encoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/model/__pycache__/point_encoder.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/position_embedding.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/models/__pycache__/position_embedding.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/vdetr_transformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/models/__pycache__/vdetr_transformer.cpython-38.pyc


--------------------------------------------------------------------------------
/pointnet2/__pycache__/pointnet2_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/pointnet2/__pycache__/pointnet2_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/pointnet2/dist/pointnet2-0.0.0-py3.8-linux-x86_64.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/pointnet2/dist/pointnet2-0.0.0-py3.8-linux-x86_64.egg


--------------------------------------------------------------------------------
/scannet/__pycache__/load_scannet_data.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/scannet/__pycache__/load_scannet_data.cpython-38.pyc


--------------------------------------------------------------------------------
/util/box_intersection.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/box_intersection.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/util/build/temp.linux-x86_64-3.8/box_intersection.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/build/temp.linux-x86_64-3.8/box_intersection.o


--------------------------------------------------------------------------------
/pointnet2/__pycache__/pointnet2_modules.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/pointnet2/__pycache__/pointnet2_modules.cpython-38.pyc


--------------------------------------------------------------------------------
/Uni3D/data/Objaverse_lvis_openshape.yaml:
--------------------------------------------------------------------------------
1 | NAME: Objaverse_lvis_openshape
2 | PC_PATH: ./data/test_datasets/objaverse_lvis/lvis_testset.txt
3 | PC_PATH_ROOT: ./data/test_datasets/objaverse_lvis


--------------------------------------------------------------------------------
/util/build/lib.linux-x86_64-3.8/box_intersection.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/build/lib.linux-x86_64-3.8/box_intersection.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/pointnet2/build/lib.linux-x86_64-3.8/pointnet2/_ext.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/pointnet2/build/lib.linux-x86_64-3.8/pointnet2/_ext.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/pointnet2/pointnet2.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 2.1
 2 | Name: pointnet2
 3 | Version: 0.0.0
 4 | Summary: UNKNOWN
 5 | Home-page: UNKNOWN
 6 | License: UNKNOWN
 7 | Platform: UNKNOWN
 8 | 
 9 | UNKNOWN
10 | 
11 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/ball_query.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 | 
3 | #pragma once
4 | #include <torch/extension.h>
5 | 
6 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
7 |                       const int nsample);
8 | 


--------------------------------------------------------------------------------
/Uni3D/data/utils/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 |  * Copyright (c) 2023, salesforce.com, inc.
3 |  * All rights reserved.
4 |  * SPDX-License-Identifier: BSD-3-Clause
5 |  * For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 |  * By Le Xue
7 | '''
8 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/group_points.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates.
2 | 
3 | 
4 | #pragma once
5 | #include <torch/extension.h>
6 | 
7 | at::Tensor group_points(at::Tensor points, at::Tensor idx);
8 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);
9 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
 1 | #Copyright (c) V-DETR authors. All Rights Reserved.
 2 | from .model_vdetr import build_vdetr
 3 | 
 4 | MODEL_FUNCS = {
 5 |     'vdetr': build_vdetr,
 6 | }
 7 | 
 8 | def build_model(args, dataset_config):
 9 |     model = MODEL_FUNCS[args.model_name](args, dataset_config)
10 |     return model


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/sampling.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #pragma once
 5 | #include <torch/extension.h>
 6 | 
 7 | at::Tensor gather_points(at::Tensor points, at::Tensor idx);
 8 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);
 9 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples);
10 | 


--------------------------------------------------------------------------------
/Uni3D/data/utils/build.py:
--------------------------------------------------------------------------------
 1 | from . import registry
 2 | 
 3 | DATASETS = registry.Registry('dataset')
 4 | 
 5 | 
 6 | def build_dataset_from_cfg(cfg, default_args = None):
 7 |     """
 8 |     Build a dataset, defined by `dataset_name`.
 9 |     Args:
10 |         cfg (eDICT): 
11 |     Returns:
12 |         Dataset: a constructed dataset specified by dataset_name.
13 |     """
14 |     return DATASETS.build(cfg, default_args = default_args)
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/scannet/wget-log:
--------------------------------------------------------------------------------
1 | --2024-08-16 22:18:33--  https://drive.usercontent.google.com/open?id=1ag_SO8kVpNdZNGOUEAwgJTYEmf4TJErv
2 | Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 142.250.191.129, 2607:f8b0:4009:818::2001
3 | Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|142.250.191.129|:443... connected.
4 | HTTP request sent, awaiting response... 404 Not Found
5 | 2024-08-16 22:18:33 ERROR 404: Not Found.
6 | 
7 | 


--------------------------------------------------------------------------------
/scannet/wget-log.1:
--------------------------------------------------------------------------------
1 | --2024-08-16 22:19:18--  https://drive.usercontent.google.com/open?id=1ag_SO8kVpNdZNGOUEAwgJTYEmf4TJErv
2 | Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 142.250.191.129, 2607:f8b0:4009:818::2001
3 | Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|142.250.191.129|:443... connected.
4 | HTTP request sent, awaiting response... 404 Not Found
5 | 2024-08-16 22:19:18 ERROR 404: Not Found.
6 | 
7 | 


--------------------------------------------------------------------------------
/pointnet2/pointnet2.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | setup.py
 2 | _ext_src/src/ball_query.cpp
 3 | _ext_src/src/ball_query_gpu.cu
 4 | _ext_src/src/bindings.cpp
 5 | _ext_src/src/group_points.cpp
 6 | _ext_src/src/group_points_gpu.cu
 7 | _ext_src/src/interpolate.cpp
 8 | _ext_src/src/interpolate_gpu.cu
 9 | _ext_src/src/sampling.cpp
10 | _ext_src/src/sampling_gpu.cu
11 | pointnet2.egg-info/PKG-INFO
12 | pointnet2.egg-info/SOURCES.txt
13 | pointnet2.egg-info/dependency_links.txt
14 | pointnet2.egg-info/top_level.txt


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/interpolate.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #pragma once
 4 | 
 5 | #include <torch/extension.h>
 6 | #include <vector>
 7 | 
 8 | std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows);
 9 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
10 |                              at::Tensor weight);
11 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
12 |                                   at::Tensor weight, const int m);
13 | 


--------------------------------------------------------------------------------
/util/cython_compile.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from setuptools import setup, Extension
 4 | from Cython.Build import cythonize
 5 | import numpy as np
 6 | 
 7 | 
 8 | # hacky way to find numpy include path
 9 | # replace with actual path if this does not work
10 | np_include_path = np.__file__.replace("__init__.py", "core/include/")
11 | INCLUDE_PATH = [
12 | np_include_path
13 | ]
14 | 
15 | setup(
16 |     ext_modules = cythonize(
17 |             Extension(
18 |                 "box_intersection",
19 |                 sources=["box_intersection.pyx"],
20 |                 include_dirs=INCLUDE_PATH
21 |             )),
22 | )
23 | 
24 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/bindings.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #include "ball_query.h"
 5 | #include "group_points.h"
 6 | #include "interpolate.h"
 7 | #include "sampling.h"
 8 | 
 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
10 |   m.def("gather_points", &gather_points);
11 |   m.def("gather_points_grad", &gather_points_grad);
12 |   m.def("furthest_point_sampling", &furthest_point_sampling);
13 | 
14 |   m.def("three_nn", &three_nn);
15 |   m.def("three_interpolate", &three_interpolate);
16 |   m.def("three_interpolate_grad", &three_interpolate_grad);
17 | 
18 |   m.def("ball_query", &ball_query);
19 | 
20 |   m.def("group_points", &group_points);
21 |   m.def("group_points_grad", &group_points_grad);
22 | }
23 | 


--------------------------------------------------------------------------------
/optimizer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def build_optimizer(args, model):
 5 | 
 6 |     params_with_decay = []
 7 |     params_without_decay = []
 8 |     for name, param in model.named_parameters():
 9 |         if param.requires_grad is False:
10 |             continue
11 |         if args.filter_biases_wd and (len(param.shape) == 1 or name.endswith("bias")):
12 |             params_without_decay.append(param)
13 |         else:
14 |             params_with_decay.append(param)
15 | 
16 |     if args.filter_biases_wd:
17 |         param_groups = [
18 |             {"params": params_without_decay, "weight_decay": 0.0},
19 |             {"params": params_with_decay, "weight_decay": args.weight_decay},
20 |         ]
21 |     else:
22 |         param_groups = [
23 |             {"params": params_with_decay, "weight_decay": args.weight_decay},
24 |         ]
25 |     optimizer = torch.optim.AdamW(param_groups, lr=args.base_lr)
26 |     return optimizer
27 | 


--------------------------------------------------------------------------------
/Uni3D/data/dataset_catalog.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ensembled_embedding": {
 3 |         "config": "./data/Ensembled_embedding.yaml",
 4 |         "train": "train",
 5 |         "test": "train",
 6 |         "usage": "train"
 7 |     },  
 8 |     "ensembled": {
 9 |         "config": "./data/Ensembled.yaml",
10 |         "train": "train",
11 |         "test": "train",
12 |         "usage": "train"
13 |     },  
14 |     "objaverse_lvis_openshape": {
15 |         "config": "./data/Objaverse_lvis_openshape.yaml",
16 |         "train": "train",
17 |         "test": "test",
18 |         "usage": "test"
19 |     },
20 |     "modelnet40_openshape": {
21 |         "config": "./data/ModelNet40_openshape.yaml",
22 |         "train": "train",
23 |         "test": "test",
24 |         "usage": "test"
25 |     },
26 |     "scanobjnn_openshape": {
27 |         "config": "./data/ScanObjNN_openshape.yaml",
28 |         "train": "train",
29 |         "test": "test",
30 |         "usage": "test"
31 |     }
32 | }


--------------------------------------------------------------------------------
/Uni3D/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def setup_logging(log_file, level, include_host=False):
 5 |     if include_host:
 6 |         import socket
 7 |         hostname = socket.gethostname()
 8 |         formatter = logging.Formatter(
 9 |             f'%(asctime)s |  {hostname} | %(levelname)s | %(message)s', datefmt='%Y-%m-%d,%H:%M:%S')
10 |     else:
11 |         formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s', datefmt='%Y-%m-%d,%H:%M:%S')
12 | 
13 |     logging.root.setLevel(level)
14 |     loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]
15 |     for logger in loggers:
16 |         logger.setLevel(level)
17 | 
18 |     stream_handler = logging.StreamHandler()
19 |     stream_handler.setFormatter(formatter)
20 |     logging.root.addHandler(stream_handler)
21 | 
22 |     if log_file:
23 |         file_handler = logging.FileHandler(filename=log_file)
24 |         file_handler.setFormatter(formatter)
25 |         logging.root.addHandler(file_handler)
26 | 
27 | 


--------------------------------------------------------------------------------
/pointnet2/pointnet2_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | ''' Testing customized ops. '''
 4 | 
 5 | import torch
 6 | from torch.autograd import gradcheck
 7 | import numpy as np
 8 | 
 9 | import os
10 | import sys
11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
12 | sys.path.append(BASE_DIR)
13 | import pointnet2_utils
14 | 
15 | def test_interpolation_grad():
16 |     batch_size = 1
17 |     feat_dim = 2
18 |     m = 4
19 |     feats = torch.randn(batch_size, feat_dim, m, requires_grad=True).float().cuda()
20 |     
21 |     def interpolate_func(inputs):
22 |         idx = torch.from_numpy(np.array([[[0,1,2],[1,2,3]]])).int().cuda()
23 |         weight = torch.from_numpy(np.array([[[1,1,1],[2,2,2]]])).float().cuda()
24 |         interpolated_feats = pointnet2_utils.three_interpolate(inputs, idx, weight)
25 |         return interpolated_feats
26 |     
27 |     assert (gradcheck(interpolate_func, feats, atol=1e-1, rtol=1e-1))
28 | 
29 | if __name__=='__main__':
30 |     test_interpolation_grad()
31 | 


--------------------------------------------------------------------------------
/util/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import torch
 4 | 
 5 | try:
 6 |     from tensorboardX import SummaryWriter
 7 | except ImportError:
 8 |     print("Cannot import tensorboard. Will log to txt files only.")
 9 |     SummaryWriter = None
10 | 
11 | from utils.dist import is_primary
12 | 
13 | 
14 | class Logger(object):
15 |     def __init__(self, log_dir=None) -> None:
16 |         self.log_dir = log_dir
17 |         if SummaryWriter is not None and is_primary():
18 |             self.writer = SummaryWriter(self.log_dir)
19 |         else:
20 |             self.writer = None
21 | 
22 |     def log_scalars(self, scalar_dict, step, prefix=None):
23 |         if self.writer is None:
24 |             return
25 |         for k in scalar_dict:
26 |             v = scalar_dict[k]
27 |             if isinstance(v, torch.Tensor):
28 |                 v = v.detach().cpu().item()
29 |             if prefix is not None:
30 |                 k = prefix + k
31 |             self.writer.add_scalar(k, v, step)
32 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #pragma once
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | #include <torch/extension.h>
 7 | 
 8 | #define CHECK_CUDA(x)                                          \
 9 |   do {                                                         \
10 |     AT_ASSERT(x.is_cuda(), #x " must be a CUDA tensor"); \
11 |   } while (0)
12 | 
13 | #define CHECK_CONTIGUOUS(x)                                         \
14 |   do {                                                              \
15 |     AT_ASSERT(x.is_contiguous(), #x " must be a contiguous tensor"); \
16 |   } while (0)
17 | 
18 | #define CHECK_IS_INT(x)                              \
19 |   do {                                               \
20 |     AT_ASSERT(x.scalar_type() == at::ScalarType::Int, \
21 |              #x " must be an int tensor");           \
22 |   } while (0)
23 | 
24 | #define CHECK_IS_FLOAT(x)                              \
25 |   do {                                                 \
26 |     AT_ASSERT(x.scalar_type() == at::ScalarType::Float, \
27 |              #x " must be a float tensor");            \
28 |   } while (0)
29 | 


--------------------------------------------------------------------------------
/pointnet2/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from setuptools import setup
 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 8 | import glob
 9 | import os.path as osp
10 | 
11 | this_dir = osp.dirname(osp.abspath(__file__))
12 | 
13 | _ext_src_root = "_ext_src"
14 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob(
15 |     "{}/src/*.cu".format(_ext_src_root)
16 | )
17 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root))
18 | 
19 | setup(
20 |     name='pointnet2',
21 |     ext_modules=[
22 |         CUDAExtension(
23 |             name='pointnet2._ext',
24 |             sources=_ext_sources,
25 |             extra_compile_args={
26 |                 "cxx": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))],
27 |                 "nvcc": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))],
28 |             },
29 |             include_dirs=[osp.join(this_dir, _ext_src_root, "include")],
30 |         )
31 |     ],
32 |     cmdclass={
33 |         'build_ext': BuildExtension
34 |     }
35 | )
36 | 


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .scannet import ScannetDetectionDataset, ScannetDatasetConfig
 2 | # from .sunrgbd import SunrgbdDetectionDataset, SunrgbdDatasetConfig #will release very soon
 3 | 
 4 | 
 5 | DATASET_FUNCTIONS = {
 6 |     "scannet": [ScannetDetectionDataset, ScannetDatasetConfig],}
 7 | 
 8 | 
 9 | def build_dataset(args):
10 |     dataset_builder = DATASET_FUNCTIONS[args.dataset_name][0]
11 |     dataset_config = DATASET_FUNCTIONS[args.dataset_name][1]()
12 |     
13 |     if args.test_only:
14 |         dataset_dict = {
15 |             "test": dataset_builder(
16 |                 dataset_config, 
17 |                 split_set="val", 
18 |                 augment=False,
19 |                 args=args
20 |             ),
21 |         }
22 |     else:
23 |         dataset_dict = {
24 |             "train": dataset_builder(
25 |                 dataset_config, 
26 |                 split_set="train", 
27 |                 augment=True,
28 |                 args=args
29 |             ),
30 |             "test": dataset_builder(
31 |                 dataset_config, 
32 |                 split_set="val", 
33 |                 augment=False,
34 |                 args=args
35 |             ),
36 |         }
37 |     return dataset_dict, dataset_config
38 |     


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/ball_query.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #include "ball_query.h"
 5 | #include "utils.h"
 6 | 
 7 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
 8 |                                      int nsample, const float *new_xyz,
 9 |                                      const float *xyz, int *idx);
10 | 
11 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
12 |                       const int nsample) {
13 |   CHECK_CONTIGUOUS(new_xyz);
14 |   CHECK_CONTIGUOUS(xyz);
15 |   CHECK_IS_FLOAT(new_xyz);
16 |   CHECK_IS_FLOAT(xyz);
17 | 
18 |   if (new_xyz.is_cuda()) {
19 |     CHECK_CUDA(xyz);
20 |   }
21 | 
22 |   at::Tensor idx =
23 |       torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample},
24 |                    at::device(new_xyz.device()).dtype(at::ScalarType::Int));
25 | 
26 |   if (new_xyz.is_cuda()) {
27 |     query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1),
28 |                                     radius, nsample, new_xyz.data<float>(),
29 |                                     xyz.data<float>(), idx.data<int>());
30 |   } else {
31 |     AT_ASSERT(false, "CPU not supported");
32 |   }
33 | 
34 |   return idx;
35 | }
36 | 


--------------------------------------------------------------------------------
/Uni3D/data/DATASETS.md:
--------------------------------------------------------------------------------
 1 | ## Evaluation datasets
 2 | 
 3 | 1. Please download the data from this [repository](https://huggingface.co/BAAI/Uni3D/blob/main/data/test_datasets.zip), which contains datasets for Objaverse-LVIS, ModelNet40, and ScanObjectNN.
 4 | 
 5 | 2. Place the `test_datasets` folder in the `/data` directory on your machine. The core `data` directory structure should look like this:
 6 | 
 7 |    ```
 8 |    ./data 
 9 |    -- test_datasets/  
10 |       -- modelnet40
11 |       -- scanobjectnn
12 |       -- objaverse_lvis
13 |    -- utils/
14 |    -- datasets.py
15 |    -- ModelNet40_openshape.yaml
16 |    -- Objaverse_lvis_openshape.yaml
17 |    -- ScanObjNN_openshape.yaml
18 |    -- dataset_catalog.json
19 |    -- labels.json
20 |    -- templates.json
21 |    ```
22 | 3. **Important**: If you choose to place the data in a location other than the default one mentioned above, please remember to update the corresponding dataset's YAML file with your path.
23 | 
24 | Now you are ready to use the datasets for zero-shot evaluation. If you have any questions or encounter any issues, please refer to the documentation or feel free to reach out for assistance.
25 | 
26 | ## Pre-training datasets
27 | 
28 | We're in the process of organizing and uploading. Hang tight, and stay tuned! ☕️
29 | 
30 | Thanks for your patience and support!
31 | 


--------------------------------------------------------------------------------
/scannet/data_viz.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import sys
 7 | import os
 8 | 
 9 | BASE_DIR = os.path.dirname(__file__)
10 | sys.path.append(BASE_DIR)
11 | 
12 | import numpy as np
13 | import pc_util
14 | 
15 | scene_name = 'scannet_train_detection_data/scene0002_00'
16 | output_folder = 'data_viz_dump'
17 | 
18 | data = np.load(scene_name+'_vert.npy')
19 | scene_points = data[:,0:3]
20 | colors = data[:,3:]
21 | instance_labels = np.load(scene_name+'_ins_label.npy')
22 | semantic_labels = np.load(scene_name+'_sem_label.npy')
23 | instance_bboxes = np.load(scene_name+'_bbox.npy')
24 | 
25 | print(np.unique(instance_labels))
26 | print(np.unique(semantic_labels))
27 | input()
28 | if not os.path.exists(output_folder):
29 |     os.mkdir(output_folder)
30 | 
31 | # Write scene as OBJ file for visualization
32 | pc_util.write_ply_rgb(scene_points, colors, os.path.join(output_folder, 'scene.obj'))
33 | pc_util.write_ply_color(scene_points, instance_labels, os.path.join(output_folder, 'scene_instance.obj'))
34 | pc_util.write_ply_color(scene_points, semantic_labels, os.path.join(output_folder, 'scene_semantic.obj'))
35 | 
36 | from model_util_scannet import ScannetDatasetConfig
37 | DC = ScannetDatasetConfig()
38 | print(instance_bboxes.shape)
39 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | model=create_uni3d
 3 | 
 4 | gpu=1
 5 | echo "export CUDA_VISIBLE_DEVICES=$gpu"
 6 | export CUDA_VISIBLE_DEVICES=${gpu}
 7 | export OMP_NUM_THREADS=12
 8 | 
 9 | clip_model="EVA02-E-14-plus" 
10 | ckpt_path="./Uni3D/downloads/ckpt/model_giant.pt"
11 | pretrained="./Uni3D/downloads/open_clip_pytorch_model.bin" # or  "laion2b_s9b_b144k"
12 | size="giant"
13 | 
14 | if [ $size = "giant" ]; then
15 |     pc_model="eva_giant_patch14_560"
16 |     pc_feat_dim=1408
17 | elif [ $size = "large" ]; then
18 |     pc_model="eva02_large_patch14_448"
19 |     pc_feat_dim=1024
20 | elif [ $size = "base" ]; then
21 |     pc_model="eva02_base_patch14_448"
22 |     pc_feat_dim=768
23 | elif [ $size = "small" ]; then
24 |     pc_model="eva02_small_patch14_224"
25 |     pc_feat_dim=384
26 | elif [ $size = "tiny" ]; then
27 |     pc_model="eva02_tiny_patch14_224"
28 |     pc_feat_dim=192
29 | else
30 |     echo "Invalid option"
31 |     exit 1
32 | fi
33 | eps=0.04
34 | min_points=3
35 | python main.py \
36 | --dataset_name scannet \
37 | --dataset_root_dir ./scannet/ \
38 | --meta_data_dir ./scannet/meta_data/ \
39 | --test_ckpt ./models/scannet_540ep.pth \
40 | --auto_test \
41 | --test_only  \
42 | --conf_thresh 0.01  \
43 | --pc-model $pc_model \
44 | --pc-feat-dim $pc_feat_dim \
45 | --pc-encoder-dim 512 \
46 | --ckpt_path $ckpt_path \
47 | --embed-dim 1024 \
48 | --group-size 64 \
49 | --num-group 512 \
50 | --inference_only \
51 | --npoints 10000 \
52 | --eps $eps \
53 | --min_points $min_points
54 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #ifndef _CUDA_UTILS_H
 4 | #define _CUDA_UTILS_H
 5 | 
 6 | #include <ATen/ATen.h>
 7 | #include <ATen/cuda/CUDAContext.h>
 8 | #include <cmath>
 9 | 
10 | #include <cuda.h>
11 | #include <cuda_runtime.h>
12 | 
13 | #include <vector>
14 | 
15 | #define TOTAL_THREADS 512
16 | 
17 | inline int opt_n_threads(int work_size) {
18 |   const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
19 | 
20 |   return max(min(1 << pow_2, TOTAL_THREADS), 1);
21 | }
22 | 
23 | inline dim3 opt_block_config(int x, int y) {
24 |   const int x_threads = opt_n_threads(x);
25 |   const int y_threads =
26 |       max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
27 |   dim3 block_config(x_threads, y_threads, 1);
28 | 
29 |   return block_config;
30 | }
31 | 
32 | #define CUDA_CHECK_ERRORS()                                           \
33 |   do {                                                                \
34 |     cudaError_t err = cudaGetLastError();                             \
35 |     if (cudaSuccess != err) {                                         \
36 |       fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n",  \
37 |               cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \
38 |               __FILE__);                                              \
39 |       exit(-1);                                                       \
40 |     }                                                                 \
41 |   } while (0)
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/Uni3D/model/uni3d.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import timm
 3 | import numpy as np
 4 | from torch import nn
 5 | from . import losses
 6 | 
 7 | from .point_encoder import PointcloudEncoder
 8 | 
 9 | class Uni3D(nn.Module):
10 |     def __init__(self, point_encoder):
11 |         super().__init__()
12 |         self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07))
13 |         self.point_encoder = point_encoder
14 | 
15 |     def encode_pc(self, pc):
16 |         xyz = pc[:,:,:3].contiguous()
17 |         color = pc[:,:,3:].contiguous()
18 |         pc_feat = self.point_encoder(xyz, color)
19 |         return pc_feat
20 | 
21 |     def forward(self, pc, text, image):
22 |         text_embed_all = text
23 |         image_embed = image   
24 |         pc_embed = self.encode_pc(pc)
25 |         return {'text_embed': text_embed_all,
26 |                 'pc_embed': pc_embed,
27 |                 'image_embed': image_embed,
28 |                 'logit_scale': self.logit_scale.exp()}
29 | 
30 | def get_filter_loss(args):
31 |     return losses.Uni3d_Text_Image_Loss()
32 | 
33 | def get_metric_names(model):
34 |     return ['loss', 'uni3d_loss', 'pc_image_acc', 'pc_text_acc']
35 | 
36 | def create_uni3d(args):  
37 |     # create transformer blocks for point cloud via timm
38 |     point_transformer = timm.create_model(args.pc_model, checkpoint_path=args.pretrained_pc, drop_path_rate=args.drop_path_rate)
39 | 
40 |     # create whole point cloud encoder
41 |     point_encoder = PointcloudEncoder(point_transformer, args)
42 | 
43 |     # uni3d model
44 |     model = Uni3D(point_encoder=point_encoder,)
45 |     return model
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/Uni3D/data/utils/io.py:
--------------------------------------------------------------------------------
 1 | import h5py
 2 | import numpy as np
 3 | import open3d
 4 | import os
 5 | 
 6 | class IO:
 7 |     @classmethod
 8 |     def get(cls, file_path):
 9 |         _, file_extension = os.path.splitext(file_path)
10 | 
11 |         if file_extension in ['.npy']:
12 |             return cls._read_npy(file_path)
13 |         elif file_extension in ['.pcd']:
14 |             return cls._read_pcd(file_path)
15 |         elif file_extension in ['.h5']:
16 |             return cls._read_h5(file_path)
17 |         elif file_extension in ['.txt', '.xyz']:
18 |             return cls._read_txt(file_path)
19 |         elif file_extension in [".bin"]:
20 |             return cls._read_bin(file_path)
21 |         else:
22 |             raise Exception('Unsupported file extension: %s' % file_extension)
23 | 
24 |     # References: https://github.com/numpy/numpy/blob/master/numpy/lib/format.py
25 |     @classmethod
26 |     def _read_npy(cls, file_path):
27 |         return np.load(file_path)
28 |        
29 |     # References: https://github.com/dimatura/pypcd/blob/master/pypcd/pypcd.py#L275
30 |     # Support PCD files without compression ONLY!
31 |     @classmethod
32 |     def _read_pcd(cls, file_path):
33 |         pc = open3d.io.read_point_cloud(file_path)
34 |         ptcloud = np.array(pc.points)
35 |         return ptcloud
36 | 
37 |     @classmethod
38 |     def _read_txt(cls, file_path):
39 |         return np.loadtxt(file_path)
40 | 
41 |     @classmethod
42 |     def _read_h5(cls, file_path):
43 |         f = h5py.File(file_path, 'r')
44 |         return f['data'][()]
45 |     
46 |     @classmethod
47 |     def _read_bin(cls, file_path):
48 |         return np.fromfile(file_path) # , dtype=np.float32, count=-1


--------------------------------------------------------------------------------
/util/io.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import torch
 4 | import os
 5 | from util.dist import is_primary
 6 | 
 7 | 
 8 | def save_checkpoint(
 9 |     checkpoint_dir,
10 |     model_no_ddp,
11 |     optimizer,
12 |     epoch,
13 |     args,
14 |     best_val_metrics,
15 |     filename=None,
16 | ):
17 |     if not is_primary():
18 |         return
19 |     if filename is None:
20 |         filename = f"checkpoint_{epoch:04d}.pth"
21 |     checkpoint_name = os.path.join(checkpoint_dir, filename)
22 | 
23 |     sd = {
24 |         "model": model_no_ddp.state_dict(),
25 |         "optimizer": optimizer.state_dict(),
26 |         "epoch": epoch,
27 |         "args": args,
28 |         "best_val_metrics": best_val_metrics,
29 |     }
30 |     torch.save(sd, checkpoint_name)
31 | 
32 | 
33 | def resume_if_possible(checkpoint_dir, model_no_ddp, optimizer):
34 |     """
35 |     Resume if checkpoint is available.
36 |     Return
37 |     - epoch of loaded checkpoint.
38 |     """
39 |     epoch = -1
40 |     best_val_metrics = {}
41 |     if not os.path.isdir(checkpoint_dir):
42 |         return epoch, best_val_metrics
43 | 
44 |     last_checkpoint = os.path.join(checkpoint_dir, "checkpoint.pth")
45 |     if not os.path.isfile(last_checkpoint):
46 |         return epoch, best_val_metrics
47 | 
48 |     sd = torch.load(last_checkpoint, map_location=torch.device("cpu"))
49 |     epoch = sd["epoch"]
50 |     best_val_metrics = sd["best_val_metrics"]
51 |     print(f"Found checkpoint at {epoch}. Resuming.")
52 | 
53 |     model_no_ddp.load_state_dict(sd["model"])
54 |     optimizer.load_state_dict(sd["optimizer"])
55 |     print(
56 |         f"Loaded model and optimizer state at {epoch}. Loaded best val metrics so far."
57 |     )
58 |     return epoch, best_val_metrics
59 | 


--------------------------------------------------------------------------------
/scannet/meta_data/scannetv2_test.txt:
--------------------------------------------------------------------------------
  1 | scene0000_00
  2 | scene0707_00
  3 | scene0708_00
  4 | scene0709_00
  5 | scene0710_00
  6 | scene0711_00
  7 | scene0712_00
  8 | scene0713_00
  9 | scene0714_00
 10 | scene0715_00
 11 | scene0716_00
 12 | scene0717_00
 13 | scene0718_00
 14 | scene0719_00
 15 | scene0720_00
 16 | scene0721_00
 17 | scene0722_00
 18 | scene0723_00
 19 | scene0724_00
 20 | scene0725_00
 21 | scene0726_00
 22 | scene0727_00
 23 | scene0728_00
 24 | scene0729_00
 25 | scene0730_00
 26 | scene0731_00
 27 | scene0732_00
 28 | scene0733_00
 29 | scene0734_00
 30 | scene0735_00
 31 | scene0736_00
 32 | scene0737_00
 33 | scene0738_00
 34 | scene0739_00
 35 | scene0740_00
 36 | scene0741_00
 37 | scene0742_00
 38 | scene0743_00
 39 | scene0744_00
 40 | scene0745_00
 41 | scene0746_00
 42 | scene0747_00
 43 | scene0748_00
 44 | scene0749_00
 45 | scene0750_00
 46 | scene0751_00
 47 | scene0752_00
 48 | scene0753_00
 49 | scene0754_00
 50 | scene0755_00
 51 | scene0756_00
 52 | scene0757_00
 53 | scene0758_00
 54 | scene0759_00
 55 | scene0760_00
 56 | scene0761_00
 57 | scene0762_00
 58 | scene0763_00
 59 | scene0764_00
 60 | scene0765_00
 61 | scene0766_00
 62 | scene0767_00
 63 | scene0768_00
 64 | scene0769_00
 65 | scene0770_00
 66 | scene0771_00
 67 | scene0772_00
 68 | scene0773_00
 69 | scene0774_00
 70 | scene0775_00
 71 | scene0776_00
 72 | scene0777_00
 73 | scene0778_00
 74 | scene0779_00
 75 | scene0780_00
 76 | scene0781_00
 77 | scene0782_00
 78 | scene0783_00
 79 | scene0784_00
 80 | scene0785_00
 81 | scene0786_00
 82 | scene0787_00
 83 | scene0788_00
 84 | scene0789_00
 85 | scene0790_00
 86 | scene0791_00
 87 | scene0792_00
 88 | scene0793_00
 89 | scene0794_00
 90 | scene0795_00
 91 | scene0796_00
 92 | scene0797_00
 93 | scene0798_00
 94 | scene0799_00
 95 | scene0800_00
 96 | scene0801_00
 97 | scene0802_00
 98 | scene0803_00
 99 | scene0804_00
100 | scene0805_00
101 | scene0806_00
102 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/ball_query_gpu.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #include <math.h>
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | 
 8 | #include "cuda_utils.h"
 9 | 
10 | // input: new_xyz(b, m, 3) xyz(b, n, 3)
11 | // output: idx(b, m, nsample)
12 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius,
13 |                                         int nsample,
14 |                                         const float *__restrict__ new_xyz,
15 |                                         const float *__restrict__ xyz,
16 |                                         int *__restrict__ idx) {
17 |   int batch_index = blockIdx.x;
18 |   xyz += batch_index * n * 3;
19 |   new_xyz += batch_index * m * 3;
20 |   idx += m * nsample * batch_index;
21 | 
22 |   int index = threadIdx.x;
23 |   int stride = blockDim.x;
24 | 
25 |   float radius2 = radius * radius;
26 |   for (int j = index; j < m; j += stride) {
27 |     float new_x = new_xyz[j * 3 + 0];
28 |     float new_y = new_xyz[j * 3 + 1];
29 |     float new_z = new_xyz[j * 3 + 2];
30 |     for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
31 |       float x = xyz[k * 3 + 0];
32 |       float y = xyz[k * 3 + 1];
33 |       float z = xyz[k * 3 + 2];
34 |       float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
35 |                  (new_z - z) * (new_z - z);
36 |       if (d2 < radius2) {
37 |         if (cnt == 0) {
38 |           for (int l = 0; l < nsample; ++l) {
39 |             idx[j * nsample + l] = k;
40 |           }
41 |         }
42 |         idx[j * nsample + cnt] = k;
43 |         ++cnt;
44 |       }
45 |     }
46 |   }
47 | }
48 | 
49 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
50 |                                      int nsample, const float *new_xyz,
51 |                                      const float *xyz, int *idx) {
52 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
53 |   query_ball_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
54 |       b, n, m, radius, nsample, new_xyz, xyz, idx);
55 | 
56 |   CUDA_CHECK_ERRORS();
57 | }
58 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/group_points.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #include "group_points.h"
 5 | #include "utils.h"
 6 | 
 7 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
 8 |                                  const float *points, const int *idx,
 9 |                                  float *out);
10 | 
11 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
12 |                                       int nsample, const float *grad_out,
13 |                                       const int *idx, float *grad_points);
14 | 
15 | at::Tensor group_points(at::Tensor points, at::Tensor idx) {
16 |   CHECK_CONTIGUOUS(points);
17 |   CHECK_CONTIGUOUS(idx);
18 |   CHECK_IS_FLOAT(points);
19 |   CHECK_IS_INT(idx);
20 | 
21 |   if (points.is_cuda()) {
22 |     CHECK_CUDA(idx);
23 |   }
24 | 
25 |   at::Tensor output =
26 |       torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)},
27 |                    at::device(points.device()).dtype(at::ScalarType::Float));
28 | 
29 |   if (points.is_cuda()) {
30 |     group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
31 |                                 idx.size(1), idx.size(2), points.data<float>(),
32 |                                 idx.data<int>(), output.data<float>());
33 |   } else {
34 |     AT_ASSERT(false, "CPU not supported");
35 |   }
36 | 
37 |   return output;
38 | }
39 | 
40 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) {
41 |   CHECK_CONTIGUOUS(grad_out);
42 |   CHECK_CONTIGUOUS(idx);
43 |   CHECK_IS_FLOAT(grad_out);
44 |   CHECK_IS_INT(idx);
45 | 
46 |   if (grad_out.is_cuda()) {
47 |     CHECK_CUDA(idx);
48 |   }
49 | 
50 |   at::Tensor output =
51 |       torch::zeros({grad_out.size(0), grad_out.size(1), n},
52 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
53 | 
54 |   if (grad_out.is_cuda()) {
55 |     group_points_grad_kernel_wrapper(
56 |         grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2),
57 |         grad_out.data<float>(), idx.data<int>(), output.data<float>());
58 |   } else {
59 |     AT_ASSERT(false, "CPU not supported");
60 |   }
61 | 
62 |   return output;
63 | }
64 | 


--------------------------------------------------------------------------------
/Uni3D/utils/scheduler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def assign_learning_rate(optimizer, new_lr):
 5 |     for param_group in optimizer.param_groups:
 6 |         param_group["lr"] = new_lr
 7 | 
 8 | 
 9 | def _warmup_lr(base_lr, warmup_length, step):
10 |     return base_lr * (step + 1) / warmup_length
11 | 
12 | 
13 | def cosine_lr(optimizer, base_lr, warmup_length, steps):
14 |     def _lr_adjuster(step):
15 |         if step < warmup_length:
16 |             lr = _warmup_lr(base_lr, warmup_length, step)
17 |         else:
18 |             e = step - warmup_length
19 |             es = steps - warmup_length
20 |             lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr
21 |         assign_learning_rate(optimizer, lr)
22 |         return lr
23 |     return _lr_adjuster
24 | 
25 | def warmup_cosine_lr(optimizer, args, steps):
26 |     def _lr_adjuster(step):
27 |         for param_group in optimizer.param_groups:
28 |             # import pdb; pdb.set_trace()
29 |             if param_group['group'] == 'text':
30 |                 base_lr = args.text_lr if args.text_lr is not None else args.lr
31 |             elif param_group['group'] == 'visual':
32 |                 base_lr = args.visual_lr if args.visual_lr is not None else args.lr
33 |             else:
34 |                 base_lr = args.lr
35 | 
36 |             if step < args.warmup:
37 |                 lr = _warmup_lr(base_lr, args.warmup, step)
38 |             else:
39 |                 e = step - args.warmup
40 |                 es = steps - args.warmup
41 |                 lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr
42 |             scale = param_group.get("lr_scale", 1.0)
43 |             param_group["lr"] = scale * lr
44 |         return lr
45 |     return _lr_adjuster
46 | 
47 | 
48 | def warmup_step_lr(optimizer, args, decay_t=500, decay_rate=0.8):
49 |     def _lr_adjuster(step):
50 |         for param_group in optimizer.param_groups:
51 |             if param_group['group'] == 'text':
52 |                 base_lr = args.text_lr
53 |             elif param_group['group'] == 'visual':
54 |                 base_lr = args.visual_lr
55 |             else:
56 |                 base_lr = args.lr
57 | 
58 |             if step < args.warmup:
59 |                 lr = _warmup_lr(base_lr, args.warmup, step)
60 |             else:
61 |                 e = step - args.warmup
62 |                 lr = base_lr * (decay_rate ** (e // decay_t))
63 |             scale = param_group.get("lr_scale", 1.0)
64 |             param_group["lr"] = scale * lr
65 |         return lr
66 |     return _lr_adjuster


--------------------------------------------------------------------------------
/Uni3D/data/utils/config.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | from easydict import EasyDict
 3 | import os
 4 | from .logger import print_log
 5 | 
 6 | def log_args_to_file(args, pre='args', logger=None):
 7 |     for key, val in args.__dict__.items():
 8 |         print_log(f'{pre}.{key} : {val}', logger = logger)
 9 | 
10 | def log_config_to_file(cfg, pre='cfg', logger=None):
11 |     for key, val in cfg.items():
12 |         if isinstance(cfg[key], EasyDict):
13 |             print_log(f'{pre}.{key} = edict()', logger = logger)
14 |             log_config_to_file(cfg[key], pre=pre + '.' + key, logger=logger)
15 |             continue
16 |         print_log(f'{pre}.{key} : {val}', logger = logger)
17 | 
18 | def merge_new_config(config, new_config):
19 |     for key, val in new_config.items():
20 |         if not isinstance(val, dict):
21 |             if key == '_base_':
22 |                 with open(new_config['_base_'], 'r') as f:
23 |                     try:
24 |                         val = yaml.load(f, Loader=yaml.FullLoader)
25 |                     except:
26 |                         val = yaml.load(f)
27 |                 config[key] = EasyDict()
28 |                 merge_new_config(config[key], val)
29 |             else:
30 |                 config[key] = val
31 |                 continue
32 |         if key not in config:
33 |             config[key] = EasyDict()
34 |         merge_new_config(config[key], val)
35 |     return config
36 | 
37 | def cfg_from_yaml_file(cfg_file):
38 |     config = EasyDict()
39 |     with open(cfg_file, 'r') as f:
40 |         try:
41 |             new_config = yaml.load(f, Loader=yaml.FullLoader)
42 |         except:
43 |             new_config = yaml.load(f)
44 |     merge_new_config(config=config, new_config=new_config)
45 |     return config
46 | 
47 | def get_config(args, logger=None):
48 |     if args.resume:
49 |         cfg_path = os.path.join(args.experiment_path, 'config.yaml')
50 |         if not os.path.exists(cfg_path):
51 |             print_log("Failed to resume", logger = logger)
52 |             raise FileNotFoundError()
53 |         print_log(f'Resume yaml from {cfg_path}', logger = logger)
54 |         args.config = cfg_path
55 |     config = cfg_from_yaml_file(args.config)
56 |     if not args.resume and args.local_rank == 0:
57 |         save_experiment_config(args, config, logger)
58 |     return config
59 | 
60 | def save_experiment_config(args, config, logger = None):
61 |     config_path = os.path.join(args.experiment_path, 'config.yaml')
62 |     os.system('cp %s %s' % (args.config, config_path))
63 |     print_log(f'Copy the Config file from {args.config} to {config_path}',logger = logger )


--------------------------------------------------------------------------------
/scannet/scannet_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | ''' Ref: https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts '''
 7 | import os
 8 | import sys
 9 | import json
10 | import csv
11 | 
12 | try:
13 |     import numpy as np
14 | except:
15 |     print("Failed to import numpy package.")
16 |     sys.exit(-1)
17 | 
18 | try:
19 |     from plyfile import PlyData, PlyElement
20 | except:
21 |     print("Please install the module 'plyfile' for PLY i/o, e.g.")
22 |     print("pip install plyfile")
23 |     sys.exit(-1)
24 | 
25 | def represents_int(s):
26 |     ''' if string s represents an int. '''
27 |     try: 
28 |         int(s)
29 |         return True
30 |     except ValueError:
31 |         return False
32 | 
33 | 
34 | def read_label_mapping(filename, label_from='raw_category', label_to='nyu40id'):
35 |     assert os.path.isfile(filename)
36 |     mapping = dict()
37 |     with open(filename) as csvfile:
38 |         reader = csv.DictReader(csvfile, delimiter='\t')
39 |         for row in reader:
40 |             mapping[row[label_from]] = int(row[label_to])
41 |     if represents_int(list(mapping.keys())[0]):
42 |         mapping = {int(k):v for k,v in mapping.items()}
43 |     return mapping
44 | 
45 | def read_mesh_vertices(filename):
46 |     """ read XYZ for each vertex.
47 |     """
48 |     assert os.path.isfile(filename)
49 |     with open(filename, 'rb') as f:
50 |         plydata = PlyData.read(f)
51 |         num_verts = plydata['vertex'].count
52 |         vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32)
53 |         vertices[:,0] = plydata['vertex'].data['x']
54 |         vertices[:,1] = plydata['vertex'].data['y']
55 |         vertices[:,2] = plydata['vertex'].data['z']
56 |     return vertices
57 | 
58 | def read_mesh_vertices_rgb(filename):
59 |     """ read XYZ RGB for each vertex.
60 |     Note: RGB values are in 0-255
61 |     """
62 |     assert os.path.isfile(filename)
63 |     with open(filename, 'rb') as f:
64 |         plydata = PlyData.read(f)
65 |         num_verts = plydata['vertex'].count
66 |         vertices = np.zeros(shape=[num_verts, 6], dtype=np.float32)
67 |         vertices[:,0] = plydata['vertex'].data['x']
68 |         vertices[:,1] = plydata['vertex'].data['y']
69 |         vertices[:,2] = plydata['vertex'].data['z']
70 |         vertices[:,3] = plydata['vertex'].data['red']
71 |         vertices[:,4] = plydata['vertex'].data['green']
72 |         vertices[:,5] = plydata['vertex'].data['blue']
73 |     return vertices
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/Uni3D/model/losses.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from Uni3D.utils import utils
 6 | 
 7 | class Uni3d_Text_Image_Loss(nn.Module):
 8 |     def __init__(self):
 9 |         super().__init__()
10 |         self.labels = None
11 |         self.last_local_batch_size = None
12 | 
13 |     def forward(self, outputs, masks):
14 |         pc_embed = outputs['pc_embed']
15 |         text_embed = outputs['text_embed']
16 |         image_embed = outputs['image_embed']
17 |         logit_scale = outputs['logit_scale']
18 |         local_batch_size = pc_embed.size(0)
19 | 
20 |         if local_batch_size != self.last_local_batch_size:
21 |             self.labels = local_batch_size * utils.get_rank() + torch.arange(
22 |                 local_batch_size, device=pc_embed.device
23 |             )
24 |             self.last_local_batch_size = local_batch_size
25 | 
26 |         masks = masks.to(pc_embed.device)
27 | 
28 |         # normalized features
29 |         pc_embed = F.normalize(pc_embed, dim=-1, p=2)
30 |         text_embed = F.normalize(text_embed, dim=-1, p=2)
31 |         image_embed = F.normalize(image_embed, dim=-1, p=2)
32 | 
33 |         # gather features from all GPUs
34 |         pc_embed_all, text_embed_all, image_embed_all, masks_all = \
35 |             utils.all_gather_batch([pc_embed, text_embed, image_embed, masks])
36 | 
37 |         # cosine similarity as logits
38 |         logits_per_pc_text = logit_scale * pc_embed @ text_embed_all.t()
39 |         logits_per_text_pc = logit_scale * text_embed @ pc_embed_all.t()
40 |         logits_per_pc_image = logit_scale * pc_embed @ image_embed_all.t()
41 |         logits_per_image_pc = logit_scale * image_embed @ pc_embed_all.t()
42 | 
43 |         loss_text = (F.cross_entropy(logits_per_pc_text, self.labels) + \
44 |                 F.cross_entropy(logits_per_text_pc, self.labels)) / 2 
45 |         
46 |         masks = masks.bool()
47 |         masks = ~masks
48 | 
49 |         self.labels_c = self.labels.clone()
50 |         self.labels_c[masks] = -100
51 | 
52 |         loss_image = (F.cross_entropy(logits_per_pc_image, self.labels_c, ignore_index=-100) +\
53 |                         F.cross_entropy(logits_per_image_pc, self.labels_c, ignore_index=-100)) / 2
54 |         
55 |         loss = loss_text + loss_image
56 | 
57 | 
58 | 
59 |         # compute accuracy
60 |         with torch.no_grad():
61 |             pred = torch.argmax(logits_per_pc_text, dim=-1)
62 |             correct = pred.eq(self.labels).sum()
63 |             pc_text_acc = 100 * correct / local_batch_size
64 | 
65 |             pred = torch.argmax(logits_per_pc_image, dim=-1)
66 |             correct = pred.eq(self.labels).sum()
67 |             pc_image_acc = 100 * correct / local_batch_size
68 | 
69 |         return {'loss': loss, 'uni3d_loss': loss, 'pc_image_acc': pc_image_acc, 'pc_text_acc': pc_text_acc}
70 | 
71 |   


--------------------------------------------------------------------------------
/util/misc.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import torch
  3 | import numpy as np
  4 | from collections import deque
  5 | from typing import List
  6 | from util.dist import is_distributed, barrier, all_reduce_sum
  7 | 
  8 | 
  9 | def my_worker_init_fn(worker_id):
 10 |     np.random.seed(np.random.get_state()[1][0] + worker_id)
 11 | 
 12 | 
 13 | @torch.jit.ignore
 14 | def to_list_1d(arr) -> List[float]:
 15 |     arr = arr.detach().cpu().numpy().tolist()
 16 |     return arr
 17 | 
 18 | 
 19 | @torch.jit.ignore
 20 | def to_list_3d(arr) -> List[List[List[float]]]:
 21 |     arr = arr.detach().cpu().numpy().tolist()
 22 |     return arr
 23 | 
 24 | 
 25 | def huber_loss(error, delta=1.0):
 26 |     """
 27 |     Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py
 28 |     x = error = pred - gt or dist(pred,gt)
 29 |     0.5 * |x|^2                 if |x|<=d
 30 |     0.5 * d^2 + d * (|x|-d)     if |x|>d
 31 |     """
 32 |     abs_error = torch.abs(error)
 33 |     quadratic = torch.clamp(abs_error, max=delta)
 34 |     linear = abs_error - quadratic
 35 |     loss = 0.5 * quadratic ** 2 + delta * linear
 36 |     return loss
 37 | 
 38 | 
 39 | # From https://github.com/facebookresearch/detr/blob/master/util/misc.py
 40 | class SmoothedValue(object):
 41 |     """Track a series of values and provide access to smoothed values over a
 42 |     window or the global series average.
 43 |     """
 44 | 
 45 |     def __init__(self, window_size=20, fmt=None):
 46 |         if fmt is None:
 47 |             fmt = "{median:.4f} ({global_avg:.4f})"
 48 |         self.deque = deque(maxlen=window_size)
 49 |         self.total = 0.0
 50 |         self.count = 0
 51 |         self.fmt = fmt
 52 | 
 53 |     def update(self, value, n=1):
 54 |         self.deque.append(value)
 55 |         self.count += n
 56 |         self.total += value * n
 57 | 
 58 |     def synchronize_between_processes(self):
 59 |         """
 60 |         Warning: does not synchronize the deque!
 61 |         """
 62 |         if not is_distributed():
 63 |             return
 64 |         t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
 65 |         barrier()
 66 |         all_reduce_sum(t)
 67 |         t = t.tolist()
 68 |         self.count = int(t[0])
 69 |         self.total = t[1]
 70 | 
 71 |     @property
 72 |     def median(self):
 73 |         d = torch.tensor(list(self.deque))
 74 |         return d.median().item()
 75 | 
 76 |     @property
 77 |     def avg(self):
 78 |         d = torch.tensor(list(self.deque), dtype=torch.float32)
 79 |         return d.mean().item()
 80 | 
 81 |     @property
 82 |     def global_avg(self):
 83 |         return self.total / self.count
 84 | 
 85 |     @property
 86 |     def max(self):
 87 |         return max(self.deque)
 88 | 
 89 |     @property
 90 |     def value(self):
 91 |         return self.deque[-1]
 92 | 
 93 |     def __str__(self):
 94 |         return self.fmt.format(
 95 |             median=self.median,
 96 |             avg=self.avg,
 97 |             global_avg=self.global_avg,
 98 |             max=self.max,
 99 |             value=self.value,
100 |         )
101 | 


--------------------------------------------------------------------------------
/Uni3D/utils/misc.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import torch
  3 | import numpy as np
  4 | from collections import deque
  5 | from typing import List
  6 | from utils.dist import is_distributed, barrier, all_reduce_sum
  7 | 
  8 | 
  9 | def my_worker_init_fn(worker_id):
 10 |     np.random.seed(np.random.get_state()[1][0] + worker_id)
 11 | 
 12 | 
 13 | @torch.jit.ignore
 14 | def to_list_1d(arr) -> List[float]:
 15 |     arr = arr.detach().cpu().numpy().tolist()
 16 |     return arr
 17 | 
 18 | 
 19 | @torch.jit.ignore
 20 | def to_list_3d(arr) -> List[List[List[float]]]:
 21 |     arr = arr.detach().cpu().numpy().tolist()
 22 |     return arr
 23 | 
 24 | 
 25 | def huber_loss(error, delta=1.0):
 26 |     """
 27 |     Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py
 28 |     x = error = pred - gt or dist(pred,gt)
 29 |     0.5 * |x|^2                 if |x|<=d
 30 |     0.5 * d^2 + d * (|x|-d)     if |x|>d
 31 |     """
 32 |     abs_error = torch.abs(error)
 33 |     quadratic = torch.clamp(abs_error, max=delta)
 34 |     linear = abs_error - quadratic
 35 |     loss = 0.5 * quadratic ** 2 + delta * linear
 36 |     return loss
 37 | 
 38 | 
 39 | # From https://github.com/facebookresearch/detr/blob/master/util/misc.py
 40 | class SmoothedValue(object):
 41 |     """Track a series of values and provide access to smoothed values over a
 42 |     window or the global series average.
 43 |     """
 44 | 
 45 |     def __init__(self, window_size=20, fmt=None):
 46 |         if fmt is None:
 47 |             fmt = "{median:.4f} ({global_avg:.4f})"
 48 |         self.deque = deque(maxlen=window_size)
 49 |         self.total = 0.0
 50 |         self.count = 0
 51 |         self.fmt = fmt
 52 | 
 53 |     def update(self, value, n=1):
 54 |         self.deque.append(value)
 55 |         self.count += n
 56 |         self.total += value * n
 57 | 
 58 |     def synchronize_between_processes(self):
 59 |         """
 60 |         Warning: does not synchronize the deque!
 61 |         """
 62 |         if not is_distributed():
 63 |             return
 64 |         t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
 65 |         barrier()
 66 |         all_reduce_sum(t)
 67 |         t = t.tolist()
 68 |         self.count = int(t[0])
 69 |         self.total = t[1]
 70 | 
 71 |     @property
 72 |     def median(self):
 73 |         d = torch.tensor(list(self.deque))
 74 |         return d.median().item()
 75 | 
 76 |     @property
 77 |     def avg(self):
 78 |         d = torch.tensor(list(self.deque), dtype=torch.float32)
 79 |         return d.mean().item()
 80 | 
 81 |     @property
 82 |     def global_avg(self):
 83 |         return self.total / self.count
 84 | 
 85 |     @property
 86 |     def max(self):
 87 |         return max(self.deque)
 88 | 
 89 |     @property
 90 |     def value(self):
 91 |         return self.deque[-1]
 92 | 
 93 |     def __str__(self):
 94 |         return self.fmt.format(
 95 |             median=self.median,
 96 |             avg=self.avg,
 97 |             global_avg=self.global_avg,
 98 |             max=self.max,
 99 |             value=self.value,
100 |         )
101 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/sampling.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #include "sampling.h"
 4 | #include "utils.h"
 5 | 
 6 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
 7 |                                   const float *points, const int *idx,
 8 |                                   float *out);
 9 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
10 |                                        const float *grad_out, const int *idx,
11 |                                        float *grad_points);
12 | 
13 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
14 |                                             const float *dataset, float *temp,
15 |                                             int *idxs);
16 | 
17 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) {
18 |   CHECK_CONTIGUOUS(points);
19 |   CHECK_CONTIGUOUS(idx);
20 |   CHECK_IS_FLOAT(points);
21 |   CHECK_IS_INT(idx);
22 | 
23 |   if (points.is_cuda()) {
24 |     CHECK_CUDA(idx);
25 |   }
26 | 
27 |   at::Tensor output =
28 |       torch::zeros({points.size(0), points.size(1), idx.size(1)},
29 |                    at::device(points.device()).dtype(at::ScalarType::Float));
30 | 
31 |   if (points.is_cuda()) {
32 |     gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
33 |                                  idx.size(1), points.data<float>(),
34 |                                  idx.data<int>(), output.data<float>());
35 |   } else {
36 |     AT_ASSERT(false, "CPU not supported");
37 |   }
38 | 
39 |   return output;
40 | }
41 | 
42 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx,
43 |                               const int n) {
44 |   CHECK_CONTIGUOUS(grad_out);
45 |   CHECK_CONTIGUOUS(idx);
46 |   CHECK_IS_FLOAT(grad_out);
47 |   CHECK_IS_INT(idx);
48 | 
49 |   if (grad_out.is_cuda()) {
50 |     CHECK_CUDA(idx);
51 |   }
52 | 
53 |   at::Tensor output =
54 |       torch::zeros({grad_out.size(0), grad_out.size(1), n},
55 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
56 | 
57 |   if (grad_out.is_cuda()) {
58 |     gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n,
59 |                                       idx.size(1), grad_out.data<float>(),
60 |                                       idx.data<int>(), output.data<float>());
61 |   } else {
62 |     AT_ASSERT(false, "CPU not supported");
63 |   }
64 | 
65 |   return output;
66 | }
67 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) {
68 |   CHECK_CONTIGUOUS(points);
69 |   CHECK_IS_FLOAT(points);
70 | 
71 |   at::Tensor output =
72 |       torch::zeros({points.size(0), nsamples},
73 |                    at::device(points.device()).dtype(at::ScalarType::Int));
74 | 
75 |   at::Tensor tmp =
76 |       torch::full({points.size(0), points.size(1)}, 1e10,
77 |                   at::device(points.device()).dtype(at::ScalarType::Float));
78 | 
79 |   if (points.is_cuda()) {
80 |     furthest_point_sampling_kernel_wrapper(
81 |         points.size(0), points.size(1), nsamples, points.data<float>(),
82 |         tmp.data<float>(), output.data<int>());
83 |   } else {
84 |     AT_ASSERT(false, "CPU not supported");
85 |   }
86 | 
87 |   return output;
88 | }
89 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/group_points_gpu.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | 
 7 | #include "cuda_utils.h"
 8 | 
 9 | // input: points(b, c, n) idx(b, npoints, nsample)
10 | // output: out(b, c, npoints, nsample)
11 | __global__ void group_points_kernel(int b, int c, int n, int npoints,
12 |                                     int nsample,
13 |                                     const float *__restrict__ points,
14 |                                     const int *__restrict__ idx,
15 |                                     float *__restrict__ out) {
16 |   int batch_index = blockIdx.x;
17 |   points += batch_index * n * c;
18 |   idx += batch_index * npoints * nsample;
19 |   out += batch_index * npoints * nsample * c;
20 | 
21 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
22 |   const int stride = blockDim.y * blockDim.x;
23 |   for (int i = index; i < c * npoints; i += stride) {
24 |     const int l = i / npoints;
25 |     const int j = i % npoints;
26 |     for (int k = 0; k < nsample; ++k) {
27 |       int ii = idx[j * nsample + k];
28 |       out[(l * npoints + j) * nsample + k] = points[l * n + ii];
29 |     }
30 |   }
31 | }
32 | 
33 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
34 |                                  const float *points, const int *idx,
35 |                                  float *out) {
36 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
37 | 
38 |   group_points_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
39 |       b, c, n, npoints, nsample, points, idx, out);
40 | 
41 |   CUDA_CHECK_ERRORS();
42 | }
43 | 
44 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample)
45 | // output: grad_points(b, c, n)
46 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints,
47 |                                          int nsample,
48 |                                          const float *__restrict__ grad_out,
49 |                                          const int *__restrict__ idx,
50 |                                          float *__restrict__ grad_points) {
51 |   int batch_index = blockIdx.x;
52 |   grad_out += batch_index * npoints * nsample * c;
53 |   idx += batch_index * npoints * nsample;
54 |   grad_points += batch_index * n * c;
55 | 
56 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
57 |   const int stride = blockDim.y * blockDim.x;
58 |   for (int i = index; i < c * npoints; i += stride) {
59 |     const int l = i / npoints;
60 |     const int j = i % npoints;
61 |     for (int k = 0; k < nsample; ++k) {
62 |       int ii = idx[j * nsample + k];
63 |       atomicAdd(grad_points + l * n + ii,
64 |                 grad_out[(l * npoints + j) * nsample + k]);
65 |     }
66 |   }
67 | }
68 | 
69 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
70 |                                       int nsample, const float *grad_out,
71 |                                       const int *idx, float *grad_points) {
72 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
73 | 
74 |   group_points_grad_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
75 |       b, c, n, npoints, nsample, grad_out, idx, grad_points);
76 | 
77 |   CUDA_CHECK_ERRORS();
78 | }
79 | 


--------------------------------------------------------------------------------
/scannet/batch_load_scannet_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | """ Batch mode in loading Scannet scenes with vertices and ground truth labels
 7 | for semantic and instance segmentations
 8 | 
 9 | Usage example: python ./batch_load_scannet_data.py
10 | """
11 | import os
12 | import sys
13 | import datetime
14 | import numpy as np
15 | from load_scannet_data import export
16 | import pdb
17 | 
18 | SCANNET_DIR = 'scans'
19 | TRAIN_SCAN_NAMES = [line.rstrip() for line in open('./meta_data/scannet_train.txt')]
20 | LABEL_MAP_FILE = './meta_data/scannetv2-labels.combined.tsv'
21 | DONOTCARE_CLASS_IDS = np.array([])
22 | OBJ_CLASS_IDS = np.array([3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39])
23 | OUTPUT_FOLDER = './scannet_train_detection_data'
24 | 
25 | def export_one_scan(scan_name, output_filename_prefix):    
26 |     mesh_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.ply')
27 |     agg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.aggregation.json')
28 |     seg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.0.010000.segs.json')
29 |     meta_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.txt') # includes axisAlignment info for the train set scans.   
30 |     mesh_vertices, semantic_labels, instance_labels, instance_bboxes, instance2semantic = \
31 |         export(mesh_file, agg_file, seg_file, meta_file, LABEL_MAP_FILE, None)
32 | 
33 |     mask = np.logical_not(np.in1d(semantic_labels, DONOTCARE_CLASS_IDS))
34 |     mesh_vertices = mesh_vertices[mask,:]
35 |     semantic_labels = semantic_labels[mask]
36 |     instance_labels = instance_labels[mask]
37 | 
38 |     num_instances = len(np.unique(instance_labels))
39 |     print('Num of instances: ', num_instances)
40 | 
41 |     bbox_mask = np.in1d(instance_bboxes[:,-1], OBJ_CLASS_IDS)
42 |     instance_bboxes = instance_bboxes[bbox_mask,:]
43 |     print('Num of care instances: ', instance_bboxes.shape[0])
44 | 
45 |     N = mesh_vertices.shape[0]
46 | 
47 |     np.save(output_filename_prefix+'_vert.npy', mesh_vertices)
48 |     np.save(output_filename_prefix+'_sem_label.npy', semantic_labels)
49 |     np.save(output_filename_prefix+'_ins_label.npy', instance_labels)
50 |     np.save(output_filename_prefix+'_bbox.npy', instance_bboxes)
51 | 
52 | def batch_export():
53 |     if not os.path.exists(OUTPUT_FOLDER):
54 |         print('Creating new data folder: {}'.format(OUTPUT_FOLDER))                
55 |         os.mkdir(OUTPUT_FOLDER)        
56 |         
57 |     for scan_name in TRAIN_SCAN_NAMES:
58 |         print('-'*20+'begin')
59 |         print(datetime.datetime.now())
60 |         print(scan_name)
61 |         output_filename_prefix = os.path.join(OUTPUT_FOLDER, scan_name) 
62 |         if os.path.isfile(output_filename_prefix+'_vert.npy'):
63 |             print('File already exists. skipping.')
64 |             print('-'*20+'done')
65 |             continue
66 |         try:            
67 |             export_one_scan(scan_name, output_filename_prefix)
68 |         except:
69 |             print('Failed export scan: %s'%(scan_name))            
70 |         print('-'*20+'done')
71 | 
72 | if __name__=='__main__':    
73 |     batch_export()
74 | 


--------------------------------------------------------------------------------
/Uni3D/data/utils/data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def random_rotate_z(pc):
 4 |     # random roate around z axis
 5 |     theta = np.random.uniform(0, 2*np.pi)
 6 |     R = np.array([[np.cos(theta), -np.sin(theta), 0],
 7 |                 [np.sin(theta), np.cos(theta), 0],
 8 |                 [0, 0, 1]])
 9 |     return np.matmul(pc, R)
10 | 
11 | def normalize_pc(pc):
12 |     # normalize pc to [-1, 1]
13 |     pc = pc - np.mean(pc, axis=0)
14 |     if np.max(np.linalg.norm(pc, axis=1)) < 1e-6:
15 |         pc = np.zeros_like(pc)
16 |     else:
17 |         pc = pc / np.max(np.linalg.norm(pc, axis=1))
18 |     return pc
19 | 
20 | def random_point_dropout(batch_pc, max_dropout_ratio=0.875):
21 |     ''' batch_pc: BxNx3 '''
22 |     for b in range(batch_pc.shape[0]):
23 |         dropout_ratio =  np.random.random()*max_dropout_ratio # 0~0.875
24 |         drop_idx = np.where(np.random.random((batch_pc.shape[1]))<=dropout_ratio)[0]
25 |         if len(drop_idx)>0:
26 |             batch_pc[b,drop_idx,:] = batch_pc[b,0,:] # set to the first point
27 |     return batch_pc
28 | 
29 | def random_scale_point_cloud(batch_data, scale_low=0.8, scale_high=1.25):
30 |     """ Randomly scale the point cloud. Scale is per point cloud.
31 |         Input:
32 |             BxNx3 array, original batch of point clouds
33 |         Return:
34 |             BxNx3 array, scaled batch of point clouds
35 |     """
36 |     B, N, C = batch_data.shape
37 |     scales = np.random.uniform(scale_low, scale_high, B)
38 |     for batch_index in range(B):
39 |         batch_data[batch_index,:,:] *= scales[batch_index]
40 |     return batch_data
41 | 
42 | def shift_point_cloud(batch_data, shift_range=0.1):
43 |     """ Randomly shift point cloud. Shift is per point cloud.
44 |         Input:
45 |           BxNx3 array, original batch of point clouds
46 |         Return:
47 |           BxNx3 array, shifted batch of point clouds
48 |     """
49 |     B, N, C = batch_data.shape
50 |     shifts = np.random.uniform(-shift_range, shift_range, (B,3))
51 |     for batch_index in range(B):
52 |         batch_data[batch_index,:,:] += shifts[batch_index,:]
53 |     return batch_data
54 | 
55 | def rotate_perturbation_point_cloud(batch_data, angle_sigma=0.06, angle_clip=0.18):
56 |     """ Randomly perturb the point clouds by small rotations
57 |         Input:
58 |           BxNx3 array, original batch of point clouds
59 |         Return:
60 |           BxNx3 array, rotated batch of point clouds
61 |     """
62 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
63 |     for k in range(batch_data.shape[0]):
64 |         angles = np.clip(angle_sigma*np.random.randn(3), -angle_clip, angle_clip)
65 |         Rx = np.array([[1,0,0],
66 |                        [0,np.cos(angles[0]),-np.sin(angles[0])],
67 |                        [0,np.sin(angles[0]),np.cos(angles[0])]])
68 |         Ry = np.array([[np.cos(angles[1]),0,np.sin(angles[1])],
69 |                        [0,1,0],
70 |                        [-np.sin(angles[1]),0,np.cos(angles[1])]])
71 |         Rz = np.array([[np.cos(angles[2]),-np.sin(angles[2]),0],
72 |                        [np.sin(angles[2]),np.cos(angles[2]),0],
73 |                        [0,0,1]])
74 |         R = np.dot(Rz, np.dot(Ry,Rx))
75 |         shape_pc = batch_data[k, ...]
76 |         rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), R)
77 |     return rotated_data
78 | 
79 | def augment_pc(data):
80 |     data = random_point_dropout(data[None, ...])
81 |     data = random_scale_point_cloud(data)
82 |     data = shift_point_cloud(data)
83 |     data = rotate_perturbation_point_cloud(data)
84 |     data = data.squeeze()
85 |     return data


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/interpolate.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | #include "interpolate.h"
  4 | #include "utils.h"
  5 | 
  6 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
  7 |                              const float *known, float *dist2, int *idx);
  8 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
  9 |                                       const float *points, const int *idx,
 10 |                                       const float *weight, float *out);
 11 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
 12 |                                            const float *grad_out,
 13 |                                            const int *idx, const float *weight,
 14 |                                            float *grad_points);
 15 | 
 16 | std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows) {
 17 |   CHECK_CONTIGUOUS(unknowns);
 18 |   CHECK_CONTIGUOUS(knows);
 19 |   CHECK_IS_FLOAT(unknowns);
 20 |   CHECK_IS_FLOAT(knows);
 21 | 
 22 |   if (unknowns.is_cuda()) {
 23 |     CHECK_CUDA(knows);
 24 |   }
 25 | 
 26 |   at::Tensor idx =
 27 |       torch::zeros({unknowns.size(0), unknowns.size(1), 3},
 28 |                    at::device(unknowns.device()).dtype(at::ScalarType::Int));
 29 |   at::Tensor dist2 =
 30 |       torch::zeros({unknowns.size(0), unknowns.size(1), 3},
 31 |                    at::device(unknowns.device()).dtype(at::ScalarType::Float));
 32 | 
 33 |   if (unknowns.is_cuda()) {
 34 |     three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1),
 35 |                             unknowns.data<float>(), knows.data<float>(),
 36 |                             dist2.data<float>(), idx.data<int>());
 37 |   } else {
 38 |     AT_ASSERT(false, "CPU not supported");
 39 |   }
 40 | 
 41 |   return {dist2, idx};
 42 | }
 43 | 
 44 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
 45 |                              at::Tensor weight) {
 46 |   CHECK_CONTIGUOUS(points);
 47 |   CHECK_CONTIGUOUS(idx);
 48 |   CHECK_CONTIGUOUS(weight);
 49 |   CHECK_IS_FLOAT(points);
 50 |   CHECK_IS_INT(idx);
 51 |   CHECK_IS_FLOAT(weight);
 52 | 
 53 |   if (points.is_cuda()) {
 54 |     CHECK_CUDA(idx);
 55 |     CHECK_CUDA(weight);
 56 |   }
 57 | 
 58 |   at::Tensor output =
 59 |       torch::zeros({points.size(0), points.size(1), idx.size(1)},
 60 |                    at::device(points.device()).dtype(at::ScalarType::Float));
 61 | 
 62 |   if (points.is_cuda()) {
 63 |     three_interpolate_kernel_wrapper(
 64 |         points.size(0), points.size(1), points.size(2), idx.size(1),
 65 |         points.data<float>(), idx.data<int>(), weight.data<float>(),
 66 |         output.data<float>());
 67 |   } else {
 68 |     AT_ASSERT(false, "CPU not supported");
 69 |   }
 70 | 
 71 |   return output;
 72 | }
 73 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
 74 |                                   at::Tensor weight, const int m) {
 75 |   CHECK_CONTIGUOUS(grad_out);
 76 |   CHECK_CONTIGUOUS(idx);
 77 |   CHECK_CONTIGUOUS(weight);
 78 |   CHECK_IS_FLOAT(grad_out);
 79 |   CHECK_IS_INT(idx);
 80 |   CHECK_IS_FLOAT(weight);
 81 | 
 82 |   if (grad_out.is_cuda()) {
 83 |     CHECK_CUDA(idx);
 84 |     CHECK_CUDA(weight);
 85 |   }
 86 | 
 87 |   at::Tensor output =
 88 |       torch::zeros({grad_out.size(0), grad_out.size(1), m},
 89 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
 90 | 
 91 |   if (grad_out.is_cuda()) {
 92 |     three_interpolate_grad_kernel_wrapper(
 93 |         grad_out.size(0), grad_out.size(1), grad_out.size(2), m,
 94 |         grad_out.data<float>(), idx.data<int>(), weight.data<float>(),
 95 |         output.data<float>());
 96 |   } else {
 97 |     AT_ASSERT(false, "CPU not supported");
 98 |   }
 99 | 
100 |   return output;
101 | }
102 | 


--------------------------------------------------------------------------------
/scannet/model_util_scannet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | import sys
 8 | import os
 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
10 | sys.path.append(BASE_DIR)
11 | ROOT_DIR = os.path.dirname(BASE_DIR)
12 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
13 | 
14 | 
15 | class ScannetDatasetConfig(object):
16 |     def __init__(self):
17 |         self.num_class = 18
18 |         self.num_heading_bin = 1
19 |         self.num_size_cluster = 18
20 | 
21 |         self.type2class = {'cabinet':0, 'bed':1, 'chair':2, 'sofa':3, 'table':4, 'door':5,
22 |             'window':6,'bookshelf':7,'picture':8, 'counter':9, 'desk':10, 'curtain':11,
23 |             'refrigerator':12, 'showercurtrain':13, 'toilet':14, 'sink':15, 'bathtub':16, 'garbagebin':17}  
24 |         self.class2type = {self.type2class[t]:t for t in self.type2class}
25 |         self.nyu40ids = np.array([3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39])
26 |         self.nyu40id2class = {nyu40id: i for i,nyu40id in enumerate(list(self.nyu40ids))}
27 |         self.mean_size_arr = np.load(os.path.join(ROOT_DIR,'scannet/meta_data/scannet_means.npz'))['arr_0']
28 |         self.type_mean_size = {}
29 |         for i in range(self.num_size_cluster):
30 |             self.type_mean_size[self.class2type[i]] = self.mean_size_arr[i,:]
31 | 
32 |     def angle2class(self, angle):
33 |         ''' Convert continuous angle to discrete class
34 |             [optinal] also small regression number from  
35 |             class center angle to current angle.
36 |            
37 |             angle is from 0-2pi (or -pi~pi), class center at 0, 1*(2pi/N), 2*(2pi/N) ...  (N-1)*(2pi/N)
38 |             return is class of int32 of 0,1,...,N-1 and a number such that
39 |                 class*(2pi/N) + number = angle
40 | 
41 |             NOT USED.
42 |         '''
43 |         assert(False)
44 |     
45 |     def class2angle(self, pred_cls, residual, to_label_format=True):
46 |         ''' Inverse function to angle2class.
47 |         
48 |         As ScanNet only has axis-alined boxes so angles are always 0. '''
49 |         return 0
50 | 
51 |     def size2class(self, size, type_name):
52 |         ''' Convert 3D box size (l,w,h) to size class and size residual '''
53 |         size_class = self.type2class[type_name]
54 |         size_residual = size - self.type_mean_size[type_name]
55 |         return size_class, size_residual
56 |     
57 |     def class2size(self, pred_cls, residual):
58 |         ''' Inverse function to size2class '''        
59 |         return self.mean_size_arr[pred_cls, :] + residual
60 | 
61 |     def param2obb(self, center, heading_class, heading_residual, size_class, size_residual):
62 |         heading_angle = self.class2angle(heading_class, heading_residual)
63 |         box_size = self.class2size(int(size_class), size_residual)
64 |         obb = np.zeros((7,))
65 |         obb[0:3] = center
66 |         obb[3:6] = box_size
67 |         obb[6] = heading_angle*-1
68 |         return obb
69 | 
70 | def rotate_aligned_boxes(input_boxes, rot_mat):    
71 |     centers, lengths = input_boxes[:,0:3], input_boxes[:,3:6]    
72 |     new_centers = np.dot(centers, np.transpose(rot_mat))
73 |            
74 |     dx, dy = lengths[:,0]/2.0, lengths[:,1]/2.0
75 |     new_x = np.zeros((dx.shape[0], 4))
76 |     new_y = np.zeros((dx.shape[0], 4))
77 |     
78 |     for i, crnr in enumerate([(-1,-1), (1, -1), (1, 1), (-1, 1)]):        
79 |         crnrs = np.zeros((dx.shape[0], 3))
80 |         crnrs[:,0] = crnr[0]*dx
81 |         crnrs[:,1] = crnr[1]*dy
82 |         crnrs = np.dot(crnrs, np.transpose(rot_mat))
83 |         new_x[:,i] = crnrs[:,0]
84 |         new_y[:,i] = crnrs[:,1]
85 |     
86 |     
87 |     new_dx = 2.0*np.max(new_x, 1)
88 |     new_dy = 2.0*np.max(new_y, 1)    
89 |     new_lengths = np.stack((new_dx, new_dy, lengths[:,2]), axis=1)
90 |                   
91 |     return np.concatenate([new_centers, new_lengths], axis=1)
92 | 


--------------------------------------------------------------------------------
/util/random_cuboid.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | import numpy as np
 3 | 
 4 | 
 5 | def check_aspect(crop_range, aspect_min):
 6 |     xy_aspect = np.min(crop_range[:2]) / np.max(crop_range[:2])
 7 |     xz_aspect = np.min(crop_range[[0, 2]]) / np.max(crop_range[[0, 2]])
 8 |     yz_aspect = np.min(crop_range[1:]) / np.max(crop_range[1:])
 9 |     return (
10 |         (xy_aspect >= aspect_min)
11 |         or (xz_aspect >= aspect_min)
12 |         or (yz_aspect >= aspect_min)
13 |     )
14 | 
15 | 
16 | class RandomCuboid(object):
17 |     """
18 |     RandomCuboid augmentation from DepthContrast [https://arxiv.org/abs/2101.02691]
19 |     We slightly modify this operation to account for object detection.
20 |     This augmentation randomly crops a cuboid from the input and
21 |     ensures that the cropped cuboid contains at least one bounding box
22 |     """
23 | 
24 |     def __init__(
25 |         self,
26 |         min_points,
27 |         aspect=0.8,
28 |         min_crop=0.5,
29 |         max_crop=1.0,
30 |         box_filter_policy="center",
31 |     ):
32 |         self.aspect = aspect
33 |         self.min_crop = min_crop
34 |         self.max_crop = max_crop
35 |         self.min_points = min_points
36 |         self.box_filter_policy = box_filter_policy
37 | 
38 |     def __call__(self, point_cloud, target_boxes, per_point_labels=None):
39 |         range_xyz = np.max(point_cloud[:, 0:3], axis=0) - np.min(
40 |             point_cloud[:, 0:3], axis=0
41 |         )
42 | 
43 |         for _ in range(100):
44 |             crop_range = self.min_crop + np.random.rand(3) * (
45 |                 self.max_crop - self.min_crop
46 |             )
47 |             if not check_aspect(crop_range, self.aspect):
48 |                 continue
49 | 
50 |             sample_center = point_cloud[np.random.choice(len(point_cloud)), 0:3]
51 | 
52 |             new_range = range_xyz * crop_range / 2.0
53 | 
54 |             max_xyz = sample_center + new_range
55 |             min_xyz = sample_center - new_range
56 | 
57 |             upper_idx = (
58 |                 np.sum((point_cloud[:, 0:3] <= max_xyz).astype(np.int32), 1) == 3
59 |             )
60 |             lower_idx = (
61 |                 np.sum((point_cloud[:, 0:3] >= min_xyz).astype(np.int32), 1) == 3
62 |             )
63 | 
64 |             new_pointidx = (upper_idx) & (lower_idx)
65 | 
66 |             if np.sum(new_pointidx) < self.min_points:
67 |                 continue
68 | 
69 |             new_point_cloud = point_cloud[new_pointidx, :]
70 | 
71 |             # filtering policy is the only modification from DepthContrast
72 |             if self.box_filter_policy == "center":
73 |                 # remove boxes whose center does not lie within the new_point_cloud
74 |                 new_boxes = target_boxes
75 |                 if (
76 |                     target_boxes.sum() > 0
77 |                 ):  # ground truth contains no bounding boxes. Common in SUNRGBD.
78 |                     box_centers = target_boxes[:, 0:3]
79 |                     new_pc_min_max = np.min(new_point_cloud[:, 0:3], axis=0), np.max(
80 |                         new_point_cloud[:, 0:3], axis=0
81 |                     )
82 |                     keep_boxes = np.logical_and(
83 |                         np.all(box_centers >= new_pc_min_max[0], axis=1),
84 |                         np.all(box_centers <= new_pc_min_max[1], axis=1),
85 |                     )
86 |                     if keep_boxes.sum() == 0:
87 |                         # current data augmentation removes all boxes in the pointcloud. fail!
88 |                         continue
89 |                     new_boxes = target_boxes[keep_boxes]
90 |                 if per_point_labels is not None:
91 |                     new_per_point_labels = [x[new_pointidx] for x in per_point_labels]
92 |                 else:
93 |                     new_per_point_labels = None
94 |                 # if we are here, all conditions are met. return boxes
95 |                 return new_point_cloud, new_boxes, new_per_point_labels
96 | 
97 |         # fallback
98 |         return point_cloud, target_boxes, per_point_labels
99 | 


--------------------------------------------------------------------------------
/models/mink_resnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | import MinkowskiEngine as ME
  5 | from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck
  6 |     
  7 |     
  8 | class MinkResNet(nn.Module):
  9 |     r"""Minkowski ResNet backbone. See `4D Spatio-Temporal ConvNets
 10 |     <https://arxiv.org/abs/1904.08755>`_ for more details.
 11 | 
 12 |     Args:
 13 |         depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
 14 |         in_channels (ont): Number of input channels, 3 for RGB.
 15 |         num_stages (int, optional): Resnet stages. Default: 4.
 16 |         pool (bool, optional): Add max pooling after first conv if True.
 17 |             Default: True.
 18 |     """
 19 |     arch_settings = {
 20 |         18: (BasicBlock, (2, 2, 2, 2)),
 21 |         34: (BasicBlock, (3, 4, 6, 3)),
 22 |         50: (Bottleneck, (3, 4, 6, 3)),
 23 |         101: (Bottleneck, (3, 4, 23, 3)),
 24 |         152: (Bottleneck, (3, 8, 36, 3))
 25 |     }
 26 | 
 27 |     def __init__(self, depth, in_channels, inplanes=64, num_stages=4, stem_bn=False):
 28 |         super(MinkResNet, self).__init__()
 29 |         if depth not in self.arch_settings:
 30 |             raise KeyError(f'invalid depth {depth} for resnet')
 31 |         assert 4 >= num_stages >= 1
 32 |         block, stage_blocks = self.arch_settings[depth]
 33 |         stage_blocks = stage_blocks[:num_stages]
 34 |         self.num_stages = num_stages
 35 | 
 36 |         self.inplanes = inplanes
 37 |         
 38 |         self.conv1 = ME.MinkowskiConvolution(
 39 |             in_channels, self.inplanes, kernel_size=3, stride=2, dimension=3)
 40 |         # May be BatchNorm is better, but we follow original implementation.
 41 |         self.norm1 = ME.MinkowskiBatchNorm(self.inplanes) if stem_bn else ME.MinkowskiInstanceNorm(self.inplanes)
 42 |         self.relu = ME.MinkowskiReLU(inplace=False)
 43 |         
 44 |         for i, num_blocks in enumerate(stage_blocks):
 45 |             setattr(
 46 |                 self, f'layer{i + 1}',
 47 |                 self._make_layer(block, inplanes * 2**i, stage_blocks[i], stride=2))
 48 |             
 49 |         self.init_weights()
 50 |             
 51 |     def init_weights(self):
 52 |         print('random init backbone')
 53 |         for m in self.modules():
 54 |             if isinstance(m, ME.MinkowskiConvolution):
 55 |                 ME.utils.kaiming_normal_(
 56 |                     m.kernel, mode='fan_out', nonlinearity='relu')
 57 | 
 58 |             if isinstance(m, ME.MinkowskiBatchNorm):
 59 |                 nn.init.constant_(m.bn.weight, 1)
 60 |                 nn.init.constant_(m.bn.bias, 0)
 61 | 
 62 |     def _make_layer(self, block, planes, blocks, stride):
 63 |         downsample = None
 64 |         if stride != 1 or self.inplanes != planes * block.expansion:
 65 |             downsample = nn.Sequential(
 66 |                 ME.MinkowskiConvolution(
 67 |                     self.inplanes,
 68 |                     planes * block.expansion,
 69 |                     kernel_size=1,
 70 |                     stride=stride,
 71 |                     dimension=3),
 72 |                 ME.MinkowskiBatchNorm(planes * block.expansion))
 73 |         layers = []
 74 |         layers.append(
 75 |             block(
 76 |                 self.inplanes,
 77 |                 planes,
 78 |                 stride=stride,
 79 |                 downsample=downsample,
 80 |                 dimension=3))
 81 |         self.inplanes = planes * block.expansion
 82 |         for i in range(1, blocks):
 83 |             layers.append(block(self.inplanes, planes, stride=1, dimension=3))
 84 |         return nn.Sequential(*layers)
 85 | 
 86 |     def forward(self, x):
 87 |         """Forward pass of ResNet.
 88 | 
 89 |         Args:
 90 |             x (ME.SparseTensor): Input sparse tensor.
 91 | 
 92 |         Returns:
 93 |             list[ME.SparseTensor]: Output sparse tensors.
 94 |         """
 95 |         x = self.conv1(x)
 96 |         x = self.norm1(x)
 97 |         x = self.relu(x)
 98 |         outs = []
 99 |         for i in range(self.num_stages):
100 |             x = getattr(self, f'layer{i + 1}')(x)
101 |             outs.append(x)
102 |         return outs


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## [Point2Graph: An End-to-end Point Cloud-based 3D Open-Vocabulary Scene Graph for Robot Navigation](https://arxiv.org/abs/2409.10350)
  2 | 
  3 | This is the implementation of **Object Detection and Classification** module of the paper "Point2Graph: An End-to-end Point Cloud-based 3D Open-Vocabulary Scene Graph for Robot Navigation". 
  4 | 
  5 | Authors: [Yifan Xu](https://www.linkedin.com/in/yifan-xu-43876120b/), [Ziming Luo](https://zimingluo.github.io/), [Qianwei Wang](https://www.linkedin.com/in/qianwei-wang-945bb9292/), [Vineet Kamat](https://live.engin.umich.edu/), [Carol Menassa](https://cee.engin.umich.edu/people/menassa-carol-c/)
  6 | 
  7 | ## News:
  8 | 
  9 | [2025/02] Our paper is accepted by **ICRA2025** 🎉🎉🎉
 10 | 
 11 | ## Object Detection and Classification Pipeline
 12 | 
 13 | This module consists of two stages: (1) detection and localization using class-agnostic bounding boxes and DBSCAN filtering for object refinement, and (2) classification via cross-modal retrieval, connecting 3D point cloud data with textual descriptions, without requiring annotations or RGB-D alignment.
 14 | 
 15 | ![Pipeline Image](https://point2graph.github.io/static/figure/object_pipeline.png)
 16 | 
 17 | 
 18 | 
 19 | ## Getting Started
 20 | 
 21 | ### Installation
 22 | 
 23 | **Step 1.** Create a conda environment and activate it.
 24 | 
 25 | ```shell
 26 | conda env create -f point2graph.yaml
 27 | conda activate point2graph
 28 | ```
 29 | 
 30 | **Step 2.** install **Minkowski Engine**.
 31 | 
 32 | ```bash
 33 | git clone https://github.com/NVIDIA/MinkowskiEngine.git
 34 | cd MinkowskiEngine
 35 | python setup.py install --blas_include_dirs=${CONDA_PREFIX}/include --blas=openblas
 36 | ```
 37 | 
 38 | **Step 3.** install **mmcv**.
 39 | 
 40 | ```bash
 41 | pip install openmim
 42 | mim install mmcv-full==1.6.1
 43 | ```
 44 | 
 45 | **Step 4.** install third party support.
 46 | 
 47 | ```bash
 48 | cd pointnet2/ && python setup.py install --user
 49 | cd ..
 50 | cd utils && python cython_compile.py build_ext --inplace
 51 | cd ..
 52 | ```
 53 | 
 54 | ### Dataset preparation
 55 | 
 56 | **Scannet Data**
 57 | 
 58 | 1. Download ScanNet v2 data [HERE](https://github.com/ScanNet/ScanNet). Move/link the `scans` folder such that under `scans` there should be folders with names such as `scene0001_01`.
 59 | 2. Open the 'scannet' folder. Extract point clouds and annotations (semantic seg, instance seg etc.) by running `python batch_load_scannet_data.py`, which will create a folder named `scannet_train_detection_data` here.
 60 | 
 61 | ### Model preparation
 62 | 
 63 | You should 
 64 | 
 65 | * download the 3D Object Detection  pre-trained model [V-DETR](https://huggingface.co/byshen/vdetr/blob/main/scannet_540ep.pth), and put it in `./models/` folder.
 66 | * download the 3D Object Classification pre-trained model  [Uni-3D](https://github.com/baaivision/Uni3D#model-zoo) and the [clip model](https://huggingface.co/timm/eva02_enormous_patch14_plus_clip_224.laion2b_s9b_b144k/blob/main/open_clip_pytorch_model.bin), and put them in `./Uni3D/downloads/` folder.
 67 | 
 68 | 
 69 | ## Testing
 70 | 
 71 | The test script is in the `run.sh` file. Once you have the datasets and model prepared, you can test this models as
 72 | 
 73 | ```shell
 74 | bash run.sh
 75 | ```
 76 | 
 77 | The script performs two functions:
 78 | 
 79 | 1. Get a set of point cloud of objects with unknown class and store them at `./results/objects/`
 80 | 2. Retrieve and visualize the 3D object point cloud most relevant to the user's query
 81 | 
 82 | ## Acknowledgement
 83 | 
 84 | Point2Graph is built on the [V-DETR](https://github.com/V-DETR/V-DETR), and [Uni3D](https://github.com/baaivision/Uni3D).
 85 | 
 86 | 
 87 | ## Citation
 88 | 
 89 | If you find this code useful in your research, please consider citing:
 90 | 
 91 | ```
 92 | @misc{xu2024point2graphendtoendpointcloudbased,
 93 |       title={Point2Graph: An End-to-end Point Cloud-based 3D Open-Vocabulary Scene Graph for Robot Navigation}, 
 94 |       author={Yifan Xu and Ziming Luo and Qianwei Wang and Vineet Kamat and Carol Menassa},
 95 |       year={2024},
 96 |       eprint={2409.10350},
 97 |       archivePrefix={arXiv},
 98 |       primaryClass={cs.RO},
 99 |       url={https://arxiv.org/abs/2409.10350}, 
100 | }
101 | ```
102 | 


--------------------------------------------------------------------------------
/pointnet2/build/temp.linux-x86_64-3.8/build.ninja:
--------------------------------------------------------------------------------
 1 | ninja_required_version = 1.3
 2 | cxx = c++
 3 | nvcc = /usr/local/cuda/bin/nvcc
 4 | 
 5 | cflags = -pthread -B /opt/conda/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/include -I/opt/conda/lib/python3.8/site-packages/torch/include -I/opt/conda/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -I/opt/conda/lib/python3.8/site-packages/torch/include/TH -I/opt/conda/lib/python3.8/site-packages/torch/include/THC -I/usr/local/cuda/include -I/opt/conda/include/python3.8 -c
 6 | post_cflags = -O2 -I_ext_src/include -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=_ext -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14
 7 | cuda_cflags = -I/home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/include -I/opt/conda/lib/python3.8/site-packages/torch/include -I/opt/conda/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -I/opt/conda/lib/python3.8/site-packages/torch/include/TH -I/opt/conda/lib/python3.8/site-packages/torch/include/THC -I/usr/local/cuda/include -I/opt/conda/include/python3.8 -c
 8 | cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -O2 -I_ext_src/include -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=_ext -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -std=c++14
 9 | ldflags = 
10 | 
11 | rule compile
12 |   command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
13 |   depfile = $out.d
14 |   deps = gcc
15 | 
16 | rule cuda_compile
17 |   depfile = $out.d
18 |   deps = gcc
19 |   command = $nvcc  $cuda_cflags -c $in -o $out $cuda_post_cflags
20 | 
21 | 
22 | 
23 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/sampling.o: compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/sampling.cpp
24 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/interpolate.o: compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/interpolate.cpp
25 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/group_points.o: compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/group_points.cpp
26 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/bindings.o: compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/bindings.cpp
27 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/ball_query.o: compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/ball_query.cpp
28 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/sampling_gpu.o: cuda_compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/sampling_gpu.cu
29 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/interpolate_gpu.o: cuda_compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/interpolate_gpu.cu
30 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/group_points_gpu.o: cuda_compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/group_points_gpu.cu
31 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/ball_query_gpu.o: cuda_compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/ball_query_gpu.cu
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/models/modules/resnet_block.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) V-DETR authors. All Rights Reserved.
  2 | import torch.nn as nn
  3 | from MinkowskiEngine import MinkowskiReLU
  4 | 
  5 | from models.modules.common import ConvType, NormType, conv, get_norm
  6 | 
  7 | 
  8 | class BasicBlockBase(nn.Module):
  9 |     expansion = 1
 10 |     NORM_TYPE = NormType.BATCH_NORM
 11 | 
 12 |     def __init__(
 13 |         self,
 14 |         inplanes,
 15 |         planes,
 16 |         stride=1,
 17 |         dilation=1,
 18 |         downsample=None,
 19 |         conv_type=ConvType.HYPERCUBE,
 20 |         bn_momentum=0.1,
 21 |         D=3,
 22 |     ):
 23 |         super().__init__()
 24 | 
 25 |         self.conv1 = conv(
 26 |             inplanes,
 27 |             planes,
 28 |             kernel_size=3,
 29 |             stride=stride,
 30 |             dilation=dilation,
 31 |             conv_type=conv_type,
 32 |             D=D,
 33 |         )
 34 |         self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum)
 35 |         self.conv2 = conv(
 36 |             planes,
 37 |             planes,
 38 |             kernel_size=3,
 39 |             stride=1,
 40 |             dilation=dilation,
 41 |             bias=False,
 42 |             conv_type=conv_type,
 43 |             D=D,
 44 |         )
 45 |         self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum)
 46 |         self.relu = MinkowskiReLU(inplace=False)
 47 |         self.downsample = downsample
 48 | 
 49 |     def forward(self, x):
 50 |         residual = x
 51 | 
 52 |         out = self.conv1(x)
 53 |         out = self.norm1(out)
 54 |         out = self.relu(out)
 55 | 
 56 |         out = self.conv2(out)
 57 |         out = self.norm2(out)
 58 | 
 59 |         if self.downsample is not None:
 60 |             residual = self.downsample(x)
 61 | 
 62 |         out += residual
 63 |         out = self.relu(out)
 64 | 
 65 |         return out
 66 | 
 67 | 
 68 | class BasicBlock(BasicBlockBase):
 69 |     NORM_TYPE = NormType.BATCH_NORM
 70 | 
 71 | 
 72 | class BasicBlockIN(BasicBlockBase):
 73 |     NORM_TYPE = NormType.INSTANCE_NORM
 74 | 
 75 | 
 76 | class BasicBlockINBN(BasicBlockBase):
 77 |     NORM_TYPE = NormType.INSTANCE_BATCH_NORM
 78 | 
 79 | 
 80 | class BottleneckBase(nn.Module):
 81 |     expansion = 4
 82 |     NORM_TYPE = NormType.BATCH_NORM
 83 | 
 84 |     def __init__(
 85 |         self,
 86 |         inplanes,
 87 |         planes,
 88 |         stride=1,
 89 |         dilation=1,
 90 |         downsample=None,
 91 |         conv_type=ConvType.HYPERCUBE,
 92 |         bn_momentum=0.1,
 93 |         D=3,
 94 |     ):
 95 |         super().__init__()
 96 |         self.conv1 = conv(inplanes, planes, kernel_size=1, D=D)
 97 |         self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum)
 98 | 
 99 |         self.conv2 = conv(
100 |             planes,
101 |             planes,
102 |             kernel_size=3,
103 |             stride=stride,
104 |             dilation=dilation,
105 |             conv_type=conv_type,
106 |             D=D,
107 |         )
108 |         self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum)
109 | 
110 |         self.conv3 = conv(planes, planes * self.expansion, kernel_size=1, D=D)
111 |         self.norm3 = get_norm(
112 |             self.NORM_TYPE, planes * self.expansion, D, bn_momentum=bn_momentum
113 |         )
114 | 
115 |         self.relu = MinkowskiReLU(inplace=False)
116 |         self.downsample = downsample
117 | 
118 |     def forward(self, x):
119 |         residual = x
120 | 
121 |         out = self.conv1(x)
122 |         out = self.norm1(out)
123 |         out = self.relu(out)
124 | 
125 |         out = self.conv2(out)
126 |         out = self.norm2(out)
127 |         out = self.relu(out)
128 | 
129 |         out = self.conv3(out)
130 |         out = self.norm3(out)
131 | 
132 |         if self.downsample is not None:
133 |             residual = self.downsample(x)
134 | 
135 |         out += residual
136 |         out = self.relu(out)
137 | 
138 |         return out
139 | 
140 | 
141 | class Bottleneck(BottleneckBase):
142 |     NORM_TYPE = NormType.BATCH_NORM
143 | 
144 | 
145 | class BottleneckIN(BottleneckBase):
146 |     NORM_TYPE = NormType.INSTANCE_NORM
147 | 
148 | 
149 | class BottleneckINBN(BottleneckBase):
150 |     NORM_TYPE = NormType.INSTANCE_BATCH_NORM
151 | 


--------------------------------------------------------------------------------
/models/helpers.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # V-DETR
  3 | # Copyright (c) V-DETR authors. All Rights Reserved.
  4 | # ------------------------------------------------------------------------
  5 | # Modified from :
  6 | # Group-Free-3D
  7 | # Copyright (c) Group-Free-3D authors. All Rights Reserved.
  8 | # 3DETR
  9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 10 | # ------------------------------------------------------------------------
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | from functools import partial
 14 | import copy
 15 | 
 16 | 
 17 | class PositionEmbeddingLearned(nn.Module):
 18 |     """
 19 |     Absolute pos embedding, learned.
 20 |     """
 21 | 
 22 |     def __init__(self, input_channel, num_pos_feats=288):
 23 |         super().__init__()
 24 |         self.position_embedding_head = nn.Sequential(
 25 |             nn.Conv1d(input_channel, num_pos_feats, kernel_size=1),
 26 |             nn.BatchNorm1d(num_pos_feats),
 27 |             nn.ReLU(inplace=True),
 28 |             nn.Conv1d(num_pos_feats, num_pos_feats, kernel_size=1))
 29 | 
 30 |     def forward(self, xyz):
 31 |         xyz = xyz.transpose(1, 2).contiguous()
 32 |         position_embedding = self.position_embedding_head(xyz)
 33 |         return position_embedding
 34 |     
 35 |     
 36 | class BatchNormDim1Swap(nn.BatchNorm1d):
 37 |     """
 38 |     Used for nn.Transformer that uses a HW x N x C rep
 39 |     """
 40 | 
 41 |     def forward(self, x):
 42 |         """
 43 |         x: HW x N x C
 44 |         permute to N x C x HW
 45 |         Apply BN on C
 46 |         permute back
 47 |         """
 48 |         hw, n, c = x.shape
 49 |         x = x.permute(1, 2, 0)
 50 |         x = super(BatchNormDim1Swap, self).forward(x)
 51 |         # x: n x c x hw -> hw x n x c
 52 |         x = x.permute(2, 0, 1)
 53 |         return x
 54 | 
 55 | 
 56 | NORM_DICT = {
 57 |     "bn": BatchNormDim1Swap,
 58 |     "bn1d": nn.BatchNorm1d,
 59 |     "id": nn.Identity,
 60 |     "ln": nn.LayerNorm,
 61 | }
 62 | 
 63 | ACTIVATION_DICT = {
 64 |     "relu": nn.ReLU,
 65 |     "gelu": nn.GELU,
 66 |     "leakyrelu": partial(nn.LeakyReLU, negative_slope=0.1),
 67 | }
 68 | 
 69 | WEIGHT_INIT_DICT = {
 70 |     "xavier_uniform": nn.init.xavier_uniform_,
 71 | }
 72 | 
 73 | 
 74 | class GenericMLP(nn.Module):
 75 |     def __init__(
 76 |         self,
 77 |         input_dim,
 78 |         hidden_dims,
 79 |         output_dim,
 80 |         norm_fn_name=None,
 81 |         activation="relu",
 82 |         use_conv=False,
 83 |         dropout=None,
 84 |         hidden_use_bias=False,
 85 |         output_use_bias=True,
 86 |         output_use_activation=False,
 87 |         output_use_norm=False,
 88 |         weight_init_name=None,
 89 |     ):
 90 |         super().__init__()
 91 |         activation = ACTIVATION_DICT[activation]
 92 |         norm = None
 93 |         if norm_fn_name is not None:
 94 |             norm = NORM_DICT[norm_fn_name]
 95 |         if norm_fn_name == "ln" and use_conv:
 96 |             norm = lambda x: nn.GroupNorm(1, x)  # easier way to use LayerNorm
 97 | 
 98 |         if dropout is not None:
 99 |             if not isinstance(dropout, list):
100 |                 dropout = [dropout for _ in range(len(hidden_dims))]
101 | 
102 |         layers = []
103 |         prev_dim = input_dim
104 |         for idx, x in enumerate(hidden_dims):
105 |             if use_conv:
106 |                 layer = nn.Conv1d(prev_dim, x, 1, bias=hidden_use_bias)
107 |             else:
108 |                 layer = nn.Linear(prev_dim, x, bias=hidden_use_bias)
109 |             layers.append(layer)
110 |             if norm:
111 |                 layers.append(norm(x))
112 |             layers.append(activation())
113 |             if dropout is not None:
114 |                 layers.append(nn.Dropout(p=dropout[idx]))
115 |             prev_dim = x
116 |         if use_conv:
117 |             layer = nn.Conv1d(prev_dim, output_dim, 1, bias=output_use_bias)
118 |         else:
119 |             layer = nn.Linear(prev_dim, output_dim, bias=output_use_bias)
120 |         layers.append(layer)
121 | 
122 |         if output_use_norm:
123 |             layers.append(norm(output_dim))
124 | 
125 |         if output_use_activation:
126 |             layers.append(activation())
127 | 
128 |         self.layers = nn.Sequential(*layers)
129 | 
130 |         if weight_init_name is not None:
131 |             self.do_weight_init(weight_init_name)
132 | 
133 |     def do_weight_init(self, weight_init_name):
134 |         func = WEIGHT_INIT_DICT[weight_init_name]
135 |         for (_, param) in self.named_parameters():
136 |             if param.dim() > 1:  # skips batchnorm/layernorm
137 |                 func(param)
138 | 
139 |     def forward(self, x):
140 |         output = self.layers(x)
141 |         return output
142 | 
143 | 
144 | def get_clones(module, N):
145 |     return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
146 | 


--------------------------------------------------------------------------------
/Uni3D/data/templates.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "modelnet40_64": [
  3 |         "a point cloud model of {}.",
  4 |         "There is a {} in the scene.",
  5 |         "There is the {} in the scene.",
  6 |         "a photo of a {} in the scene.",
  7 |         "a photo of the {} in the scene.",
  8 |         "a photo of one {} in the scene.",
  9 |         "itap of a {}.",
 10 |         "itap of my {}.",
 11 |         "itap of the {}.",
 12 |         "a photo of a {}.",
 13 |         "a photo of my {}.",
 14 |         "a photo of the {}.",
 15 |         "a photo of one {}.",
 16 |         "a photo of many {}.",
 17 |         "a good photo of a {}.",
 18 |         "a good photo of the {}.",
 19 |         "a bad photo of a {}.",
 20 |         "a bad photo of the {}.",
 21 |         "a photo of a nice {}.",
 22 |         "a photo of the nice {}.",
 23 |         "a photo of a cool {}.",
 24 |         "a photo of the cool {}.",
 25 |         "a photo of a weird {}.",
 26 |         "a photo of the weird {}.",
 27 |         "a photo of a small {}.",
 28 |         "a photo of the small {}.",
 29 |         "a photo of a large {}.",
 30 |         "a photo of the large {}.",
 31 |         "a photo of a clean {}.",
 32 |         "a photo of the clean {}.",
 33 |         "a photo of a dirty {}.",
 34 |         "a photo of the dirty {}.",
 35 |         "a bright photo of a {}.",
 36 |         "a bright photo of the {}.",
 37 |         "a dark photo of a {}.",
 38 |         "a dark photo of the {}.",
 39 |         "a photo of a hard to see {}.",
 40 |         "a photo of the hard to see {}.",
 41 |         "a low resolution photo of a {}.",
 42 |         "a low resolution photo of the {}.",
 43 |         "a cropped photo of a {}.",
 44 |         "a cropped photo of the {}.",
 45 |         "a close-up photo of a {}.",
 46 |         "a close-up photo of the {}.",
 47 |         "a jpeg corrupted photo of a {}.",
 48 |         "a jpeg corrupted photo of the {}.",
 49 |         "a blurry photo of a {}.",
 50 |         "a blurry photo of the {}.",
 51 |         "a pixelated photo of a {}.",
 52 |         "a pixelated photo of the {}.",
 53 |         "a black and white photo of the {}.",
 54 |         "a black and white photo of a {}",
 55 |         "a plastic {}.",
 56 |         "the plastic {}.",
 57 |         "a toy {}.",
 58 |         "the toy {}.",
 59 |         "a plushie {}.",
 60 |         "the plushie {}.",
 61 |         "a cartoon {}.",
 62 |         "the cartoon {}.",
 63 |         "an embroidered {}.",
 64 |         "the embroidered {}.",
 65 |         "a painting of the {}.",
 66 |         "a painting of a {}."
 67 |     ],
 68 |     "shapenet_64": [
 69 |         "a point cloud model of {}.",
 70 |         "There is a {} in the scene.",
 71 |         "There is the {} in the scene.",
 72 |         "a photo of a {} in the scene.",
 73 |         "a photo of the {} in the scene.",
 74 |         "a photo of one {} in the scene.",
 75 |         "itap of a {}.",
 76 |         "itap of my {}.",
 77 |         "itap of the {}.",
 78 |         "a photo of a {}.",
 79 |         "a photo of my {}.",
 80 |         "a photo of the {}.",
 81 |         "a photo of one {}.",
 82 |         "a photo of many {}.",
 83 |         "a good photo of a {}.",
 84 |         "a good photo of the {}.",
 85 |         "a bad photo of a {}.",
 86 |         "a bad photo of the {}.",
 87 |         "a photo of a nice {}.",
 88 |         "a photo of the nice {}.",
 89 |         "a photo of a cool {}.",
 90 |         "a photo of the cool {}.",
 91 |         "a photo of a weird {}.",
 92 |         "a photo of the weird {}.",
 93 |         "a photo of a small {}.",
 94 |         "a photo of the small {}.",
 95 |         "a photo of a large {}.",
 96 |         "a photo of the large {}.",
 97 |         "a photo of a clean {}.",
 98 |         "a photo of the clean {}.",
 99 |         "a photo of a dirty {}.",
100 |         "a photo of the dirty {}.",
101 |         "a bright photo of a {}.",
102 |         "a bright photo of the {}.",
103 |         "a dark photo of a {}.",
104 |         "a dark photo of the {}.",
105 |         "a photo of a hard to see {}.",
106 |         "a photo of the hard to see {}.",
107 |         "a low resolution photo of a {}.",
108 |         "a low resolution photo of the {}.",
109 |         "a cropped photo of a {}.",
110 |         "a cropped photo of the {}.",
111 |         "a close-up photo of a {}.",
112 |         "a close-up photo of the {}.",
113 |         "a jpeg corrupted photo of a {}.",
114 |         "a jpeg corrupted photo of the {}.",
115 |         "a blurry photo of a {}.",
116 |         "a blurry photo of the {}.",
117 |         "a pixelated photo of a {}.",
118 |         "a pixelated photo of the {}.",
119 |         "a black and white photo of the {}.",
120 |         "a black and white photo of a {}",
121 |         "a plastic {}.",
122 |         "the plastic {}.",
123 |         "a toy {}.",
124 |         "the toy {}.",
125 |         "a plushie {}.",
126 |         "the plushie {}.",
127 |         "a cartoon {}.",
128 |         "the cartoon {}.",
129 |         "an embroidered {}.",
130 |         "the embroidered {}.",
131 |         "a painting of the {}.",
132 |         "a painting of a {}."
133 |     ]
134 | 
135 | }


--------------------------------------------------------------------------------
/util/nms.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import numpy as np
  4 | 
  5 | # boxes are axis aigned 2D boxes of shape (n,5) in FLOAT numbers with (x1,y1,x2,y2,score)
  6 | """ Ref: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
  7 | Ref: https://github.com/vickyboy47/nms-python/blob/master/nms.py 
  8 | """
  9 | 
 10 | 
 11 | def nms_2d(boxes, overlap_threshold):
 12 |     x1 = boxes[:, 0]
 13 |     y1 = boxes[:, 1]
 14 |     x2 = boxes[:, 2]
 15 |     y2 = boxes[:, 3]
 16 |     score = boxes[:, 4]
 17 |     area = (x2 - x1) * (y2 - y1)
 18 | 
 19 |     I = np.argsort(score)
 20 |     pick = []
 21 |     while I.size != 0:
 22 |         last = I.size
 23 |         i = I[-1]
 24 |         pick.append(i)
 25 |         suppress = [last - 1]
 26 |         for pos in range(last - 1):
 27 |             j = I[pos]
 28 |             xx1 = max(x1[i], x1[j])
 29 |             yy1 = max(y1[i], y1[j])
 30 |             xx2 = min(x2[i], x2[j])
 31 |             yy2 = min(y2[i], y2[j])
 32 |             w = xx2 - xx1
 33 |             h = yy2 - yy1
 34 |             if w > 0 and h > 0:
 35 |                 o = w * h / area[j]
 36 |                 print("Overlap is", o)
 37 |                 if o > overlap_threshold:
 38 |                     suppress.append(pos)
 39 |         I = np.delete(I, suppress)
 40 |     return pick
 41 | 
 42 | 
 43 | def nms_2d_faster(boxes, overlap_threshold, old_type=False):
 44 |     x1 = boxes[:, 0]
 45 |     y1 = boxes[:, 1]
 46 |     x2 = boxes[:, 2]
 47 |     y2 = boxes[:, 3]
 48 |     score = boxes[:, 4]
 49 |     area = (x2 - x1) * (y2 - y1)
 50 | 
 51 |     I = np.argsort(score)
 52 |     pick = []
 53 |     while I.size != 0:
 54 |         last = I.size
 55 |         i = I[-1]
 56 |         pick.append(i)
 57 | 
 58 |         xx1 = np.maximum(x1[i], x1[I[: last - 1]])
 59 |         yy1 = np.maximum(y1[i], y1[I[: last - 1]])
 60 |         xx2 = np.minimum(x2[i], x2[I[: last - 1]])
 61 |         yy2 = np.minimum(y2[i], y2[I[: last - 1]])
 62 | 
 63 |         w = np.maximum(0, xx2 - xx1)
 64 |         h = np.maximum(0, yy2 - yy1)
 65 | 
 66 |         if old_type:
 67 |             o = (w * h) / area[I[: last - 1]]
 68 |         else:
 69 |             inter = w * h
 70 |             o = inter / (area[i] + area[I[: last - 1]] - inter)
 71 | 
 72 |         I = np.delete(
 73 |             I, np.concatenate(([last - 1], np.where(o > overlap_threshold)[0]))
 74 |         )
 75 | 
 76 |     return pick
 77 | 
 78 | 
 79 | def nms_3d_faster(boxes, overlap_threshold, old_type=False):
 80 |     x1 = boxes[:, 0]
 81 |     y1 = boxes[:, 1]
 82 |     z1 = boxes[:, 2]
 83 |     x2 = boxes[:, 3]
 84 |     y2 = boxes[:, 4]
 85 |     z2 = boxes[:, 5]
 86 |     score = boxes[:, 6]
 87 |     area = (x2 - x1) * (y2 - y1) * (z2 - z1)
 88 | 
 89 |     I = np.argsort(score)
 90 |     pick = []
 91 |     while I.size != 0:
 92 |         last = I.size
 93 |         i = I[-1]
 94 |         pick.append(i)
 95 | 
 96 |         xx1 = np.maximum(x1[i], x1[I[: last - 1]])
 97 |         yy1 = np.maximum(y1[i], y1[I[: last - 1]])
 98 |         zz1 = np.maximum(z1[i], z1[I[: last - 1]])
 99 |         xx2 = np.minimum(x2[i], x2[I[: last - 1]])
100 |         yy2 = np.minimum(y2[i], y2[I[: last - 1]])
101 |         zz2 = np.minimum(z2[i], z2[I[: last - 1]])
102 | 
103 |         l = np.maximum(0, xx2 - xx1)
104 |         w = np.maximum(0, yy2 - yy1)
105 |         h = np.maximum(0, zz2 - zz1)
106 | 
107 |         if old_type:
108 |             o = (l * w * h) / area[I[: last - 1]]
109 |         else:
110 |             inter = l * w * h
111 |             o = inter / (area[i] + area[I[: last - 1]] - inter)
112 | 
113 |         I = np.delete(
114 |             I, np.concatenate(([last - 1], np.where(o > overlap_threshold)[0]))
115 |         )
116 | 
117 |     return pick
118 | 
119 | 
120 | def nms_3d_faster_samecls(boxes, overlap_threshold, old_type=False):
121 |     x1 = boxes[:, 0]
122 |     y1 = boxes[:, 1]
123 |     z1 = boxes[:, 2]
124 |     x2 = boxes[:, 3]
125 |     y2 = boxes[:, 4]
126 |     z2 = boxes[:, 5]
127 |     score = boxes[:, 6]
128 |     cls = boxes[:, 7]
129 |     area = (x2 - x1) * (y2 - y1) * (z2 - z1)
130 | 
131 |     I = np.argsort(score)
132 |     pick = []
133 |     while I.size != 0:
134 |         last = I.size
135 |         i = I[-1]
136 |         pick.append(i)
137 | 
138 |         xx1 = np.maximum(x1[i], x1[I[: last - 1]])
139 |         yy1 = np.maximum(y1[i], y1[I[: last - 1]])
140 |         zz1 = np.maximum(z1[i], z1[I[: last - 1]])
141 |         xx2 = np.minimum(x2[i], x2[I[: last - 1]])
142 |         yy2 = np.minimum(y2[i], y2[I[: last - 1]])
143 |         zz2 = np.minimum(z2[i], z2[I[: last - 1]])
144 |         cls1 = cls[i]
145 |         cls2 = cls[I[: last - 1]]
146 | 
147 |         l = np.maximum(0, xx2 - xx1)
148 |         w = np.maximum(0, yy2 - yy1)
149 |         h = np.maximum(0, zz2 - zz1)
150 | 
151 |         if old_type:
152 |             o = (l * w * h) / area[I[: last - 1]]
153 |         else:
154 |             inter = l * w * h
155 |             o = inter / (area[i] + area[I[: last - 1]] - inter)
156 |         o = o * (cls1 == cls2)
157 | 
158 |         I = np.delete(
159 |             I, np.concatenate(([last - 1], np.where(o > overlap_threshold)[0]))
160 |         )
161 | 
162 |     return pick
163 | 


--------------------------------------------------------------------------------
/Uni3D/data/utils/logger.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import torch.distributed as dist
  3 | 
  4 | logger_initialized = {}
  5 | 
  6 | def get_root_logger(log_file=None, log_level=logging.INFO, name='main'):
  7 |     """Get root logger and add a keyword filter to it.
  8 |     The logger will be initialized if it has not been initialized. By default a
  9 |     StreamHandler will be added. If `log_file` is specified, a FileHandler will
 10 |     also be added. The name of the root logger is the top-level package name,
 11 |     e.g., "mmdet3d".
 12 |     Args:
 13 |         log_file (str, optional): File path of log. Defaults to None.
 14 |         log_level (int, optional): The level of logger.
 15 |             Defaults to logging.INFO.
 16 |         name (str, optional): The name of the root logger, also used as a
 17 |             filter keyword. Defaults to 'mmdet3d'.
 18 |     Returns:
 19 |         :obj:`logging.Logger`: The obtained logger
 20 |     """
 21 |     logger = get_logger(name=name, log_file=log_file, log_level=log_level)
 22 |     # add a logging filter
 23 |     logging_filter = logging.Filter(name)
 24 |     logging_filter.filter = lambda record: record.find(name) != -1
 25 | 
 26 |     return logger
 27 | 
 28 | 
 29 | def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'):
 30 |     """Initialize and get a logger by name.
 31 |     If the logger has not been initialized, this method will initialize the
 32 |     logger by adding one or two handlers, otherwise the initialized logger will
 33 |     be directly returned. During initialization, a StreamHandler will always be
 34 |     added. If `log_file` is specified and the process rank is 0, a FileHandler
 35 |     will also be added.
 36 |     Args:
 37 |         name (str): Logger name.
 38 |         log_file (str | None): The log filename. If specified, a FileHandler
 39 |             will be added to the logger.
 40 |         log_level (int): The logger level. Note that only the process of
 41 |             rank 0 is affected, and other processes will set the level to
 42 |             "Error" thus be silent most of the time.
 43 |         file_mode (str): The file mode used in opening log file.
 44 |             Defaults to 'w'.
 45 |     Returns:
 46 |         logging.Logger: The expected logger.
 47 |     """
 48 |     logger = logging.getLogger(name)
 49 |     if name in logger_initialized:
 50 |         return logger
 51 |     # handle hierarchical names
 52 |     # e.g., logger "a" is initialized, then logger "a.b" will skip the
 53 |     # initialization since it is a child of "a".
 54 |     for logger_name in logger_initialized:
 55 |         if name.startswith(logger_name):
 56 |             return logger
 57 | 
 58 |     # handle duplicate logs to the console
 59 |     # Starting in 1.8.0, PyTorch DDP attaches a StreamHandler <stderr> (NOTSET)
 60 |     # to the root logger. As logger.propagate is True by default, this root
 61 |     # level handler causes logging messages from rank>0 processes to
 62 |     # unexpectedly show up on the console, creating much unwanted clutter.
 63 |     # To fix this issue, we set the root logger's StreamHandler, if any, to log
 64 |     # at the ERROR level.
 65 |     for handler in logger.root.handlers:
 66 |         if type(handler) is logging.StreamHandler:
 67 |             handler.setLevel(logging.ERROR)
 68 | 
 69 |     stream_handler = logging.StreamHandler()
 70 |     handlers = [stream_handler]
 71 | 
 72 |     if dist.is_available() and dist.is_initialized():
 73 |         rank = dist.get_rank()
 74 |     else:
 75 |         rank = 0
 76 | 
 77 |     # only rank 0 will add a FileHandler
 78 |     if rank == 0 and log_file is not None:
 79 |         # Here, the default behaviour of the official logger is 'a'. Thus, we
 80 |         # provide an interface to change the file mode to the default
 81 |         # behaviour.
 82 |         file_handler = logging.FileHandler(log_file, file_mode)
 83 |         handlers.append(file_handler)
 84 | 
 85 |     formatter = logging.Formatter(
 86 |         '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 87 |     for handler in handlers:
 88 |         handler.setFormatter(formatter)
 89 |         handler.setLevel(log_level)
 90 |         logger.addHandler(handler)
 91 | 
 92 |     if rank == 0:
 93 |         logger.setLevel(log_level)
 94 |     else:
 95 |         logger.setLevel(logging.ERROR)
 96 | 
 97 |     logger_initialized[name] = True
 98 | 
 99 | 
100 |     return logger
101 | 
102 | 
103 | def print_log(msg, logger=None, level=logging.INFO):
104 |     """Print a log message.
105 |     Args:
106 |         msg (str): The message to be logged.
107 |         logger (logging.Logger | str | None): The logger to be used.
108 |             Some special loggers are:
109 |             - "silent": no message will be printed.
110 |             - other str: the logger obtained with `get_root_logger(logger)`.
111 |             - None: The `print()` method will be used to print log messages.
112 |         level (int): Logging level. Only available when `logger` is a Logger
113 |             object or "root".
114 |     """
115 |     if logger is None:
116 |         print(msg)
117 |     elif isinstance(logger, logging.Logger):
118 |         logger.log(level, msg)
119 |     elif logger == 'silent':
120 |         pass
121 |     elif isinstance(logger, str):
122 |         _logger = get_logger(logger)
123 |         _logger.log(level, msg)
124 |     else:
125 |         raise TypeError(
126 |             'logger should be either a logging.Logger object, str, '
127 |             f'"silent" or None, but got {type(logger)}')


--------------------------------------------------------------------------------
/scannet/meta_data/scannetv2_val.txt:
--------------------------------------------------------------------------------
  1 | scene0568_00
  2 | scene0568_01
  3 | scene0568_02
  4 | scene0304_00
  5 | scene0488_00
  6 | scene0488_01
  7 | scene0412_00
  8 | scene0412_01
  9 | scene0217_00
 10 | scene0019_00
 11 | scene0019_01
 12 | scene0414_00
 13 | scene0575_00
 14 | scene0575_01
 15 | scene0575_02
 16 | scene0426_00
 17 | scene0426_01
 18 | scene0426_02
 19 | scene0426_03
 20 | scene0549_00
 21 | scene0549_01
 22 | scene0578_00
 23 | scene0578_01
 24 | scene0578_02
 25 | scene0665_00
 26 | scene0665_01
 27 | scene0050_00
 28 | scene0050_01
 29 | scene0050_02
 30 | scene0257_00
 31 | scene0025_00
 32 | scene0025_01
 33 | scene0025_02
 34 | scene0583_00
 35 | scene0583_01
 36 | scene0583_02
 37 | scene0701_00
 38 | scene0701_01
 39 | scene0701_02
 40 | scene0580_00
 41 | scene0580_01
 42 | scene0565_00
 43 | scene0169_00
 44 | scene0169_01
 45 | scene0655_00
 46 | scene0655_01
 47 | scene0655_02
 48 | scene0063_00
 49 | scene0221_00
 50 | scene0221_01
 51 | scene0591_00
 52 | scene0591_01
 53 | scene0591_02
 54 | scene0678_00
 55 | scene0678_01
 56 | scene0678_02
 57 | scene0462_00
 58 | scene0427_00
 59 | scene0595_00
 60 | scene0193_00
 61 | scene0193_01
 62 | scene0164_00
 63 | scene0164_01
 64 | scene0164_02
 65 | scene0164_03
 66 | scene0598_00
 67 | scene0598_01
 68 | scene0598_02
 69 | scene0599_00
 70 | scene0599_01
 71 | scene0599_02
 72 | scene0328_00
 73 | scene0300_00
 74 | scene0300_01
 75 | scene0354_00
 76 | scene0458_00
 77 | scene0458_01
 78 | scene0423_00
 79 | scene0423_01
 80 | scene0423_02
 81 | scene0307_00
 82 | scene0307_01
 83 | scene0307_02
 84 | scene0606_00
 85 | scene0606_01
 86 | scene0606_02
 87 | scene0432_00
 88 | scene0432_01
 89 | scene0608_00
 90 | scene0608_01
 91 | scene0608_02
 92 | scene0651_00
 93 | scene0651_01
 94 | scene0651_02
 95 | scene0430_00
 96 | scene0430_01
 97 | scene0689_00
 98 | scene0357_00
 99 | scene0357_01
100 | scene0574_00
101 | scene0574_01
102 | scene0574_02
103 | scene0329_00
104 | scene0329_01
105 | scene0329_02
106 | scene0153_00
107 | scene0153_01
108 | scene0616_00
109 | scene0616_01
110 | scene0671_00
111 | scene0671_01
112 | scene0618_00
113 | scene0382_00
114 | scene0382_01
115 | scene0490_00
116 | scene0621_00
117 | scene0607_00
118 | scene0607_01
119 | scene0149_00
120 | scene0695_00
121 | scene0695_01
122 | scene0695_02
123 | scene0695_03
124 | scene0389_00
125 | scene0377_00
126 | scene0377_01
127 | scene0377_02
128 | scene0342_00
129 | scene0139_00
130 | scene0629_00
131 | scene0629_01
132 | scene0629_02
133 | scene0496_00
134 | scene0633_00
135 | scene0633_01
136 | scene0518_00
137 | scene0652_00
138 | scene0406_00
139 | scene0406_01
140 | scene0406_02
141 | scene0144_00
142 | scene0144_01
143 | scene0494_00
144 | scene0278_00
145 | scene0278_01
146 | scene0316_00
147 | scene0609_00
148 | scene0609_01
149 | scene0609_02
150 | scene0609_03
151 | scene0084_00
152 | scene0084_01
153 | scene0084_02
154 | scene0696_00
155 | scene0696_01
156 | scene0696_02
157 | scene0351_00
158 | scene0351_01
159 | scene0643_00
160 | scene0644_00
161 | scene0645_00
162 | scene0645_01
163 | scene0645_02
164 | scene0081_00
165 | scene0081_01
166 | scene0081_02
167 | scene0647_00
168 | scene0647_01
169 | scene0535_00
170 | scene0353_00
171 | scene0353_01
172 | scene0353_02
173 | scene0559_00
174 | scene0559_01
175 | scene0559_02
176 | scene0593_00
177 | scene0593_01
178 | scene0246_00
179 | scene0653_00
180 | scene0653_01
181 | scene0064_00
182 | scene0064_01
183 | scene0356_00
184 | scene0356_01
185 | scene0356_02
186 | scene0030_00
187 | scene0030_01
188 | scene0030_02
189 | scene0222_00
190 | scene0222_01
191 | scene0338_00
192 | scene0338_01
193 | scene0338_02
194 | scene0378_00
195 | scene0378_01
196 | scene0378_02
197 | scene0660_00
198 | scene0553_00
199 | scene0553_01
200 | scene0553_02
201 | scene0527_00
202 | scene0663_00
203 | scene0663_01
204 | scene0663_02
205 | scene0664_00
206 | scene0664_01
207 | scene0664_02
208 | scene0334_00
209 | scene0334_01
210 | scene0334_02
211 | scene0046_00
212 | scene0046_01
213 | scene0046_02
214 | scene0203_00
215 | scene0203_01
216 | scene0203_02
217 | scene0088_00
218 | scene0088_01
219 | scene0088_02
220 | scene0088_03
221 | scene0086_00
222 | scene0086_01
223 | scene0086_02
224 | scene0670_00
225 | scene0670_01
226 | scene0256_00
227 | scene0256_01
228 | scene0256_02
229 | scene0249_00
230 | scene0441_00
231 | scene0658_00
232 | scene0704_00
233 | scene0704_01
234 | scene0187_00
235 | scene0187_01
236 | scene0131_00
237 | scene0131_01
238 | scene0131_02
239 | scene0207_00
240 | scene0207_01
241 | scene0207_02
242 | scene0461_00
243 | scene0011_00
244 | scene0011_01
245 | scene0343_00
246 | scene0251_00
247 | scene0077_00
248 | scene0077_01
249 | scene0684_00
250 | scene0684_01
251 | scene0550_00
252 | scene0686_00
253 | scene0686_01
254 | scene0686_02
255 | scene0208_00
256 | scene0500_00
257 | scene0500_01
258 | scene0552_00
259 | scene0552_01
260 | scene0648_00
261 | scene0648_01
262 | scene0435_00
263 | scene0435_01
264 | scene0435_02
265 | scene0435_03
266 | scene0690_00
267 | scene0690_01
268 | scene0693_00
269 | scene0693_01
270 | scene0693_02
271 | scene0700_00
272 | scene0700_01
273 | scene0700_02
274 | scene0699_00
275 | scene0231_00
276 | scene0231_01
277 | scene0231_02
278 | scene0697_00
279 | scene0697_01
280 | scene0697_02
281 | scene0697_03
282 | scene0474_00
283 | scene0474_01
284 | scene0474_02
285 | scene0474_03
286 | scene0474_04
287 | scene0474_05
288 | scene0355_00
289 | scene0355_01
290 | scene0146_00
291 | scene0146_01
292 | scene0146_02
293 | scene0196_00
294 | scene0702_00
295 | scene0702_01
296 | scene0702_02
297 | scene0314_00
298 | scene0277_00
299 | scene0277_01
300 | scene0277_02
301 | scene0095_00
302 | scene0095_01
303 | scene0015_00
304 | scene0100_00
305 | scene0100_01
306 | scene0100_02
307 | scene0558_00
308 | scene0558_01
309 | scene0558_02
310 | scene0685_00
311 | scene0685_01
312 | scene0685_02


--------------------------------------------------------------------------------
/scannet/meta_data/scannetv2_val_copy.txt:
--------------------------------------------------------------------------------
  1 | scene0568_00
  2 | scene0568_01
  3 | scene0568_02
  4 | scene0304_00
  5 | scene0488_00
  6 | scene0488_01
  7 | scene0412_00
  8 | scene0412_01
  9 | scene0217_00
 10 | scene0019_00
 11 | scene0019_01
 12 | scene0414_00
 13 | scene0575_00
 14 | scene0575_01
 15 | scene0575_02
 16 | scene0426_00
 17 | scene0426_01
 18 | scene0426_02
 19 | scene0426_03
 20 | scene0549_00
 21 | scene0549_01
 22 | scene0578_00
 23 | scene0578_01
 24 | scene0578_02
 25 | scene0665_00
 26 | scene0665_01
 27 | scene0050_00
 28 | scene0050_01
 29 | scene0050_02
 30 | scene0257_00
 31 | scene0025_00
 32 | scene0025_01
 33 | scene0025_02
 34 | scene0583_00
 35 | scene0583_01
 36 | scene0583_02
 37 | scene0701_00
 38 | scene0701_01
 39 | scene0701_02
 40 | scene0580_00
 41 | scene0580_01
 42 | scene0565_00
 43 | scene0169_00
 44 | scene0169_01
 45 | scene0655_00
 46 | scene0655_01
 47 | scene0655_02
 48 | scene0063_00
 49 | scene0221_00
 50 | scene0221_01
 51 | scene0591_00
 52 | scene0591_01
 53 | scene0591_02
 54 | scene0678_00
 55 | scene0678_01
 56 | scene0678_02
 57 | scene0462_00
 58 | scene0427_00
 59 | scene0595_00
 60 | scene0193_00
 61 | scene0193_01
 62 | scene0164_00
 63 | scene0164_01
 64 | scene0164_02
 65 | scene0164_03
 66 | scene0598_00
 67 | scene0598_01
 68 | scene0598_02
 69 | scene0599_00
 70 | scene0599_01
 71 | scene0599_02
 72 | scene0328_00
 73 | scene0300_00
 74 | scene0300_01
 75 | scene0354_00
 76 | scene0458_00
 77 | scene0458_01
 78 | scene0423_00
 79 | scene0423_01
 80 | scene0423_02
 81 | scene0307_00
 82 | scene0307_01
 83 | scene0307_02
 84 | scene0606_00
 85 | scene0606_01
 86 | scene0606_02
 87 | scene0432_00
 88 | scene0432_01
 89 | scene0608_00
 90 | scene0608_01
 91 | scene0608_02
 92 | scene0651_00
 93 | scene0651_01
 94 | scene0651_02
 95 | scene0430_00
 96 | scene0430_01
 97 | scene0689_00
 98 | scene0357_00
 99 | scene0357_01
100 | scene0574_00
101 | scene0574_01
102 | scene0574_02
103 | scene0329_00
104 | scene0329_01
105 | scene0329_02
106 | scene0153_00
107 | scene0153_01
108 | scene0616_00
109 | scene0616_01
110 | scene0671_00
111 | scene0671_01
112 | scene0618_00
113 | scene0382_00
114 | scene0382_01
115 | scene0490_00
116 | scene0621_00
117 | scene0607_00
118 | scene0607_01
119 | scene0149_00
120 | scene0695_00
121 | scene0695_01
122 | scene0695_02
123 | scene0695_03
124 | scene0389_00
125 | scene0377_00
126 | scene0377_01
127 | scene0377_02
128 | scene0342_00
129 | scene0139_00
130 | scene0629_00
131 | scene0629_01
132 | scene0629_02
133 | scene0496_00
134 | scene0633_00
135 | scene0633_01
136 | scene0518_00
137 | scene0652_00
138 | scene0406_00
139 | scene0406_01
140 | scene0406_02
141 | scene0144_00
142 | scene0144_01
143 | scene0494_00
144 | scene0278_00
145 | scene0278_01
146 | scene0316_00
147 | scene0609_00
148 | scene0609_01
149 | scene0609_02
150 | scene0609_03
151 | scene0084_00
152 | scene0084_01
153 | scene0084_02
154 | scene0696_00
155 | scene0696_01
156 | scene0696_02
157 | scene0351_00
158 | scene0351_01
159 | scene0643_00
160 | scene0644_00
161 | scene0645_00
162 | scene0645_01
163 | scene0645_02
164 | scene0081_00
165 | scene0081_01
166 | scene0081_02
167 | scene0647_00
168 | scene0647_01
169 | scene0535_00
170 | scene0353_00
171 | scene0353_01
172 | scene0353_02
173 | scene0559_00
174 | scene0559_01
175 | scene0559_02
176 | scene0593_00
177 | scene0593_01
178 | scene0246_00
179 | scene0653_00
180 | scene0653_01
181 | scene0064_00
182 | scene0064_01
183 | scene0356_00
184 | scene0356_01
185 | scene0356_02
186 | scene0030_00
187 | scene0030_01
188 | scene0030_02
189 | scene0222_00
190 | scene0222_01
191 | scene0338_00
192 | scene0338_01
193 | scene0338_02
194 | scene0378_00
195 | scene0378_01
196 | scene0378_02
197 | scene0660_00
198 | scene0553_00
199 | scene0553_01
200 | scene0553_02
201 | scene0527_00
202 | scene0663_00
203 | scene0663_01
204 | scene0663_02
205 | scene0664_00
206 | scene0664_01
207 | scene0664_02
208 | scene0334_00
209 | scene0334_01
210 | scene0334_02
211 | scene0046_00
212 | scene0046_01
213 | scene0046_02
214 | scene0203_00
215 | scene0203_01
216 | scene0203_02
217 | scene0088_00
218 | scene0088_01
219 | scene0088_02
220 | scene0088_03
221 | scene0086_00
222 | scene0086_01
223 | scene0086_02
224 | scene0670_00
225 | scene0670_01
226 | scene0256_00
227 | scene0256_01
228 | scene0256_02
229 | scene0249_00
230 | scene0441_00
231 | scene0658_00
232 | scene0704_00
233 | scene0704_01
234 | scene0187_00
235 | scene0187_01
236 | scene0131_00
237 | scene0131_01
238 | scene0131_02
239 | scene0207_00
240 | scene0207_01
241 | scene0207_02
242 | scene0461_00
243 | scene0011_00
244 | scene0011_01
245 | scene0343_00
246 | scene0251_00
247 | scene0077_00
248 | scene0077_01
249 | scene0684_00
250 | scene0684_01
251 | scene0550_00
252 | scene0686_00
253 | scene0686_01
254 | scene0686_02
255 | scene0208_00
256 | scene0500_00
257 | scene0500_01
258 | scene0552_00
259 | scene0552_01
260 | scene0648_00
261 | scene0648_01
262 | scene0435_00
263 | scene0435_01
264 | scene0435_02
265 | scene0435_03
266 | scene0690_00
267 | scene0690_01
268 | scene0693_00
269 | scene0693_01
270 | scene0693_02
271 | scene0700_00
272 | scene0700_01
273 | scene0700_02
274 | scene0699_00
275 | scene0231_00
276 | scene0231_01
277 | scene0231_02
278 | scene0697_00
279 | scene0697_01
280 | scene0697_02
281 | scene0697_03
282 | scene0474_00
283 | scene0474_01
284 | scene0474_02
285 | scene0474_03
286 | scene0474_04
287 | scene0474_05
288 | scene0355_00
289 | scene0355_01
290 | scene0146_00
291 | scene0146_01
292 | scene0146_02
293 | scene0196_00
294 | scene0702_00
295 | scene0702_01
296 | scene0702_02
297 | scene0314_00
298 | scene0277_00
299 | scene0277_01
300 | scene0277_02
301 | scene0095_00
302 | scene0095_01
303 | scene0015_00
304 | scene0100_00
305 | scene0100_01
306 | scene0100_02
307 | scene0558_00
308 | scene0558_01
309 | scene0558_02
310 | scene0685_00
311 | scene0685_01
312 | scene0685_02
313 | 


--------------------------------------------------------------------------------
/models/position_embedding.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # V-DETR
  3 | # Copyright (c) V-DETR authors. All Rights Reserved.
  4 | # ------------------------------------------------------------------------
  5 | # Modified from :
  6 | # 3DETR
  7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  8 | # Group-Free-3D
  9 | # Copyright (c) Group-Free-3D authors. All Rights Reserved.
 10 | # ------------------------------------------------------------------------
 11 | """
 12 | Various positional encodings for the transformer.
 13 | """
 14 | import math
 15 | import torch
 16 | from torch import nn
 17 | import numpy as np
 18 | from util.pc_util import shift_scale_points
 19 | 
 20 | 
 21 | class PositionEmbeddingCoordsSine(nn.Module):
 22 |     def __init__(
 23 |         self,
 24 |         temperature=10000,
 25 |         normalize=False,
 26 |         scale=None,
 27 |         pos_type="fourier",
 28 |         d_pos=None,
 29 |         d_in=3,
 30 |         gauss_scale=1.0,
 31 |     ):
 32 |         super().__init__()
 33 |         self.temperature = temperature
 34 |         self.normalize = normalize
 35 |         if scale is not None and normalize is False:
 36 |             raise ValueError("normalize should be True if scale is passed")
 37 |         if scale is None:
 38 |             scale = 2 * math.pi
 39 |         assert pos_type in ["sine", "fourier"]
 40 |         self.pos_type = pos_type
 41 |         self.scale = scale
 42 |         if pos_type == "fourier":
 43 |             assert d_pos is not None
 44 |             assert d_pos % 2 == 0
 45 |             # define a gaussian matrix input_ch -> output_ch
 46 |             B = torch.empty((d_in, d_pos // 2)).normal_()
 47 |             B *= gauss_scale
 48 |             self.register_buffer("gauss_B", B)
 49 |             self.d_pos = d_pos
 50 | 
 51 |     def get_sine_embeddings(self, xyz, num_channels, input_range):
 52 |         # clone coords so that shift/scale operations do not affect original tensor
 53 |         orig_xyz = xyz
 54 |         xyz = orig_xyz.clone()
 55 | 
 56 |         ncoords = xyz.shape[1]
 57 |         if self.normalize:
 58 |             xyz = shift_scale_points(xyz, src_range=input_range)
 59 | 
 60 |         ndim = num_channels // xyz.shape[2]
 61 |         if ndim % 2 != 0:
 62 |             ndim -= 1
 63 |         # automatically handle remainder by assiging it to the first dim
 64 |         rems = num_channels - (ndim * xyz.shape[2])
 65 | 
 66 |         assert (
 67 |             ndim % 2 == 0
 68 |         ), f"Cannot handle odd sized ndim={ndim} where num_channels={num_channels} and xyz={xyz.shape}"
 69 | 
 70 |         final_embeds = []
 71 |         prev_dim = 0
 72 | 
 73 |         for d in range(xyz.shape[2]):
 74 |             cdim = ndim
 75 |             if rems > 0:
 76 |                 # add remainder in increments of two to maintain even size
 77 |                 cdim += 2
 78 |                 rems -= 2
 79 | 
 80 |             if cdim != prev_dim:
 81 |                 dim_t = torch.arange(cdim, dtype=torch.float32, device=xyz.device)
 82 |                 dim_t = self.temperature ** (2 * (dim_t // 2) / cdim)
 83 | 
 84 |             # create batch x cdim x nccords embedding
 85 |             raw_pos = xyz[:, :, d]
 86 |             if self.scale:
 87 |                 raw_pos *= self.scale
 88 |             pos = raw_pos[:, :, None] / dim_t
 89 |             pos = torch.stack(
 90 |                 (pos[:, :, 0::2].sin(), pos[:, :, 1::2].cos()), dim=3
 91 |             ).flatten(2)
 92 |             final_embeds.append(pos)
 93 |             prev_dim = cdim
 94 | 
 95 |         final_embeds = torch.cat(final_embeds, dim=2).permute(0, 2, 1)
 96 |         return final_embeds
 97 | 
 98 |     def get_fourier_embeddings(self, xyz, num_channels=None, input_range=None):
 99 |         # Follows - https://people.eecs.berkeley.edu/~bmild/fourfeat/index.html
100 | 
101 |         if num_channels is None:
102 |             num_channels = self.gauss_B.shape[1] * 2
103 | 
104 |         bsize, npoints = xyz.shape[0], xyz.shape[1]
105 |         assert num_channels > 0 and num_channels % 2 == 0
106 |         d_in, max_d_out = self.gauss_B.shape[0], self.gauss_B.shape[1]
107 |         d_out = num_channels // 2
108 |         assert d_out <= max_d_out
109 |         assert d_in == xyz.shape[-1]
110 | 
111 |         # clone coords so that shift/scale operations do not affect original tensor
112 |         orig_xyz = xyz
113 |         xyz = orig_xyz.clone()
114 | 
115 |         ncoords = xyz.shape[1]
116 |         if self.normalize:
117 |             xyz = shift_scale_points(xyz, src_range=input_range)
118 | 
119 |         xyz *= 2 * np.pi
120 |         xyz_proj = torch.mm(xyz.view(-1, d_in), self.gauss_B[:, :d_out]).view(
121 |             bsize, npoints, d_out
122 |         )
123 |         final_embeds = [xyz_proj.sin(), xyz_proj.cos()]
124 | 
125 |         # return batch x d_pos x npoints embedding
126 |         final_embeds = torch.cat(final_embeds, dim=2).permute(0, 2, 1)
127 |         return final_embeds
128 | 
129 |     def forward(self, xyz, num_channels=None, input_range=None):
130 |         assert isinstance(xyz, torch.Tensor)
131 |         assert xyz.ndim == 3
132 |         # xyz is batch x npoints x 3
133 |         if self.pos_type == "sine":
134 |             with torch.no_grad():
135 |                 return self.get_sine_embeddings(xyz, num_channels, input_range)
136 |         elif self.pos_type == "fourier":
137 |             with torch.no_grad():
138 |                 return self.get_fourier_embeddings(xyz, num_channels, input_range)
139 |         else:
140 |             raise ValueError(f"Unknown {self.pos_type}")
141 | 
142 |     def extra_repr(self):
143 |         st = f"type={self.pos_type}, scale={self.scale}, normalize={self.normalize}"
144 |         if hasattr(self, "gauss_B"):
145 |             st += (
146 |                 f", gaussB={self.gauss_B.shape}, gaussBsum={self.gauss_B.sum().item()}"
147 |             )
148 |         return st
149 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/interpolate_gpu.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | 
  4 | #include <math.h>
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | 
  8 | #include "cuda_utils.h"
  9 | 
 10 | // input: unknown(b, n, 3) known(b, m, 3)
 11 | // output: dist2(b, n, 3), idx(b, n, 3)
 12 | __global__ void three_nn_kernel(int b, int n, int m,
 13 |                                 const float *__restrict__ unknown,
 14 |                                 const float *__restrict__ known,
 15 |                                 float *__restrict__ dist2,
 16 |                                 int *__restrict__ idx) {
 17 |   int batch_index = blockIdx.x;
 18 |   unknown += batch_index * n * 3;
 19 |   known += batch_index * m * 3;
 20 |   dist2 += batch_index * n * 3;
 21 |   idx += batch_index * n * 3;
 22 | 
 23 |   int index = threadIdx.x;
 24 |   int stride = blockDim.x;
 25 |   for (int j = index; j < n; j += stride) {
 26 |     float ux = unknown[j * 3 + 0];
 27 |     float uy = unknown[j * 3 + 1];
 28 |     float uz = unknown[j * 3 + 2];
 29 | 
 30 |     double best1 = 1e40, best2 = 1e40, best3 = 1e40;
 31 |     int besti1 = 0, besti2 = 0, besti3 = 0;
 32 |     for (int k = 0; k < m; ++k) {
 33 |       float x = known[k * 3 + 0];
 34 |       float y = known[k * 3 + 1];
 35 |       float z = known[k * 3 + 2];
 36 |       float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
 37 |       if (d < best1) {
 38 |         best3 = best2;
 39 |         besti3 = besti2;
 40 |         best2 = best1;
 41 |         besti2 = besti1;
 42 |         best1 = d;
 43 |         besti1 = k;
 44 |       } else if (d < best2) {
 45 |         best3 = best2;
 46 |         besti3 = besti2;
 47 |         best2 = d;
 48 |         besti2 = k;
 49 |       } else if (d < best3) {
 50 |         best3 = d;
 51 |         besti3 = k;
 52 |       }
 53 |     }
 54 |     dist2[j * 3 + 0] = best1;
 55 |     dist2[j * 3 + 1] = best2;
 56 |     dist2[j * 3 + 2] = best3;
 57 | 
 58 |     idx[j * 3 + 0] = besti1;
 59 |     idx[j * 3 + 1] = besti2;
 60 |     idx[j * 3 + 2] = besti3;
 61 |   }
 62 | }
 63 | 
 64 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
 65 |                              const float *known, float *dist2, int *idx) {
 66 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 67 |   three_nn_kernel<<<b, opt_n_threads(n), 0, stream>>>(b, n, m, unknown, known,
 68 |                                                       dist2, idx);
 69 | 
 70 |   CUDA_CHECK_ERRORS();
 71 | }
 72 | 
 73 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3)
 74 | // output: out(b, c, n)
 75 | __global__ void three_interpolate_kernel(int b, int c, int m, int n,
 76 |                                          const float *__restrict__ points,
 77 |                                          const int *__restrict__ idx,
 78 |                                          const float *__restrict__ weight,
 79 |                                          float *__restrict__ out) {
 80 |   int batch_index = blockIdx.x;
 81 |   points += batch_index * m * c;
 82 | 
 83 |   idx += batch_index * n * 3;
 84 |   weight += batch_index * n * 3;
 85 | 
 86 |   out += batch_index * n * c;
 87 | 
 88 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
 89 |   const int stride = blockDim.y * blockDim.x;
 90 |   for (int i = index; i < c * n; i += stride) {
 91 |     const int l = i / n;
 92 |     const int j = i % n;
 93 |     float w1 = weight[j * 3 + 0];
 94 |     float w2 = weight[j * 3 + 1];
 95 |     float w3 = weight[j * 3 + 2];
 96 | 
 97 |     int i1 = idx[j * 3 + 0];
 98 |     int i2 = idx[j * 3 + 1];
 99 |     int i3 = idx[j * 3 + 2];
100 | 
101 |     out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 +
102 |              points[l * m + i3] * w3;
103 |   }
104 | }
105 | 
106 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
107 |                                       const float *points, const int *idx,
108 |                                       const float *weight, float *out) {
109 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
110 |   three_interpolate_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
111 |       b, c, m, n, points, idx, weight, out);
112 | 
113 |   CUDA_CHECK_ERRORS();
114 | }
115 | 
116 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3)
117 | // output: grad_points(b, c, m)
118 | 
119 | __global__ void three_interpolate_grad_kernel(
120 |     int b, int c, int n, int m, const float *__restrict__ grad_out,
121 |     const int *__restrict__ idx, const float *__restrict__ weight,
122 |     float *__restrict__ grad_points) {
123 |   int batch_index = blockIdx.x;
124 |   grad_out += batch_index * n * c;
125 |   idx += batch_index * n * 3;
126 |   weight += batch_index * n * 3;
127 |   grad_points += batch_index * m * c;
128 | 
129 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
130 |   const int stride = blockDim.y * blockDim.x;
131 |   for (int i = index; i < c * n; i += stride) {
132 |     const int l = i / n;
133 |     const int j = i % n;
134 |     float w1 = weight[j * 3 + 0];
135 |     float w2 = weight[j * 3 + 1];
136 |     float w3 = weight[j * 3 + 2];
137 | 
138 |     int i1 = idx[j * 3 + 0];
139 |     int i2 = idx[j * 3 + 1];
140 |     int i3 = idx[j * 3 + 2];
141 | 
142 |     atomicAdd(grad_points + l * m + i1, grad_out[i] * w1);
143 |     atomicAdd(grad_points + l * m + i2, grad_out[i] * w2);
144 |     atomicAdd(grad_points + l * m + i3, grad_out[i] * w3);
145 |   }
146 | }
147 | 
148 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
149 |                                            const float *grad_out,
150 |                                            const int *idx, const float *weight,
151 |                                            float *grad_points) {
152 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
153 |   three_interpolate_grad_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
154 |       b, c, n, m, grad_out, idx, weight, grad_points);
155 | 
156 |   CUDA_CHECK_ERRORS();
157 | }
158 | 


--------------------------------------------------------------------------------
/Uni3D/utils/tokenizer.py:
--------------------------------------------------------------------------------
  1 | # Modified from github.com/openai/CLIP
  2 | import gzip
  3 | import html
  4 | import os
  5 | from functools import lru_cache
  6 | 
  7 | import ftfy
  8 | import regex as re
  9 | import torch
 10 | 
 11 | 
 12 | @lru_cache()
 13 | def default_bpe():
 14 |     return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz")
 15 | 
 16 | 
 17 | @lru_cache()
 18 | def bytes_to_unicode():
 19 |     """
 20 |     Returns list of utf-8 byte and a corresponding list of unicode strings.
 21 |     The reversible bpe codes work on unicode strings.
 22 |     This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
 23 |     When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
 24 |     This is a signficant percentage of your normal, say, 32K bpe vocab.
 25 |     To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
 26 |     And avoids mapping to whitespace/control characters the bpe code barfs on.
 27 |     """
 28 |     bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
 29 |     cs = bs[:]
 30 |     n = 0
 31 |     for b in range(2**8):
 32 |         if b not in bs:
 33 |             bs.append(b)
 34 |             cs.append(2**8+n)
 35 |             n += 1
 36 |     cs = [chr(n) for n in cs]
 37 |     return dict(zip(bs, cs))
 38 | 
 39 | 
 40 | def get_pairs(word):
 41 |     """Return set of symbol pairs in a word.
 42 |     Word is represented as tuple of symbols (symbols being variable-length strings).
 43 |     """
 44 |     pairs = set()
 45 |     prev_char = word[0]
 46 |     for char in word[1:]:
 47 |         pairs.add((prev_char, char))
 48 |         prev_char = char
 49 |     return pairs
 50 | 
 51 | 
 52 | def basic_clean(text):
 53 |     text = ftfy.fix_text(text)
 54 |     text = html.unescape(html.unescape(text))
 55 |     return text.strip()
 56 | 
 57 | 
 58 | def whitespace_clean(text):
 59 |     text = re.sub(r'\s+', ' ', text)
 60 |     text = text.strip()
 61 |     return text
 62 | 
 63 | 
 64 | class SimpleTokenizer(object):
 65 |     def __init__(self, bpe_path: str = default_bpe()):
 66 |         self.byte_encoder = bytes_to_unicode()
 67 |         self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
 68 |         merges = gzip.open(bpe_path).read().decode("utf-8").split('\n')
 69 |         merges = merges[1:49152-256-2+1]
 70 |         merges = [tuple(merge.split()) for merge in merges]
 71 |         vocab = list(bytes_to_unicode().values())
 72 |         vocab = vocab + [v+'</w>' for v in vocab]
 73 |         for merge in merges:
 74 |             vocab.append(''.join(merge))
 75 |         vocab.extend(['<|startoftext|>', '<|endoftext|>'])
 76 |         self.encoder = dict(zip(vocab, range(len(vocab))))
 77 |         self.decoder = {v: k for k, v in self.encoder.items()}
 78 |         self.bpe_ranks = dict(zip(merges, range(len(merges))))
 79 |         self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'}
 80 |         self.pat = re.compile(r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", re.IGNORECASE)
 81 | 
 82 |     def bpe(self, token):
 83 |         if token in self.cache:
 84 |             return self.cache[token]
 85 |         word = tuple(token[:-1]) + ( token[-1] + '</w>',)
 86 |         pairs = get_pairs(word)
 87 | 
 88 |         if not pairs:
 89 |             return token+'</w>'
 90 | 
 91 |         while True:
 92 |             bigram = min(pairs, key = lambda pair: self.bpe_ranks.get(pair, float('inf')))
 93 |             if bigram not in self.bpe_ranks:
 94 |                 break
 95 |             first, second = bigram
 96 |             new_word = []
 97 |             i = 0
 98 |             while i < len(word):
 99 |                 try:
100 |                     j = word.index(first, i)
101 |                     new_word.extend(word[i:j])
102 |                     i = j
103 |                 except:
104 |                     new_word.extend(word[i:])
105 |                     break
106 | 
107 |                 if word[i] == first and i < len(word)-1 and word[i+1] == second:
108 |                     new_word.append(first+second)
109 |                     i += 2
110 |                 else:
111 |                     new_word.append(word[i])
112 |                     i += 1
113 |             new_word = tuple(new_word)
114 |             word = new_word
115 |             if len(word) == 1:
116 |                 break
117 |             else:
118 |                 pairs = get_pairs(word)
119 |         word = ' '.join(word)
120 |         self.cache[token] = word
121 |         return word
122 | 
123 |     def encode(self, text):
124 |         bpe_tokens = []
125 |         text = whitespace_clean(basic_clean(text)).lower()
126 |         for token in re.findall(self.pat, text):
127 |             token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8'))
128 |             bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' '))
129 |         return bpe_tokens
130 | 
131 |     def decode(self, tokens):
132 |         text = ''.join([self.decoder[token] for token in tokens])
133 |         text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('</w>', ' ')
134 |         return text
135 | 
136 |     def __call__(self, texts, context_length=77):
137 |         if isinstance(texts, str):
138 |             texts = [texts]
139 | 
140 |         sot_token = self.encoder["<|startoftext|>"]
141 |         eot_token = self.encoder["<|endoftext|>"]
142 |         all_tokens = [[sot_token] + self.encode(text) + [eot_token] for text in texts]
143 |         result = torch.zeros(len(all_tokens), context_length, dtype=torch.long)
144 | 
145 |         for i, tokens in enumerate(all_tokens):
146 |             tokens = tokens[:context_length]
147 |             result[i, :len(tokens)] = torch.tensor(tokens)
148 | 
149 |         if len(result) == 1:
150 |             return result[0]
151 |         return result


--------------------------------------------------------------------------------
/scannet/load_scannet_data.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """ Load Scannet scenes with vertices and ground truth labels
  7 | for semantic and instance segmentations
  8 | """
  9 | 
 10 | # python imports
 11 | import math
 12 | import os, sys, argparse
 13 | import inspect
 14 | import json
 15 | import pdb
 16 | 
 17 | try:
 18 |     import numpy as np
 19 | except:
 20 |     print("Failed to import numpy package.")
 21 |     sys.exit(-1)
 22 | 
 23 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 24 | import scannet_utils
 25 | 
 26 | def read_aggregation(filename):
 27 |     assert os.path.isfile(filename)
 28 |     object_id_to_segs = {}
 29 |     label_to_segs = {}
 30 |     with open(filename) as f:
 31 |         data = json.load(f)
 32 |         num_objects = len(data['segGroups'])
 33 |         for i in range(num_objects):
 34 |             object_id = data['segGroups'][i]['objectId'] + 1 # instance ids should be 1-indexed
 35 |             label = data['segGroups'][i]['label']
 36 |             segs = data['segGroups'][i]['segments']
 37 |             object_id_to_segs[object_id] = segs
 38 |             if label in label_to_segs:
 39 |                 label_to_segs[label].extend(segs)
 40 |             else:
 41 |                 label_to_segs[label] = segs
 42 |     return object_id_to_segs, label_to_segs
 43 | 
 44 | 
 45 | def read_segmentation(filename):
 46 |     assert os.path.isfile(filename)
 47 |     seg_to_verts = {}
 48 |     with open(filename) as f:
 49 |         data = json.load(f)
 50 |         num_verts = len(data['segIndices'])
 51 |         for i in range(num_verts):
 52 |             seg_id = data['segIndices'][i]
 53 |             if seg_id in seg_to_verts:
 54 |                 seg_to_verts[seg_id].append(i)
 55 |             else:
 56 |                 seg_to_verts[seg_id] = [i]
 57 |     return seg_to_verts, num_verts
 58 | 
 59 | 
 60 | def export(mesh_file, agg_file, seg_file, meta_file, label_map_file, output_file=None):
 61 |     """ points are XYZ RGB (RGB in 0-255),
 62 |     semantic label as nyu40 ids,
 63 |     instance label as 1-#instance,
 64 |     box as (cx,cy,cz,dx,dy,dz,semantic_label)
 65 |     """
 66 |     label_map = scannet_utils.read_label_mapping(label_map_file,
 67 |         label_from='raw_category', label_to='nyu40id')    
 68 |     mesh_vertices = scannet_utils.read_mesh_vertices_rgb(mesh_file)
 69 | 
 70 |     # Load scene axis alignment matrix
 71 |     lines = open(meta_file).readlines()
 72 |     for line in lines:
 73 |         if 'axisAlignment' in line:
 74 |             axis_align_matrix = [float(x) \
 75 |                 for x in line.rstrip().strip('axisAlignment = ').split(' ')]
 76 |             break
 77 |     axis_align_matrix = np.array(axis_align_matrix).reshape((4,4))
 78 |     pts = np.ones((mesh_vertices.shape[0], 4))
 79 |     pts[:,0:3] = mesh_vertices[:,0:3]
 80 |     pts = np.dot(pts, axis_align_matrix.transpose()) # Nx4
 81 |     mesh_vertices[:,0:3] = pts[:,0:3]
 82 | 
 83 |     # Load semantic and instance labels
 84 |     object_id_to_segs, label_to_segs = read_aggregation(agg_file)
 85 |     seg_to_verts, num_verts = read_segmentation(seg_file)
 86 |     label_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated
 87 |     object_id_to_label_id = {}
 88 |     for label, segs in label_to_segs.items():
 89 |         label_id = label_map[label]
 90 |         for seg in segs:
 91 |             verts = seg_to_verts[seg]
 92 |             label_ids[verts] = label_id
 93 |     instance_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated
 94 |     num_instances = len(np.unique(list(object_id_to_segs.keys())))
 95 |     for object_id, segs in object_id_to_segs.items():
 96 |         for seg in segs:
 97 |             verts = seg_to_verts[seg]
 98 |             instance_ids[verts] = object_id
 99 |             if object_id not in object_id_to_label_id:
100 |                 object_id_to_label_id[object_id] = label_ids[verts][0]
101 |     instance_bboxes = np.zeros((num_instances,7))
102 |     for obj_id in object_id_to_segs:
103 |         label_id = object_id_to_label_id[obj_id]
104 |         obj_pc = mesh_vertices[instance_ids==obj_id, 0:3]
105 |         if len(obj_pc) == 0: continue
106 |         # Compute axis aligned box
107 |         # An axis aligned bounding box is parameterized by
108 |         # (cx,cy,cz) and (dx,dy,dz) and label id
109 |         # where (cx,cy,cz) is the center point of the box,
110 |         # dx is the x-axis length of the box.
111 |         xmin = np.min(obj_pc[:,0])
112 |         ymin = np.min(obj_pc[:,1])
113 |         zmin = np.min(obj_pc[:,2])
114 |         xmax = np.max(obj_pc[:,0])
115 |         ymax = np.max(obj_pc[:,1])
116 |         zmax = np.max(obj_pc[:,2])
117 |         bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2,
118 |             xmax-xmin, ymax-ymin, zmax-zmin, label_id])
119 |         # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES
120 |         instance_bboxes[obj_id-1,:] = bbox 
121 | 
122 |     if output_file is not None:
123 |         np.save(output_file+'_vert.npy', mesh_vertices)
124 |         np.save(output_file+'_sem_label.npy', label_ids)
125 |         np.save(output_file+'_ins_label.npy', instance_ids)
126 |         np.save(output_file+'_bbox.npy', instance_bboxes)
127 | 
128 |     return mesh_vertices, label_ids, instance_ids,\
129 |         instance_bboxes, object_id_to_label_id
130 | 
131 | def main():
132 |     parser = argparse.ArgumentParser()
133 |     parser.add_argument('--scan_path', required=True, help='path to scannet scene (e.g., data/ScanNet/v2/scene0000_00')
134 |     parser.add_argument('--output_file', required=True, help='output file')
135 |     parser.add_argument('--label_map_file', required=True, help='path to scannetv2-labels.combined.tsv')
136 |     opt = parser.parse_args()
137 | 
138 |     scan_name = os.path.split(opt.scan_path)[-1]
139 |     mesh_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.ply')
140 |     agg_file = os.path.join(opt.scan_path, scan_name + '.aggregation.json')
141 |     seg_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.0.010000.segs.json')
142 |     meta_file = os.path.join(opt.scan_path, scan_name + '.txt') # includes axisAlignment info for the train set scans.
143 |     export(mesh_file, agg_file, seg_file, meta_file, opt.label_map_file, opt.output_file)
144 | 
145 | if __name__ == '__main__':
146 |     main()
147 | 


--------------------------------------------------------------------------------
/Uni3D/utils/dist.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import pickle
  3 | 
  4 | import torch
  5 | import torch.distributed as dist
  6 | 
  7 | 
  8 | def is_distributed():
  9 |     if not dist.is_available() or not dist.is_initialized():
 10 |         return False
 11 |     return True
 12 | 
 13 | 
 14 | def get_rank():
 15 |     if not is_distributed():
 16 |         return 0
 17 |     return dist.get_rank()
 18 | 
 19 | 
 20 | def is_primary():
 21 |     return get_rank() == 0
 22 | 
 23 | 
 24 | def get_world_size():
 25 |     if not is_distributed():
 26 |         return 1
 27 |     return dist.get_world_size()
 28 | 
 29 | 
 30 | def barrier():
 31 |     if not is_distributed():
 32 |         return
 33 |     torch.distributed.barrier()
 34 | 
 35 | 
 36 | def setup_print_for_distributed(is_primary):
 37 |     """
 38 |     This function disables printing when not in primary process
 39 |     """
 40 |     import builtins as __builtin__
 41 |     builtin_print = __builtin__.print
 42 | 
 43 |     def print(*args, **kwargs):
 44 |         force = kwargs.pop('force', False)
 45 |         if is_primary or force:
 46 |             builtin_print(*args, **kwargs)
 47 | 
 48 |     __builtin__.print = print
 49 | 
 50 | 
 51 | def init_distributed(gpu_id, global_rank, world_size, dist_url, dist_backend):
 52 |     torch.cuda.set_device(gpu_id)
 53 |     print(
 54 |         f"| distributed init (rank {global_rank}) (world {world_size}): {dist_url}",
 55 |         flush=True,
 56 |     )
 57 |     torch.distributed.init_process_group(
 58 |         backend=dist_backend,
 59 |         init_method=dist_url,
 60 |         world_size=world_size,
 61 |         rank=global_rank,
 62 |     )
 63 |     torch.distributed.barrier()
 64 |     setup_print_for_distributed(is_primary())
 65 | 
 66 | 
 67 | def all_reduce_sum(tensor):
 68 |     if not is_distributed():
 69 |         return tensor
 70 |     dim_squeeze = False
 71 |     if tensor.ndim == 0:
 72 |         tensor = tensor[None, ...]
 73 |         dim_squeeze = True
 74 |     torch.distributed.all_reduce(tensor)
 75 |     if dim_squeeze:
 76 |         tensor = tensor.squeeze(0)
 77 |     return tensor
 78 | 
 79 | 
 80 | def all_reduce_average(tensor):
 81 |     val = all_reduce_sum(tensor)
 82 |     return val / get_world_size()
 83 | 
 84 | 
 85 | # Function from DETR - https://github.com/facebookresearch/detr/blob/master/util/misc.py
 86 | def reduce_dict(input_dict, average=True):
 87 |     """
 88 |     Args:
 89 |         input_dict (dict): all the values will be reduced
 90 |         average (bool): whether to do average or sum
 91 |     Reduce the values in the dictionary from all processes so that all processes
 92 |     have the averaged results. Returns a dict with the same fields as
 93 |     input_dict, after reduction.
 94 |     """
 95 |     world_size = get_world_size()
 96 |     if world_size < 2:
 97 |         return input_dict
 98 |     with torch.no_grad():
 99 |         names = []
100 |         values = []
101 |         # sort the keys so that they are consistent across processes
102 |         for k in sorted(input_dict.keys()):
103 |             names.append(k)
104 |             values.append(input_dict[k])
105 |         values = torch.stack(values, dim=0)
106 |         torch.distributed.all_reduce(values)
107 |         if average:
108 |             values /= world_size
109 |         reduced_dict = {k: v for k, v in zip(names, values)}
110 |     return reduced_dict
111 | 
112 | 
113 | # Function from https://github.com/facebookresearch/detr/blob/master/util/misc.py
114 | def all_gather_pickle(data, device):
115 |     """
116 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
117 |     Args:
118 |         data: any picklable object
119 |     Returns:
120 |         list[data]: list of data gathered from each rank
121 |     """
122 |     world_size = get_world_size()
123 |     if world_size == 1:
124 |         return [data]
125 | 
126 |     # serialized to a Tensor
127 |     buffer = pickle.dumps(data)
128 |     storage = torch.ByteStorage.from_buffer(buffer)
129 |     tensor = torch.ByteTensor(storage).to(device)
130 | 
131 |     # obtain Tensor size of each rank
132 |     local_size = torch.tensor([tensor.numel()], device=device)
133 |     size_list = [torch.tensor([0], device=device) for _ in range(world_size)]
134 |     dist.all_gather(size_list, local_size)
135 |     size_list = [int(size.item()) for size in size_list]
136 |     max_size = max(size_list)
137 | 
138 |     # receiving Tensor from all ranks
139 |     # we pad the tensor because torch all_gather does not support
140 |     # gathering tensors of different shapes
141 |     tensor_list = []
142 |     for _ in size_list:
143 |         tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device=device))
144 |     if local_size != max_size:
145 |         padding = torch.empty(
146 |             size=(max_size - local_size,), dtype=torch.uint8, device=device
147 |         )
148 |         tensor = torch.cat((tensor, padding), dim=0)
149 |     dist.all_gather(tensor_list, tensor)
150 | 
151 |     data_list = []
152 |     for size, tensor in zip(size_list, tensor_list):
153 |         buffer = tensor.cpu().numpy().tobytes()[:size]
154 |         data_list.append(pickle.loads(buffer))
155 | 
156 |     return data_list
157 | 
158 | 
159 | def all_gather_dict(data):
160 |     """
161 |     Run all_gather on data which is a dictionary of Tensors
162 |     """
163 |     assert isinstance(data, dict)
164 |     
165 |     gathered_dict = {}
166 |     for item_key in data:
167 |         if isinstance(data[item_key], torch.Tensor):
168 |             if is_distributed():
169 |                 data[item_key] = data[item_key].contiguous()
170 |                 tensor_list = [torch.empty_like(data[item_key]) for _ in range(get_world_size())]
171 |                 dist.all_gather(tensor_list, data[item_key])
172 |                 gathered_tensor = torch.cat(tensor_list, dim=0)
173 |             else:
174 |                 gathered_tensor = data[item_key]
175 |         gathered_dict[item_key] = gathered_tensor
176 |     return gathered_dict
177 | 
178 | def batch_dict_to_cuda(batch_dict,local_rank="cuda:0"):
179 |     for key in batch_dict:
180 |         if isinstance(batch_dict[key], torch.Tensor):
181 |             batch_dict[key] = batch_dict[key].to(local_rank)
182 |         elif isinstance(batch_dict[key], list):
183 |             if len(batch_dict[key])>0 and isinstance(batch_dict[key][0],torch.Tensor):
184 |                 batch_dict[key] = [item.to(local_rank) for item in batch_dict[key]]
185 |     return batch_dict
186 | 
187 |         


--------------------------------------------------------------------------------
/util/dist.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | import pickle
  3 | 
  4 | import torch
  5 | import torch.distributed as dist
  6 | 
  7 | 
  8 | def is_distributed():
  9 |     if not dist.is_available() or not dist.is_initialized():
 10 |         return False
 11 |     return True
 12 | 
 13 | 
 14 | def get_rank():
 15 |     if not is_distributed():
 16 |         return 0
 17 |     return dist.get_rank()
 18 | 
 19 | 
 20 | def is_primary():
 21 |     return get_rank() == 0
 22 | 
 23 | 
 24 | def get_world_size():
 25 |     if not is_distributed():
 26 |         return 1
 27 |     return dist.get_world_size()
 28 | 
 29 | 
 30 | def barrier():
 31 |     if not is_distributed():
 32 |         return
 33 |     torch.distributed.barrier()
 34 | 
 35 | 
 36 | def setup_print_for_distributed(is_primary):
 37 |     """
 38 |     This function disables printing when not in primary process
 39 |     """
 40 |     import builtins as __builtin__
 41 |     builtin_print = __builtin__.print
 42 | 
 43 |     def print(*args, **kwargs):
 44 |         force = kwargs.pop('force', False)
 45 |         if is_primary or force:
 46 |             builtin_print(*args, **kwargs)
 47 | 
 48 |     __builtin__.print = print
 49 | 
 50 | 
 51 | def init_distributed(gpu_id, global_rank, world_size, dist_url, dist_backend):
 52 |     torch.cuda.set_device(gpu_id)
 53 |     print(
 54 |         f"| distributed init (rank {global_rank}) (world {world_size}): {dist_url}",
 55 |         flush=True,
 56 |     )
 57 |     torch.distributed.init_process_group(
 58 |         backend=dist_backend,
 59 |         init_method=dist_url,
 60 |         world_size=world_size,
 61 |         rank=global_rank,
 62 |     )
 63 |     torch.distributed.barrier()
 64 |     setup_print_for_distributed(is_primary())
 65 | 
 66 | 
 67 | def all_reduce_sum(tensor):
 68 |     if not is_distributed():
 69 |         return tensor
 70 |     dim_squeeze = False
 71 |     if tensor.ndim == 0:
 72 |         tensor = tensor[None, ...]
 73 |         dim_squeeze = True
 74 |     torch.distributed.all_reduce(tensor)
 75 |     if dim_squeeze:
 76 |         tensor = tensor.squeeze(0)
 77 |     return tensor
 78 | 
 79 | 
 80 | def all_reduce_average(tensor):
 81 |     val = all_reduce_sum(tensor)
 82 |     return val / get_world_size()
 83 | 
 84 | 
 85 | # Function from DETR - https://github.com/facebookresearch/detr/blob/master/util/misc.py
 86 | def reduce_dict(input_dict, average=True):
 87 |     """
 88 |     Args:
 89 |         input_dict (dict): all the values will be reduced
 90 |         average (bool): whether to do average or sum
 91 |     Reduce the values in the dictionary from all processes so that all processes
 92 |     have the averaged results. Returns a dict with the same fields as
 93 |     input_dict, after reduction.
 94 |     """
 95 |     world_size = get_world_size()
 96 |     if world_size < 2:
 97 |         return input_dict
 98 |     with torch.no_grad():
 99 |         names = []
100 |         values = []
101 |         # sort the keys so that they are consistent across processes
102 |         for k in sorted(input_dict.keys()):
103 |             names.append(k)
104 |             values.append(input_dict[k])
105 |         values = torch.stack(values, dim=0)
106 |         torch.distributed.all_reduce(values)
107 |         if average:
108 |             values /= world_size
109 |         reduced_dict = {k: v for k, v in zip(names, values)}
110 |     return reduced_dict
111 | 
112 | 
113 | # Function from https://github.com/facebookresearch/detr/blob/master/util/misc.py
114 | def all_gather_pickle(data, device):
115 |     """
116 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
117 |     Args:
118 |         data: any picklable object
119 |     Returns:
120 |         list[data]: list of data gathered from each rank
121 |     """
122 |     world_size = get_world_size()
123 |     if world_size == 1:
124 |         return [data]
125 | 
126 |     # serialized to a Tensor
127 |     buffer = pickle.dumps(data)
128 |     storage = torch.ByteStorage.from_buffer(buffer)
129 |     tensor = torch.ByteTensor(storage).to(device)
130 | 
131 |     # obtain Tensor size of each rank
132 |     local_size = torch.tensor([tensor.numel()], device=device)
133 |     size_list = [torch.tensor([0], device=device) for _ in range(world_size)]
134 |     dist.all_gather(size_list, local_size)
135 |     size_list = [int(size.item()) for size in size_list]
136 |     max_size = max(size_list)
137 | 
138 |     # receiving Tensor from all ranks
139 |     # we pad the tensor because torch all_gather does not support
140 |     # gathering tensors of different shapes
141 |     tensor_list = []
142 |     for _ in size_list:
143 |         tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device=device))
144 |     if local_size != max_size:
145 |         padding = torch.empty(
146 |             size=(max_size - local_size,), dtype=torch.uint8, device=device
147 |         )
148 |         tensor = torch.cat((tensor, padding), dim=0)
149 |     dist.all_gather(tensor_list, tensor)
150 | 
151 |     data_list = []
152 |     for size, tensor in zip(size_list, tensor_list):
153 |         buffer = tensor.cpu().numpy().tobytes()[:size]
154 |         data_list.append(pickle.loads(buffer))
155 | 
156 |     return data_list
157 | 
158 | 
159 | def all_gather_dict(data):
160 |     """
161 |     Run all_gather on data which is a dictionary of Tensors
162 |     """
163 |     assert isinstance(data, dict)
164 |     
165 |     gathered_dict = {}
166 |     for item_key in data:
167 |         if isinstance(data[item_key], torch.Tensor):
168 |             if is_distributed():
169 |                 data[item_key] = data[item_key].contiguous()
170 |                 tensor_list = [torch.empty_like(data[item_key]) for _ in range(get_world_size())]
171 |                 dist.all_gather(tensor_list, data[item_key])
172 |                 gathered_tensor = torch.cat(tensor_list, dim=0)
173 |             else:
174 |                 gathered_tensor = data[item_key]
175 |         gathered_dict[item_key] = gathered_tensor
176 |     return gathered_dict
177 | 
178 | def batch_dict_to_cuda(batch_dict,local_rank="cuda:0"):
179 |     # print("batch_dict: ", type(batch_dict))
180 |     if isinstance(batch_dict, dict):
181 |         for key in batch_dict:
182 |             if isinstance(batch_dict[key], torch.Tensor):
183 |                 batch_dict[key] = batch_dict[key].to(local_rank)
184 |             elif isinstance(batch_dict[key], list):
185 |                 if len(batch_dict[key])>0 and isinstance(batch_dict[key][0],torch.Tensor):
186 |                     batch_dict[key] = [item.to(local_rank) for item in batch_dict[key]]
187 |     # else:
188 |     #     for key in batch_dict:
189 |     #         print("key: ", key)
190 | 
191 |     return batch_dict
192 | 
193 |         


--------------------------------------------------------------------------------
/point2graph.yaml:
--------------------------------------------------------------------------------
  1 | name: vdetr
  2 | channels:
  3 |   - anaconda
  4 |   - pytorch
  5 |   - nvidia
  6 |   - defaults
  7 | dependencies:
  8 |   - _libgcc_mutex=0.1=main
  9 |   - _openmp_mutex=5.1=1_gnu
 10 |   - blas=1.0=mkl
 11 |   - brotli-python=1.0.9=py38h6a678d5_8
 12 |   - bzip2=1.0.8=h5eee18b_6
 13 |   - ca-certificates=2024.3.11=h06a4308_0
 14 |   - certifi=2024.6.2=py38h06a4308_0
 15 |   - charset-normalizer=2.0.4=pyhd3eb1b0_0
 16 |   - cuda-cudart=11.8.89=0
 17 |   - cuda-cupti=11.8.87=0
 18 |   - cuda-libraries=11.8.0=0
 19 |   - cuda-nvrtc=11.8.89=0
 20 |   - cuda-nvtx=11.8.86=0
 21 |   - cuda-runtime=11.8.0=0
 22 |   - cudatoolkit=11.3.1=h2bc3f7f_2
 23 |   - ffmpeg=4.3=hf484d3e_0
 24 |   - freetype=2.12.1=h4a9f257_0
 25 |   - gmp=6.2.1=h295c915_3
 26 |   - gmpy2=2.1.2=py38heeb90bb_0
 27 |   - gnutls=3.6.15=he1e5248_0
 28 |   - idna=3.7=py38h06a4308_0
 29 |   - intel-openmp=2023.1.0=hdb19cb5_46306
 30 |   - jinja2=3.1.4=py38h06a4308_0
 31 |   - jpeg=9e=h5eee18b_1
 32 |   - lame=3.100=h7b6447c_0
 33 |   - lcms2=2.12=h3be6417_0
 34 |   - ld_impl_linux-64=2.38=h1181459_1
 35 |   - lerc=3.0=h295c915_0
 36 |   - libcublas=11.11.3.6=0
 37 |   - libcufft=10.9.0.58=0
 38 |   - libcufile=1.9.1.3=0
 39 |   - libcurand=10.3.5.147=0
 40 |   - libcusolver=11.4.1.48=0
 41 |   - libcusparse=11.7.5.86=0
 42 |   - libdeflate=1.17=h5eee18b_1
 43 |   - libffi=3.4.4=h6a678d5_1
 44 |   - libgcc-ng=11.2.0=h1234567_1
 45 |   - libgfortran-ng=8.4.0=he6ba991_17
 46 |   - libgfortran5=8.4.0=he6ba991_17
 47 |   - libgomp=11.2.0=h1234567_1
 48 |   - libiconv=1.16=h5eee18b_3
 49 |   - libidn2=2.3.4=h5eee18b_0
 50 |   - libjpeg-turbo=2.0.0=h9bf148f_0
 51 |   - libnpp=11.8.0.86=0
 52 |   - libnvjpeg=11.9.0.86=0
 53 |   - libopenblas=0.3.2=h9ac9557_1
 54 |   - libpng=1.6.39=h5eee18b_0
 55 |   - libstdcxx-ng=11.2.0=h1234567_1
 56 |   - libtasn1=4.19.0=h5eee18b_0
 57 |   - libtiff=4.5.1=h6a678d5_0
 58 |   - libunistring=0.9.10=h27cfd23_0
 59 |   - libwebp-base=1.3.2=h5eee18b_0
 60 |   - llvm-openmp=14.0.6=h9e868ea_0
 61 |   - lz4-c=1.9.4=h6a678d5_1
 62 |   - markupsafe=2.1.3=py38h5eee18b_0
 63 |   - mkl=2023.1.0=h213fc3f_46344
 64 |   - mkl-service=2.4.0=py38h5eee18b_1
 65 |   - mkl_fft=1.3.8=py38h5eee18b_0
 66 |   - mkl_random=1.2.4=py38hdb19cb5_0
 67 |   - mpc=1.1.0=h10f8cd9_1
 68 |   - mpfr=4.0.2=hb69a4c5_1
 69 |   - mpmath=1.3.0=py38h06a4308_0
 70 |   - ncurses=6.4=h6a678d5_0
 71 |   - nettle=3.7.3=hbbd107a_1
 72 |   - networkx=3.1=py38h06a4308_0
 73 |   - nomkl=2.0=0
 74 |   - numpy-base=1.24.3=py38h060ed82_1
 75 |   - openblas-devel=0.3.2=0
 76 |   - openh264=2.1.1=h4ff587b_0
 77 |   - openjpeg=2.4.0=h3ad879b_0
 78 |   - openssl=3.0.13=h7f8727e_2
 79 |   - pillow=10.3.0=py38h5eee18b_0
 80 |   - pip=24.0=py38h06a4308_0
 81 |   - pysocks=1.7.1=py38h06a4308_0
 82 |   - python=3.8.19=h955ad1f_0
 83 |   - pytorch=1.12.1=py3.8_cuda11.3_cudnn8.3.2_0
 84 |   - pytorch-cuda=11.8=h7e8668a_5
 85 |   - pytorch-mutex=1.0=cuda
 86 |   - pyyaml=6.0.1=py38h5eee18b_0
 87 |   - readline=8.2=h5eee18b_0
 88 |   - sqlite=3.45.3=h5eee18b_0
 89 |   - sympy=1.12=py38h06a4308_0
 90 |   - tbb=2021.8.0=hdb19cb5_0
 91 |   - tk=8.6.14=h39e8969_0
 92 |   - torchaudio=0.12.1=py38_cu113
 93 |   - torchtriton=2.3.1=py38
 94 |   - torchvision=0.13.1=py38_cu113
 95 |   - typing_extensions=4.11.0=py38h06a4308_0
 96 |   - wheel=0.43.0=py38h06a4308_0
 97 |   - xz=5.4.6=h5eee18b_1
 98 |   - yaml=0.2.5=h7b6447c_0
 99 |   - zlib=1.2.13=h5eee18b_1
100 |   - zstd=1.5.5=hc292b87_2
101 |   - pip:
102 |       - addict==2.4.0
103 |       - aliyun-python-sdk-core==2.15.1
104 |       - aliyun-python-sdk-kms==2.16.3
105 |       - annotated-types==0.7.0
106 |       - asttokens==2.4.1
107 |       - attrs==23.2.0
108 |       - backcall==0.2.0
109 |       - blinker==1.8.2
110 |       - cffi==1.16.0
111 |       - click==8.1.7
112 |       - colorama==0.4.6
113 |       - comm==0.2.2
114 |       - configargparse==1.7
115 |       - contourpy==1.1.1
116 |       - crcmod==1.7
117 |       - cryptography==42.0.8
118 |       - cycler==0.12.1
119 |       - cython==3.0.10
120 |       - dash==2.17.1
121 |       - dash-core-components==2.0.0
122 |       - dash-html-components==2.0.0
123 |       - dash-table==5.0.0
124 |       - decorator==5.1.1
125 |       - deepspeed==0.14.3
126 |       - docker-pycreds==0.4.0
127 |       - easydict==1.13
128 |       - executing==2.0.1
129 |       - fastjsonschema==2.19.1
130 |       - filelock==3.14.0
131 |       - flask==3.0.3
132 |       - fonttools==4.53.0
133 |       - fsspec==2024.6.0
134 |       - ftfy==6.2.0
135 |       - future==1.0.0
136 |       - gitdb==4.0.11
137 |       - gitpython==3.1.43
138 |       - h5py==3.11.0
139 |       - hjson==3.1.0
140 |       - huggingface-hub==0.23.3
141 |       - importlib-metadata==7.1.0
142 |       - importlib-resources==6.4.0
143 |       - ipython==8.12.3
144 |       - ipywidgets==8.1.3
145 |       - itsdangerous==2.2.0
146 |       - jedi==0.19.1
147 |       - jmespath==0.10.0
148 |       - joblib==1.4.2
149 |       - jsonschema==4.22.0
150 |       - jsonschema-specifications==2023.12.1
151 |       - jupyter-core==5.7.2
152 |       - jupyterlab-widgets==3.0.11
153 |       - kiwisolver==1.4.5
154 |       - markdown==3.6
155 |       - markdown-it-py==3.0.0
156 |       - matplotlib==3.7.5
157 |       - matplotlib-inline==0.1.7
158 |       - mdurl==0.1.2
159 |       - minkowskiengine==0.5.4
160 |       - mmcv-full==1.6.1
161 |       - model-index==0.1.11
162 |       - nbformat==5.10.4
163 |       - nest-asyncio==1.6.0
164 |       - ninja==1.11.1.1
165 |       - numpy==1.24.4
166 |       - nvidia-ml-py==12.555.43
167 |       - open-clip-torch==2.24.0
168 |       - open3d==0.18.0
169 |       - opencv-python==4.10.0.82
170 |       - opendatalab==0.0.10
171 |       - openmim==0.3.9
172 |       - openxlab==0.1.0
173 |       - ordered-set==4.1.0
174 |       - oss2==2.17.0
175 |       - packaging==24.1
176 |       - pandas==2.0.3
177 |       - parso==0.8.4
178 |       - pexpect==4.9.0
179 |       - pickleshare==0.7.5
180 |       - pkgutil-resolve-name==1.3.10
181 |       - platformdirs==4.2.2
182 |       - plotly==5.22.0
183 |       - plyfile==1.0.3
184 |       - pointnet2-ops==3.0.0
185 |       - prompt-toolkit==3.0.47
186 |       - protobuf==5.27.1
187 |       - psutil==5.9.8
188 |       - ptyprocess==0.7.0
189 |       - pure-eval==0.2.2
190 |       - py-cpuinfo==9.0.0
191 |       - pycparser==2.22
192 |       - pycryptodome==3.20.0
193 |       - pydantic==2.7.4
194 |       - pydantic-core==2.18.4
195 |       - pygments==2.18.0
196 |       - pyparsing==3.1.2
197 |       - pyquaternion==0.9.9
198 |       - python-dateutil==2.9.0.post0
199 |       - pytz==2023.4
200 |       - pyviz3d==0.3.5
201 |       - referencing==0.35.1
202 |       - regex==2024.5.15
203 |       - requests==2.28.2
204 |       - retrying==1.3.4
205 |       - rich==13.4.2
206 |       - rpds-py==0.18.1
207 |       - safetensors==0.4.3
208 |       - scikit-learn==1.3.2
209 |       - scipy==1.10.1
210 |       - sentencepiece==0.2.0
211 |       - sentry-sdk==2.5.1
212 |       - setproctitle==1.3.3
213 |       - setuptools==60.2.0
214 |       - six==1.16.0
215 |       - smmap==5.0.1
216 |       - stack-data==0.6.3
217 |       - tabulate==0.9.0
218 |       - tenacity==8.3.0
219 |       - threadpoolctl==3.5.0
220 |       - timm==1.0.3
221 |       - tomli==2.0.1
222 |       - torch==1.6.0
223 |       - tqdm==4.65.2
224 |       - traitlets==5.14.3
225 |       - trimesh==4.4.1
226 |       - tzdata==2024.1
227 |       - urllib3==1.26.18
228 |       - wandb==0.17.1
229 |       - wcwidth==0.2.13
230 |       - werkzeug==3.0.3
231 |       - widgetsnbextension==4.0.11
232 |       - yapf==0.40.2
233 |       - zipp==3.19.2
234 | prefix: /root/anaconda3/envs/vdetr
235 | 


--------------------------------------------------------------------------------
/util/box_intersection.pyx:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import numpy as np
  3 | cimport numpy as np
  4 | cimport cython
  5 | cdef bint boolean_variable = True
  6 | np.import_array()
  7 | 
  8 | 
  9 | FLOAT = np.float32
 10 | 
 11 | @cython.boundscheck(False)
 12 | @cython.wraparound(False)
 13 | def computeIntersection(cp1, cp2, s, e):
 14 |       dc = [ cp1[0] - cp2[0], cp1[1] - cp2[1] ]
 15 |       dp = [ s[0] - e[0], s[1] - e[1] ]
 16 |       n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
 17 |       n2 = s[0] * e[1] - s[1] * e[0] 
 18 |       n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
 19 |       return [(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3]
 20 | 
 21 | @cython.boundscheck(False)
 22 | @cython.wraparound(False)
 23 | cdef inline bint inside(cp1, cp2, p):
 24 |       return(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
 25 | 
 26 | @cython.boundscheck(False)
 27 | def polygon_clip_unnest(float [:, :] subjectPolygon, float [:, :] clipPolygon):
 28 |     """ Clip a polygon with another polygon.
 29 | 
 30 |    Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python
 31 | 
 32 |    Args:
 33 |      subjectPolygon: a list of (x,y) 2d points, any polygon.
 34 |      clipPolygon: a list of (x,y) 2d points, has to be *convex*
 35 |    Note:
 36 |      **points have to be counter-clockwise ordered**
 37 | 
 38 |    Return:
 39 |      a list of (x,y) vertex point for the intersection polygon.
 40 |    """
 41 |     outputList = [subjectPolygon[x] for x in range(subjectPolygon.shape[0])]
 42 |     cp1 = clipPolygon[-1]
 43 |     cdef int lenc = len(clipPolygon)
 44 |     cdef int iidx = 0
 45 | 
 46 |     # for clipVertex in clipPolygon:
 47 |     for cidx in range(lenc):
 48 |         clipVertex = clipPolygon[cidx]
 49 |         cp2 = clipVertex
 50 |         inputList = outputList.copy()
 51 |         outputList.clear()
 52 |         s = inputList[-1]
 53 | 
 54 |         inc = len(inputList)
 55 |  
 56 |         # for subjectVertex in inputList:
 57 |         for iidx in range(inc):
 58 |             subjectVertex = inputList[iidx]
 59 |             e = subjectVertex
 60 |             if inside(cp1, cp2, e):
 61 |                 if not inside(cp1, cp2, s):
 62 |                     outputList.append(computeIntersection(cp1, cp2, s, e))
 63 |                 outputList.append(e)
 64 |             elif inside(cp1, cp2, s):
 65 |                 outputList.append(computeIntersection(cp1, cp2, s, e))
 66 |             s = e
 67 |         cp1 = cp2
 68 |         if len(outputList) == 0:
 69 |             break
 70 |     return outputList
 71 | 
 72 | 
 73 | @cython.boundscheck(False)
 74 | @cython.wraparound(False)
 75 | cdef void copy_points(float[:, :] src, float[:, :] dst, Py_ssize_t num_points):
 76 |     cdef Py_ssize_t i
 77 |     for i in range(num_points):
 78 |         dst[i][0] = src[i][0]
 79 |         dst[i][1] = src[i][1]
 80 | 
 81 | 
 82 | @cython.boundscheck(False)
 83 | @cython.wraparound(False)
 84 | cdef inline Py_ssize_t add_point(float[:, :] arr, float[:] point, Py_ssize_t num_points):
 85 |     # assert num_points < arr.shape[0] - 1
 86 |     # for j in range(dim):
 87 |     arr[num_points][0] = point[0]
 88 |     arr[num_points][1] = point[1]
 89 |     num_points = num_points + 1
 90 |     return num_points
 91 | 
 92 | @cython.boundscheck(False)
 93 | @cython.wraparound(False)
 94 | cdef Py_ssize_t computeIntersection_and_add(float[:] cp1, float[:] cp2, float[:] s, float[:] e, float[:, :] arr, Py_ssize_t num_points):
 95 |     #   dc_np = np.zeros(2, dtype=np.float32)
 96 |       cdef float[2] dc
 97 |       dc[0] = cp1[0] - cp2[0]
 98 |       dc[1] = cp1[1] - cp2[1]
 99 |       
100 |     #   dp_np = np.zeros(2, dtype=np.float32)
101 |       cdef float[2] dp
102 |       dp[0] = s[0] - e[0]
103 |       dp[1] = s[1] - e[1]
104 | 
105 |       cdef float n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
106 |       cdef float n2 = s[0] * e[1] - s[1] * e[0]
107 |       cdef float n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
108 |     
109 |       arr[num_points][0] = (n1*dp[0] - n2*dc[0]) * n3
110 |       arr[num_points][1] = (n1*dp[1] - n2*dc[1]) * n3
111 |       num_points = num_points + 1
112 | 
113 |       return num_points
114 | 
115 | @cython.boundscheck(False)
116 | @cython.wraparound(False)
117 | def polygon_clip_float(float [:, :] subjectPolygon, float [:, :] clipPolygon):
118 |     """
119 |     Assumes subjectPolygon and clipPolygon have 4 vertices
120 |     """
121 |     cdef Py_ssize_t num_clip_points = clipPolygon.shape[0]
122 |     cp1 = clipPolygon[num_clip_points - 1]
123 | 
124 |     MAX_INTERSECT_POINTS = 10
125 |     num_intersect_points = 0
126 |     outputList_np = np.zeros((MAX_INTERSECT_POINTS, 2), dtype=np.float32)
127 |     cdef float[:, :] outputList = outputList_np
128 | 
129 |     inputList_np = np.zeros((MAX_INTERSECT_POINTS, 2), dtype=np.float32)
130 |     cdef float[:, :] inputList = inputList_np
131 | 
132 |     copy_points(subjectPolygon, outputList, subjectPolygon.shape[0])
133 |     cdef Py_ssize_t noutput_list = subjectPolygon.shape[0]
134 |     cdef Py_ssize_t ninput_list = 0
135 |     cdef Py_ssize_t iidx = 0
136 |     
137 |     for cidx in range(num_clip_points):
138 |         clipVertex = clipPolygon[cidx]
139 |         cp2 = clipVertex
140 |         
141 |         copy_points(outputList, inputList, noutput_list)
142 |         ninput_list = noutput_list
143 |         noutput_list = 0
144 | 
145 |         s = inputList[ninput_list - 1]
146 |         
147 |         for iidx in range(ninput_list):
148 |             e = inputList[iidx]
149 |             if inside(cp1, cp2, e):
150 |                 if not inside(cp1, cp2, s):
151 |                     noutput_list = computeIntersection_and_add(cp1, cp2, s, e, outputList, noutput_list)
152 |                     
153 |                 noutput_list = add_point(outputList, e, noutput_list)
154 |             elif inside(cp1, cp2, s):
155 |                 noutput_list = computeIntersection_and_add(cp1, cp2, s, e, outputList, noutput_list)
156 |             s = e
157 |         cp1 = cp2
158 |         if noutput_list == 0:
159 |             break
160 |     return outputList_np, noutput_list
161 | 
162 | 
163 | 
164 | @cython.boundscheck(False)
165 | @cython.wraparound(False)
166 | def box_intersection(float [:, :, :, :] rect1, 
167 |                     float [:, :, :, :] rect2, 
168 |                     float [:, :, :] non_rot_inter_areas, 
169 |                     int[:] nums_k2, 
170 |                     float [:, :, :] inter_areas,
171 |                     bint approximate):
172 |     """
173 |     rect1 - B x K1 x 8 x 3 matrix of box corners
174 |     rect2 - B x K2 x 8 x 3 matrix of box corners
175 |     non_rot_inter_areas - intersection areas of boxes 
176 |     """
177 |     
178 |     cdef Py_ssize_t B = rect1.shape[0]
179 |     cdef Py_ssize_t K1 = rect1.shape[1]
180 |     cdef Py_ssize_t K2 = rect2.shape[2]
181 | 
182 | 
183 |     for b in range(B):
184 |       for k1 in range(K1):
185 |           for k2 in range(K2):
186 |               if k2 >= nums_k2[b]:
187 |                   break
188 |               
189 |               if approximate and non_rot_inter_areas[b][k1][k2] == 0:
190 |                   continue
191 |               
192 |               ##### compute volume of intersection
193 |               inter = polygon_clip_unnest(rect1[b, k1], rect2[b, k2])
194 |               ninter = len(inter)
195 |               if ninter > 0: # there is some intersection between the boxes
196 |                   xs = np.array([x[0] for x in inter]).astype(dtype=FLOAT)
197 |                   ys = np.array([x[1] for x in inter]).astype(dtype=FLOAT)
198 |                   inter_areas[b,k1,k2] = 0.5 * np.abs(np.dot(xs,np.roll(ys,1))-np.dot(ys,np.roll(xs,1)))
199 |     
200 |         
201 | 


--------------------------------------------------------------------------------
/util/ply_helper.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | 
  4 | from plyfile import PlyData,PlyElement
  5 | 
  6 | 
  7 | def read_mesh_vertices_rgb_normal(filename):
  8 |     """ read XYZ RGB normals point cloud from filename PLY file """
  9 |     assert(os.path.isfile(filename))
 10 |     with open(filename, 'rb') as f:
 11 |         plydata = PlyData.read(f)
 12 |         num_verts = plydata['vertex'].count
 13 |         vertices = np.zeros(shape=[num_verts, 6], dtype=np.float32)
 14 |         vertices[:,0] = plydata['vertex'].data['x']
 15 |         vertices[:,1] = plydata['vertex'].data['y']
 16 |         vertices[:,2] = plydata['vertex'].data['z']
 17 |         vertices[:,3] = plydata['vertex'].data['red']
 18 |         vertices[:,4] = plydata['vertex'].data['green']
 19 |         vertices[:,5] = plydata['vertex'].data['blue']
 20 | 
 21 |         # compute normals
 22 |         face = np.array([f[0] for f in plydata["face"].data])
 23 |         
 24 |     return vertices, face
 25 | 
 26 | 
 27 | def write_ply(verts, colors, indices, output_file):
 28 |     if colors is None:
 29 |         colors = np.zeros_like(verts)
 30 |     if indices is None:
 31 |         indices = []
 32 | 
 33 |     file = open(output_file, 'w')
 34 |     file.write('ply \n')
 35 |     file.write('format ascii 1.0\n')
 36 |     file.write('element vertex {:d}\n'.format(len(verts)))
 37 |     file.write('property float x\n')
 38 |     file.write('property float y\n')
 39 |     file.write('property float z\n')
 40 |     file.write('property uchar red\n')
 41 |     file.write('property uchar green\n')
 42 |     file.write('property uchar blue\n')
 43 |     file.write('element face {:d}\n'.format(len(indices)))
 44 |     file.write('property list uchar uint vertex_indices\n')
 45 |     file.write('end_header\n')
 46 |     for vert, color in zip(verts, colors):
 47 |         file.write("{:f} {:f} {:f} {:d} {:d} {:d}\n".format(vert[0], vert[1], vert[2] , int(color[0]*255), int(color[1]*255), int(color[2]*255)))
 48 |     for ind in indices:
 49 |         file.write('3 {:d} {:d} {:d}\n'.format(ind[0], ind[1], ind[2]))
 50 |     file.close()
 51 | 
 52 | 
 53 | def create_cylinder_mesh(radius, p0, p1, stacks=10, slices=10):
 54 |     
 55 |     import math
 56 | 
 57 |     def compute_length_vec3(vec3):
 58 |         return math.sqrt(vec3[0]*vec3[0] + vec3[1]*vec3[1] + vec3[2]*vec3[2])
 59 |     
 60 |     def rotation(axis, angle):
 61 |         rot = np.eye(4)
 62 |         c = np.cos(-angle)
 63 |         s = np.sin(-angle)
 64 |         t = 1.0 - c
 65 |         axis /= compute_length_vec3(axis)
 66 |         x = axis[0]
 67 |         y = axis[1]
 68 |         z = axis[2]
 69 |         rot[0,0] = 1 + t*(x*x-1)
 70 |         rot[0,1] = z*s+t*x*y
 71 |         rot[0,2] = -y*s+t*x*z
 72 |         rot[1,0] = -z*s+t*x*y
 73 |         rot[1,1] = 1+t*(y*y-1)
 74 |         rot[1,2] = x*s+t*y*z
 75 |         rot[2,0] = y*s+t*x*z
 76 |         rot[2,1] = -x*s+t*y*z
 77 |         rot[2,2] = 1+t*(z*z-1)
 78 |         return rot
 79 | 
 80 | 
 81 |     verts = []
 82 |     indices = []
 83 |     diff = (p1 - p0).astype(np.float32)
 84 |     height = compute_length_vec3(diff)
 85 |     for i in range(stacks+1):
 86 |         for i2 in range(slices):
 87 |             theta = i2 * 2.0 * math.pi / slices
 88 |             pos = np.array([radius*math.cos(theta), radius*math.sin(theta), height*i/stacks])
 89 |             verts.append(pos)
 90 |     for i in range(stacks):
 91 |         for i2 in range(slices):
 92 |             i2p1 = math.fmod(i2 + 1, slices)
 93 |             indices.append( np.array([(i + 1)*slices + i2, i*slices + i2, i*slices + i2p1], dtype=np.uint32) )
 94 |             indices.append( np.array([(i + 1)*slices + i2, i*slices + i2p1, (i + 1)*slices + i2p1], dtype=np.uint32) )
 95 |     transform = np.eye(4)
 96 |     va = np.array([0, 0, 1], dtype=np.float32)
 97 |     vb = diff
 98 |     vb /= compute_length_vec3(vb)
 99 |     axis = np.cross(vb, va)
100 |     angle = np.arccos(np.clip(np.dot(va, vb), -1, 1))
101 |     if angle != 0:
102 |         if compute_length_vec3(axis) == 0:
103 |             dotx = va[0]
104 |             if (math.fabs(dotx) != 1.0):
105 |                 axis = np.array([1,0,0]) - dotx * va
106 |             else:
107 |                 axis = np.array([0,1,0]) - va[1] * va
108 |             axis /= compute_length_vec3(axis)
109 |         transform = rotation(axis, -angle)
110 |     transform[:3,3] += p0
111 |     verts = [np.dot(transform, np.array([v[0], v[1], v[2], 1.0])) for v in verts]
112 |     verts = [np.array([v[0], v[1], v[2]]) / v[3] for v in verts]
113 |         
114 |     return verts, indices
115 | 
116 | def write_bbox(corners, color, output_file):
117 |     """
118 |     bbox: (cx, cy, cz, lx, ly, lz, r), center and length in three axis, the last is the rotation
119 |     output_file: string
120 |     """
121 |     
122 |     def get_bbox_edges(bbox_min, bbox_max):
123 |         def get_bbox_verts(bbox_min, bbox_max):
124 |             verts = [
125 |                 np.array([bbox_min[0], bbox_min[1], bbox_min[2]]),
126 |                 np.array([bbox_max[0], bbox_min[1], bbox_min[2]]),
127 |                 np.array([bbox_max[0], bbox_max[1], bbox_min[2]]),
128 |                 np.array([bbox_min[0], bbox_max[1], bbox_min[2]]),
129 | 
130 |                 np.array([bbox_min[0], bbox_min[1], bbox_max[2]]),
131 |                 np.array([bbox_max[0], bbox_min[1], bbox_max[2]]),
132 |                 np.array([bbox_max[0], bbox_max[1], bbox_max[2]]),
133 |                 np.array([bbox_min[0], bbox_max[1], bbox_max[2]])
134 |             ]
135 |             return verts
136 | 
137 |         box_verts = get_bbox_verts(bbox_min, bbox_max)
138 |         edges = [
139 |             (box_verts[0], box_verts[1]),
140 |             (box_verts[1], box_verts[2]),
141 |             (box_verts[2], box_verts[3]),
142 |             (box_verts[3], box_verts[0]),
143 | 
144 |             (box_verts[4], box_verts[5]),
145 |             (box_verts[5], box_verts[6]),
146 |             (box_verts[6], box_verts[7]),
147 |             (box_verts[7], box_verts[4]),
148 | 
149 |             (box_verts[0], box_verts[4]),
150 |             (box_verts[1], box_verts[5]),
151 |             (box_verts[2], box_verts[6]),
152 |             (box_verts[3], box_verts[7])
153 |         ]
154 |         return edges
155 | 
156 |     radius = 0.03
157 |     offset = [0,0,0]
158 |     verts = []
159 |     indices = []
160 |     colors = []
161 | 
162 |     box_min = np.min(corners, axis=0)
163 |     box_max = np.max(corners, axis=0)
164 |     edges = get_bbox_edges(box_min, box_max)
165 |     for k in range(len(edges)):
166 |         cyl_verts, cyl_ind = create_cylinder_mesh(radius, edges[k][0], edges[k][1])
167 |         cur_num_verts = len(verts)
168 |         cyl_color = [[c / 255 for c in color] for _ in cyl_verts]
169 |         cyl_verts = [x + offset for x in cyl_verts]
170 |         cyl_ind = [x + cur_num_verts for x in cyl_ind]
171 |         verts.extend(cyl_verts)
172 |         indices.extend(cyl_ind)
173 |         colors.extend(cyl_color)
174 | 
175 |     write_ply(verts, colors, indices, output_file)
176 |     return
177 | 
178 | 
179 | def write_path(points, color, output_file):
180 |     
181 |     radius = 0.03
182 |     offset = [0,0,0]
183 |     verts = []
184 |     indices = []
185 |     colors = []
186 |     
187 |     for start, end in zip(points[:-1], points[1:]):
188 |         cyl_verts, cyl_ind = create_cylinder_mesh(radius, start, end)
189 |         cur_num_verts = len(verts)
190 |         cyl_color = [[c / 255 for c in color] for _ in cyl_verts]
191 |         cyl_verts = [x + offset for x in cyl_verts]
192 |         cyl_ind = [x + cur_num_verts for x in cyl_ind]
193 |         verts.extend(cyl_verts)
194 |         indices.extend(cyl_ind)
195 |         colors.extend(cyl_color)
196 | 
197 |     write_ply(verts, colors, indices, output_file)
198 |     return
199 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/sampling_gpu.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | 
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | 
  7 | #include "cuda_utils.h"
  8 | 
  9 | // input: points(b, c, n) idx(b, m)
 10 | // output: out(b, c, m)
 11 | __global__ void gather_points_kernel(int b, int c, int n, int m,
 12 |                                      const float *__restrict__ points,
 13 |                                      const int *__restrict__ idx,
 14 |                                      float *__restrict__ out) {
 15 |   for (int i = blockIdx.x; i < b; i += gridDim.x) {
 16 |     for (int l = blockIdx.y; l < c; l += gridDim.y) {
 17 |       for (int j = threadIdx.x; j < m; j += blockDim.x) {
 18 |         int a = idx[i * m + j];
 19 |         out[(i * c + l) * m + j] = points[(i * c + l) * n + a];
 20 |       }
 21 |     }
 22 |   }
 23 | }
 24 | 
 25 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
 26 |                                   const float *points, const int *idx,
 27 |                                   float *out) {
 28 |   gather_points_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,
 29 |                          at::cuda::getCurrentCUDAStream()>>>(b, c, n, npoints,
 30 |                                                              points, idx, out);
 31 | 
 32 |   CUDA_CHECK_ERRORS();
 33 | }
 34 | 
 35 | // input: grad_out(b, c, m) idx(b, m)
 36 | // output: grad_points(b, c, n)
 37 | __global__ void gather_points_grad_kernel(int b, int c, int n, int m,
 38 |                                           const float *__restrict__ grad_out,
 39 |                                           const int *__restrict__ idx,
 40 |                                           float *__restrict__ grad_points) {
 41 |   for (int i = blockIdx.x; i < b; i += gridDim.x) {
 42 |     for (int l = blockIdx.y; l < c; l += gridDim.y) {
 43 |       for (int j = threadIdx.x; j < m; j += blockDim.x) {
 44 |         int a = idx[i * m + j];
 45 |         atomicAdd(grad_points + (i * c + l) * n + a,
 46 |                   grad_out[(i * c + l) * m + j]);
 47 |       }
 48 |     }
 49 |   }
 50 | }
 51 | 
 52 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
 53 |                                        const float *grad_out, const int *idx,
 54 |                                        float *grad_points) {
 55 |   gather_points_grad_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,
 56 |                               at::cuda::getCurrentCUDAStream()>>>(
 57 |       b, c, n, npoints, grad_out, idx, grad_points);
 58 | 
 59 |   CUDA_CHECK_ERRORS();
 60 | }
 61 | 
 62 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,
 63 |                          int idx1, int idx2) {
 64 |   const float v1 = dists[idx1], v2 = dists[idx2];
 65 |   const int i1 = dists_i[idx1], i2 = dists_i[idx2];
 66 |   dists[idx1] = max(v1, v2);
 67 |   dists_i[idx1] = v2 > v1 ? i2 : i1;
 68 | }
 69 | 
 70 | // Input dataset: (b, n, 3), tmp: (b, n)
 71 | // Ouput idxs (b, m)
 72 | template <unsigned int block_size>
 73 | __global__ void furthest_point_sampling_kernel(
 74 |     int b, int n, int m, const float *__restrict__ dataset,
 75 |     float *__restrict__ temp, int *__restrict__ idxs) {
 76 |   if (m <= 0) return;
 77 |   __shared__ float dists[block_size];
 78 |   __shared__ int dists_i[block_size];
 79 | 
 80 |   int batch_index = blockIdx.x;
 81 |   dataset += batch_index * n * 3;
 82 |   temp += batch_index * n;
 83 |   idxs += batch_index * m;
 84 | 
 85 |   int tid = threadIdx.x;
 86 |   const int stride = block_size;
 87 | 
 88 |   int old = 0;
 89 |   if (threadIdx.x == 0) idxs[0] = old;
 90 | 
 91 |   __syncthreads();
 92 |   for (int j = 1; j < m; j++) {
 93 |     int besti = 0;
 94 |     float best = -1;
 95 |     float x1 = dataset[old * 3 + 0];
 96 |     float y1 = dataset[old * 3 + 1];
 97 |     float z1 = dataset[old * 3 + 2];
 98 |     for (int k = tid; k < n; k += stride) {
 99 |       float x2, y2, z2;
100 |       x2 = dataset[k * 3 + 0];
101 |       y2 = dataset[k * 3 + 1];
102 |       z2 = dataset[k * 3 + 2];
103 |       float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
104 |       if (mag <= 1e-3) continue;
105 | 
106 |       float d =
107 |           (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
108 | 
109 |       float d2 = min(d, temp[k]);
110 |       temp[k] = d2;
111 |       besti = d2 > best ? k : besti;
112 |       best = d2 > best ? d2 : best;
113 |     }
114 |     dists[tid] = best;
115 |     dists_i[tid] = besti;
116 |     __syncthreads();
117 | 
118 |     if (block_size >= 512) {
119 |       if (tid < 256) {
120 |         __update(dists, dists_i, tid, tid + 256);
121 |       }
122 |       __syncthreads();
123 |     }
124 |     if (block_size >= 256) {
125 |       if (tid < 128) {
126 |         __update(dists, dists_i, tid, tid + 128);
127 |       }
128 |       __syncthreads();
129 |     }
130 |     if (block_size >= 128) {
131 |       if (tid < 64) {
132 |         __update(dists, dists_i, tid, tid + 64);
133 |       }
134 |       __syncthreads();
135 |     }
136 |     if (block_size >= 64) {
137 |       if (tid < 32) {
138 |         __update(dists, dists_i, tid, tid + 32);
139 |       }
140 |       __syncthreads();
141 |     }
142 |     if (block_size >= 32) {
143 |       if (tid < 16) {
144 |         __update(dists, dists_i, tid, tid + 16);
145 |       }
146 |       __syncthreads();
147 |     }
148 |     if (block_size >= 16) {
149 |       if (tid < 8) {
150 |         __update(dists, dists_i, tid, tid + 8);
151 |       }
152 |       __syncthreads();
153 |     }
154 |     if (block_size >= 8) {
155 |       if (tid < 4) {
156 |         __update(dists, dists_i, tid, tid + 4);
157 |       }
158 |       __syncthreads();
159 |     }
160 |     if (block_size >= 4) {
161 |       if (tid < 2) {
162 |         __update(dists, dists_i, tid, tid + 2);
163 |       }
164 |       __syncthreads();
165 |     }
166 |     if (block_size >= 2) {
167 |       if (tid < 1) {
168 |         __update(dists, dists_i, tid, tid + 1);
169 |       }
170 |       __syncthreads();
171 |     }
172 | 
173 |     old = dists_i[0];
174 |     if (tid == 0) idxs[j] = old;
175 |   }
176 | }
177 | 
178 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
179 |                                             const float *dataset, float *temp,
180 |                                             int *idxs) {
181 |   unsigned int n_threads = opt_n_threads(n);
182 | 
183 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
184 | 
185 |   switch (n_threads) {
186 |     case 512:
187 |       furthest_point_sampling_kernel<512>
188 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
189 |       break;
190 |     case 256:
191 |       furthest_point_sampling_kernel<256>
192 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
193 |       break;
194 |     case 128:
195 |       furthest_point_sampling_kernel<128>
196 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
197 |       break;
198 |     case 64:
199 |       furthest_point_sampling_kernel<64>
200 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
201 |       break;
202 |     case 32:
203 |       furthest_point_sampling_kernel<32>
204 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
205 |       break;
206 |     case 16:
207 |       furthest_point_sampling_kernel<16>
208 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
209 |       break;
210 |     case 8:
211 |       furthest_point_sampling_kernel<8>
212 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
213 |       break;
214 |     case 4:
215 |       furthest_point_sampling_kernel<4>
216 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
217 |       break;
218 |     case 2:
219 |       furthest_point_sampling_kernel<2>
220 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
221 |       break;
222 |     case 1:
223 |       furthest_point_sampling_kernel<1>
224 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
225 |       break;
226 |     default:
227 |       furthest_point_sampling_kernel<512>
228 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
229 |   }
230 | 
231 |   CUDA_CHECK_ERRORS();
232 | }
233 | 


--------------------------------------------------------------------------------
/models/modules/common.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) V-DETR authors. All Rights Reserved.
  2 | import sys
  3 | 
  4 | if sys.version_info[:2] >= (3, 8):
  5 |     from collections.abc import Sequence
  6 | else:
  7 |     from collections import Sequence
  8 | 
  9 | from enum import Enum
 10 | 
 11 | import torch.nn as nn
 12 | import MinkowskiEngine as ME
 13 | 
 14 | 
 15 | class NormType(Enum):
 16 |     BATCH_NORM = 0
 17 |     INSTANCE_NORM = 1
 18 |     INSTANCE_BATCH_NORM = 2
 19 | 
 20 | 
 21 | def get_norm(norm_type, n_channels, D, bn_momentum=0.1):
 22 |     if norm_type == NormType.BATCH_NORM:
 23 |         return ME.MinkowskiBatchNorm(n_channels, momentum=bn_momentum)
 24 |     elif norm_type == NormType.INSTANCE_NORM:
 25 |         return ME.MinkowskiInstanceNorm(n_channels)
 26 |     elif norm_type == NormType.INSTANCE_BATCH_NORM:
 27 |         return nn.Sequential(
 28 |             ME.MinkowskiInstanceNorm(n_channels),
 29 |             ME.MinkowskiBatchNorm(n_channels, momentum=bn_momentum),
 30 |         )
 31 |     else:
 32 |         raise ValueError(f"Norm type: {norm_type} not supported")
 33 | 
 34 | 
 35 | class ConvType(Enum):
 36 |     """
 37 |     Define the kernel region type
 38 |     """
 39 | 
 40 |     HYPERCUBE = 0, "HYPERCUBE"
 41 |     SPATIAL_HYPERCUBE = 1, "SPATIAL_HYPERCUBE"
 42 |     SPATIO_TEMPORAL_HYPERCUBE = 2, "SPATIO_TEMPORAL_HYPERCUBE"
 43 |     HYPERCROSS = 3, "HYPERCROSS"
 44 |     SPATIAL_HYPERCROSS = 4, "SPATIAL_HYPERCROSS"
 45 |     SPATIO_TEMPORAL_HYPERCROSS = 5, "SPATIO_TEMPORAL_HYPERCROSS"
 46 |     SPATIAL_HYPERCUBE_TEMPORAL_HYPERCROSS = 6, "SPATIAL_HYPERCUBE_TEMPORAL_HYPERCROSS "
 47 | 
 48 |     def __new__(cls, value, name):
 49 |         member = object.__new__(cls)
 50 |         member._value_ = value
 51 |         member.fullname = name
 52 |         return member
 53 | 
 54 |     def __int__(self):
 55 |         return self.value
 56 | 
 57 | 
 58 | # Covert the ConvType var to a RegionType var
 59 | conv_to_region_type = {
 60 |     # kernel_size = [k, k, k, 1]
 61 |     ConvType.HYPERCUBE: ME.RegionType.HYPER_CUBE,
 62 |     ConvType.SPATIAL_HYPERCUBE: ME.RegionType.HYPER_CUBE,
 63 |     ConvType.SPATIO_TEMPORAL_HYPERCUBE: ME.RegionType.HYPER_CUBE,
 64 |     ConvType.HYPERCROSS: ME.RegionType.HYPER_CROSS,
 65 |     ConvType.SPATIAL_HYPERCROSS: ME.RegionType.HYPER_CROSS,
 66 |     ConvType.SPATIO_TEMPORAL_HYPERCROSS: ME.RegionType.HYPER_CROSS,
 67 |     ConvType.SPATIAL_HYPERCUBE_TEMPORAL_HYPERCROSS: ME.RegionType.HYPER_CUBE,  # JONAS CHANGE from HYBRID
 68 | }
 69 | 
 70 | # int_to_region_type = {m.value: m for m in ME.RegionType}
 71 | int_to_region_type = {m: ME.RegionType(m) for m in range(3)}
 72 | 
 73 | 
 74 | def convert_region_type(region_type):
 75 |     """
 76 |     Convert the integer region_type to the corresponding RegionType enum object.
 77 |     """
 78 |     return int_to_region_type[region_type]
 79 | 
 80 | 
 81 | def convert_conv_type(conv_type, kernel_size, D):
 82 |     assert isinstance(conv_type, ConvType), "conv_type must be of ConvType"
 83 |     region_type = conv_to_region_type[conv_type]
 84 |     axis_types = None
 85 |     if conv_type == ConvType.SPATIAL_HYPERCUBE:
 86 |         # No temporal convolution
 87 |         if isinstance(kernel_size, Sequence):
 88 |             kernel_size = kernel_size[:3]
 89 |         else:
 90 |             kernel_size = [
 91 |                 kernel_size,
 92 |             ] * 3
 93 |         if D == 4:
 94 |             kernel_size.append(1)
 95 |     elif conv_type == ConvType.SPATIO_TEMPORAL_HYPERCUBE:
 96 |         # conv_type conversion already handled
 97 |         assert D == 4
 98 |     elif conv_type == ConvType.HYPERCUBE:
 99 |         # conv_type conversion already handled
100 |         pass
101 |     elif conv_type == ConvType.SPATIAL_HYPERCROSS:
102 |         if isinstance(kernel_size, Sequence):
103 |             kernel_size = kernel_size[:3]
104 |         else:
105 |             kernel_size = [
106 |                 kernel_size,
107 |             ] * 3
108 |         if D == 4:
109 |             kernel_size.append(1)
110 |     elif conv_type == ConvType.HYPERCROSS:
111 |         # conv_type conversion already handled
112 |         pass
113 |     elif conv_type == ConvType.SPATIO_TEMPORAL_HYPERCROSS:
114 |         # conv_type conversion already handled
115 |         assert D == 4
116 |     elif conv_type == ConvType.SPATIAL_HYPERCUBE_TEMPORAL_HYPERCROSS:
117 |         # Define the CUBIC conv kernel for spatial dims and CROSS conv for temp dim
118 |         axis_types = [
119 |             ME.RegionType.HYPER_CUBE,
120 |         ] * 3
121 |         if D == 4:
122 |             axis_types.append(ME.RegionType.HYPER_CROSS)
123 |     return region_type, axis_types, kernel_size
124 | 
125 | 
126 | def conv(
127 |     in_planes,
128 |     out_planes,
129 |     kernel_size,
130 |     stride=1,
131 |     dilation=1,
132 |     bias=False,
133 |     conv_type=ConvType.HYPERCUBE,
134 |     D=-1,
135 | ):
136 |     assert D > 0, "Dimension must be a positive integer"
137 |     region_type, axis_types, kernel_size = convert_conv_type(conv_type, kernel_size, D)
138 |     kernel_generator = ME.KernelGenerator(
139 |         kernel_size,
140 |         stride,
141 |         dilation,
142 |         region_type=region_type,
143 |         axis_types=None, # axis_types JONAS
144 |         dimension=D,
145 |     )
146 | 
147 |     return ME.MinkowskiConvolution(
148 |         in_channels=in_planes,
149 |         out_channels=out_planes,
150 |         kernel_size=kernel_size,
151 |         stride=stride,
152 |         dilation=dilation,
153 |         bias=bias,
154 |         kernel_generator=kernel_generator,
155 |         dimension=D,
156 |     )
157 | 
158 | 
159 | def conv_tr(
160 |     in_planes,
161 |     out_planes,
162 |     kernel_size,
163 |     upsample_stride=1,
164 |     dilation=1,
165 |     bias=False,
166 |     conv_type=ConvType.HYPERCUBE,
167 |     D=-1,
168 | ):
169 |     assert D > 0, "Dimension must be a positive integer"
170 |     region_type, axis_types, kernel_size = convert_conv_type(conv_type, kernel_size, D)
171 |     kernel_generator = ME.KernelGenerator(
172 |         kernel_size,
173 |         upsample_stride,
174 |         dilation,
175 |         region_type=region_type,
176 |         axis_types=axis_types,
177 |         dimension=D,
178 |     )
179 | 
180 |     return ME.MinkowskiConvolutionTranspose(
181 |         in_channels=in_planes,
182 |         out_channels=out_planes,
183 |         kernel_size=kernel_size,
184 |         stride=upsample_stride,
185 |         dilation=dilation,
186 |         bias=bias,
187 |         kernel_generator=kernel_generator,
188 |         dimension=D,
189 |     )
190 | 
191 | 
192 | def avg_pool(
193 |     kernel_size,
194 |     stride=1,
195 |     dilation=1,
196 |     conv_type=ConvType.HYPERCUBE,
197 |     in_coords_key=None,
198 |     D=-1,
199 | ):
200 |     assert D > 0, "Dimension must be a positive integer"
201 |     region_type, axis_types, kernel_size = convert_conv_type(conv_type, kernel_size, D)
202 |     kernel_generator = ME.KernelGenerator(
203 |         kernel_size,
204 |         stride,
205 |         dilation,
206 |         region_type=region_type,
207 |         axis_types=axis_types,
208 |         dimension=D,
209 |     )
210 | 
211 |     return ME.MinkowskiAvgPooling(
212 |         kernel_size=kernel_size,
213 |         stride=stride,
214 |         dilation=dilation,
215 |         kernel_generator=kernel_generator,
216 |         dimension=D,
217 |     )
218 | 
219 | 
220 | def avg_unpool(kernel_size, stride=1, dilation=1, conv_type=ConvType.HYPERCUBE, D=-1):
221 |     assert D > 0, "Dimension must be a positive integer"
222 |     region_type, axis_types, kernel_size = convert_conv_type(conv_type, kernel_size, D)
223 |     kernel_generator = ME.KernelGenerator(
224 |         kernel_size,
225 |         stride,
226 |         dilation,
227 |         region_type=region_type,
228 |         axis_types=axis_types,
229 |         dimension=D,
230 |     )
231 | 
232 |     return ME.MinkowskiAvgUnpooling(
233 |         kernel_size=kernel_size,
234 |         stride=stride,
235 |         dilation=dilation,
236 |         kernel_generator=kernel_generator,
237 |         dimension=D,
238 |     )
239 | 
240 | 
241 | def sum_pool(kernel_size, stride=1, dilation=1, conv_type=ConvType.HYPERCUBE, D=-1):
242 |     assert D > 0, "Dimension must be a positive integer"
243 |     region_type, axis_types, kernel_size = convert_conv_type(conv_type, kernel_size, D)
244 |     kernel_generator = ME.KernelGenerator(
245 |         kernel_size,
246 |         stride,
247 |         dilation,
248 |         region_type=region_type,
249 |         axis_types=axis_types,
250 |         dimension=D,
251 |     )
252 | 
253 |     return ME.MinkowskiSumPooling(
254 |         kernel_size=kernel_size,
255 |         stride=stride,
256 |         dilation=dilation,
257 |         kernel_generator=kernel_generator,
258 |         dimension=D,
259 |     )
260 | 


--------------------------------------------------------------------------------