├── Uni3D ├── data │ ├── __init__.py │ ├── ModelNet40_openshape.yaml │ ├── ScanObjNN_openshape.yaml │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ └── datasets.cpython-38.pyc │ ├── utils │ │ ├── __pycache__ │ │ │ ├── data.cpython-38.pyc │ │ │ ├── io.cpython-38.pyc │ │ │ ├── build.cpython-38.pyc │ │ │ ├── config.cpython-38.pyc │ │ │ ├── logger.cpython-38.pyc │ │ │ ├── __init__.cpython-38.pyc │ │ │ └── registry.cpython-38.pyc │ │ ├── __init__.py │ │ ├── build.py │ │ ├── io.py │ │ ├── config.py │ │ ├── data.py │ │ └── logger.py │ ├── Objaverse_lvis_openshape.yaml │ ├── dataset_catalog.json │ ├── DATASETS.md │ └── templates.json ├── utils │ ├── __init__.py │ ├── bpe_simple_vocab_16e6.txt.gz │ ├── __pycache__ │ │ ├── logger.cpython-38.pyc │ │ ├── optim.cpython-38.pyc │ │ ├── params.cpython-38.pyc │ │ ├── utils.cpython-38.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── scheduler.cpython-38.pyc │ │ ├── tokenizer.cpython-38.pyc │ │ └── distributed.cpython-38.pyc │ ├── logger.py │ ├── scheduler.py │ ├── misc.py │ ├── tokenizer.py │ └── dist.py ├── assets │ ├── editing.jpg │ ├── overview.jpg │ ├── retrival.jpg │ ├── vis_part.jpg │ ├── retrival_text.jpg │ └── scene_understanding.jpg ├── __pycache__ │ └── main.cpython-38.pyc └── model │ ├── __pycache__ │ ├── losses.cpython-38.pyc │ ├── uni3d.cpython-38.pyc │ └── point_encoder.cpython-38.pyc │ ├── uni3d.py │ └── losses.py ├── scannet ├── meta_data │ ├── 1.txt │ ├── scannet_means.npz │ ├── scannetv2_test.txt │ ├── scannetv2_val.txt │ └── scannetv2_val_copy.txt ├── __pycache__ │ ├── scannet_utils.cpython-38.pyc │ └── load_scannet_data.cpython-38.pyc ├── wget-log ├── wget-log.1 ├── data_viz.py ├── scannet_utils.py ├── batch_load_scannet_data.py ├── model_util_scannet.py └── load_scannet_data.py ├── models ├── modules │ ├── __init__.py │ ├── resnet_block.py │ └── common.py ├── __pycache__ │ ├── __init__.cpython-38.pyc │ ├── helpers.cpython-38.pyc │ ├── mink_resnet.cpython-38.pyc │ ├── model_vdetr.cpython-38.pyc │ ├── position_embedding.cpython-38.pyc │ └── vdetr_transformer.cpython-38.pyc ├── __init__.py ├── mink_resnet.py ├── helpers.py └── position_embedding.py ├── pointnet2 ├── pointnet2.egg-info │ ├── dependency_links.txt │ ├── top_level.txt │ ├── PKG-INFO │ └── SOURCES.txt ├── __pycache__ │ ├── pytorch_utils.cpython-38.pyc │ ├── pointnet2_utils.cpython-38.pyc │ └── pointnet2_modules.cpython-38.pyc ├── dist │ └── pointnet2-0.0.0-py3.8-linux-x86_64.egg ├── build │ ├── lib.linux-x86_64-3.8 │ │ └── pointnet2 │ │ │ └── _ext.cpython-38-x86_64-linux-gnu.so │ └── temp.linux-x86_64-3.8 │ │ └── build.ninja ├── _ext_src │ ├── include │ │ ├── ball_query.h │ │ ├── group_points.h │ │ ├── sampling.h │ │ ├── interpolate.h │ │ ├── utils.h │ │ └── cuda_utils.h │ └── src │ │ ├── bindings.cpp │ │ ├── ball_query.cpp │ │ ├── ball_query_gpu.cu │ │ ├── group_points.cpp │ │ ├── sampling.cpp │ │ ├── group_points_gpu.cu │ │ ├── interpolate.cpp │ │ ├── interpolate_gpu.cu │ │ └── sampling_gpu.cu ├── pointnet2_test.py └── setup.py ├── util ├── __init__.py ├── __pycache__ │ ├── io.cpython-38.pyc │ ├── nms.cpython-38.pyc │ ├── dist.cpython-38.pyc │ ├── misc.cpython-38.pyc │ ├── __init__.cpython-38.pyc │ ├── __init__.cpython-39.pyc │ ├── box_util.cpython-38.pyc │ ├── eval_det.cpython-38.pyc │ ├── pc_util.cpython-38.pyc │ ├── pc_util.cpython-39.pyc │ ├── o3d_helper.cpython-38.pyc │ ├── ap_calculator.cpython-38.pyc │ └── random_cuboid.cpython-38.pyc ├── box_intersection.cpython-38-x86_64-linux-gnu.so ├── build │ ├── temp.linux-x86_64-3.8 │ │ └── box_intersection.o │ └── lib.linux-x86_64-3.8 │ │ └── box_intersection.cpython-38-x86_64-linux-gnu.so ├── cython_compile.py ├── logger.py ├── io.py ├── misc.py ├── random_cuboid.py ├── nms.py ├── dist.py ├── box_intersection.pyx └── ply_helper.py ├── datasets ├── __pycache__ │ ├── scannet.cpython-38.pyc │ ├── scannet.cpython-39.pyc │ ├── __init__.cpython-38.pyc │ └── __init__.cpython-39.pyc └── __init__.py ├── optimizer.py ├── run.sh ├── README.md └── point2graph.yaml /Uni3D/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Uni3D/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scannet/meta_data/1.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pointnet2/pointnet2.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pointnet2/pointnet2.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | pointnet2 2 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- 1 | from .pc_util import * 2 | from .box_util import * 3 | -------------------------------------------------------------------------------- /Uni3D/assets/editing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/assets/editing.jpg -------------------------------------------------------------------------------- /Uni3D/assets/overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/assets/overview.jpg -------------------------------------------------------------------------------- /Uni3D/assets/retrival.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/assets/retrival.jpg -------------------------------------------------------------------------------- /Uni3D/assets/vis_part.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/assets/vis_part.jpg -------------------------------------------------------------------------------- /Uni3D/data/ModelNet40_openshape.yaml: -------------------------------------------------------------------------------- 1 | NAME: ModelNet40_openshape 2 | DATA_PATH: ./data/test_datasets/modelnet40 -------------------------------------------------------------------------------- /Uni3D/data/ScanObjNN_openshape.yaml: -------------------------------------------------------------------------------- 1 | NAME: ScanObjNN_openshape 2 | DATA_PATH: ./data/test_datasets/scanobjectnn -------------------------------------------------------------------------------- /Uni3D/assets/retrival_text.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/assets/retrival_text.jpg -------------------------------------------------------------------------------- /scannet/meta_data/scannet_means.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/scannet/meta_data/scannet_means.npz -------------------------------------------------------------------------------- /util/__pycache__/io.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/io.cpython-38.pyc -------------------------------------------------------------------------------- /util/__pycache__/nms.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/nms.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/__pycache__/main.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/__pycache__/main.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/assets/scene_understanding.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/assets/scene_understanding.jpg -------------------------------------------------------------------------------- /util/__pycache__/dist.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/dist.cpython-38.pyc -------------------------------------------------------------------------------- /util/__pycache__/misc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/misc.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/utils/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /util/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /util/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /util/__pycache__/box_util.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/box_util.cpython-38.pyc -------------------------------------------------------------------------------- /util/__pycache__/eval_det.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/eval_det.cpython-38.pyc -------------------------------------------------------------------------------- /util/__pycache__/pc_util.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/pc_util.cpython-38.pyc -------------------------------------------------------------------------------- /util/__pycache__/pc_util.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/pc_util.cpython-39.pyc -------------------------------------------------------------------------------- /datasets/__pycache__/scannet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/datasets/__pycache__/scannet.cpython-38.pyc -------------------------------------------------------------------------------- /datasets/__pycache__/scannet.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/datasets/__pycache__/scannet.cpython-39.pyc -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /models/__pycache__/helpers.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/models/__pycache__/helpers.cpython-38.pyc -------------------------------------------------------------------------------- /util/__pycache__/o3d_helper.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/o3d_helper.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/model/__pycache__/losses.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/model/__pycache__/losses.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/model/__pycache__/uni3d.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/model/__pycache__/uni3d.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/utils/__pycache__/logger.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/logger.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/utils/__pycache__/optim.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/optim.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/utils/__pycache__/params.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/params.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/utils/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /datasets/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/datasets/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /datasets/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/datasets/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /models/__pycache__/mink_resnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/models/__pycache__/mink_resnet.cpython-38.pyc -------------------------------------------------------------------------------- /models/__pycache__/model_vdetr.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/models/__pycache__/model_vdetr.cpython-38.pyc -------------------------------------------------------------------------------- /util/__pycache__/ap_calculator.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/ap_calculator.cpython-38.pyc -------------------------------------------------------------------------------- /util/__pycache__/random_cuboid.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/__pycache__/random_cuboid.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/data/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/data/__pycache__/datasets.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/__pycache__/datasets.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/data/utils/__pycache__/data.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/data.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/data/utils/__pycache__/io.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/io.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/utils/__pycache__/scheduler.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/scheduler.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/utils/__pycache__/tokenizer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/tokenizer.cpython-38.pyc -------------------------------------------------------------------------------- /scannet/__pycache__/scannet_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/scannet/__pycache__/scannet_utils.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/data/utils/__pycache__/build.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/build.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/data/utils/__pycache__/config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/config.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/data/utils/__pycache__/logger.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/logger.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/utils/__pycache__/distributed.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/utils/__pycache__/distributed.cpython-38.pyc -------------------------------------------------------------------------------- /pointnet2/__pycache__/pytorch_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/pointnet2/__pycache__/pytorch_utils.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/data/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/data/utils/__pycache__/registry.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/data/utils/__pycache__/registry.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/model/__pycache__/point_encoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/Uni3D/model/__pycache__/point_encoder.cpython-38.pyc -------------------------------------------------------------------------------- /models/__pycache__/position_embedding.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/models/__pycache__/position_embedding.cpython-38.pyc -------------------------------------------------------------------------------- /models/__pycache__/vdetr_transformer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/models/__pycache__/vdetr_transformer.cpython-38.pyc -------------------------------------------------------------------------------- /pointnet2/__pycache__/pointnet2_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/pointnet2/__pycache__/pointnet2_utils.cpython-38.pyc -------------------------------------------------------------------------------- /pointnet2/dist/pointnet2-0.0.0-py3.8-linux-x86_64.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/pointnet2/dist/pointnet2-0.0.0-py3.8-linux-x86_64.egg -------------------------------------------------------------------------------- /scannet/__pycache__/load_scannet_data.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/scannet/__pycache__/load_scannet_data.cpython-38.pyc -------------------------------------------------------------------------------- /util/box_intersection.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/box_intersection.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /util/build/temp.linux-x86_64-3.8/box_intersection.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/build/temp.linux-x86_64-3.8/box_intersection.o -------------------------------------------------------------------------------- /pointnet2/__pycache__/pointnet2_modules.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/pointnet2/__pycache__/pointnet2_modules.cpython-38.pyc -------------------------------------------------------------------------------- /Uni3D/data/Objaverse_lvis_openshape.yaml: -------------------------------------------------------------------------------- 1 | NAME: Objaverse_lvis_openshape 2 | PC_PATH: ./data/test_datasets/objaverse_lvis/lvis_testset.txt 3 | PC_PATH_ROOT: ./data/test_datasets/objaverse_lvis -------------------------------------------------------------------------------- /util/build/lib.linux-x86_64-3.8/box_intersection.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/util/build/lib.linux-x86_64-3.8/box_intersection.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /pointnet2/build/lib.linux-x86_64-3.8/pointnet2/_ext.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zimingluo/Point2Graph/HEAD/pointnet2/build/lib.linux-x86_64-3.8/pointnet2/_ext.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /pointnet2/pointnet2.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: pointnet2 3 | Version: 0.0.0 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | License: UNKNOWN 7 | Platform: UNKNOWN 8 | 9 | UNKNOWN 10 | 11 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/ball_query.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #pragma once 4 | #include 5 | 6 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 7 | const int nsample); 8 | -------------------------------------------------------------------------------- /Uni3D/data/utils/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | * Copyright (c) 2023, salesforce.com, inc. 3 | * All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause 6 | * By Le Xue 7 | ''' 8 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/group_points.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #pragma once 5 | #include 6 | 7 | at::Tensor group_points(at::Tensor points, at::Tensor idx); 8 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 9 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | #Copyright (c) V-DETR authors. All Rights Reserved. 2 | from .model_vdetr import build_vdetr 3 | 4 | MODEL_FUNCS = { 5 | 'vdetr': build_vdetr, 6 | } 7 | 8 | def build_model(args, dataset_config): 9 | model = MODEL_FUNCS[args.model_name](args, dataset_config) 10 | return model -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/sampling.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #pragma once 5 | #include 6 | 7 | at::Tensor gather_points(at::Tensor points, at::Tensor idx); 8 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 9 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples); 10 | -------------------------------------------------------------------------------- /Uni3D/data/utils/build.py: -------------------------------------------------------------------------------- 1 | from . import registry 2 | 3 | DATASETS = registry.Registry('dataset') 4 | 5 | 6 | def build_dataset_from_cfg(cfg, default_args = None): 7 | """ 8 | Build a dataset, defined by `dataset_name`. 9 | Args: 10 | cfg (eDICT): 11 | Returns: 12 | Dataset: a constructed dataset specified by dataset_name. 13 | """ 14 | return DATASETS.build(cfg, default_args = default_args) 15 | 16 | 17 | -------------------------------------------------------------------------------- /scannet/wget-log: -------------------------------------------------------------------------------- 1 | --2024-08-16 22:18:33-- https://drive.usercontent.google.com/open?id=1ag_SO8kVpNdZNGOUEAwgJTYEmf4TJErv 2 | Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 142.250.191.129, 2607:f8b0:4009:818::2001 3 | Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|142.250.191.129|:443... connected. 4 | HTTP request sent, awaiting response... 404 Not Found 5 | 2024-08-16 22:18:33 ERROR 404: Not Found. 6 | 7 | -------------------------------------------------------------------------------- /scannet/wget-log.1: -------------------------------------------------------------------------------- 1 | --2024-08-16 22:19:18-- https://drive.usercontent.google.com/open?id=1ag_SO8kVpNdZNGOUEAwgJTYEmf4TJErv 2 | Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 142.250.191.129, 2607:f8b0:4009:818::2001 3 | Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|142.250.191.129|:443... connected. 4 | HTTP request sent, awaiting response... 404 Not Found 5 | 2024-08-16 22:19:18 ERROR 404: Not Found. 6 | 7 | -------------------------------------------------------------------------------- /pointnet2/pointnet2.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | setup.py 2 | _ext_src/src/ball_query.cpp 3 | _ext_src/src/ball_query_gpu.cu 4 | _ext_src/src/bindings.cpp 5 | _ext_src/src/group_points.cpp 6 | _ext_src/src/group_points_gpu.cu 7 | _ext_src/src/interpolate.cpp 8 | _ext_src/src/interpolate_gpu.cu 9 | _ext_src/src/sampling.cpp 10 | _ext_src/src/sampling_gpu.cu 11 | pointnet2.egg-info/PKG-INFO 12 | pointnet2.egg-info/SOURCES.txt 13 | pointnet2.egg-info/dependency_links.txt 14 | pointnet2.egg-info/top_level.txt -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/interpolate.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows); 9 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 10 | at::Tensor weight); 11 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 12 | at::Tensor weight, const int m); 13 | -------------------------------------------------------------------------------- /util/cython_compile.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from setuptools import setup, Extension 4 | from Cython.Build import cythonize 5 | import numpy as np 6 | 7 | 8 | # hacky way to find numpy include path 9 | # replace with actual path if this does not work 10 | np_include_path = np.__file__.replace("__init__.py", "core/include/") 11 | INCLUDE_PATH = [ 12 | np_include_path 13 | ] 14 | 15 | setup( 16 | ext_modules = cythonize( 17 | Extension( 18 | "box_intersection", 19 | sources=["box_intersection.pyx"], 20 | include_dirs=INCLUDE_PATH 21 | )), 22 | ) 23 | 24 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/bindings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include "ball_query.h" 5 | #include "group_points.h" 6 | #include "interpolate.h" 7 | #include "sampling.h" 8 | 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 10 | m.def("gather_points", &gather_points); 11 | m.def("gather_points_grad", &gather_points_grad); 12 | m.def("furthest_point_sampling", &furthest_point_sampling); 13 | 14 | m.def("three_nn", &three_nn); 15 | m.def("three_interpolate", &three_interpolate); 16 | m.def("three_interpolate_grad", &three_interpolate_grad); 17 | 18 | m.def("ball_query", &ball_query); 19 | 20 | m.def("group_points", &group_points); 21 | m.def("group_points_grad", &group_points_grad); 22 | } 23 | -------------------------------------------------------------------------------- /optimizer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def build_optimizer(args, model): 5 | 6 | params_with_decay = [] 7 | params_without_decay = [] 8 | for name, param in model.named_parameters(): 9 | if param.requires_grad is False: 10 | continue 11 | if args.filter_biases_wd and (len(param.shape) == 1 or name.endswith("bias")): 12 | params_without_decay.append(param) 13 | else: 14 | params_with_decay.append(param) 15 | 16 | if args.filter_biases_wd: 17 | param_groups = [ 18 | {"params": params_without_decay, "weight_decay": 0.0}, 19 | {"params": params_with_decay, "weight_decay": args.weight_decay}, 20 | ] 21 | else: 22 | param_groups = [ 23 | {"params": params_with_decay, "weight_decay": args.weight_decay}, 24 | ] 25 | optimizer = torch.optim.AdamW(param_groups, lr=args.base_lr) 26 | return optimizer 27 | -------------------------------------------------------------------------------- /Uni3D/data/dataset_catalog.json: -------------------------------------------------------------------------------- 1 | { 2 | "ensembled_embedding": { 3 | "config": "./data/Ensembled_embedding.yaml", 4 | "train": "train", 5 | "test": "train", 6 | "usage": "train" 7 | }, 8 | "ensembled": { 9 | "config": "./data/Ensembled.yaml", 10 | "train": "train", 11 | "test": "train", 12 | "usage": "train" 13 | }, 14 | "objaverse_lvis_openshape": { 15 | "config": "./data/Objaverse_lvis_openshape.yaml", 16 | "train": "train", 17 | "test": "test", 18 | "usage": "test" 19 | }, 20 | "modelnet40_openshape": { 21 | "config": "./data/ModelNet40_openshape.yaml", 22 | "train": "train", 23 | "test": "test", 24 | "usage": "test" 25 | }, 26 | "scanobjnn_openshape": { 27 | "config": "./data/ScanObjNN_openshape.yaml", 28 | "train": "train", 29 | "test": "test", 30 | "usage": "test" 31 | } 32 | } -------------------------------------------------------------------------------- /Uni3D/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def setup_logging(log_file, level, include_host=False): 5 | if include_host: 6 | import socket 7 | hostname = socket.gethostname() 8 | formatter = logging.Formatter( 9 | f'%(asctime)s | {hostname} | %(levelname)s | %(message)s', datefmt='%Y-%m-%d,%H:%M:%S') 10 | else: 11 | formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s', datefmt='%Y-%m-%d,%H:%M:%S') 12 | 13 | logging.root.setLevel(level) 14 | loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict] 15 | for logger in loggers: 16 | logger.setLevel(level) 17 | 18 | stream_handler = logging.StreamHandler() 19 | stream_handler.setFormatter(formatter) 20 | logging.root.addHandler(stream_handler) 21 | 22 | if log_file: 23 | file_handler = logging.FileHandler(filename=log_file) 24 | file_handler.setFormatter(formatter) 25 | logging.root.addHandler(file_handler) 26 | 27 | -------------------------------------------------------------------------------- /pointnet2/pointnet2_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | ''' Testing customized ops. ''' 4 | 5 | import torch 6 | from torch.autograd import gradcheck 7 | import numpy as np 8 | 9 | import os 10 | import sys 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | sys.path.append(BASE_DIR) 13 | import pointnet2_utils 14 | 15 | def test_interpolation_grad(): 16 | batch_size = 1 17 | feat_dim = 2 18 | m = 4 19 | feats = torch.randn(batch_size, feat_dim, m, requires_grad=True).float().cuda() 20 | 21 | def interpolate_func(inputs): 22 | idx = torch.from_numpy(np.array([[[0,1,2],[1,2,3]]])).int().cuda() 23 | weight = torch.from_numpy(np.array([[[1,1,1],[2,2,2]]])).float().cuda() 24 | interpolated_feats = pointnet2_utils.three_interpolate(inputs, idx, weight) 25 | return interpolated_feats 26 | 27 | assert (gradcheck(interpolate_func, feats, atol=1e-1, rtol=1e-1)) 28 | 29 | if __name__=='__main__': 30 | test_interpolation_grad() 31 | -------------------------------------------------------------------------------- /util/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import torch 4 | 5 | try: 6 | from tensorboardX import SummaryWriter 7 | except ImportError: 8 | print("Cannot import tensorboard. Will log to txt files only.") 9 | SummaryWriter = None 10 | 11 | from utils.dist import is_primary 12 | 13 | 14 | class Logger(object): 15 | def __init__(self, log_dir=None) -> None: 16 | self.log_dir = log_dir 17 | if SummaryWriter is not None and is_primary(): 18 | self.writer = SummaryWriter(self.log_dir) 19 | else: 20 | self.writer = None 21 | 22 | def log_scalars(self, scalar_dict, step, prefix=None): 23 | if self.writer is None: 24 | return 25 | for k in scalar_dict: 26 | v = scalar_dict[k] 27 | if isinstance(v, torch.Tensor): 28 | v = v.detach().cpu().item() 29 | if prefix is not None: 30 | k = prefix + k 31 | self.writer.add_scalar(k, v, step) 32 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #pragma once 5 | #include 6 | #include 7 | 8 | #define CHECK_CUDA(x) \ 9 | do { \ 10 | AT_ASSERT(x.is_cuda(), #x " must be a CUDA tensor"); \ 11 | } while (0) 12 | 13 | #define CHECK_CONTIGUOUS(x) \ 14 | do { \ 15 | AT_ASSERT(x.is_contiguous(), #x " must be a contiguous tensor"); \ 16 | } while (0) 17 | 18 | #define CHECK_IS_INT(x) \ 19 | do { \ 20 | AT_ASSERT(x.scalar_type() == at::ScalarType::Int, \ 21 | #x " must be an int tensor"); \ 22 | } while (0) 23 | 24 | #define CHECK_IS_FLOAT(x) \ 25 | do { \ 26 | AT_ASSERT(x.scalar_type() == at::ScalarType::Float, \ 27 | #x " must be a float tensor"); \ 28 | } while (0) 29 | -------------------------------------------------------------------------------- /pointnet2/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from setuptools import setup 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | import glob 9 | import os.path as osp 10 | 11 | this_dir = osp.dirname(osp.abspath(__file__)) 12 | 13 | _ext_src_root = "_ext_src" 14 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob( 15 | "{}/src/*.cu".format(_ext_src_root) 16 | ) 17 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root)) 18 | 19 | setup( 20 | name='pointnet2', 21 | ext_modules=[ 22 | CUDAExtension( 23 | name='pointnet2._ext', 24 | sources=_ext_sources, 25 | extra_compile_args={ 26 | "cxx": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))], 27 | "nvcc": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))], 28 | }, 29 | include_dirs=[osp.join(this_dir, _ext_src_root, "include")], 30 | ) 31 | ], 32 | cmdclass={ 33 | 'build_ext': BuildExtension 34 | } 35 | ) 36 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .scannet import ScannetDetectionDataset, ScannetDatasetConfig 2 | # from .sunrgbd import SunrgbdDetectionDataset, SunrgbdDatasetConfig #will release very soon 3 | 4 | 5 | DATASET_FUNCTIONS = { 6 | "scannet": [ScannetDetectionDataset, ScannetDatasetConfig],} 7 | 8 | 9 | def build_dataset(args): 10 | dataset_builder = DATASET_FUNCTIONS[args.dataset_name][0] 11 | dataset_config = DATASET_FUNCTIONS[args.dataset_name][1]() 12 | 13 | if args.test_only: 14 | dataset_dict = { 15 | "test": dataset_builder( 16 | dataset_config, 17 | split_set="val", 18 | augment=False, 19 | args=args 20 | ), 21 | } 22 | else: 23 | dataset_dict = { 24 | "train": dataset_builder( 25 | dataset_config, 26 | split_set="train", 27 | augment=True, 28 | args=args 29 | ), 30 | "test": dataset_builder( 31 | dataset_config, 32 | split_set="val", 33 | augment=False, 34 | args=args 35 | ), 36 | } 37 | return dataset_dict, dataset_config 38 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include "ball_query.h" 5 | #include "utils.h" 6 | 7 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 8 | int nsample, const float *new_xyz, 9 | const float *xyz, int *idx); 10 | 11 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 12 | const int nsample) { 13 | CHECK_CONTIGUOUS(new_xyz); 14 | CHECK_CONTIGUOUS(xyz); 15 | CHECK_IS_FLOAT(new_xyz); 16 | CHECK_IS_FLOAT(xyz); 17 | 18 | if (new_xyz.is_cuda()) { 19 | CHECK_CUDA(xyz); 20 | } 21 | 22 | at::Tensor idx = 23 | torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample}, 24 | at::device(new_xyz.device()).dtype(at::ScalarType::Int)); 25 | 26 | if (new_xyz.is_cuda()) { 27 | query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1), 28 | radius, nsample, new_xyz.data(), 29 | xyz.data(), idx.data()); 30 | } else { 31 | AT_ASSERT(false, "CPU not supported"); 32 | } 33 | 34 | return idx; 35 | } 36 | -------------------------------------------------------------------------------- /Uni3D/data/DATASETS.md: -------------------------------------------------------------------------------- 1 | ## Evaluation datasets 2 | 3 | 1. Please download the data from this [repository](https://huggingface.co/BAAI/Uni3D/blob/main/data/test_datasets.zip), which contains datasets for Objaverse-LVIS, ModelNet40, and ScanObjectNN. 4 | 5 | 2. Place the `test_datasets` folder in the `/data` directory on your machine. The core `data` directory structure should look like this: 6 | 7 | ``` 8 | ./data 9 | -- test_datasets/ 10 | -- modelnet40 11 | -- scanobjectnn 12 | -- objaverse_lvis 13 | -- utils/ 14 | -- datasets.py 15 | -- ModelNet40_openshape.yaml 16 | -- Objaverse_lvis_openshape.yaml 17 | -- ScanObjNN_openshape.yaml 18 | -- dataset_catalog.json 19 | -- labels.json 20 | -- templates.json 21 | ``` 22 | 3. **Important**: If you choose to place the data in a location other than the default one mentioned above, please remember to update the corresponding dataset's YAML file with your path. 23 | 24 | Now you are ready to use the datasets for zero-shot evaluation. If you have any questions or encounter any issues, please refer to the documentation or feel free to reach out for assistance. 25 | 26 | ## Pre-training datasets 27 | 28 | We're in the process of organizing and uploading. Hang tight, and stay tuned! ☕️ 29 | 30 | Thanks for your patience and support! 31 | -------------------------------------------------------------------------------- /scannet/data_viz.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import sys 7 | import os 8 | 9 | BASE_DIR = os.path.dirname(__file__) 10 | sys.path.append(BASE_DIR) 11 | 12 | import numpy as np 13 | import pc_util 14 | 15 | scene_name = 'scannet_train_detection_data/scene0002_00' 16 | output_folder = 'data_viz_dump' 17 | 18 | data = np.load(scene_name+'_vert.npy') 19 | scene_points = data[:,0:3] 20 | colors = data[:,3:] 21 | instance_labels = np.load(scene_name+'_ins_label.npy') 22 | semantic_labels = np.load(scene_name+'_sem_label.npy') 23 | instance_bboxes = np.load(scene_name+'_bbox.npy') 24 | 25 | print(np.unique(instance_labels)) 26 | print(np.unique(semantic_labels)) 27 | input() 28 | if not os.path.exists(output_folder): 29 | os.mkdir(output_folder) 30 | 31 | # Write scene as OBJ file for visualization 32 | pc_util.write_ply_rgb(scene_points, colors, os.path.join(output_folder, 'scene.obj')) 33 | pc_util.write_ply_color(scene_points, instance_labels, os.path.join(output_folder, 'scene_instance.obj')) 34 | pc_util.write_ply_color(scene_points, semantic_labels, os.path.join(output_folder, 'scene_semantic.obj')) 35 | 36 | from model_util_scannet import ScannetDatasetConfig 37 | DC = ScannetDatasetConfig() 38 | print(instance_bboxes.shape) 39 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | model=create_uni3d 3 | 4 | gpu=1 5 | echo "export CUDA_VISIBLE_DEVICES=$gpu" 6 | export CUDA_VISIBLE_DEVICES=${gpu} 7 | export OMP_NUM_THREADS=12 8 | 9 | clip_model="EVA02-E-14-plus" 10 | ckpt_path="./Uni3D/downloads/ckpt/model_giant.pt" 11 | pretrained="./Uni3D/downloads/open_clip_pytorch_model.bin" # or "laion2b_s9b_b144k" 12 | size="giant" 13 | 14 | if [ $size = "giant" ]; then 15 | pc_model="eva_giant_patch14_560" 16 | pc_feat_dim=1408 17 | elif [ $size = "large" ]; then 18 | pc_model="eva02_large_patch14_448" 19 | pc_feat_dim=1024 20 | elif [ $size = "base" ]; then 21 | pc_model="eva02_base_patch14_448" 22 | pc_feat_dim=768 23 | elif [ $size = "small" ]; then 24 | pc_model="eva02_small_patch14_224" 25 | pc_feat_dim=384 26 | elif [ $size = "tiny" ]; then 27 | pc_model="eva02_tiny_patch14_224" 28 | pc_feat_dim=192 29 | else 30 | echo "Invalid option" 31 | exit 1 32 | fi 33 | eps=0.04 34 | min_points=3 35 | python main.py \ 36 | --dataset_name scannet \ 37 | --dataset_root_dir ./scannet/ \ 38 | --meta_data_dir ./scannet/meta_data/ \ 39 | --test_ckpt ./models/scannet_540ep.pth \ 40 | --auto_test \ 41 | --test_only \ 42 | --conf_thresh 0.01 \ 43 | --pc-model $pc_model \ 44 | --pc-feat-dim $pc_feat_dim \ 45 | --pc-encoder-dim 512 \ 46 | --ckpt_path $ckpt_path \ 47 | --embed-dim 1024 \ 48 | --group-size 64 \ 49 | --num-group 512 \ 50 | --inference_only \ 51 | --npoints 10000 \ 52 | --eps $eps \ 53 | --min_points $min_points 54 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/cuda_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #ifndef _CUDA_UTILS_H 4 | #define _CUDA_UTILS_H 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #define TOTAL_THREADS 512 16 | 17 | inline int opt_n_threads(int work_size) { 18 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 19 | 20 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 21 | } 22 | 23 | inline dim3 opt_block_config(int x, int y) { 24 | const int x_threads = opt_n_threads(x); 25 | const int y_threads = 26 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 27 | dim3 block_config(x_threads, y_threads, 1); 28 | 29 | return block_config; 30 | } 31 | 32 | #define CUDA_CHECK_ERRORS() \ 33 | do { \ 34 | cudaError_t err = cudaGetLastError(); \ 35 | if (cudaSuccess != err) { \ 36 | fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \ 37 | cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \ 38 | __FILE__); \ 39 | exit(-1); \ 40 | } \ 41 | } while (0) 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /Uni3D/model/uni3d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import timm 3 | import numpy as np 4 | from torch import nn 5 | from . import losses 6 | 7 | from .point_encoder import PointcloudEncoder 8 | 9 | class Uni3D(nn.Module): 10 | def __init__(self, point_encoder): 11 | super().__init__() 12 | self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) 13 | self.point_encoder = point_encoder 14 | 15 | def encode_pc(self, pc): 16 | xyz = pc[:,:,:3].contiguous() 17 | color = pc[:,:,3:].contiguous() 18 | pc_feat = self.point_encoder(xyz, color) 19 | return pc_feat 20 | 21 | def forward(self, pc, text, image): 22 | text_embed_all = text 23 | image_embed = image 24 | pc_embed = self.encode_pc(pc) 25 | return {'text_embed': text_embed_all, 26 | 'pc_embed': pc_embed, 27 | 'image_embed': image_embed, 28 | 'logit_scale': self.logit_scale.exp()} 29 | 30 | def get_filter_loss(args): 31 | return losses.Uni3d_Text_Image_Loss() 32 | 33 | def get_metric_names(model): 34 | return ['loss', 'uni3d_loss', 'pc_image_acc', 'pc_text_acc'] 35 | 36 | def create_uni3d(args): 37 | # create transformer blocks for point cloud via timm 38 | point_transformer = timm.create_model(args.pc_model, checkpoint_path=args.pretrained_pc, drop_path_rate=args.drop_path_rate) 39 | 40 | # create whole point cloud encoder 41 | point_encoder = PointcloudEncoder(point_transformer, args) 42 | 43 | # uni3d model 44 | model = Uni3D(point_encoder=point_encoder,) 45 | return model 46 | 47 | 48 | -------------------------------------------------------------------------------- /Uni3D/data/utils/io.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import open3d 4 | import os 5 | 6 | class IO: 7 | @classmethod 8 | def get(cls, file_path): 9 | _, file_extension = os.path.splitext(file_path) 10 | 11 | if file_extension in ['.npy']: 12 | return cls._read_npy(file_path) 13 | elif file_extension in ['.pcd']: 14 | return cls._read_pcd(file_path) 15 | elif file_extension in ['.h5']: 16 | return cls._read_h5(file_path) 17 | elif file_extension in ['.txt', '.xyz']: 18 | return cls._read_txt(file_path) 19 | elif file_extension in [".bin"]: 20 | return cls._read_bin(file_path) 21 | else: 22 | raise Exception('Unsupported file extension: %s' % file_extension) 23 | 24 | # References: https://github.com/numpy/numpy/blob/master/numpy/lib/format.py 25 | @classmethod 26 | def _read_npy(cls, file_path): 27 | return np.load(file_path) 28 | 29 | # References: https://github.com/dimatura/pypcd/blob/master/pypcd/pypcd.py#L275 30 | # Support PCD files without compression ONLY! 31 | @classmethod 32 | def _read_pcd(cls, file_path): 33 | pc = open3d.io.read_point_cloud(file_path) 34 | ptcloud = np.array(pc.points) 35 | return ptcloud 36 | 37 | @classmethod 38 | def _read_txt(cls, file_path): 39 | return np.loadtxt(file_path) 40 | 41 | @classmethod 42 | def _read_h5(cls, file_path): 43 | f = h5py.File(file_path, 'r') 44 | return f['data'][()] 45 | 46 | @classmethod 47 | def _read_bin(cls, file_path): 48 | return np.fromfile(file_path) # , dtype=np.float32, count=-1 -------------------------------------------------------------------------------- /util/io.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import torch 4 | import os 5 | from util.dist import is_primary 6 | 7 | 8 | def save_checkpoint( 9 | checkpoint_dir, 10 | model_no_ddp, 11 | optimizer, 12 | epoch, 13 | args, 14 | best_val_metrics, 15 | filename=None, 16 | ): 17 | if not is_primary(): 18 | return 19 | if filename is None: 20 | filename = f"checkpoint_{epoch:04d}.pth" 21 | checkpoint_name = os.path.join(checkpoint_dir, filename) 22 | 23 | sd = { 24 | "model": model_no_ddp.state_dict(), 25 | "optimizer": optimizer.state_dict(), 26 | "epoch": epoch, 27 | "args": args, 28 | "best_val_metrics": best_val_metrics, 29 | } 30 | torch.save(sd, checkpoint_name) 31 | 32 | 33 | def resume_if_possible(checkpoint_dir, model_no_ddp, optimizer): 34 | """ 35 | Resume if checkpoint is available. 36 | Return 37 | - epoch of loaded checkpoint. 38 | """ 39 | epoch = -1 40 | best_val_metrics = {} 41 | if not os.path.isdir(checkpoint_dir): 42 | return epoch, best_val_metrics 43 | 44 | last_checkpoint = os.path.join(checkpoint_dir, "checkpoint.pth") 45 | if not os.path.isfile(last_checkpoint): 46 | return epoch, best_val_metrics 47 | 48 | sd = torch.load(last_checkpoint, map_location=torch.device("cpu")) 49 | epoch = sd["epoch"] 50 | best_val_metrics = sd["best_val_metrics"] 51 | print(f"Found checkpoint at {epoch}. Resuming.") 52 | 53 | model_no_ddp.load_state_dict(sd["model"]) 54 | optimizer.load_state_dict(sd["optimizer"]) 55 | print( 56 | f"Loaded model and optimizer state at {epoch}. Loaded best val metrics so far." 57 | ) 58 | return epoch, best_val_metrics 59 | -------------------------------------------------------------------------------- /scannet/meta_data/scannetv2_test.txt: -------------------------------------------------------------------------------- 1 | scene0000_00 2 | scene0707_00 3 | scene0708_00 4 | scene0709_00 5 | scene0710_00 6 | scene0711_00 7 | scene0712_00 8 | scene0713_00 9 | scene0714_00 10 | scene0715_00 11 | scene0716_00 12 | scene0717_00 13 | scene0718_00 14 | scene0719_00 15 | scene0720_00 16 | scene0721_00 17 | scene0722_00 18 | scene0723_00 19 | scene0724_00 20 | scene0725_00 21 | scene0726_00 22 | scene0727_00 23 | scene0728_00 24 | scene0729_00 25 | scene0730_00 26 | scene0731_00 27 | scene0732_00 28 | scene0733_00 29 | scene0734_00 30 | scene0735_00 31 | scene0736_00 32 | scene0737_00 33 | scene0738_00 34 | scene0739_00 35 | scene0740_00 36 | scene0741_00 37 | scene0742_00 38 | scene0743_00 39 | scene0744_00 40 | scene0745_00 41 | scene0746_00 42 | scene0747_00 43 | scene0748_00 44 | scene0749_00 45 | scene0750_00 46 | scene0751_00 47 | scene0752_00 48 | scene0753_00 49 | scene0754_00 50 | scene0755_00 51 | scene0756_00 52 | scene0757_00 53 | scene0758_00 54 | scene0759_00 55 | scene0760_00 56 | scene0761_00 57 | scene0762_00 58 | scene0763_00 59 | scene0764_00 60 | scene0765_00 61 | scene0766_00 62 | scene0767_00 63 | scene0768_00 64 | scene0769_00 65 | scene0770_00 66 | scene0771_00 67 | scene0772_00 68 | scene0773_00 69 | scene0774_00 70 | scene0775_00 71 | scene0776_00 72 | scene0777_00 73 | scene0778_00 74 | scene0779_00 75 | scene0780_00 76 | scene0781_00 77 | scene0782_00 78 | scene0783_00 79 | scene0784_00 80 | scene0785_00 81 | scene0786_00 82 | scene0787_00 83 | scene0788_00 84 | scene0789_00 85 | scene0790_00 86 | scene0791_00 87 | scene0792_00 88 | scene0793_00 89 | scene0794_00 90 | scene0795_00 91 | scene0796_00 92 | scene0797_00 93 | scene0798_00 94 | scene0799_00 95 | scene0800_00 96 | scene0801_00 97 | scene0802_00 98 | scene0803_00 99 | scene0804_00 100 | scene0805_00 101 | scene0806_00 102 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/ball_query_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cuda_utils.h" 9 | 10 | // input: new_xyz(b, m, 3) xyz(b, n, 3) 11 | // output: idx(b, m, nsample) 12 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius, 13 | int nsample, 14 | const float *__restrict__ new_xyz, 15 | const float *__restrict__ xyz, 16 | int *__restrict__ idx) { 17 | int batch_index = blockIdx.x; 18 | xyz += batch_index * n * 3; 19 | new_xyz += batch_index * m * 3; 20 | idx += m * nsample * batch_index; 21 | 22 | int index = threadIdx.x; 23 | int stride = blockDim.x; 24 | 25 | float radius2 = radius * radius; 26 | for (int j = index; j < m; j += stride) { 27 | float new_x = new_xyz[j * 3 + 0]; 28 | float new_y = new_xyz[j * 3 + 1]; 29 | float new_z = new_xyz[j * 3 + 2]; 30 | for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) { 31 | float x = xyz[k * 3 + 0]; 32 | float y = xyz[k * 3 + 1]; 33 | float z = xyz[k * 3 + 2]; 34 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + 35 | (new_z - z) * (new_z - z); 36 | if (d2 < radius2) { 37 | if (cnt == 0) { 38 | for (int l = 0; l < nsample; ++l) { 39 | idx[j * nsample + l] = k; 40 | } 41 | } 42 | idx[j * nsample + cnt] = k; 43 | ++cnt; 44 | } 45 | } 46 | } 47 | } 48 | 49 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 50 | int nsample, const float *new_xyz, 51 | const float *xyz, int *idx) { 52 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 53 | query_ball_point_kernel<<>>( 54 | b, n, m, radius, nsample, new_xyz, xyz, idx); 55 | 56 | CUDA_CHECK_ERRORS(); 57 | } 58 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include "group_points.h" 5 | #include "utils.h" 6 | 7 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 8 | const float *points, const int *idx, 9 | float *out); 10 | 11 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 12 | int nsample, const float *grad_out, 13 | const int *idx, float *grad_points); 14 | 15 | at::Tensor group_points(at::Tensor points, at::Tensor idx) { 16 | CHECK_CONTIGUOUS(points); 17 | CHECK_CONTIGUOUS(idx); 18 | CHECK_IS_FLOAT(points); 19 | CHECK_IS_INT(idx); 20 | 21 | if (points.is_cuda()) { 22 | CHECK_CUDA(idx); 23 | } 24 | 25 | at::Tensor output = 26 | torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)}, 27 | at::device(points.device()).dtype(at::ScalarType::Float)); 28 | 29 | if (points.is_cuda()) { 30 | group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 31 | idx.size(1), idx.size(2), points.data(), 32 | idx.data(), output.data()); 33 | } else { 34 | AT_ASSERT(false, "CPU not supported"); 35 | } 36 | 37 | return output; 38 | } 39 | 40 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) { 41 | CHECK_CONTIGUOUS(grad_out); 42 | CHECK_CONTIGUOUS(idx); 43 | CHECK_IS_FLOAT(grad_out); 44 | CHECK_IS_INT(idx); 45 | 46 | if (grad_out.is_cuda()) { 47 | CHECK_CUDA(idx); 48 | } 49 | 50 | at::Tensor output = 51 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 52 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 53 | 54 | if (grad_out.is_cuda()) { 55 | group_points_grad_kernel_wrapper( 56 | grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2), 57 | grad_out.data(), idx.data(), output.data()); 58 | } else { 59 | AT_ASSERT(false, "CPU not supported"); 60 | } 61 | 62 | return output; 63 | } 64 | -------------------------------------------------------------------------------- /Uni3D/utils/scheduler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def assign_learning_rate(optimizer, new_lr): 5 | for param_group in optimizer.param_groups: 6 | param_group["lr"] = new_lr 7 | 8 | 9 | def _warmup_lr(base_lr, warmup_length, step): 10 | return base_lr * (step + 1) / warmup_length 11 | 12 | 13 | def cosine_lr(optimizer, base_lr, warmup_length, steps): 14 | def _lr_adjuster(step): 15 | if step < warmup_length: 16 | lr = _warmup_lr(base_lr, warmup_length, step) 17 | else: 18 | e = step - warmup_length 19 | es = steps - warmup_length 20 | lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr 21 | assign_learning_rate(optimizer, lr) 22 | return lr 23 | return _lr_adjuster 24 | 25 | def warmup_cosine_lr(optimizer, args, steps): 26 | def _lr_adjuster(step): 27 | for param_group in optimizer.param_groups: 28 | # import pdb; pdb.set_trace() 29 | if param_group['group'] == 'text': 30 | base_lr = args.text_lr if args.text_lr is not None else args.lr 31 | elif param_group['group'] == 'visual': 32 | base_lr = args.visual_lr if args.visual_lr is not None else args.lr 33 | else: 34 | base_lr = args.lr 35 | 36 | if step < args.warmup: 37 | lr = _warmup_lr(base_lr, args.warmup, step) 38 | else: 39 | e = step - args.warmup 40 | es = steps - args.warmup 41 | lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr 42 | scale = param_group.get("lr_scale", 1.0) 43 | param_group["lr"] = scale * lr 44 | return lr 45 | return _lr_adjuster 46 | 47 | 48 | def warmup_step_lr(optimizer, args, decay_t=500, decay_rate=0.8): 49 | def _lr_adjuster(step): 50 | for param_group in optimizer.param_groups: 51 | if param_group['group'] == 'text': 52 | base_lr = args.text_lr 53 | elif param_group['group'] == 'visual': 54 | base_lr = args.visual_lr 55 | else: 56 | base_lr = args.lr 57 | 58 | if step < args.warmup: 59 | lr = _warmup_lr(base_lr, args.warmup, step) 60 | else: 61 | e = step - args.warmup 62 | lr = base_lr * (decay_rate ** (e // decay_t)) 63 | scale = param_group.get("lr_scale", 1.0) 64 | param_group["lr"] = scale * lr 65 | return lr 66 | return _lr_adjuster -------------------------------------------------------------------------------- /Uni3D/data/utils/config.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from easydict import EasyDict 3 | import os 4 | from .logger import print_log 5 | 6 | def log_args_to_file(args, pre='args', logger=None): 7 | for key, val in args.__dict__.items(): 8 | print_log(f'{pre}.{key} : {val}', logger = logger) 9 | 10 | def log_config_to_file(cfg, pre='cfg', logger=None): 11 | for key, val in cfg.items(): 12 | if isinstance(cfg[key], EasyDict): 13 | print_log(f'{pre}.{key} = edict()', logger = logger) 14 | log_config_to_file(cfg[key], pre=pre + '.' + key, logger=logger) 15 | continue 16 | print_log(f'{pre}.{key} : {val}', logger = logger) 17 | 18 | def merge_new_config(config, new_config): 19 | for key, val in new_config.items(): 20 | if not isinstance(val, dict): 21 | if key == '_base_': 22 | with open(new_config['_base_'], 'r') as f: 23 | try: 24 | val = yaml.load(f, Loader=yaml.FullLoader) 25 | except: 26 | val = yaml.load(f) 27 | config[key] = EasyDict() 28 | merge_new_config(config[key], val) 29 | else: 30 | config[key] = val 31 | continue 32 | if key not in config: 33 | config[key] = EasyDict() 34 | merge_new_config(config[key], val) 35 | return config 36 | 37 | def cfg_from_yaml_file(cfg_file): 38 | config = EasyDict() 39 | with open(cfg_file, 'r') as f: 40 | try: 41 | new_config = yaml.load(f, Loader=yaml.FullLoader) 42 | except: 43 | new_config = yaml.load(f) 44 | merge_new_config(config=config, new_config=new_config) 45 | return config 46 | 47 | def get_config(args, logger=None): 48 | if args.resume: 49 | cfg_path = os.path.join(args.experiment_path, 'config.yaml') 50 | if not os.path.exists(cfg_path): 51 | print_log("Failed to resume", logger = logger) 52 | raise FileNotFoundError() 53 | print_log(f'Resume yaml from {cfg_path}', logger = logger) 54 | args.config = cfg_path 55 | config = cfg_from_yaml_file(args.config) 56 | if not args.resume and args.local_rank == 0: 57 | save_experiment_config(args, config, logger) 58 | return config 59 | 60 | def save_experiment_config(args, config, logger = None): 61 | config_path = os.path.join(args.experiment_path, 'config.yaml') 62 | os.system('cp %s %s' % (args.config, config_path)) 63 | print_log(f'Copy the Config file from {args.config} to {config_path}',logger = logger ) -------------------------------------------------------------------------------- /scannet/scannet_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Ref: https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts ''' 7 | import os 8 | import sys 9 | import json 10 | import csv 11 | 12 | try: 13 | import numpy as np 14 | except: 15 | print("Failed to import numpy package.") 16 | sys.exit(-1) 17 | 18 | try: 19 | from plyfile import PlyData, PlyElement 20 | except: 21 | print("Please install the module 'plyfile' for PLY i/o, e.g.") 22 | print("pip install plyfile") 23 | sys.exit(-1) 24 | 25 | def represents_int(s): 26 | ''' if string s represents an int. ''' 27 | try: 28 | int(s) 29 | return True 30 | except ValueError: 31 | return False 32 | 33 | 34 | def read_label_mapping(filename, label_from='raw_category', label_to='nyu40id'): 35 | assert os.path.isfile(filename) 36 | mapping = dict() 37 | with open(filename) as csvfile: 38 | reader = csv.DictReader(csvfile, delimiter='\t') 39 | for row in reader: 40 | mapping[row[label_from]] = int(row[label_to]) 41 | if represents_int(list(mapping.keys())[0]): 42 | mapping = {int(k):v for k,v in mapping.items()} 43 | return mapping 44 | 45 | def read_mesh_vertices(filename): 46 | """ read XYZ for each vertex. 47 | """ 48 | assert os.path.isfile(filename) 49 | with open(filename, 'rb') as f: 50 | plydata = PlyData.read(f) 51 | num_verts = plydata['vertex'].count 52 | vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32) 53 | vertices[:,0] = plydata['vertex'].data['x'] 54 | vertices[:,1] = plydata['vertex'].data['y'] 55 | vertices[:,2] = plydata['vertex'].data['z'] 56 | return vertices 57 | 58 | def read_mesh_vertices_rgb(filename): 59 | """ read XYZ RGB for each vertex. 60 | Note: RGB values are in 0-255 61 | """ 62 | assert os.path.isfile(filename) 63 | with open(filename, 'rb') as f: 64 | plydata = PlyData.read(f) 65 | num_verts = plydata['vertex'].count 66 | vertices = np.zeros(shape=[num_verts, 6], dtype=np.float32) 67 | vertices[:,0] = plydata['vertex'].data['x'] 68 | vertices[:,1] = plydata['vertex'].data['y'] 69 | vertices[:,2] = plydata['vertex'].data['z'] 70 | vertices[:,3] = plydata['vertex'].data['red'] 71 | vertices[:,4] = plydata['vertex'].data['green'] 72 | vertices[:,5] = plydata['vertex'].data['blue'] 73 | return vertices 74 | 75 | 76 | -------------------------------------------------------------------------------- /Uni3D/model/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from Uni3D.utils import utils 6 | 7 | class Uni3d_Text_Image_Loss(nn.Module): 8 | def __init__(self): 9 | super().__init__() 10 | self.labels = None 11 | self.last_local_batch_size = None 12 | 13 | def forward(self, outputs, masks): 14 | pc_embed = outputs['pc_embed'] 15 | text_embed = outputs['text_embed'] 16 | image_embed = outputs['image_embed'] 17 | logit_scale = outputs['logit_scale'] 18 | local_batch_size = pc_embed.size(0) 19 | 20 | if local_batch_size != self.last_local_batch_size: 21 | self.labels = local_batch_size * utils.get_rank() + torch.arange( 22 | local_batch_size, device=pc_embed.device 23 | ) 24 | self.last_local_batch_size = local_batch_size 25 | 26 | masks = masks.to(pc_embed.device) 27 | 28 | # normalized features 29 | pc_embed = F.normalize(pc_embed, dim=-1, p=2) 30 | text_embed = F.normalize(text_embed, dim=-1, p=2) 31 | image_embed = F.normalize(image_embed, dim=-1, p=2) 32 | 33 | # gather features from all GPUs 34 | pc_embed_all, text_embed_all, image_embed_all, masks_all = \ 35 | utils.all_gather_batch([pc_embed, text_embed, image_embed, masks]) 36 | 37 | # cosine similarity as logits 38 | logits_per_pc_text = logit_scale * pc_embed @ text_embed_all.t() 39 | logits_per_text_pc = logit_scale * text_embed @ pc_embed_all.t() 40 | logits_per_pc_image = logit_scale * pc_embed @ image_embed_all.t() 41 | logits_per_image_pc = logit_scale * image_embed @ pc_embed_all.t() 42 | 43 | loss_text = (F.cross_entropy(logits_per_pc_text, self.labels) + \ 44 | F.cross_entropy(logits_per_text_pc, self.labels)) / 2 45 | 46 | masks = masks.bool() 47 | masks = ~masks 48 | 49 | self.labels_c = self.labels.clone() 50 | self.labels_c[masks] = -100 51 | 52 | loss_image = (F.cross_entropy(logits_per_pc_image, self.labels_c, ignore_index=-100) +\ 53 | F.cross_entropy(logits_per_image_pc, self.labels_c, ignore_index=-100)) / 2 54 | 55 | loss = loss_text + loss_image 56 | 57 | 58 | 59 | # compute accuracy 60 | with torch.no_grad(): 61 | pred = torch.argmax(logits_per_pc_text, dim=-1) 62 | correct = pred.eq(self.labels).sum() 63 | pc_text_acc = 100 * correct / local_batch_size 64 | 65 | pred = torch.argmax(logits_per_pc_image, dim=-1) 66 | correct = pred.eq(self.labels).sum() 67 | pc_image_acc = 100 * correct / local_batch_size 68 | 69 | return {'loss': loss, 'uni3d_loss': loss, 'pc_image_acc': pc_image_acc, 'pc_text_acc': pc_text_acc} 70 | 71 | -------------------------------------------------------------------------------- /util/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | import numpy as np 4 | from collections import deque 5 | from typing import List 6 | from util.dist import is_distributed, barrier, all_reduce_sum 7 | 8 | 9 | def my_worker_init_fn(worker_id): 10 | np.random.seed(np.random.get_state()[1][0] + worker_id) 11 | 12 | 13 | @torch.jit.ignore 14 | def to_list_1d(arr) -> List[float]: 15 | arr = arr.detach().cpu().numpy().tolist() 16 | return arr 17 | 18 | 19 | @torch.jit.ignore 20 | def to_list_3d(arr) -> List[List[List[float]]]: 21 | arr = arr.detach().cpu().numpy().tolist() 22 | return arr 23 | 24 | 25 | def huber_loss(error, delta=1.0): 26 | """ 27 | Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py 28 | x = error = pred - gt or dist(pred,gt) 29 | 0.5 * |x|^2 if |x|<=d 30 | 0.5 * d^2 + d * (|x|-d) if |x|>d 31 | """ 32 | abs_error = torch.abs(error) 33 | quadratic = torch.clamp(abs_error, max=delta) 34 | linear = abs_error - quadratic 35 | loss = 0.5 * quadratic ** 2 + delta * linear 36 | return loss 37 | 38 | 39 | # From https://github.com/facebookresearch/detr/blob/master/util/misc.py 40 | class SmoothedValue(object): 41 | """Track a series of values and provide access to smoothed values over a 42 | window or the global series average. 43 | """ 44 | 45 | def __init__(self, window_size=20, fmt=None): 46 | if fmt is None: 47 | fmt = "{median:.4f} ({global_avg:.4f})" 48 | self.deque = deque(maxlen=window_size) 49 | self.total = 0.0 50 | self.count = 0 51 | self.fmt = fmt 52 | 53 | def update(self, value, n=1): 54 | self.deque.append(value) 55 | self.count += n 56 | self.total += value * n 57 | 58 | def synchronize_between_processes(self): 59 | """ 60 | Warning: does not synchronize the deque! 61 | """ 62 | if not is_distributed(): 63 | return 64 | t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") 65 | barrier() 66 | all_reduce_sum(t) 67 | t = t.tolist() 68 | self.count = int(t[0]) 69 | self.total = t[1] 70 | 71 | @property 72 | def median(self): 73 | d = torch.tensor(list(self.deque)) 74 | return d.median().item() 75 | 76 | @property 77 | def avg(self): 78 | d = torch.tensor(list(self.deque), dtype=torch.float32) 79 | return d.mean().item() 80 | 81 | @property 82 | def global_avg(self): 83 | return self.total / self.count 84 | 85 | @property 86 | def max(self): 87 | return max(self.deque) 88 | 89 | @property 90 | def value(self): 91 | return self.deque[-1] 92 | 93 | def __str__(self): 94 | return self.fmt.format( 95 | median=self.median, 96 | avg=self.avg, 97 | global_avg=self.global_avg, 98 | max=self.max, 99 | value=self.value, 100 | ) 101 | -------------------------------------------------------------------------------- /Uni3D/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import torch 3 | import numpy as np 4 | from collections import deque 5 | from typing import List 6 | from utils.dist import is_distributed, barrier, all_reduce_sum 7 | 8 | 9 | def my_worker_init_fn(worker_id): 10 | np.random.seed(np.random.get_state()[1][0] + worker_id) 11 | 12 | 13 | @torch.jit.ignore 14 | def to_list_1d(arr) -> List[float]: 15 | arr = arr.detach().cpu().numpy().tolist() 16 | return arr 17 | 18 | 19 | @torch.jit.ignore 20 | def to_list_3d(arr) -> List[List[List[float]]]: 21 | arr = arr.detach().cpu().numpy().tolist() 22 | return arr 23 | 24 | 25 | def huber_loss(error, delta=1.0): 26 | """ 27 | Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py 28 | x = error = pred - gt or dist(pred,gt) 29 | 0.5 * |x|^2 if |x|<=d 30 | 0.5 * d^2 + d * (|x|-d) if |x|>d 31 | """ 32 | abs_error = torch.abs(error) 33 | quadratic = torch.clamp(abs_error, max=delta) 34 | linear = abs_error - quadratic 35 | loss = 0.5 * quadratic ** 2 + delta * linear 36 | return loss 37 | 38 | 39 | # From https://github.com/facebookresearch/detr/blob/master/util/misc.py 40 | class SmoothedValue(object): 41 | """Track a series of values and provide access to smoothed values over a 42 | window or the global series average. 43 | """ 44 | 45 | def __init__(self, window_size=20, fmt=None): 46 | if fmt is None: 47 | fmt = "{median:.4f} ({global_avg:.4f})" 48 | self.deque = deque(maxlen=window_size) 49 | self.total = 0.0 50 | self.count = 0 51 | self.fmt = fmt 52 | 53 | def update(self, value, n=1): 54 | self.deque.append(value) 55 | self.count += n 56 | self.total += value * n 57 | 58 | def synchronize_between_processes(self): 59 | """ 60 | Warning: does not synchronize the deque! 61 | """ 62 | if not is_distributed(): 63 | return 64 | t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") 65 | barrier() 66 | all_reduce_sum(t) 67 | t = t.tolist() 68 | self.count = int(t[0]) 69 | self.total = t[1] 70 | 71 | @property 72 | def median(self): 73 | d = torch.tensor(list(self.deque)) 74 | return d.median().item() 75 | 76 | @property 77 | def avg(self): 78 | d = torch.tensor(list(self.deque), dtype=torch.float32) 79 | return d.mean().item() 80 | 81 | @property 82 | def global_avg(self): 83 | return self.total / self.count 84 | 85 | @property 86 | def max(self): 87 | return max(self.deque) 88 | 89 | @property 90 | def value(self): 91 | return self.deque[-1] 92 | 93 | def __str__(self): 94 | return self.fmt.format( 95 | median=self.median, 96 | avg=self.avg, 97 | global_avg=self.global_avg, 98 | max=self.max, 99 | value=self.value, 100 | ) 101 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/sampling.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #include "sampling.h" 4 | #include "utils.h" 5 | 6 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 7 | const float *points, const int *idx, 8 | float *out); 9 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 10 | const float *grad_out, const int *idx, 11 | float *grad_points); 12 | 13 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 14 | const float *dataset, float *temp, 15 | int *idxs); 16 | 17 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) { 18 | CHECK_CONTIGUOUS(points); 19 | CHECK_CONTIGUOUS(idx); 20 | CHECK_IS_FLOAT(points); 21 | CHECK_IS_INT(idx); 22 | 23 | if (points.is_cuda()) { 24 | CHECK_CUDA(idx); 25 | } 26 | 27 | at::Tensor output = 28 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 29 | at::device(points.device()).dtype(at::ScalarType::Float)); 30 | 31 | if (points.is_cuda()) { 32 | gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 33 | idx.size(1), points.data(), 34 | idx.data(), output.data()); 35 | } else { 36 | AT_ASSERT(false, "CPU not supported"); 37 | } 38 | 39 | return output; 40 | } 41 | 42 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, 43 | const int n) { 44 | CHECK_CONTIGUOUS(grad_out); 45 | CHECK_CONTIGUOUS(idx); 46 | CHECK_IS_FLOAT(grad_out); 47 | CHECK_IS_INT(idx); 48 | 49 | if (grad_out.is_cuda()) { 50 | CHECK_CUDA(idx); 51 | } 52 | 53 | at::Tensor output = 54 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 55 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 56 | 57 | if (grad_out.is_cuda()) { 58 | gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n, 59 | idx.size(1), grad_out.data(), 60 | idx.data(), output.data()); 61 | } else { 62 | AT_ASSERT(false, "CPU not supported"); 63 | } 64 | 65 | return output; 66 | } 67 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) { 68 | CHECK_CONTIGUOUS(points); 69 | CHECK_IS_FLOAT(points); 70 | 71 | at::Tensor output = 72 | torch::zeros({points.size(0), nsamples}, 73 | at::device(points.device()).dtype(at::ScalarType::Int)); 74 | 75 | at::Tensor tmp = 76 | torch::full({points.size(0), points.size(1)}, 1e10, 77 | at::device(points.device()).dtype(at::ScalarType::Float)); 78 | 79 | if (points.is_cuda()) { 80 | furthest_point_sampling_kernel_wrapper( 81 | points.size(0), points.size(1), nsamples, points.data(), 82 | tmp.data(), output.data()); 83 | } else { 84 | AT_ASSERT(false, "CPU not supported"); 85 | } 86 | 87 | return output; 88 | } 89 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/group_points_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include 5 | #include 6 | 7 | #include "cuda_utils.h" 8 | 9 | // input: points(b, c, n) idx(b, npoints, nsample) 10 | // output: out(b, c, npoints, nsample) 11 | __global__ void group_points_kernel(int b, int c, int n, int npoints, 12 | int nsample, 13 | const float *__restrict__ points, 14 | const int *__restrict__ idx, 15 | float *__restrict__ out) { 16 | int batch_index = blockIdx.x; 17 | points += batch_index * n * c; 18 | idx += batch_index * npoints * nsample; 19 | out += batch_index * npoints * nsample * c; 20 | 21 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 22 | const int stride = blockDim.y * blockDim.x; 23 | for (int i = index; i < c * npoints; i += stride) { 24 | const int l = i / npoints; 25 | const int j = i % npoints; 26 | for (int k = 0; k < nsample; ++k) { 27 | int ii = idx[j * nsample + k]; 28 | out[(l * npoints + j) * nsample + k] = points[l * n + ii]; 29 | } 30 | } 31 | } 32 | 33 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 34 | const float *points, const int *idx, 35 | float *out) { 36 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 37 | 38 | group_points_kernel<<>>( 39 | b, c, n, npoints, nsample, points, idx, out); 40 | 41 | CUDA_CHECK_ERRORS(); 42 | } 43 | 44 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample) 45 | // output: grad_points(b, c, n) 46 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints, 47 | int nsample, 48 | const float *__restrict__ grad_out, 49 | const int *__restrict__ idx, 50 | float *__restrict__ grad_points) { 51 | int batch_index = blockIdx.x; 52 | grad_out += batch_index * npoints * nsample * c; 53 | idx += batch_index * npoints * nsample; 54 | grad_points += batch_index * n * c; 55 | 56 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 57 | const int stride = blockDim.y * blockDim.x; 58 | for (int i = index; i < c * npoints; i += stride) { 59 | const int l = i / npoints; 60 | const int j = i % npoints; 61 | for (int k = 0; k < nsample; ++k) { 62 | int ii = idx[j * nsample + k]; 63 | atomicAdd(grad_points + l * n + ii, 64 | grad_out[(l * npoints + j) * nsample + k]); 65 | } 66 | } 67 | } 68 | 69 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 70 | int nsample, const float *grad_out, 71 | const int *idx, float *grad_points) { 72 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 73 | 74 | group_points_grad_kernel<<>>( 75 | b, c, n, npoints, nsample, grad_out, idx, grad_points); 76 | 77 | CUDA_CHECK_ERRORS(); 78 | } 79 | -------------------------------------------------------------------------------- /scannet/batch_load_scannet_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Batch mode in loading Scannet scenes with vertices and ground truth labels 7 | for semantic and instance segmentations 8 | 9 | Usage example: python ./batch_load_scannet_data.py 10 | """ 11 | import os 12 | import sys 13 | import datetime 14 | import numpy as np 15 | from load_scannet_data import export 16 | import pdb 17 | 18 | SCANNET_DIR = 'scans' 19 | TRAIN_SCAN_NAMES = [line.rstrip() for line in open('./meta_data/scannet_train.txt')] 20 | LABEL_MAP_FILE = './meta_data/scannetv2-labels.combined.tsv' 21 | DONOTCARE_CLASS_IDS = np.array([]) 22 | OBJ_CLASS_IDS = np.array([3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39]) 23 | OUTPUT_FOLDER = './scannet_train_detection_data' 24 | 25 | def export_one_scan(scan_name, output_filename_prefix): 26 | mesh_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.ply') 27 | agg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.aggregation.json') 28 | seg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.0.010000.segs.json') 29 | meta_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.txt') # includes axisAlignment info for the train set scans. 30 | mesh_vertices, semantic_labels, instance_labels, instance_bboxes, instance2semantic = \ 31 | export(mesh_file, agg_file, seg_file, meta_file, LABEL_MAP_FILE, None) 32 | 33 | mask = np.logical_not(np.in1d(semantic_labels, DONOTCARE_CLASS_IDS)) 34 | mesh_vertices = mesh_vertices[mask,:] 35 | semantic_labels = semantic_labels[mask] 36 | instance_labels = instance_labels[mask] 37 | 38 | num_instances = len(np.unique(instance_labels)) 39 | print('Num of instances: ', num_instances) 40 | 41 | bbox_mask = np.in1d(instance_bboxes[:,-1], OBJ_CLASS_IDS) 42 | instance_bboxes = instance_bboxes[bbox_mask,:] 43 | print('Num of care instances: ', instance_bboxes.shape[0]) 44 | 45 | N = mesh_vertices.shape[0] 46 | 47 | np.save(output_filename_prefix+'_vert.npy', mesh_vertices) 48 | np.save(output_filename_prefix+'_sem_label.npy', semantic_labels) 49 | np.save(output_filename_prefix+'_ins_label.npy', instance_labels) 50 | np.save(output_filename_prefix+'_bbox.npy', instance_bboxes) 51 | 52 | def batch_export(): 53 | if not os.path.exists(OUTPUT_FOLDER): 54 | print('Creating new data folder: {}'.format(OUTPUT_FOLDER)) 55 | os.mkdir(OUTPUT_FOLDER) 56 | 57 | for scan_name in TRAIN_SCAN_NAMES: 58 | print('-'*20+'begin') 59 | print(datetime.datetime.now()) 60 | print(scan_name) 61 | output_filename_prefix = os.path.join(OUTPUT_FOLDER, scan_name) 62 | if os.path.isfile(output_filename_prefix+'_vert.npy'): 63 | print('File already exists. skipping.') 64 | print('-'*20+'done') 65 | continue 66 | try: 67 | export_one_scan(scan_name, output_filename_prefix) 68 | except: 69 | print('Failed export scan: %s'%(scan_name)) 70 | print('-'*20+'done') 71 | 72 | if __name__=='__main__': 73 | batch_export() 74 | -------------------------------------------------------------------------------- /Uni3D/data/utils/data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def random_rotate_z(pc): 4 | # random roate around z axis 5 | theta = np.random.uniform(0, 2*np.pi) 6 | R = np.array([[np.cos(theta), -np.sin(theta), 0], 7 | [np.sin(theta), np.cos(theta), 0], 8 | [0, 0, 1]]) 9 | return np.matmul(pc, R) 10 | 11 | def normalize_pc(pc): 12 | # normalize pc to [-1, 1] 13 | pc = pc - np.mean(pc, axis=0) 14 | if np.max(np.linalg.norm(pc, axis=1)) < 1e-6: 15 | pc = np.zeros_like(pc) 16 | else: 17 | pc = pc / np.max(np.linalg.norm(pc, axis=1)) 18 | return pc 19 | 20 | def random_point_dropout(batch_pc, max_dropout_ratio=0.875): 21 | ''' batch_pc: BxNx3 ''' 22 | for b in range(batch_pc.shape[0]): 23 | dropout_ratio = np.random.random()*max_dropout_ratio # 0~0.875 24 | drop_idx = np.where(np.random.random((batch_pc.shape[1]))<=dropout_ratio)[0] 25 | if len(drop_idx)>0: 26 | batch_pc[b,drop_idx,:] = batch_pc[b,0,:] # set to the first point 27 | return batch_pc 28 | 29 | def random_scale_point_cloud(batch_data, scale_low=0.8, scale_high=1.25): 30 | """ Randomly scale the point cloud. Scale is per point cloud. 31 | Input: 32 | BxNx3 array, original batch of point clouds 33 | Return: 34 | BxNx3 array, scaled batch of point clouds 35 | """ 36 | B, N, C = batch_data.shape 37 | scales = np.random.uniform(scale_low, scale_high, B) 38 | for batch_index in range(B): 39 | batch_data[batch_index,:,:] *= scales[batch_index] 40 | return batch_data 41 | 42 | def shift_point_cloud(batch_data, shift_range=0.1): 43 | """ Randomly shift point cloud. Shift is per point cloud. 44 | Input: 45 | BxNx3 array, original batch of point clouds 46 | Return: 47 | BxNx3 array, shifted batch of point clouds 48 | """ 49 | B, N, C = batch_data.shape 50 | shifts = np.random.uniform(-shift_range, shift_range, (B,3)) 51 | for batch_index in range(B): 52 | batch_data[batch_index,:,:] += shifts[batch_index,:] 53 | return batch_data 54 | 55 | def rotate_perturbation_point_cloud(batch_data, angle_sigma=0.06, angle_clip=0.18): 56 | """ Randomly perturb the point clouds by small rotations 57 | Input: 58 | BxNx3 array, original batch of point clouds 59 | Return: 60 | BxNx3 array, rotated batch of point clouds 61 | """ 62 | rotated_data = np.zeros(batch_data.shape, dtype=np.float32) 63 | for k in range(batch_data.shape[0]): 64 | angles = np.clip(angle_sigma*np.random.randn(3), -angle_clip, angle_clip) 65 | Rx = np.array([[1,0,0], 66 | [0,np.cos(angles[0]),-np.sin(angles[0])], 67 | [0,np.sin(angles[0]),np.cos(angles[0])]]) 68 | Ry = np.array([[np.cos(angles[1]),0,np.sin(angles[1])], 69 | [0,1,0], 70 | [-np.sin(angles[1]),0,np.cos(angles[1])]]) 71 | Rz = np.array([[np.cos(angles[2]),-np.sin(angles[2]),0], 72 | [np.sin(angles[2]),np.cos(angles[2]),0], 73 | [0,0,1]]) 74 | R = np.dot(Rz, np.dot(Ry,Rx)) 75 | shape_pc = batch_data[k, ...] 76 | rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), R) 77 | return rotated_data 78 | 79 | def augment_pc(data): 80 | data = random_point_dropout(data[None, ...]) 81 | data = random_scale_point_cloud(data) 82 | data = shift_point_cloud(data) 83 | data = rotate_perturbation_point_cloud(data) 84 | data = data.squeeze() 85 | return data -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #include "interpolate.h" 4 | #include "utils.h" 5 | 6 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 7 | const float *known, float *dist2, int *idx); 8 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 9 | const float *points, const int *idx, 10 | const float *weight, float *out); 11 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 12 | const float *grad_out, 13 | const int *idx, const float *weight, 14 | float *grad_points); 15 | 16 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows) { 17 | CHECK_CONTIGUOUS(unknowns); 18 | CHECK_CONTIGUOUS(knows); 19 | CHECK_IS_FLOAT(unknowns); 20 | CHECK_IS_FLOAT(knows); 21 | 22 | if (unknowns.is_cuda()) { 23 | CHECK_CUDA(knows); 24 | } 25 | 26 | at::Tensor idx = 27 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 28 | at::device(unknowns.device()).dtype(at::ScalarType::Int)); 29 | at::Tensor dist2 = 30 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 31 | at::device(unknowns.device()).dtype(at::ScalarType::Float)); 32 | 33 | if (unknowns.is_cuda()) { 34 | three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1), 35 | unknowns.data(), knows.data(), 36 | dist2.data(), idx.data()); 37 | } else { 38 | AT_ASSERT(false, "CPU not supported"); 39 | } 40 | 41 | return {dist2, idx}; 42 | } 43 | 44 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 45 | at::Tensor weight) { 46 | CHECK_CONTIGUOUS(points); 47 | CHECK_CONTIGUOUS(idx); 48 | CHECK_CONTIGUOUS(weight); 49 | CHECK_IS_FLOAT(points); 50 | CHECK_IS_INT(idx); 51 | CHECK_IS_FLOAT(weight); 52 | 53 | if (points.is_cuda()) { 54 | CHECK_CUDA(idx); 55 | CHECK_CUDA(weight); 56 | } 57 | 58 | at::Tensor output = 59 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 60 | at::device(points.device()).dtype(at::ScalarType::Float)); 61 | 62 | if (points.is_cuda()) { 63 | three_interpolate_kernel_wrapper( 64 | points.size(0), points.size(1), points.size(2), idx.size(1), 65 | points.data(), idx.data(), weight.data(), 66 | output.data()); 67 | } else { 68 | AT_ASSERT(false, "CPU not supported"); 69 | } 70 | 71 | return output; 72 | } 73 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 74 | at::Tensor weight, const int m) { 75 | CHECK_CONTIGUOUS(grad_out); 76 | CHECK_CONTIGUOUS(idx); 77 | CHECK_CONTIGUOUS(weight); 78 | CHECK_IS_FLOAT(grad_out); 79 | CHECK_IS_INT(idx); 80 | CHECK_IS_FLOAT(weight); 81 | 82 | if (grad_out.is_cuda()) { 83 | CHECK_CUDA(idx); 84 | CHECK_CUDA(weight); 85 | } 86 | 87 | at::Tensor output = 88 | torch::zeros({grad_out.size(0), grad_out.size(1), m}, 89 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 90 | 91 | if (grad_out.is_cuda()) { 92 | three_interpolate_grad_kernel_wrapper( 93 | grad_out.size(0), grad_out.size(1), grad_out.size(2), m, 94 | grad_out.data(), idx.data(), weight.data(), 95 | output.data()); 96 | } else { 97 | AT_ASSERT(false, "CPU not supported"); 98 | } 99 | 100 | return output; 101 | } 102 | -------------------------------------------------------------------------------- /scannet/model_util_scannet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import sys 8 | import os 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | sys.path.append(BASE_DIR) 11 | ROOT_DIR = os.path.dirname(BASE_DIR) 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 13 | 14 | 15 | class ScannetDatasetConfig(object): 16 | def __init__(self): 17 | self.num_class = 18 18 | self.num_heading_bin = 1 19 | self.num_size_cluster = 18 20 | 21 | self.type2class = {'cabinet':0, 'bed':1, 'chair':2, 'sofa':3, 'table':4, 'door':5, 22 | 'window':6,'bookshelf':7,'picture':8, 'counter':9, 'desk':10, 'curtain':11, 23 | 'refrigerator':12, 'showercurtrain':13, 'toilet':14, 'sink':15, 'bathtub':16, 'garbagebin':17} 24 | self.class2type = {self.type2class[t]:t for t in self.type2class} 25 | self.nyu40ids = np.array([3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39]) 26 | self.nyu40id2class = {nyu40id: i for i,nyu40id in enumerate(list(self.nyu40ids))} 27 | self.mean_size_arr = np.load(os.path.join(ROOT_DIR,'scannet/meta_data/scannet_means.npz'))['arr_0'] 28 | self.type_mean_size = {} 29 | for i in range(self.num_size_cluster): 30 | self.type_mean_size[self.class2type[i]] = self.mean_size_arr[i,:] 31 | 32 | def angle2class(self, angle): 33 | ''' Convert continuous angle to discrete class 34 | [optinal] also small regression number from 35 | class center angle to current angle. 36 | 37 | angle is from 0-2pi (or -pi~pi), class center at 0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N) 38 | return is class of int32 of 0,1,...,N-1 and a number such that 39 | class*(2pi/N) + number = angle 40 | 41 | NOT USED. 42 | ''' 43 | assert(False) 44 | 45 | def class2angle(self, pred_cls, residual, to_label_format=True): 46 | ''' Inverse function to angle2class. 47 | 48 | As ScanNet only has axis-alined boxes so angles are always 0. ''' 49 | return 0 50 | 51 | def size2class(self, size, type_name): 52 | ''' Convert 3D box size (l,w,h) to size class and size residual ''' 53 | size_class = self.type2class[type_name] 54 | size_residual = size - self.type_mean_size[type_name] 55 | return size_class, size_residual 56 | 57 | def class2size(self, pred_cls, residual): 58 | ''' Inverse function to size2class ''' 59 | return self.mean_size_arr[pred_cls, :] + residual 60 | 61 | def param2obb(self, center, heading_class, heading_residual, size_class, size_residual): 62 | heading_angle = self.class2angle(heading_class, heading_residual) 63 | box_size = self.class2size(int(size_class), size_residual) 64 | obb = np.zeros((7,)) 65 | obb[0:3] = center 66 | obb[3:6] = box_size 67 | obb[6] = heading_angle*-1 68 | return obb 69 | 70 | def rotate_aligned_boxes(input_boxes, rot_mat): 71 | centers, lengths = input_boxes[:,0:3], input_boxes[:,3:6] 72 | new_centers = np.dot(centers, np.transpose(rot_mat)) 73 | 74 | dx, dy = lengths[:,0]/2.0, lengths[:,1]/2.0 75 | new_x = np.zeros((dx.shape[0], 4)) 76 | new_y = np.zeros((dx.shape[0], 4)) 77 | 78 | for i, crnr in enumerate([(-1,-1), (1, -1), (1, 1), (-1, 1)]): 79 | crnrs = np.zeros((dx.shape[0], 3)) 80 | crnrs[:,0] = crnr[0]*dx 81 | crnrs[:,1] = crnr[1]*dy 82 | crnrs = np.dot(crnrs, np.transpose(rot_mat)) 83 | new_x[:,i] = crnrs[:,0] 84 | new_y[:,i] = crnrs[:,1] 85 | 86 | 87 | new_dx = 2.0*np.max(new_x, 1) 88 | new_dy = 2.0*np.max(new_y, 1) 89 | new_lengths = np.stack((new_dx, new_dy, lengths[:,2]), axis=1) 90 | 91 | return np.concatenate([new_centers, new_lengths], axis=1) 92 | -------------------------------------------------------------------------------- /util/random_cuboid.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import numpy as np 3 | 4 | 5 | def check_aspect(crop_range, aspect_min): 6 | xy_aspect = np.min(crop_range[:2]) / np.max(crop_range[:2]) 7 | xz_aspect = np.min(crop_range[[0, 2]]) / np.max(crop_range[[0, 2]]) 8 | yz_aspect = np.min(crop_range[1:]) / np.max(crop_range[1:]) 9 | return ( 10 | (xy_aspect >= aspect_min) 11 | or (xz_aspect >= aspect_min) 12 | or (yz_aspect >= aspect_min) 13 | ) 14 | 15 | 16 | class RandomCuboid(object): 17 | """ 18 | RandomCuboid augmentation from DepthContrast [https://arxiv.org/abs/2101.02691] 19 | We slightly modify this operation to account for object detection. 20 | This augmentation randomly crops a cuboid from the input and 21 | ensures that the cropped cuboid contains at least one bounding box 22 | """ 23 | 24 | def __init__( 25 | self, 26 | min_points, 27 | aspect=0.8, 28 | min_crop=0.5, 29 | max_crop=1.0, 30 | box_filter_policy="center", 31 | ): 32 | self.aspect = aspect 33 | self.min_crop = min_crop 34 | self.max_crop = max_crop 35 | self.min_points = min_points 36 | self.box_filter_policy = box_filter_policy 37 | 38 | def __call__(self, point_cloud, target_boxes, per_point_labels=None): 39 | range_xyz = np.max(point_cloud[:, 0:3], axis=0) - np.min( 40 | point_cloud[:, 0:3], axis=0 41 | ) 42 | 43 | for _ in range(100): 44 | crop_range = self.min_crop + np.random.rand(3) * ( 45 | self.max_crop - self.min_crop 46 | ) 47 | if not check_aspect(crop_range, self.aspect): 48 | continue 49 | 50 | sample_center = point_cloud[np.random.choice(len(point_cloud)), 0:3] 51 | 52 | new_range = range_xyz * crop_range / 2.0 53 | 54 | max_xyz = sample_center + new_range 55 | min_xyz = sample_center - new_range 56 | 57 | upper_idx = ( 58 | np.sum((point_cloud[:, 0:3] <= max_xyz).astype(np.int32), 1) == 3 59 | ) 60 | lower_idx = ( 61 | np.sum((point_cloud[:, 0:3] >= min_xyz).astype(np.int32), 1) == 3 62 | ) 63 | 64 | new_pointidx = (upper_idx) & (lower_idx) 65 | 66 | if np.sum(new_pointidx) < self.min_points: 67 | continue 68 | 69 | new_point_cloud = point_cloud[new_pointidx, :] 70 | 71 | # filtering policy is the only modification from DepthContrast 72 | if self.box_filter_policy == "center": 73 | # remove boxes whose center does not lie within the new_point_cloud 74 | new_boxes = target_boxes 75 | if ( 76 | target_boxes.sum() > 0 77 | ): # ground truth contains no bounding boxes. Common in SUNRGBD. 78 | box_centers = target_boxes[:, 0:3] 79 | new_pc_min_max = np.min(new_point_cloud[:, 0:3], axis=0), np.max( 80 | new_point_cloud[:, 0:3], axis=0 81 | ) 82 | keep_boxes = np.logical_and( 83 | np.all(box_centers >= new_pc_min_max[0], axis=1), 84 | np.all(box_centers <= new_pc_min_max[1], axis=1), 85 | ) 86 | if keep_boxes.sum() == 0: 87 | # current data augmentation removes all boxes in the pointcloud. fail! 88 | continue 89 | new_boxes = target_boxes[keep_boxes] 90 | if per_point_labels is not None: 91 | new_per_point_labels = [x[new_pointidx] for x in per_point_labels] 92 | else: 93 | new_per_point_labels = None 94 | # if we are here, all conditions are met. return boxes 95 | return new_point_cloud, new_boxes, new_per_point_labels 96 | 97 | # fallback 98 | return point_cloud, target_boxes, per_point_labels 99 | -------------------------------------------------------------------------------- /models/mink_resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | import MinkowskiEngine as ME 5 | from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck 6 | 7 | 8 | class MinkResNet(nn.Module): 9 | r"""Minkowski ResNet backbone. See `4D Spatio-Temporal ConvNets 10 | `_ for more details. 11 | 12 | Args: 13 | depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. 14 | in_channels (ont): Number of input channels, 3 for RGB. 15 | num_stages (int, optional): Resnet stages. Default: 4. 16 | pool (bool, optional): Add max pooling after first conv if True. 17 | Default: True. 18 | """ 19 | arch_settings = { 20 | 18: (BasicBlock, (2, 2, 2, 2)), 21 | 34: (BasicBlock, (3, 4, 6, 3)), 22 | 50: (Bottleneck, (3, 4, 6, 3)), 23 | 101: (Bottleneck, (3, 4, 23, 3)), 24 | 152: (Bottleneck, (3, 8, 36, 3)) 25 | } 26 | 27 | def __init__(self, depth, in_channels, inplanes=64, num_stages=4, stem_bn=False): 28 | super(MinkResNet, self).__init__() 29 | if depth not in self.arch_settings: 30 | raise KeyError(f'invalid depth {depth} for resnet') 31 | assert 4 >= num_stages >= 1 32 | block, stage_blocks = self.arch_settings[depth] 33 | stage_blocks = stage_blocks[:num_stages] 34 | self.num_stages = num_stages 35 | 36 | self.inplanes = inplanes 37 | 38 | self.conv1 = ME.MinkowskiConvolution( 39 | in_channels, self.inplanes, kernel_size=3, stride=2, dimension=3) 40 | # May be BatchNorm is better, but we follow original implementation. 41 | self.norm1 = ME.MinkowskiBatchNorm(self.inplanes) if stem_bn else ME.MinkowskiInstanceNorm(self.inplanes) 42 | self.relu = ME.MinkowskiReLU(inplace=False) 43 | 44 | for i, num_blocks in enumerate(stage_blocks): 45 | setattr( 46 | self, f'layer{i + 1}', 47 | self._make_layer(block, inplanes * 2**i, stage_blocks[i], stride=2)) 48 | 49 | self.init_weights() 50 | 51 | def init_weights(self): 52 | print('random init backbone') 53 | for m in self.modules(): 54 | if isinstance(m, ME.MinkowskiConvolution): 55 | ME.utils.kaiming_normal_( 56 | m.kernel, mode='fan_out', nonlinearity='relu') 57 | 58 | if isinstance(m, ME.MinkowskiBatchNorm): 59 | nn.init.constant_(m.bn.weight, 1) 60 | nn.init.constant_(m.bn.bias, 0) 61 | 62 | def _make_layer(self, block, planes, blocks, stride): 63 | downsample = None 64 | if stride != 1 or self.inplanes != planes * block.expansion: 65 | downsample = nn.Sequential( 66 | ME.MinkowskiConvolution( 67 | self.inplanes, 68 | planes * block.expansion, 69 | kernel_size=1, 70 | stride=stride, 71 | dimension=3), 72 | ME.MinkowskiBatchNorm(planes * block.expansion)) 73 | layers = [] 74 | layers.append( 75 | block( 76 | self.inplanes, 77 | planes, 78 | stride=stride, 79 | downsample=downsample, 80 | dimension=3)) 81 | self.inplanes = planes * block.expansion 82 | for i in range(1, blocks): 83 | layers.append(block(self.inplanes, planes, stride=1, dimension=3)) 84 | return nn.Sequential(*layers) 85 | 86 | def forward(self, x): 87 | """Forward pass of ResNet. 88 | 89 | Args: 90 | x (ME.SparseTensor): Input sparse tensor. 91 | 92 | Returns: 93 | list[ME.SparseTensor]: Output sparse tensors. 94 | """ 95 | x = self.conv1(x) 96 | x = self.norm1(x) 97 | x = self.relu(x) 98 | outs = [] 99 | for i in range(self.num_stages): 100 | x = getattr(self, f'layer{i + 1}')(x) 101 | outs.append(x) 102 | return outs -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## [Point2Graph: An End-to-end Point Cloud-based 3D Open-Vocabulary Scene Graph for Robot Navigation](https://arxiv.org/abs/2409.10350) 2 | 3 | This is the implementation of **Object Detection and Classification** module of the paper "Point2Graph: An End-to-end Point Cloud-based 3D Open-Vocabulary Scene Graph for Robot Navigation". 4 | 5 | Authors: [Yifan Xu](https://www.linkedin.com/in/yifan-xu-43876120b/), [Ziming Luo](https://zimingluo.github.io/), [Qianwei Wang](https://www.linkedin.com/in/qianwei-wang-945bb9292/), [Vineet Kamat](https://live.engin.umich.edu/), [Carol Menassa](https://cee.engin.umich.edu/people/menassa-carol-c/) 6 | 7 | ## News: 8 | 9 | [2025/02] Our paper is accepted by **ICRA2025** 🎉🎉🎉 10 | 11 | ## Object Detection and Classification Pipeline 12 | 13 | This module consists of two stages: (1) detection and localization using class-agnostic bounding boxes and DBSCAN filtering for object refinement, and (2) classification via cross-modal retrieval, connecting 3D point cloud data with textual descriptions, without requiring annotations or RGB-D alignment. 14 | 15 | ![Pipeline Image](https://point2graph.github.io/static/figure/object_pipeline.png) 16 | 17 | 18 | 19 | ## Getting Started 20 | 21 | ### Installation 22 | 23 | **Step 1.** Create a conda environment and activate it. 24 | 25 | ```shell 26 | conda env create -f point2graph.yaml 27 | conda activate point2graph 28 | ``` 29 | 30 | **Step 2.** install **Minkowski Engine**. 31 | 32 | ```bash 33 | git clone https://github.com/NVIDIA/MinkowskiEngine.git 34 | cd MinkowskiEngine 35 | python setup.py install --blas_include_dirs=${CONDA_PREFIX}/include --blas=openblas 36 | ``` 37 | 38 | **Step 3.** install **mmcv**. 39 | 40 | ```bash 41 | pip install openmim 42 | mim install mmcv-full==1.6.1 43 | ``` 44 | 45 | **Step 4.** install third party support. 46 | 47 | ```bash 48 | cd pointnet2/ && python setup.py install --user 49 | cd .. 50 | cd utils && python cython_compile.py build_ext --inplace 51 | cd .. 52 | ``` 53 | 54 | ### Dataset preparation 55 | 56 | **Scannet Data** 57 | 58 | 1. Download ScanNet v2 data [HERE](https://github.com/ScanNet/ScanNet). Move/link the `scans` folder such that under `scans` there should be folders with names such as `scene0001_01`. 59 | 2. Open the 'scannet' folder. Extract point clouds and annotations (semantic seg, instance seg etc.) by running `python batch_load_scannet_data.py`, which will create a folder named `scannet_train_detection_data` here. 60 | 61 | ### Model preparation 62 | 63 | You should 64 | 65 | * download the 3D Object Detection pre-trained model [V-DETR](https://huggingface.co/byshen/vdetr/blob/main/scannet_540ep.pth), and put it in `./models/` folder. 66 | * download the 3D Object Classification pre-trained model [Uni-3D](https://github.com/baaivision/Uni3D#model-zoo) and the [clip model](https://huggingface.co/timm/eva02_enormous_patch14_plus_clip_224.laion2b_s9b_b144k/blob/main/open_clip_pytorch_model.bin), and put them in `./Uni3D/downloads/` folder. 67 | 68 | 69 | ## Testing 70 | 71 | The test script is in the `run.sh` file. Once you have the datasets and model prepared, you can test this models as 72 | 73 | ```shell 74 | bash run.sh 75 | ``` 76 | 77 | The script performs two functions: 78 | 79 | 1. Get a set of point cloud of objects with unknown class and store them at `./results/objects/` 80 | 2. Retrieve and visualize the 3D object point cloud most relevant to the user's query 81 | 82 | ## Acknowledgement 83 | 84 | Point2Graph is built on the [V-DETR](https://github.com/V-DETR/V-DETR), and [Uni3D](https://github.com/baaivision/Uni3D). 85 | 86 | 87 | ## Citation 88 | 89 | If you find this code useful in your research, please consider citing: 90 | 91 | ``` 92 | @misc{xu2024point2graphendtoendpointcloudbased, 93 | title={Point2Graph: An End-to-end Point Cloud-based 3D Open-Vocabulary Scene Graph for Robot Navigation}, 94 | author={Yifan Xu and Ziming Luo and Qianwei Wang and Vineet Kamat and Carol Menassa}, 95 | year={2024}, 96 | eprint={2409.10350}, 97 | archivePrefix={arXiv}, 98 | primaryClass={cs.RO}, 99 | url={https://arxiv.org/abs/2409.10350}, 100 | } 101 | ``` 102 | -------------------------------------------------------------------------------- /pointnet2/build/temp.linux-x86_64-3.8/build.ninja: -------------------------------------------------------------------------------- 1 | ninja_required_version = 1.3 2 | cxx = c++ 3 | nvcc = /usr/local/cuda/bin/nvcc 4 | 5 | cflags = -pthread -B /opt/conda/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/include -I/opt/conda/lib/python3.8/site-packages/torch/include -I/opt/conda/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -I/opt/conda/lib/python3.8/site-packages/torch/include/TH -I/opt/conda/lib/python3.8/site-packages/torch/include/THC -I/usr/local/cuda/include -I/opt/conda/include/python3.8 -c 6 | post_cflags = -O2 -I_ext_src/include -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=_ext -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14 7 | cuda_cflags = -I/home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/include -I/opt/conda/lib/python3.8/site-packages/torch/include -I/opt/conda/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -I/opt/conda/lib/python3.8/site-packages/torch/include/TH -I/opt/conda/lib/python3.8/site-packages/torch/include/THC -I/usr/local/cuda/include -I/opt/conda/include/python3.8 -c 8 | cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -O2 -I_ext_src/include -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=_ext -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -std=c++14 9 | ldflags = 10 | 11 | rule compile 12 | command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags 13 | depfile = $out.d 14 | deps = gcc 15 | 16 | rule cuda_compile 17 | depfile = $out.d 18 | deps = gcc 19 | command = $nvcc $cuda_cflags -c $in -o $out $cuda_post_cflags 20 | 21 | 22 | 23 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/sampling.o: compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/sampling.cpp 24 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/interpolate.o: compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/interpolate.cpp 25 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/group_points.o: compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/group_points.cpp 26 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/bindings.o: compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/bindings.cpp 27 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/ball_query.o: compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/ball_query.cpp 28 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/sampling_gpu.o: cuda_compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/sampling_gpu.cu 29 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/interpolate_gpu.o: cuda_compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/interpolate_gpu.cu 30 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/group_points_gpu.o: cuda_compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/group_points_gpu.cu 31 | build /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/build/temp.linux-x86_64-3.8/_ext_src/src/ball_query_gpu.o: cuda_compile /home/syc/yichao_blob_2/code/3detr_rebuttal/third_party/pointnet2/_ext_src/src/ball_query_gpu.cu 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /models/modules/resnet_block.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) V-DETR authors. All Rights Reserved. 2 | import torch.nn as nn 3 | from MinkowskiEngine import MinkowskiReLU 4 | 5 | from models.modules.common import ConvType, NormType, conv, get_norm 6 | 7 | 8 | class BasicBlockBase(nn.Module): 9 | expansion = 1 10 | NORM_TYPE = NormType.BATCH_NORM 11 | 12 | def __init__( 13 | self, 14 | inplanes, 15 | planes, 16 | stride=1, 17 | dilation=1, 18 | downsample=None, 19 | conv_type=ConvType.HYPERCUBE, 20 | bn_momentum=0.1, 21 | D=3, 22 | ): 23 | super().__init__() 24 | 25 | self.conv1 = conv( 26 | inplanes, 27 | planes, 28 | kernel_size=3, 29 | stride=stride, 30 | dilation=dilation, 31 | conv_type=conv_type, 32 | D=D, 33 | ) 34 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 35 | self.conv2 = conv( 36 | planes, 37 | planes, 38 | kernel_size=3, 39 | stride=1, 40 | dilation=dilation, 41 | bias=False, 42 | conv_type=conv_type, 43 | D=D, 44 | ) 45 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 46 | self.relu = MinkowskiReLU(inplace=False) 47 | self.downsample = downsample 48 | 49 | def forward(self, x): 50 | residual = x 51 | 52 | out = self.conv1(x) 53 | out = self.norm1(out) 54 | out = self.relu(out) 55 | 56 | out = self.conv2(out) 57 | out = self.norm2(out) 58 | 59 | if self.downsample is not None: 60 | residual = self.downsample(x) 61 | 62 | out += residual 63 | out = self.relu(out) 64 | 65 | return out 66 | 67 | 68 | class BasicBlock(BasicBlockBase): 69 | NORM_TYPE = NormType.BATCH_NORM 70 | 71 | 72 | class BasicBlockIN(BasicBlockBase): 73 | NORM_TYPE = NormType.INSTANCE_NORM 74 | 75 | 76 | class BasicBlockINBN(BasicBlockBase): 77 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 78 | 79 | 80 | class BottleneckBase(nn.Module): 81 | expansion = 4 82 | NORM_TYPE = NormType.BATCH_NORM 83 | 84 | def __init__( 85 | self, 86 | inplanes, 87 | planes, 88 | stride=1, 89 | dilation=1, 90 | downsample=None, 91 | conv_type=ConvType.HYPERCUBE, 92 | bn_momentum=0.1, 93 | D=3, 94 | ): 95 | super().__init__() 96 | self.conv1 = conv(inplanes, planes, kernel_size=1, D=D) 97 | self.norm1 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 98 | 99 | self.conv2 = conv( 100 | planes, 101 | planes, 102 | kernel_size=3, 103 | stride=stride, 104 | dilation=dilation, 105 | conv_type=conv_type, 106 | D=D, 107 | ) 108 | self.norm2 = get_norm(self.NORM_TYPE, planes, D, bn_momentum=bn_momentum) 109 | 110 | self.conv3 = conv(planes, planes * self.expansion, kernel_size=1, D=D) 111 | self.norm3 = get_norm( 112 | self.NORM_TYPE, planes * self.expansion, D, bn_momentum=bn_momentum 113 | ) 114 | 115 | self.relu = MinkowskiReLU(inplace=False) 116 | self.downsample = downsample 117 | 118 | def forward(self, x): 119 | residual = x 120 | 121 | out = self.conv1(x) 122 | out = self.norm1(out) 123 | out = self.relu(out) 124 | 125 | out = self.conv2(out) 126 | out = self.norm2(out) 127 | out = self.relu(out) 128 | 129 | out = self.conv3(out) 130 | out = self.norm3(out) 131 | 132 | if self.downsample is not None: 133 | residual = self.downsample(x) 134 | 135 | out += residual 136 | out = self.relu(out) 137 | 138 | return out 139 | 140 | 141 | class Bottleneck(BottleneckBase): 142 | NORM_TYPE = NormType.BATCH_NORM 143 | 144 | 145 | class BottleneckIN(BottleneckBase): 146 | NORM_TYPE = NormType.INSTANCE_NORM 147 | 148 | 149 | class BottleneckINBN(BottleneckBase): 150 | NORM_TYPE = NormType.INSTANCE_BATCH_NORM 151 | -------------------------------------------------------------------------------- /models/helpers.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # V-DETR 3 | # Copyright (c) V-DETR authors. All Rights Reserved. 4 | # ------------------------------------------------------------------------ 5 | # Modified from : 6 | # Group-Free-3D 7 | # Copyright (c) Group-Free-3D authors. All Rights Reserved. 8 | # 3DETR 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 10 | # ------------------------------------------------------------------------ 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from functools import partial 14 | import copy 15 | 16 | 17 | class PositionEmbeddingLearned(nn.Module): 18 | """ 19 | Absolute pos embedding, learned. 20 | """ 21 | 22 | def __init__(self, input_channel, num_pos_feats=288): 23 | super().__init__() 24 | self.position_embedding_head = nn.Sequential( 25 | nn.Conv1d(input_channel, num_pos_feats, kernel_size=1), 26 | nn.BatchNorm1d(num_pos_feats), 27 | nn.ReLU(inplace=True), 28 | nn.Conv1d(num_pos_feats, num_pos_feats, kernel_size=1)) 29 | 30 | def forward(self, xyz): 31 | xyz = xyz.transpose(1, 2).contiguous() 32 | position_embedding = self.position_embedding_head(xyz) 33 | return position_embedding 34 | 35 | 36 | class BatchNormDim1Swap(nn.BatchNorm1d): 37 | """ 38 | Used for nn.Transformer that uses a HW x N x C rep 39 | """ 40 | 41 | def forward(self, x): 42 | """ 43 | x: HW x N x C 44 | permute to N x C x HW 45 | Apply BN on C 46 | permute back 47 | """ 48 | hw, n, c = x.shape 49 | x = x.permute(1, 2, 0) 50 | x = super(BatchNormDim1Swap, self).forward(x) 51 | # x: n x c x hw -> hw x n x c 52 | x = x.permute(2, 0, 1) 53 | return x 54 | 55 | 56 | NORM_DICT = { 57 | "bn": BatchNormDim1Swap, 58 | "bn1d": nn.BatchNorm1d, 59 | "id": nn.Identity, 60 | "ln": nn.LayerNorm, 61 | } 62 | 63 | ACTIVATION_DICT = { 64 | "relu": nn.ReLU, 65 | "gelu": nn.GELU, 66 | "leakyrelu": partial(nn.LeakyReLU, negative_slope=0.1), 67 | } 68 | 69 | WEIGHT_INIT_DICT = { 70 | "xavier_uniform": nn.init.xavier_uniform_, 71 | } 72 | 73 | 74 | class GenericMLP(nn.Module): 75 | def __init__( 76 | self, 77 | input_dim, 78 | hidden_dims, 79 | output_dim, 80 | norm_fn_name=None, 81 | activation="relu", 82 | use_conv=False, 83 | dropout=None, 84 | hidden_use_bias=False, 85 | output_use_bias=True, 86 | output_use_activation=False, 87 | output_use_norm=False, 88 | weight_init_name=None, 89 | ): 90 | super().__init__() 91 | activation = ACTIVATION_DICT[activation] 92 | norm = None 93 | if norm_fn_name is not None: 94 | norm = NORM_DICT[norm_fn_name] 95 | if norm_fn_name == "ln" and use_conv: 96 | norm = lambda x: nn.GroupNorm(1, x) # easier way to use LayerNorm 97 | 98 | if dropout is not None: 99 | if not isinstance(dropout, list): 100 | dropout = [dropout for _ in range(len(hidden_dims))] 101 | 102 | layers = [] 103 | prev_dim = input_dim 104 | for idx, x in enumerate(hidden_dims): 105 | if use_conv: 106 | layer = nn.Conv1d(prev_dim, x, 1, bias=hidden_use_bias) 107 | else: 108 | layer = nn.Linear(prev_dim, x, bias=hidden_use_bias) 109 | layers.append(layer) 110 | if norm: 111 | layers.append(norm(x)) 112 | layers.append(activation()) 113 | if dropout is not None: 114 | layers.append(nn.Dropout(p=dropout[idx])) 115 | prev_dim = x 116 | if use_conv: 117 | layer = nn.Conv1d(prev_dim, output_dim, 1, bias=output_use_bias) 118 | else: 119 | layer = nn.Linear(prev_dim, output_dim, bias=output_use_bias) 120 | layers.append(layer) 121 | 122 | if output_use_norm: 123 | layers.append(norm(output_dim)) 124 | 125 | if output_use_activation: 126 | layers.append(activation()) 127 | 128 | self.layers = nn.Sequential(*layers) 129 | 130 | if weight_init_name is not None: 131 | self.do_weight_init(weight_init_name) 132 | 133 | def do_weight_init(self, weight_init_name): 134 | func = WEIGHT_INIT_DICT[weight_init_name] 135 | for (_, param) in self.named_parameters(): 136 | if param.dim() > 1: # skips batchnorm/layernorm 137 | func(param) 138 | 139 | def forward(self, x): 140 | output = self.layers(x) 141 | return output 142 | 143 | 144 | def get_clones(module, N): 145 | return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) 146 | -------------------------------------------------------------------------------- /Uni3D/data/templates.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelnet40_64": [ 3 | "a point cloud model of {}.", 4 | "There is a {} in the scene.", 5 | "There is the {} in the scene.", 6 | "a photo of a {} in the scene.", 7 | "a photo of the {} in the scene.", 8 | "a photo of one {} in the scene.", 9 | "itap of a {}.", 10 | "itap of my {}.", 11 | "itap of the {}.", 12 | "a photo of a {}.", 13 | "a photo of my {}.", 14 | "a photo of the {}.", 15 | "a photo of one {}.", 16 | "a photo of many {}.", 17 | "a good photo of a {}.", 18 | "a good photo of the {}.", 19 | "a bad photo of a {}.", 20 | "a bad photo of the {}.", 21 | "a photo of a nice {}.", 22 | "a photo of the nice {}.", 23 | "a photo of a cool {}.", 24 | "a photo of the cool {}.", 25 | "a photo of a weird {}.", 26 | "a photo of the weird {}.", 27 | "a photo of a small {}.", 28 | "a photo of the small {}.", 29 | "a photo of a large {}.", 30 | "a photo of the large {}.", 31 | "a photo of a clean {}.", 32 | "a photo of the clean {}.", 33 | "a photo of a dirty {}.", 34 | "a photo of the dirty {}.", 35 | "a bright photo of a {}.", 36 | "a bright photo of the {}.", 37 | "a dark photo of a {}.", 38 | "a dark photo of the {}.", 39 | "a photo of a hard to see {}.", 40 | "a photo of the hard to see {}.", 41 | "a low resolution photo of a {}.", 42 | "a low resolution photo of the {}.", 43 | "a cropped photo of a {}.", 44 | "a cropped photo of the {}.", 45 | "a close-up photo of a {}.", 46 | "a close-up photo of the {}.", 47 | "a jpeg corrupted photo of a {}.", 48 | "a jpeg corrupted photo of the {}.", 49 | "a blurry photo of a {}.", 50 | "a blurry photo of the {}.", 51 | "a pixelated photo of a {}.", 52 | "a pixelated photo of the {}.", 53 | "a black and white photo of the {}.", 54 | "a black and white photo of a {}", 55 | "a plastic {}.", 56 | "the plastic {}.", 57 | "a toy {}.", 58 | "the toy {}.", 59 | "a plushie {}.", 60 | "the plushie {}.", 61 | "a cartoon {}.", 62 | "the cartoon {}.", 63 | "an embroidered {}.", 64 | "the embroidered {}.", 65 | "a painting of the {}.", 66 | "a painting of a {}." 67 | ], 68 | "shapenet_64": [ 69 | "a point cloud model of {}.", 70 | "There is a {} in the scene.", 71 | "There is the {} in the scene.", 72 | "a photo of a {} in the scene.", 73 | "a photo of the {} in the scene.", 74 | "a photo of one {} in the scene.", 75 | "itap of a {}.", 76 | "itap of my {}.", 77 | "itap of the {}.", 78 | "a photo of a {}.", 79 | "a photo of my {}.", 80 | "a photo of the {}.", 81 | "a photo of one {}.", 82 | "a photo of many {}.", 83 | "a good photo of a {}.", 84 | "a good photo of the {}.", 85 | "a bad photo of a {}.", 86 | "a bad photo of the {}.", 87 | "a photo of a nice {}.", 88 | "a photo of the nice {}.", 89 | "a photo of a cool {}.", 90 | "a photo of the cool {}.", 91 | "a photo of a weird {}.", 92 | "a photo of the weird {}.", 93 | "a photo of a small {}.", 94 | "a photo of the small {}.", 95 | "a photo of a large {}.", 96 | "a photo of the large {}.", 97 | "a photo of a clean {}.", 98 | "a photo of the clean {}.", 99 | "a photo of a dirty {}.", 100 | "a photo of the dirty {}.", 101 | "a bright photo of a {}.", 102 | "a bright photo of the {}.", 103 | "a dark photo of a {}.", 104 | "a dark photo of the {}.", 105 | "a photo of a hard to see {}.", 106 | "a photo of the hard to see {}.", 107 | "a low resolution photo of a {}.", 108 | "a low resolution photo of the {}.", 109 | "a cropped photo of a {}.", 110 | "a cropped photo of the {}.", 111 | "a close-up photo of a {}.", 112 | "a close-up photo of the {}.", 113 | "a jpeg corrupted photo of a {}.", 114 | "a jpeg corrupted photo of the {}.", 115 | "a blurry photo of a {}.", 116 | "a blurry photo of the {}.", 117 | "a pixelated photo of a {}.", 118 | "a pixelated photo of the {}.", 119 | "a black and white photo of the {}.", 120 | "a black and white photo of a {}", 121 | "a plastic {}.", 122 | "the plastic {}.", 123 | "a toy {}.", 124 | "the toy {}.", 125 | "a plushie {}.", 126 | "the plushie {}.", 127 | "a cartoon {}.", 128 | "the cartoon {}.", 129 | "an embroidered {}.", 130 | "the embroidered {}.", 131 | "a painting of the {}.", 132 | "a painting of a {}." 133 | ] 134 | 135 | } -------------------------------------------------------------------------------- /util/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import numpy as np 4 | 5 | # boxes are axis aigned 2D boxes of shape (n,5) in FLOAT numbers with (x1,y1,x2,y2,score) 6 | """ Ref: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/ 7 | Ref: https://github.com/vickyboy47/nms-python/blob/master/nms.py 8 | """ 9 | 10 | 11 | def nms_2d(boxes, overlap_threshold): 12 | x1 = boxes[:, 0] 13 | y1 = boxes[:, 1] 14 | x2 = boxes[:, 2] 15 | y2 = boxes[:, 3] 16 | score = boxes[:, 4] 17 | area = (x2 - x1) * (y2 - y1) 18 | 19 | I = np.argsort(score) 20 | pick = [] 21 | while I.size != 0: 22 | last = I.size 23 | i = I[-1] 24 | pick.append(i) 25 | suppress = [last - 1] 26 | for pos in range(last - 1): 27 | j = I[pos] 28 | xx1 = max(x1[i], x1[j]) 29 | yy1 = max(y1[i], y1[j]) 30 | xx2 = min(x2[i], x2[j]) 31 | yy2 = min(y2[i], y2[j]) 32 | w = xx2 - xx1 33 | h = yy2 - yy1 34 | if w > 0 and h > 0: 35 | o = w * h / area[j] 36 | print("Overlap is", o) 37 | if o > overlap_threshold: 38 | suppress.append(pos) 39 | I = np.delete(I, suppress) 40 | return pick 41 | 42 | 43 | def nms_2d_faster(boxes, overlap_threshold, old_type=False): 44 | x1 = boxes[:, 0] 45 | y1 = boxes[:, 1] 46 | x2 = boxes[:, 2] 47 | y2 = boxes[:, 3] 48 | score = boxes[:, 4] 49 | area = (x2 - x1) * (y2 - y1) 50 | 51 | I = np.argsort(score) 52 | pick = [] 53 | while I.size != 0: 54 | last = I.size 55 | i = I[-1] 56 | pick.append(i) 57 | 58 | xx1 = np.maximum(x1[i], x1[I[: last - 1]]) 59 | yy1 = np.maximum(y1[i], y1[I[: last - 1]]) 60 | xx2 = np.minimum(x2[i], x2[I[: last - 1]]) 61 | yy2 = np.minimum(y2[i], y2[I[: last - 1]]) 62 | 63 | w = np.maximum(0, xx2 - xx1) 64 | h = np.maximum(0, yy2 - yy1) 65 | 66 | if old_type: 67 | o = (w * h) / area[I[: last - 1]] 68 | else: 69 | inter = w * h 70 | o = inter / (area[i] + area[I[: last - 1]] - inter) 71 | 72 | I = np.delete( 73 | I, np.concatenate(([last - 1], np.where(o > overlap_threshold)[0])) 74 | ) 75 | 76 | return pick 77 | 78 | 79 | def nms_3d_faster(boxes, overlap_threshold, old_type=False): 80 | x1 = boxes[:, 0] 81 | y1 = boxes[:, 1] 82 | z1 = boxes[:, 2] 83 | x2 = boxes[:, 3] 84 | y2 = boxes[:, 4] 85 | z2 = boxes[:, 5] 86 | score = boxes[:, 6] 87 | area = (x2 - x1) * (y2 - y1) * (z2 - z1) 88 | 89 | I = np.argsort(score) 90 | pick = [] 91 | while I.size != 0: 92 | last = I.size 93 | i = I[-1] 94 | pick.append(i) 95 | 96 | xx1 = np.maximum(x1[i], x1[I[: last - 1]]) 97 | yy1 = np.maximum(y1[i], y1[I[: last - 1]]) 98 | zz1 = np.maximum(z1[i], z1[I[: last - 1]]) 99 | xx2 = np.minimum(x2[i], x2[I[: last - 1]]) 100 | yy2 = np.minimum(y2[i], y2[I[: last - 1]]) 101 | zz2 = np.minimum(z2[i], z2[I[: last - 1]]) 102 | 103 | l = np.maximum(0, xx2 - xx1) 104 | w = np.maximum(0, yy2 - yy1) 105 | h = np.maximum(0, zz2 - zz1) 106 | 107 | if old_type: 108 | o = (l * w * h) / area[I[: last - 1]] 109 | else: 110 | inter = l * w * h 111 | o = inter / (area[i] + area[I[: last - 1]] - inter) 112 | 113 | I = np.delete( 114 | I, np.concatenate(([last - 1], np.where(o > overlap_threshold)[0])) 115 | ) 116 | 117 | return pick 118 | 119 | 120 | def nms_3d_faster_samecls(boxes, overlap_threshold, old_type=False): 121 | x1 = boxes[:, 0] 122 | y1 = boxes[:, 1] 123 | z1 = boxes[:, 2] 124 | x2 = boxes[:, 3] 125 | y2 = boxes[:, 4] 126 | z2 = boxes[:, 5] 127 | score = boxes[:, 6] 128 | cls = boxes[:, 7] 129 | area = (x2 - x1) * (y2 - y1) * (z2 - z1) 130 | 131 | I = np.argsort(score) 132 | pick = [] 133 | while I.size != 0: 134 | last = I.size 135 | i = I[-1] 136 | pick.append(i) 137 | 138 | xx1 = np.maximum(x1[i], x1[I[: last - 1]]) 139 | yy1 = np.maximum(y1[i], y1[I[: last - 1]]) 140 | zz1 = np.maximum(z1[i], z1[I[: last - 1]]) 141 | xx2 = np.minimum(x2[i], x2[I[: last - 1]]) 142 | yy2 = np.minimum(y2[i], y2[I[: last - 1]]) 143 | zz2 = np.minimum(z2[i], z2[I[: last - 1]]) 144 | cls1 = cls[i] 145 | cls2 = cls[I[: last - 1]] 146 | 147 | l = np.maximum(0, xx2 - xx1) 148 | w = np.maximum(0, yy2 - yy1) 149 | h = np.maximum(0, zz2 - zz1) 150 | 151 | if old_type: 152 | o = (l * w * h) / area[I[: last - 1]] 153 | else: 154 | inter = l * w * h 155 | o = inter / (area[i] + area[I[: last - 1]] - inter) 156 | o = o * (cls1 == cls2) 157 | 158 | I = np.delete( 159 | I, np.concatenate(([last - 1], np.where(o > overlap_threshold)[0])) 160 | ) 161 | 162 | return pick 163 | -------------------------------------------------------------------------------- /Uni3D/data/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch.distributed as dist 3 | 4 | logger_initialized = {} 5 | 6 | def get_root_logger(log_file=None, log_level=logging.INFO, name='main'): 7 | """Get root logger and add a keyword filter to it. 8 | The logger will be initialized if it has not been initialized. By default a 9 | StreamHandler will be added. If `log_file` is specified, a FileHandler will 10 | also be added. The name of the root logger is the top-level package name, 11 | e.g., "mmdet3d". 12 | Args: 13 | log_file (str, optional): File path of log. Defaults to None. 14 | log_level (int, optional): The level of logger. 15 | Defaults to logging.INFO. 16 | name (str, optional): The name of the root logger, also used as a 17 | filter keyword. Defaults to 'mmdet3d'. 18 | Returns: 19 | :obj:`logging.Logger`: The obtained logger 20 | """ 21 | logger = get_logger(name=name, log_file=log_file, log_level=log_level) 22 | # add a logging filter 23 | logging_filter = logging.Filter(name) 24 | logging_filter.filter = lambda record: record.find(name) != -1 25 | 26 | return logger 27 | 28 | 29 | def get_logger(name, log_file=None, log_level=logging.INFO, file_mode='w'): 30 | """Initialize and get a logger by name. 31 | If the logger has not been initialized, this method will initialize the 32 | logger by adding one or two handlers, otherwise the initialized logger will 33 | be directly returned. During initialization, a StreamHandler will always be 34 | added. If `log_file` is specified and the process rank is 0, a FileHandler 35 | will also be added. 36 | Args: 37 | name (str): Logger name. 38 | log_file (str | None): The log filename. If specified, a FileHandler 39 | will be added to the logger. 40 | log_level (int): The logger level. Note that only the process of 41 | rank 0 is affected, and other processes will set the level to 42 | "Error" thus be silent most of the time. 43 | file_mode (str): The file mode used in opening log file. 44 | Defaults to 'w'. 45 | Returns: 46 | logging.Logger: The expected logger. 47 | """ 48 | logger = logging.getLogger(name) 49 | if name in logger_initialized: 50 | return logger 51 | # handle hierarchical names 52 | # e.g., logger "a" is initialized, then logger "a.b" will skip the 53 | # initialization since it is a child of "a". 54 | for logger_name in logger_initialized: 55 | if name.startswith(logger_name): 56 | return logger 57 | 58 | # handle duplicate logs to the console 59 | # Starting in 1.8.0, PyTorch DDP attaches a StreamHandler (NOTSET) 60 | # to the root logger. As logger.propagate is True by default, this root 61 | # level handler causes logging messages from rank>0 processes to 62 | # unexpectedly show up on the console, creating much unwanted clutter. 63 | # To fix this issue, we set the root logger's StreamHandler, if any, to log 64 | # at the ERROR level. 65 | for handler in logger.root.handlers: 66 | if type(handler) is logging.StreamHandler: 67 | handler.setLevel(logging.ERROR) 68 | 69 | stream_handler = logging.StreamHandler() 70 | handlers = [stream_handler] 71 | 72 | if dist.is_available() and dist.is_initialized(): 73 | rank = dist.get_rank() 74 | else: 75 | rank = 0 76 | 77 | # only rank 0 will add a FileHandler 78 | if rank == 0 and log_file is not None: 79 | # Here, the default behaviour of the official logger is 'a'. Thus, we 80 | # provide an interface to change the file mode to the default 81 | # behaviour. 82 | file_handler = logging.FileHandler(log_file, file_mode) 83 | handlers.append(file_handler) 84 | 85 | formatter = logging.Formatter( 86 | '%(asctime)s - %(name)s - %(levelname)s - %(message)s') 87 | for handler in handlers: 88 | handler.setFormatter(formatter) 89 | handler.setLevel(log_level) 90 | logger.addHandler(handler) 91 | 92 | if rank == 0: 93 | logger.setLevel(log_level) 94 | else: 95 | logger.setLevel(logging.ERROR) 96 | 97 | logger_initialized[name] = True 98 | 99 | 100 | return logger 101 | 102 | 103 | def print_log(msg, logger=None, level=logging.INFO): 104 | """Print a log message. 105 | Args: 106 | msg (str): The message to be logged. 107 | logger (logging.Logger | str | None): The logger to be used. 108 | Some special loggers are: 109 | - "silent": no message will be printed. 110 | - other str: the logger obtained with `get_root_logger(logger)`. 111 | - None: The `print()` method will be used to print log messages. 112 | level (int): Logging level. Only available when `logger` is a Logger 113 | object or "root". 114 | """ 115 | if logger is None: 116 | print(msg) 117 | elif isinstance(logger, logging.Logger): 118 | logger.log(level, msg) 119 | elif logger == 'silent': 120 | pass 121 | elif isinstance(logger, str): 122 | _logger = get_logger(logger) 123 | _logger.log(level, msg) 124 | else: 125 | raise TypeError( 126 | 'logger should be either a logging.Logger object, str, ' 127 | f'"silent" or None, but got {type(logger)}') -------------------------------------------------------------------------------- /scannet/meta_data/scannetv2_val.txt: -------------------------------------------------------------------------------- 1 | scene0568_00 2 | scene0568_01 3 | scene0568_02 4 | scene0304_00 5 | scene0488_00 6 | scene0488_01 7 | scene0412_00 8 | scene0412_01 9 | scene0217_00 10 | scene0019_00 11 | scene0019_01 12 | scene0414_00 13 | scene0575_00 14 | scene0575_01 15 | scene0575_02 16 | scene0426_00 17 | scene0426_01 18 | scene0426_02 19 | scene0426_03 20 | scene0549_00 21 | scene0549_01 22 | scene0578_00 23 | scene0578_01 24 | scene0578_02 25 | scene0665_00 26 | scene0665_01 27 | scene0050_00 28 | scene0050_01 29 | scene0050_02 30 | scene0257_00 31 | scene0025_00 32 | scene0025_01 33 | scene0025_02 34 | scene0583_00 35 | scene0583_01 36 | scene0583_02 37 | scene0701_00 38 | scene0701_01 39 | scene0701_02 40 | scene0580_00 41 | scene0580_01 42 | scene0565_00 43 | scene0169_00 44 | scene0169_01 45 | scene0655_00 46 | scene0655_01 47 | scene0655_02 48 | scene0063_00 49 | scene0221_00 50 | scene0221_01 51 | scene0591_00 52 | scene0591_01 53 | scene0591_02 54 | scene0678_00 55 | scene0678_01 56 | scene0678_02 57 | scene0462_00 58 | scene0427_00 59 | scene0595_00 60 | scene0193_00 61 | scene0193_01 62 | scene0164_00 63 | scene0164_01 64 | scene0164_02 65 | scene0164_03 66 | scene0598_00 67 | scene0598_01 68 | scene0598_02 69 | scene0599_00 70 | scene0599_01 71 | scene0599_02 72 | scene0328_00 73 | scene0300_00 74 | scene0300_01 75 | scene0354_00 76 | scene0458_00 77 | scene0458_01 78 | scene0423_00 79 | scene0423_01 80 | scene0423_02 81 | scene0307_00 82 | scene0307_01 83 | scene0307_02 84 | scene0606_00 85 | scene0606_01 86 | scene0606_02 87 | scene0432_00 88 | scene0432_01 89 | scene0608_00 90 | scene0608_01 91 | scene0608_02 92 | scene0651_00 93 | scene0651_01 94 | scene0651_02 95 | scene0430_00 96 | scene0430_01 97 | scene0689_00 98 | scene0357_00 99 | scene0357_01 100 | scene0574_00 101 | scene0574_01 102 | scene0574_02 103 | scene0329_00 104 | scene0329_01 105 | scene0329_02 106 | scene0153_00 107 | scene0153_01 108 | scene0616_00 109 | scene0616_01 110 | scene0671_00 111 | scene0671_01 112 | scene0618_00 113 | scene0382_00 114 | scene0382_01 115 | scene0490_00 116 | scene0621_00 117 | scene0607_00 118 | scene0607_01 119 | scene0149_00 120 | scene0695_00 121 | scene0695_01 122 | scene0695_02 123 | scene0695_03 124 | scene0389_00 125 | scene0377_00 126 | scene0377_01 127 | scene0377_02 128 | scene0342_00 129 | scene0139_00 130 | scene0629_00 131 | scene0629_01 132 | scene0629_02 133 | scene0496_00 134 | scene0633_00 135 | scene0633_01 136 | scene0518_00 137 | scene0652_00 138 | scene0406_00 139 | scene0406_01 140 | scene0406_02 141 | scene0144_00 142 | scene0144_01 143 | scene0494_00 144 | scene0278_00 145 | scene0278_01 146 | scene0316_00 147 | scene0609_00 148 | scene0609_01 149 | scene0609_02 150 | scene0609_03 151 | scene0084_00 152 | scene0084_01 153 | scene0084_02 154 | scene0696_00 155 | scene0696_01 156 | scene0696_02 157 | scene0351_00 158 | scene0351_01 159 | scene0643_00 160 | scene0644_00 161 | scene0645_00 162 | scene0645_01 163 | scene0645_02 164 | scene0081_00 165 | scene0081_01 166 | scene0081_02 167 | scene0647_00 168 | scene0647_01 169 | scene0535_00 170 | scene0353_00 171 | scene0353_01 172 | scene0353_02 173 | scene0559_00 174 | scene0559_01 175 | scene0559_02 176 | scene0593_00 177 | scene0593_01 178 | scene0246_00 179 | scene0653_00 180 | scene0653_01 181 | scene0064_00 182 | scene0064_01 183 | scene0356_00 184 | scene0356_01 185 | scene0356_02 186 | scene0030_00 187 | scene0030_01 188 | scene0030_02 189 | scene0222_00 190 | scene0222_01 191 | scene0338_00 192 | scene0338_01 193 | scene0338_02 194 | scene0378_00 195 | scene0378_01 196 | scene0378_02 197 | scene0660_00 198 | scene0553_00 199 | scene0553_01 200 | scene0553_02 201 | scene0527_00 202 | scene0663_00 203 | scene0663_01 204 | scene0663_02 205 | scene0664_00 206 | scene0664_01 207 | scene0664_02 208 | scene0334_00 209 | scene0334_01 210 | scene0334_02 211 | scene0046_00 212 | scene0046_01 213 | scene0046_02 214 | scene0203_00 215 | scene0203_01 216 | scene0203_02 217 | scene0088_00 218 | scene0088_01 219 | scene0088_02 220 | scene0088_03 221 | scene0086_00 222 | scene0086_01 223 | scene0086_02 224 | scene0670_00 225 | scene0670_01 226 | scene0256_00 227 | scene0256_01 228 | scene0256_02 229 | scene0249_00 230 | scene0441_00 231 | scene0658_00 232 | scene0704_00 233 | scene0704_01 234 | scene0187_00 235 | scene0187_01 236 | scene0131_00 237 | scene0131_01 238 | scene0131_02 239 | scene0207_00 240 | scene0207_01 241 | scene0207_02 242 | scene0461_00 243 | scene0011_00 244 | scene0011_01 245 | scene0343_00 246 | scene0251_00 247 | scene0077_00 248 | scene0077_01 249 | scene0684_00 250 | scene0684_01 251 | scene0550_00 252 | scene0686_00 253 | scene0686_01 254 | scene0686_02 255 | scene0208_00 256 | scene0500_00 257 | scene0500_01 258 | scene0552_00 259 | scene0552_01 260 | scene0648_00 261 | scene0648_01 262 | scene0435_00 263 | scene0435_01 264 | scene0435_02 265 | scene0435_03 266 | scene0690_00 267 | scene0690_01 268 | scene0693_00 269 | scene0693_01 270 | scene0693_02 271 | scene0700_00 272 | scene0700_01 273 | scene0700_02 274 | scene0699_00 275 | scene0231_00 276 | scene0231_01 277 | scene0231_02 278 | scene0697_00 279 | scene0697_01 280 | scene0697_02 281 | scene0697_03 282 | scene0474_00 283 | scene0474_01 284 | scene0474_02 285 | scene0474_03 286 | scene0474_04 287 | scene0474_05 288 | scene0355_00 289 | scene0355_01 290 | scene0146_00 291 | scene0146_01 292 | scene0146_02 293 | scene0196_00 294 | scene0702_00 295 | scene0702_01 296 | scene0702_02 297 | scene0314_00 298 | scene0277_00 299 | scene0277_01 300 | scene0277_02 301 | scene0095_00 302 | scene0095_01 303 | scene0015_00 304 | scene0100_00 305 | scene0100_01 306 | scene0100_02 307 | scene0558_00 308 | scene0558_01 309 | scene0558_02 310 | scene0685_00 311 | scene0685_01 312 | scene0685_02 -------------------------------------------------------------------------------- /scannet/meta_data/scannetv2_val_copy.txt: -------------------------------------------------------------------------------- 1 | scene0568_00 2 | scene0568_01 3 | scene0568_02 4 | scene0304_00 5 | scene0488_00 6 | scene0488_01 7 | scene0412_00 8 | scene0412_01 9 | scene0217_00 10 | scene0019_00 11 | scene0019_01 12 | scene0414_00 13 | scene0575_00 14 | scene0575_01 15 | scene0575_02 16 | scene0426_00 17 | scene0426_01 18 | scene0426_02 19 | scene0426_03 20 | scene0549_00 21 | scene0549_01 22 | scene0578_00 23 | scene0578_01 24 | scene0578_02 25 | scene0665_00 26 | scene0665_01 27 | scene0050_00 28 | scene0050_01 29 | scene0050_02 30 | scene0257_00 31 | scene0025_00 32 | scene0025_01 33 | scene0025_02 34 | scene0583_00 35 | scene0583_01 36 | scene0583_02 37 | scene0701_00 38 | scene0701_01 39 | scene0701_02 40 | scene0580_00 41 | scene0580_01 42 | scene0565_00 43 | scene0169_00 44 | scene0169_01 45 | scene0655_00 46 | scene0655_01 47 | scene0655_02 48 | scene0063_00 49 | scene0221_00 50 | scene0221_01 51 | scene0591_00 52 | scene0591_01 53 | scene0591_02 54 | scene0678_00 55 | scene0678_01 56 | scene0678_02 57 | scene0462_00 58 | scene0427_00 59 | scene0595_00 60 | scene0193_00 61 | scene0193_01 62 | scene0164_00 63 | scene0164_01 64 | scene0164_02 65 | scene0164_03 66 | scene0598_00 67 | scene0598_01 68 | scene0598_02 69 | scene0599_00 70 | scene0599_01 71 | scene0599_02 72 | scene0328_00 73 | scene0300_00 74 | scene0300_01 75 | scene0354_00 76 | scene0458_00 77 | scene0458_01 78 | scene0423_00 79 | scene0423_01 80 | scene0423_02 81 | scene0307_00 82 | scene0307_01 83 | scene0307_02 84 | scene0606_00 85 | scene0606_01 86 | scene0606_02 87 | scene0432_00 88 | scene0432_01 89 | scene0608_00 90 | scene0608_01 91 | scene0608_02 92 | scene0651_00 93 | scene0651_01 94 | scene0651_02 95 | scene0430_00 96 | scene0430_01 97 | scene0689_00 98 | scene0357_00 99 | scene0357_01 100 | scene0574_00 101 | scene0574_01 102 | scene0574_02 103 | scene0329_00 104 | scene0329_01 105 | scene0329_02 106 | scene0153_00 107 | scene0153_01 108 | scene0616_00 109 | scene0616_01 110 | scene0671_00 111 | scene0671_01 112 | scene0618_00 113 | scene0382_00 114 | scene0382_01 115 | scene0490_00 116 | scene0621_00 117 | scene0607_00 118 | scene0607_01 119 | scene0149_00 120 | scene0695_00 121 | scene0695_01 122 | scene0695_02 123 | scene0695_03 124 | scene0389_00 125 | scene0377_00 126 | scene0377_01 127 | scene0377_02 128 | scene0342_00 129 | scene0139_00 130 | scene0629_00 131 | scene0629_01 132 | scene0629_02 133 | scene0496_00 134 | scene0633_00 135 | scene0633_01 136 | scene0518_00 137 | scene0652_00 138 | scene0406_00 139 | scene0406_01 140 | scene0406_02 141 | scene0144_00 142 | scene0144_01 143 | scene0494_00 144 | scene0278_00 145 | scene0278_01 146 | scene0316_00 147 | scene0609_00 148 | scene0609_01 149 | scene0609_02 150 | scene0609_03 151 | scene0084_00 152 | scene0084_01 153 | scene0084_02 154 | scene0696_00 155 | scene0696_01 156 | scene0696_02 157 | scene0351_00 158 | scene0351_01 159 | scene0643_00 160 | scene0644_00 161 | scene0645_00 162 | scene0645_01 163 | scene0645_02 164 | scene0081_00 165 | scene0081_01 166 | scene0081_02 167 | scene0647_00 168 | scene0647_01 169 | scene0535_00 170 | scene0353_00 171 | scene0353_01 172 | scene0353_02 173 | scene0559_00 174 | scene0559_01 175 | scene0559_02 176 | scene0593_00 177 | scene0593_01 178 | scene0246_00 179 | scene0653_00 180 | scene0653_01 181 | scene0064_00 182 | scene0064_01 183 | scene0356_00 184 | scene0356_01 185 | scene0356_02 186 | scene0030_00 187 | scene0030_01 188 | scene0030_02 189 | scene0222_00 190 | scene0222_01 191 | scene0338_00 192 | scene0338_01 193 | scene0338_02 194 | scene0378_00 195 | scene0378_01 196 | scene0378_02 197 | scene0660_00 198 | scene0553_00 199 | scene0553_01 200 | scene0553_02 201 | scene0527_00 202 | scene0663_00 203 | scene0663_01 204 | scene0663_02 205 | scene0664_00 206 | scene0664_01 207 | scene0664_02 208 | scene0334_00 209 | scene0334_01 210 | scene0334_02 211 | scene0046_00 212 | scene0046_01 213 | scene0046_02 214 | scene0203_00 215 | scene0203_01 216 | scene0203_02 217 | scene0088_00 218 | scene0088_01 219 | scene0088_02 220 | scene0088_03 221 | scene0086_00 222 | scene0086_01 223 | scene0086_02 224 | scene0670_00 225 | scene0670_01 226 | scene0256_00 227 | scene0256_01 228 | scene0256_02 229 | scene0249_00 230 | scene0441_00 231 | scene0658_00 232 | scene0704_00 233 | scene0704_01 234 | scene0187_00 235 | scene0187_01 236 | scene0131_00 237 | scene0131_01 238 | scene0131_02 239 | scene0207_00 240 | scene0207_01 241 | scene0207_02 242 | scene0461_00 243 | scene0011_00 244 | scene0011_01 245 | scene0343_00 246 | scene0251_00 247 | scene0077_00 248 | scene0077_01 249 | scene0684_00 250 | scene0684_01 251 | scene0550_00 252 | scene0686_00 253 | scene0686_01 254 | scene0686_02 255 | scene0208_00 256 | scene0500_00 257 | scene0500_01 258 | scene0552_00 259 | scene0552_01 260 | scene0648_00 261 | scene0648_01 262 | scene0435_00 263 | scene0435_01 264 | scene0435_02 265 | scene0435_03 266 | scene0690_00 267 | scene0690_01 268 | scene0693_00 269 | scene0693_01 270 | scene0693_02 271 | scene0700_00 272 | scene0700_01 273 | scene0700_02 274 | scene0699_00 275 | scene0231_00 276 | scene0231_01 277 | scene0231_02 278 | scene0697_00 279 | scene0697_01 280 | scene0697_02 281 | scene0697_03 282 | scene0474_00 283 | scene0474_01 284 | scene0474_02 285 | scene0474_03 286 | scene0474_04 287 | scene0474_05 288 | scene0355_00 289 | scene0355_01 290 | scene0146_00 291 | scene0146_01 292 | scene0146_02 293 | scene0196_00 294 | scene0702_00 295 | scene0702_01 296 | scene0702_02 297 | scene0314_00 298 | scene0277_00 299 | scene0277_01 300 | scene0277_02 301 | scene0095_00 302 | scene0095_01 303 | scene0015_00 304 | scene0100_00 305 | scene0100_01 306 | scene0100_02 307 | scene0558_00 308 | scene0558_01 309 | scene0558_02 310 | scene0685_00 311 | scene0685_01 312 | scene0685_02 313 | -------------------------------------------------------------------------------- /models/position_embedding.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # V-DETR 3 | # Copyright (c) V-DETR authors. All Rights Reserved. 4 | # ------------------------------------------------------------------------ 5 | # Modified from : 6 | # 3DETR 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 8 | # Group-Free-3D 9 | # Copyright (c) Group-Free-3D authors. All Rights Reserved. 10 | # ------------------------------------------------------------------------ 11 | """ 12 | Various positional encodings for the transformer. 13 | """ 14 | import math 15 | import torch 16 | from torch import nn 17 | import numpy as np 18 | from util.pc_util import shift_scale_points 19 | 20 | 21 | class PositionEmbeddingCoordsSine(nn.Module): 22 | def __init__( 23 | self, 24 | temperature=10000, 25 | normalize=False, 26 | scale=None, 27 | pos_type="fourier", 28 | d_pos=None, 29 | d_in=3, 30 | gauss_scale=1.0, 31 | ): 32 | super().__init__() 33 | self.temperature = temperature 34 | self.normalize = normalize 35 | if scale is not None and normalize is False: 36 | raise ValueError("normalize should be True if scale is passed") 37 | if scale is None: 38 | scale = 2 * math.pi 39 | assert pos_type in ["sine", "fourier"] 40 | self.pos_type = pos_type 41 | self.scale = scale 42 | if pos_type == "fourier": 43 | assert d_pos is not None 44 | assert d_pos % 2 == 0 45 | # define a gaussian matrix input_ch -> output_ch 46 | B = torch.empty((d_in, d_pos // 2)).normal_() 47 | B *= gauss_scale 48 | self.register_buffer("gauss_B", B) 49 | self.d_pos = d_pos 50 | 51 | def get_sine_embeddings(self, xyz, num_channels, input_range): 52 | # clone coords so that shift/scale operations do not affect original tensor 53 | orig_xyz = xyz 54 | xyz = orig_xyz.clone() 55 | 56 | ncoords = xyz.shape[1] 57 | if self.normalize: 58 | xyz = shift_scale_points(xyz, src_range=input_range) 59 | 60 | ndim = num_channels // xyz.shape[2] 61 | if ndim % 2 != 0: 62 | ndim -= 1 63 | # automatically handle remainder by assiging it to the first dim 64 | rems = num_channels - (ndim * xyz.shape[2]) 65 | 66 | assert ( 67 | ndim % 2 == 0 68 | ), f"Cannot handle odd sized ndim={ndim} where num_channels={num_channels} and xyz={xyz.shape}" 69 | 70 | final_embeds = [] 71 | prev_dim = 0 72 | 73 | for d in range(xyz.shape[2]): 74 | cdim = ndim 75 | if rems > 0: 76 | # add remainder in increments of two to maintain even size 77 | cdim += 2 78 | rems -= 2 79 | 80 | if cdim != prev_dim: 81 | dim_t = torch.arange(cdim, dtype=torch.float32, device=xyz.device) 82 | dim_t = self.temperature ** (2 * (dim_t // 2) / cdim) 83 | 84 | # create batch x cdim x nccords embedding 85 | raw_pos = xyz[:, :, d] 86 | if self.scale: 87 | raw_pos *= self.scale 88 | pos = raw_pos[:, :, None] / dim_t 89 | pos = torch.stack( 90 | (pos[:, :, 0::2].sin(), pos[:, :, 1::2].cos()), dim=3 91 | ).flatten(2) 92 | final_embeds.append(pos) 93 | prev_dim = cdim 94 | 95 | final_embeds = torch.cat(final_embeds, dim=2).permute(0, 2, 1) 96 | return final_embeds 97 | 98 | def get_fourier_embeddings(self, xyz, num_channels=None, input_range=None): 99 | # Follows - https://people.eecs.berkeley.edu/~bmild/fourfeat/index.html 100 | 101 | if num_channels is None: 102 | num_channels = self.gauss_B.shape[1] * 2 103 | 104 | bsize, npoints = xyz.shape[0], xyz.shape[1] 105 | assert num_channels > 0 and num_channels % 2 == 0 106 | d_in, max_d_out = self.gauss_B.shape[0], self.gauss_B.shape[1] 107 | d_out = num_channels // 2 108 | assert d_out <= max_d_out 109 | assert d_in == xyz.shape[-1] 110 | 111 | # clone coords so that shift/scale operations do not affect original tensor 112 | orig_xyz = xyz 113 | xyz = orig_xyz.clone() 114 | 115 | ncoords = xyz.shape[1] 116 | if self.normalize: 117 | xyz = shift_scale_points(xyz, src_range=input_range) 118 | 119 | xyz *= 2 * np.pi 120 | xyz_proj = torch.mm(xyz.view(-1, d_in), self.gauss_B[:, :d_out]).view( 121 | bsize, npoints, d_out 122 | ) 123 | final_embeds = [xyz_proj.sin(), xyz_proj.cos()] 124 | 125 | # return batch x d_pos x npoints embedding 126 | final_embeds = torch.cat(final_embeds, dim=2).permute(0, 2, 1) 127 | return final_embeds 128 | 129 | def forward(self, xyz, num_channels=None, input_range=None): 130 | assert isinstance(xyz, torch.Tensor) 131 | assert xyz.ndim == 3 132 | # xyz is batch x npoints x 3 133 | if self.pos_type == "sine": 134 | with torch.no_grad(): 135 | return self.get_sine_embeddings(xyz, num_channels, input_range) 136 | elif self.pos_type == "fourier": 137 | with torch.no_grad(): 138 | return self.get_fourier_embeddings(xyz, num_channels, input_range) 139 | else: 140 | raise ValueError(f"Unknown {self.pos_type}") 141 | 142 | def extra_repr(self): 143 | st = f"type={self.pos_type}, scale={self.scale}, normalize={self.normalize}" 144 | if hasattr(self, "gauss_B"): 145 | st += ( 146 | f", gaussB={self.gauss_B.shape}, gaussBsum={self.gauss_B.sum().item()}" 147 | ) 148 | return st 149 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/interpolate_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cuda_utils.h" 9 | 10 | // input: unknown(b, n, 3) known(b, m, 3) 11 | // output: dist2(b, n, 3), idx(b, n, 3) 12 | __global__ void three_nn_kernel(int b, int n, int m, 13 | const float *__restrict__ unknown, 14 | const float *__restrict__ known, 15 | float *__restrict__ dist2, 16 | int *__restrict__ idx) { 17 | int batch_index = blockIdx.x; 18 | unknown += batch_index * n * 3; 19 | known += batch_index * m * 3; 20 | dist2 += batch_index * n * 3; 21 | idx += batch_index * n * 3; 22 | 23 | int index = threadIdx.x; 24 | int stride = blockDim.x; 25 | for (int j = index; j < n; j += stride) { 26 | float ux = unknown[j * 3 + 0]; 27 | float uy = unknown[j * 3 + 1]; 28 | float uz = unknown[j * 3 + 2]; 29 | 30 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 31 | int besti1 = 0, besti2 = 0, besti3 = 0; 32 | for (int k = 0; k < m; ++k) { 33 | float x = known[k * 3 + 0]; 34 | float y = known[k * 3 + 1]; 35 | float z = known[k * 3 + 2]; 36 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 37 | if (d < best1) { 38 | best3 = best2; 39 | besti3 = besti2; 40 | best2 = best1; 41 | besti2 = besti1; 42 | best1 = d; 43 | besti1 = k; 44 | } else if (d < best2) { 45 | best3 = best2; 46 | besti3 = besti2; 47 | best2 = d; 48 | besti2 = k; 49 | } else if (d < best3) { 50 | best3 = d; 51 | besti3 = k; 52 | } 53 | } 54 | dist2[j * 3 + 0] = best1; 55 | dist2[j * 3 + 1] = best2; 56 | dist2[j * 3 + 2] = best3; 57 | 58 | idx[j * 3 + 0] = besti1; 59 | idx[j * 3 + 1] = besti2; 60 | idx[j * 3 + 2] = besti3; 61 | } 62 | } 63 | 64 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 65 | const float *known, float *dist2, int *idx) { 66 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 67 | three_nn_kernel<<>>(b, n, m, unknown, known, 68 | dist2, idx); 69 | 70 | CUDA_CHECK_ERRORS(); 71 | } 72 | 73 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3) 74 | // output: out(b, c, n) 75 | __global__ void three_interpolate_kernel(int b, int c, int m, int n, 76 | const float *__restrict__ points, 77 | const int *__restrict__ idx, 78 | const float *__restrict__ weight, 79 | float *__restrict__ out) { 80 | int batch_index = blockIdx.x; 81 | points += batch_index * m * c; 82 | 83 | idx += batch_index * n * 3; 84 | weight += batch_index * n * 3; 85 | 86 | out += batch_index * n * c; 87 | 88 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 89 | const int stride = blockDim.y * blockDim.x; 90 | for (int i = index; i < c * n; i += stride) { 91 | const int l = i / n; 92 | const int j = i % n; 93 | float w1 = weight[j * 3 + 0]; 94 | float w2 = weight[j * 3 + 1]; 95 | float w3 = weight[j * 3 + 2]; 96 | 97 | int i1 = idx[j * 3 + 0]; 98 | int i2 = idx[j * 3 + 1]; 99 | int i3 = idx[j * 3 + 2]; 100 | 101 | out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 + 102 | points[l * m + i3] * w3; 103 | } 104 | } 105 | 106 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 107 | const float *points, const int *idx, 108 | const float *weight, float *out) { 109 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 110 | three_interpolate_kernel<<>>( 111 | b, c, m, n, points, idx, weight, out); 112 | 113 | CUDA_CHECK_ERRORS(); 114 | } 115 | 116 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3) 117 | // output: grad_points(b, c, m) 118 | 119 | __global__ void three_interpolate_grad_kernel( 120 | int b, int c, int n, int m, const float *__restrict__ grad_out, 121 | const int *__restrict__ idx, const float *__restrict__ weight, 122 | float *__restrict__ grad_points) { 123 | int batch_index = blockIdx.x; 124 | grad_out += batch_index * n * c; 125 | idx += batch_index * n * 3; 126 | weight += batch_index * n * 3; 127 | grad_points += batch_index * m * c; 128 | 129 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 130 | const int stride = blockDim.y * blockDim.x; 131 | for (int i = index; i < c * n; i += stride) { 132 | const int l = i / n; 133 | const int j = i % n; 134 | float w1 = weight[j * 3 + 0]; 135 | float w2 = weight[j * 3 + 1]; 136 | float w3 = weight[j * 3 + 2]; 137 | 138 | int i1 = idx[j * 3 + 0]; 139 | int i2 = idx[j * 3 + 1]; 140 | int i3 = idx[j * 3 + 2]; 141 | 142 | atomicAdd(grad_points + l * m + i1, grad_out[i] * w1); 143 | atomicAdd(grad_points + l * m + i2, grad_out[i] * w2); 144 | atomicAdd(grad_points + l * m + i3, grad_out[i] * w3); 145 | } 146 | } 147 | 148 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 149 | const float *grad_out, 150 | const int *idx, const float *weight, 151 | float *grad_points) { 152 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 153 | three_interpolate_grad_kernel<<>>( 154 | b, c, n, m, grad_out, idx, weight, grad_points); 155 | 156 | CUDA_CHECK_ERRORS(); 157 | } 158 | -------------------------------------------------------------------------------- /Uni3D/utils/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Modified from github.com/openai/CLIP 2 | import gzip 3 | import html 4 | import os 5 | from functools import lru_cache 6 | 7 | import ftfy 8 | import regex as re 9 | import torch 10 | 11 | 12 | @lru_cache() 13 | def default_bpe(): 14 | return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz") 15 | 16 | 17 | @lru_cache() 18 | def bytes_to_unicode(): 19 | """ 20 | Returns list of utf-8 byte and a corresponding list of unicode strings. 21 | The reversible bpe codes work on unicode strings. 22 | This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. 23 | When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. 24 | This is a signficant percentage of your normal, say, 32K bpe vocab. 25 | To avoid that, we want lookup tables between utf-8 bytes and unicode strings. 26 | And avoids mapping to whitespace/control characters the bpe code barfs on. 27 | """ 28 | bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1)) 29 | cs = bs[:] 30 | n = 0 31 | for b in range(2**8): 32 | if b not in bs: 33 | bs.append(b) 34 | cs.append(2**8+n) 35 | n += 1 36 | cs = [chr(n) for n in cs] 37 | return dict(zip(bs, cs)) 38 | 39 | 40 | def get_pairs(word): 41 | """Return set of symbol pairs in a word. 42 | Word is represented as tuple of symbols (symbols being variable-length strings). 43 | """ 44 | pairs = set() 45 | prev_char = word[0] 46 | for char in word[1:]: 47 | pairs.add((prev_char, char)) 48 | prev_char = char 49 | return pairs 50 | 51 | 52 | def basic_clean(text): 53 | text = ftfy.fix_text(text) 54 | text = html.unescape(html.unescape(text)) 55 | return text.strip() 56 | 57 | 58 | def whitespace_clean(text): 59 | text = re.sub(r'\s+', ' ', text) 60 | text = text.strip() 61 | return text 62 | 63 | 64 | class SimpleTokenizer(object): 65 | def __init__(self, bpe_path: str = default_bpe()): 66 | self.byte_encoder = bytes_to_unicode() 67 | self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} 68 | merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') 69 | merges = merges[1:49152-256-2+1] 70 | merges = [tuple(merge.split()) for merge in merges] 71 | vocab = list(bytes_to_unicode().values()) 72 | vocab = vocab + [v+'' for v in vocab] 73 | for merge in merges: 74 | vocab.append(''.join(merge)) 75 | vocab.extend(['<|startoftext|>', '<|endoftext|>']) 76 | self.encoder = dict(zip(vocab, range(len(vocab)))) 77 | self.decoder = {v: k for k, v in self.encoder.items()} 78 | self.bpe_ranks = dict(zip(merges, range(len(merges)))) 79 | self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} 80 | self.pat = re.compile(r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", re.IGNORECASE) 81 | 82 | def bpe(self, token): 83 | if token in self.cache: 84 | return self.cache[token] 85 | word = tuple(token[:-1]) + ( token[-1] + '',) 86 | pairs = get_pairs(word) 87 | 88 | if not pairs: 89 | return token+'' 90 | 91 | while True: 92 | bigram = min(pairs, key = lambda pair: self.bpe_ranks.get(pair, float('inf'))) 93 | if bigram not in self.bpe_ranks: 94 | break 95 | first, second = bigram 96 | new_word = [] 97 | i = 0 98 | while i < len(word): 99 | try: 100 | j = word.index(first, i) 101 | new_word.extend(word[i:j]) 102 | i = j 103 | except: 104 | new_word.extend(word[i:]) 105 | break 106 | 107 | if word[i] == first and i < len(word)-1 and word[i+1] == second: 108 | new_word.append(first+second) 109 | i += 2 110 | else: 111 | new_word.append(word[i]) 112 | i += 1 113 | new_word = tuple(new_word) 114 | word = new_word 115 | if len(word) == 1: 116 | break 117 | else: 118 | pairs = get_pairs(word) 119 | word = ' '.join(word) 120 | self.cache[token] = word 121 | return word 122 | 123 | def encode(self, text): 124 | bpe_tokens = [] 125 | text = whitespace_clean(basic_clean(text)).lower() 126 | for token in re.findall(self.pat, text): 127 | token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) 128 | bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) 129 | return bpe_tokens 130 | 131 | def decode(self, tokens): 132 | text = ''.join([self.decoder[token] for token in tokens]) 133 | text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('', ' ') 134 | return text 135 | 136 | def __call__(self, texts, context_length=77): 137 | if isinstance(texts, str): 138 | texts = [texts] 139 | 140 | sot_token = self.encoder["<|startoftext|>"] 141 | eot_token = self.encoder["<|endoftext|>"] 142 | all_tokens = [[sot_token] + self.encode(text) + [eot_token] for text in texts] 143 | result = torch.zeros(len(all_tokens), context_length, dtype=torch.long) 144 | 145 | for i, tokens in enumerate(all_tokens): 146 | tokens = tokens[:context_length] 147 | result[i, :len(tokens)] = torch.tensor(tokens) 148 | 149 | if len(result) == 1: 150 | return result[0] 151 | return result -------------------------------------------------------------------------------- /scannet/load_scannet_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Load Scannet scenes with vertices and ground truth labels 7 | for semantic and instance segmentations 8 | """ 9 | 10 | # python imports 11 | import math 12 | import os, sys, argparse 13 | import inspect 14 | import json 15 | import pdb 16 | 17 | try: 18 | import numpy as np 19 | except: 20 | print("Failed to import numpy package.") 21 | sys.exit(-1) 22 | 23 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 24 | import scannet_utils 25 | 26 | def read_aggregation(filename): 27 | assert os.path.isfile(filename) 28 | object_id_to_segs = {} 29 | label_to_segs = {} 30 | with open(filename) as f: 31 | data = json.load(f) 32 | num_objects = len(data['segGroups']) 33 | for i in range(num_objects): 34 | object_id = data['segGroups'][i]['objectId'] + 1 # instance ids should be 1-indexed 35 | label = data['segGroups'][i]['label'] 36 | segs = data['segGroups'][i]['segments'] 37 | object_id_to_segs[object_id] = segs 38 | if label in label_to_segs: 39 | label_to_segs[label].extend(segs) 40 | else: 41 | label_to_segs[label] = segs 42 | return object_id_to_segs, label_to_segs 43 | 44 | 45 | def read_segmentation(filename): 46 | assert os.path.isfile(filename) 47 | seg_to_verts = {} 48 | with open(filename) as f: 49 | data = json.load(f) 50 | num_verts = len(data['segIndices']) 51 | for i in range(num_verts): 52 | seg_id = data['segIndices'][i] 53 | if seg_id in seg_to_verts: 54 | seg_to_verts[seg_id].append(i) 55 | else: 56 | seg_to_verts[seg_id] = [i] 57 | return seg_to_verts, num_verts 58 | 59 | 60 | def export(mesh_file, agg_file, seg_file, meta_file, label_map_file, output_file=None): 61 | """ points are XYZ RGB (RGB in 0-255), 62 | semantic label as nyu40 ids, 63 | instance label as 1-#instance, 64 | box as (cx,cy,cz,dx,dy,dz,semantic_label) 65 | """ 66 | label_map = scannet_utils.read_label_mapping(label_map_file, 67 | label_from='raw_category', label_to='nyu40id') 68 | mesh_vertices = scannet_utils.read_mesh_vertices_rgb(mesh_file) 69 | 70 | # Load scene axis alignment matrix 71 | lines = open(meta_file).readlines() 72 | for line in lines: 73 | if 'axisAlignment' in line: 74 | axis_align_matrix = [float(x) \ 75 | for x in line.rstrip().strip('axisAlignment = ').split(' ')] 76 | break 77 | axis_align_matrix = np.array(axis_align_matrix).reshape((4,4)) 78 | pts = np.ones((mesh_vertices.shape[0], 4)) 79 | pts[:,0:3] = mesh_vertices[:,0:3] 80 | pts = np.dot(pts, axis_align_matrix.transpose()) # Nx4 81 | mesh_vertices[:,0:3] = pts[:,0:3] 82 | 83 | # Load semantic and instance labels 84 | object_id_to_segs, label_to_segs = read_aggregation(agg_file) 85 | seg_to_verts, num_verts = read_segmentation(seg_file) 86 | label_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated 87 | object_id_to_label_id = {} 88 | for label, segs in label_to_segs.items(): 89 | label_id = label_map[label] 90 | for seg in segs: 91 | verts = seg_to_verts[seg] 92 | label_ids[verts] = label_id 93 | instance_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated 94 | num_instances = len(np.unique(list(object_id_to_segs.keys()))) 95 | for object_id, segs in object_id_to_segs.items(): 96 | for seg in segs: 97 | verts = seg_to_verts[seg] 98 | instance_ids[verts] = object_id 99 | if object_id not in object_id_to_label_id: 100 | object_id_to_label_id[object_id] = label_ids[verts][0] 101 | instance_bboxes = np.zeros((num_instances,7)) 102 | for obj_id in object_id_to_segs: 103 | label_id = object_id_to_label_id[obj_id] 104 | obj_pc = mesh_vertices[instance_ids==obj_id, 0:3] 105 | if len(obj_pc) == 0: continue 106 | # Compute axis aligned box 107 | # An axis aligned bounding box is parameterized by 108 | # (cx,cy,cz) and (dx,dy,dz) and label id 109 | # where (cx,cy,cz) is the center point of the box, 110 | # dx is the x-axis length of the box. 111 | xmin = np.min(obj_pc[:,0]) 112 | ymin = np.min(obj_pc[:,1]) 113 | zmin = np.min(obj_pc[:,2]) 114 | xmax = np.max(obj_pc[:,0]) 115 | ymax = np.max(obj_pc[:,1]) 116 | zmax = np.max(obj_pc[:,2]) 117 | bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, 118 | xmax-xmin, ymax-ymin, zmax-zmin, label_id]) 119 | # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES 120 | instance_bboxes[obj_id-1,:] = bbox 121 | 122 | if output_file is not None: 123 | np.save(output_file+'_vert.npy', mesh_vertices) 124 | np.save(output_file+'_sem_label.npy', label_ids) 125 | np.save(output_file+'_ins_label.npy', instance_ids) 126 | np.save(output_file+'_bbox.npy', instance_bboxes) 127 | 128 | return mesh_vertices, label_ids, instance_ids,\ 129 | instance_bboxes, object_id_to_label_id 130 | 131 | def main(): 132 | parser = argparse.ArgumentParser() 133 | parser.add_argument('--scan_path', required=True, help='path to scannet scene (e.g., data/ScanNet/v2/scene0000_00') 134 | parser.add_argument('--output_file', required=True, help='output file') 135 | parser.add_argument('--label_map_file', required=True, help='path to scannetv2-labels.combined.tsv') 136 | opt = parser.parse_args() 137 | 138 | scan_name = os.path.split(opt.scan_path)[-1] 139 | mesh_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.ply') 140 | agg_file = os.path.join(opt.scan_path, scan_name + '.aggregation.json') 141 | seg_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.0.010000.segs.json') 142 | meta_file = os.path.join(opt.scan_path, scan_name + '.txt') # includes axisAlignment info for the train set scans. 143 | export(mesh_file, agg_file, seg_file, meta_file, opt.label_map_file, opt.output_file) 144 | 145 | if __name__ == '__main__': 146 | main() 147 | -------------------------------------------------------------------------------- /Uni3D/utils/dist.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import pickle 3 | 4 | import torch 5 | import torch.distributed as dist 6 | 7 | 8 | def is_distributed(): 9 | if not dist.is_available() or not dist.is_initialized(): 10 | return False 11 | return True 12 | 13 | 14 | def get_rank(): 15 | if not is_distributed(): 16 | return 0 17 | return dist.get_rank() 18 | 19 | 20 | def is_primary(): 21 | return get_rank() == 0 22 | 23 | 24 | def get_world_size(): 25 | if not is_distributed(): 26 | return 1 27 | return dist.get_world_size() 28 | 29 | 30 | def barrier(): 31 | if not is_distributed(): 32 | return 33 | torch.distributed.barrier() 34 | 35 | 36 | def setup_print_for_distributed(is_primary): 37 | """ 38 | This function disables printing when not in primary process 39 | """ 40 | import builtins as __builtin__ 41 | builtin_print = __builtin__.print 42 | 43 | def print(*args, **kwargs): 44 | force = kwargs.pop('force', False) 45 | if is_primary or force: 46 | builtin_print(*args, **kwargs) 47 | 48 | __builtin__.print = print 49 | 50 | 51 | def init_distributed(gpu_id, global_rank, world_size, dist_url, dist_backend): 52 | torch.cuda.set_device(gpu_id) 53 | print( 54 | f"| distributed init (rank {global_rank}) (world {world_size}): {dist_url}", 55 | flush=True, 56 | ) 57 | torch.distributed.init_process_group( 58 | backend=dist_backend, 59 | init_method=dist_url, 60 | world_size=world_size, 61 | rank=global_rank, 62 | ) 63 | torch.distributed.barrier() 64 | setup_print_for_distributed(is_primary()) 65 | 66 | 67 | def all_reduce_sum(tensor): 68 | if not is_distributed(): 69 | return tensor 70 | dim_squeeze = False 71 | if tensor.ndim == 0: 72 | tensor = tensor[None, ...] 73 | dim_squeeze = True 74 | torch.distributed.all_reduce(tensor) 75 | if dim_squeeze: 76 | tensor = tensor.squeeze(0) 77 | return tensor 78 | 79 | 80 | def all_reduce_average(tensor): 81 | val = all_reduce_sum(tensor) 82 | return val / get_world_size() 83 | 84 | 85 | # Function from DETR - https://github.com/facebookresearch/detr/blob/master/util/misc.py 86 | def reduce_dict(input_dict, average=True): 87 | """ 88 | Args: 89 | input_dict (dict): all the values will be reduced 90 | average (bool): whether to do average or sum 91 | Reduce the values in the dictionary from all processes so that all processes 92 | have the averaged results. Returns a dict with the same fields as 93 | input_dict, after reduction. 94 | """ 95 | world_size = get_world_size() 96 | if world_size < 2: 97 | return input_dict 98 | with torch.no_grad(): 99 | names = [] 100 | values = [] 101 | # sort the keys so that they are consistent across processes 102 | for k in sorted(input_dict.keys()): 103 | names.append(k) 104 | values.append(input_dict[k]) 105 | values = torch.stack(values, dim=0) 106 | torch.distributed.all_reduce(values) 107 | if average: 108 | values /= world_size 109 | reduced_dict = {k: v for k, v in zip(names, values)} 110 | return reduced_dict 111 | 112 | 113 | # Function from https://github.com/facebookresearch/detr/blob/master/util/misc.py 114 | def all_gather_pickle(data, device): 115 | """ 116 | Run all_gather on arbitrary picklable data (not necessarily tensors) 117 | Args: 118 | data: any picklable object 119 | Returns: 120 | list[data]: list of data gathered from each rank 121 | """ 122 | world_size = get_world_size() 123 | if world_size == 1: 124 | return [data] 125 | 126 | # serialized to a Tensor 127 | buffer = pickle.dumps(data) 128 | storage = torch.ByteStorage.from_buffer(buffer) 129 | tensor = torch.ByteTensor(storage).to(device) 130 | 131 | # obtain Tensor size of each rank 132 | local_size = torch.tensor([tensor.numel()], device=device) 133 | size_list = [torch.tensor([0], device=device) for _ in range(world_size)] 134 | dist.all_gather(size_list, local_size) 135 | size_list = [int(size.item()) for size in size_list] 136 | max_size = max(size_list) 137 | 138 | # receiving Tensor from all ranks 139 | # we pad the tensor because torch all_gather does not support 140 | # gathering tensors of different shapes 141 | tensor_list = [] 142 | for _ in size_list: 143 | tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device=device)) 144 | if local_size != max_size: 145 | padding = torch.empty( 146 | size=(max_size - local_size,), dtype=torch.uint8, device=device 147 | ) 148 | tensor = torch.cat((tensor, padding), dim=0) 149 | dist.all_gather(tensor_list, tensor) 150 | 151 | data_list = [] 152 | for size, tensor in zip(size_list, tensor_list): 153 | buffer = tensor.cpu().numpy().tobytes()[:size] 154 | data_list.append(pickle.loads(buffer)) 155 | 156 | return data_list 157 | 158 | 159 | def all_gather_dict(data): 160 | """ 161 | Run all_gather on data which is a dictionary of Tensors 162 | """ 163 | assert isinstance(data, dict) 164 | 165 | gathered_dict = {} 166 | for item_key in data: 167 | if isinstance(data[item_key], torch.Tensor): 168 | if is_distributed(): 169 | data[item_key] = data[item_key].contiguous() 170 | tensor_list = [torch.empty_like(data[item_key]) for _ in range(get_world_size())] 171 | dist.all_gather(tensor_list, data[item_key]) 172 | gathered_tensor = torch.cat(tensor_list, dim=0) 173 | else: 174 | gathered_tensor = data[item_key] 175 | gathered_dict[item_key] = gathered_tensor 176 | return gathered_dict 177 | 178 | def batch_dict_to_cuda(batch_dict,local_rank="cuda:0"): 179 | for key in batch_dict: 180 | if isinstance(batch_dict[key], torch.Tensor): 181 | batch_dict[key] = batch_dict[key].to(local_rank) 182 | elif isinstance(batch_dict[key], list): 183 | if len(batch_dict[key])>0 and isinstance(batch_dict[key][0],torch.Tensor): 184 | batch_dict[key] = [item.to(local_rank) for item in batch_dict[key]] 185 | return batch_dict 186 | 187 | -------------------------------------------------------------------------------- /util/dist.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | import pickle 3 | 4 | import torch 5 | import torch.distributed as dist 6 | 7 | 8 | def is_distributed(): 9 | if not dist.is_available() or not dist.is_initialized(): 10 | return False 11 | return True 12 | 13 | 14 | def get_rank(): 15 | if not is_distributed(): 16 | return 0 17 | return dist.get_rank() 18 | 19 | 20 | def is_primary(): 21 | return get_rank() == 0 22 | 23 | 24 | def get_world_size(): 25 | if not is_distributed(): 26 | return 1 27 | return dist.get_world_size() 28 | 29 | 30 | def barrier(): 31 | if not is_distributed(): 32 | return 33 | torch.distributed.barrier() 34 | 35 | 36 | def setup_print_for_distributed(is_primary): 37 | """ 38 | This function disables printing when not in primary process 39 | """ 40 | import builtins as __builtin__ 41 | builtin_print = __builtin__.print 42 | 43 | def print(*args, **kwargs): 44 | force = kwargs.pop('force', False) 45 | if is_primary or force: 46 | builtin_print(*args, **kwargs) 47 | 48 | __builtin__.print = print 49 | 50 | 51 | def init_distributed(gpu_id, global_rank, world_size, dist_url, dist_backend): 52 | torch.cuda.set_device(gpu_id) 53 | print( 54 | f"| distributed init (rank {global_rank}) (world {world_size}): {dist_url}", 55 | flush=True, 56 | ) 57 | torch.distributed.init_process_group( 58 | backend=dist_backend, 59 | init_method=dist_url, 60 | world_size=world_size, 61 | rank=global_rank, 62 | ) 63 | torch.distributed.barrier() 64 | setup_print_for_distributed(is_primary()) 65 | 66 | 67 | def all_reduce_sum(tensor): 68 | if not is_distributed(): 69 | return tensor 70 | dim_squeeze = False 71 | if tensor.ndim == 0: 72 | tensor = tensor[None, ...] 73 | dim_squeeze = True 74 | torch.distributed.all_reduce(tensor) 75 | if dim_squeeze: 76 | tensor = tensor.squeeze(0) 77 | return tensor 78 | 79 | 80 | def all_reduce_average(tensor): 81 | val = all_reduce_sum(tensor) 82 | return val / get_world_size() 83 | 84 | 85 | # Function from DETR - https://github.com/facebookresearch/detr/blob/master/util/misc.py 86 | def reduce_dict(input_dict, average=True): 87 | """ 88 | Args: 89 | input_dict (dict): all the values will be reduced 90 | average (bool): whether to do average or sum 91 | Reduce the values in the dictionary from all processes so that all processes 92 | have the averaged results. Returns a dict with the same fields as 93 | input_dict, after reduction. 94 | """ 95 | world_size = get_world_size() 96 | if world_size < 2: 97 | return input_dict 98 | with torch.no_grad(): 99 | names = [] 100 | values = [] 101 | # sort the keys so that they are consistent across processes 102 | for k in sorted(input_dict.keys()): 103 | names.append(k) 104 | values.append(input_dict[k]) 105 | values = torch.stack(values, dim=0) 106 | torch.distributed.all_reduce(values) 107 | if average: 108 | values /= world_size 109 | reduced_dict = {k: v for k, v in zip(names, values)} 110 | return reduced_dict 111 | 112 | 113 | # Function from https://github.com/facebookresearch/detr/blob/master/util/misc.py 114 | def all_gather_pickle(data, device): 115 | """ 116 | Run all_gather on arbitrary picklable data (not necessarily tensors) 117 | Args: 118 | data: any picklable object 119 | Returns: 120 | list[data]: list of data gathered from each rank 121 | """ 122 | world_size = get_world_size() 123 | if world_size == 1: 124 | return [data] 125 | 126 | # serialized to a Tensor 127 | buffer = pickle.dumps(data) 128 | storage = torch.ByteStorage.from_buffer(buffer) 129 | tensor = torch.ByteTensor(storage).to(device) 130 | 131 | # obtain Tensor size of each rank 132 | local_size = torch.tensor([tensor.numel()], device=device) 133 | size_list = [torch.tensor([0], device=device) for _ in range(world_size)] 134 | dist.all_gather(size_list, local_size) 135 | size_list = [int(size.item()) for size in size_list] 136 | max_size = max(size_list) 137 | 138 | # receiving Tensor from all ranks 139 | # we pad the tensor because torch all_gather does not support 140 | # gathering tensors of different shapes 141 | tensor_list = [] 142 | for _ in size_list: 143 | tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device=device)) 144 | if local_size != max_size: 145 | padding = torch.empty( 146 | size=(max_size - local_size,), dtype=torch.uint8, device=device 147 | ) 148 | tensor = torch.cat((tensor, padding), dim=0) 149 | dist.all_gather(tensor_list, tensor) 150 | 151 | data_list = [] 152 | for size, tensor in zip(size_list, tensor_list): 153 | buffer = tensor.cpu().numpy().tobytes()[:size] 154 | data_list.append(pickle.loads(buffer)) 155 | 156 | return data_list 157 | 158 | 159 | def all_gather_dict(data): 160 | """ 161 | Run all_gather on data which is a dictionary of Tensors 162 | """ 163 | assert isinstance(data, dict) 164 | 165 | gathered_dict = {} 166 | for item_key in data: 167 | if isinstance(data[item_key], torch.Tensor): 168 | if is_distributed(): 169 | data[item_key] = data[item_key].contiguous() 170 | tensor_list = [torch.empty_like(data[item_key]) for _ in range(get_world_size())] 171 | dist.all_gather(tensor_list, data[item_key]) 172 | gathered_tensor = torch.cat(tensor_list, dim=0) 173 | else: 174 | gathered_tensor = data[item_key] 175 | gathered_dict[item_key] = gathered_tensor 176 | return gathered_dict 177 | 178 | def batch_dict_to_cuda(batch_dict,local_rank="cuda:0"): 179 | # print("batch_dict: ", type(batch_dict)) 180 | if isinstance(batch_dict, dict): 181 | for key in batch_dict: 182 | if isinstance(batch_dict[key], torch.Tensor): 183 | batch_dict[key] = batch_dict[key].to(local_rank) 184 | elif isinstance(batch_dict[key], list): 185 | if len(batch_dict[key])>0 and isinstance(batch_dict[key][0],torch.Tensor): 186 | batch_dict[key] = [item.to(local_rank) for item in batch_dict[key]] 187 | # else: 188 | # for key in batch_dict: 189 | # print("key: ", key) 190 | 191 | return batch_dict 192 | 193 | -------------------------------------------------------------------------------- /point2graph.yaml: -------------------------------------------------------------------------------- 1 | name: vdetr 2 | channels: 3 | - anaconda 4 | - pytorch 5 | - nvidia 6 | - defaults 7 | dependencies: 8 | - _libgcc_mutex=0.1=main 9 | - _openmp_mutex=5.1=1_gnu 10 | - blas=1.0=mkl 11 | - brotli-python=1.0.9=py38h6a678d5_8 12 | - bzip2=1.0.8=h5eee18b_6 13 | - ca-certificates=2024.3.11=h06a4308_0 14 | - certifi=2024.6.2=py38h06a4308_0 15 | - charset-normalizer=2.0.4=pyhd3eb1b0_0 16 | - cuda-cudart=11.8.89=0 17 | - cuda-cupti=11.8.87=0 18 | - cuda-libraries=11.8.0=0 19 | - cuda-nvrtc=11.8.89=0 20 | - cuda-nvtx=11.8.86=0 21 | - cuda-runtime=11.8.0=0 22 | - cudatoolkit=11.3.1=h2bc3f7f_2 23 | - ffmpeg=4.3=hf484d3e_0 24 | - freetype=2.12.1=h4a9f257_0 25 | - gmp=6.2.1=h295c915_3 26 | - gmpy2=2.1.2=py38heeb90bb_0 27 | - gnutls=3.6.15=he1e5248_0 28 | - idna=3.7=py38h06a4308_0 29 | - intel-openmp=2023.1.0=hdb19cb5_46306 30 | - jinja2=3.1.4=py38h06a4308_0 31 | - jpeg=9e=h5eee18b_1 32 | - lame=3.100=h7b6447c_0 33 | - lcms2=2.12=h3be6417_0 34 | - ld_impl_linux-64=2.38=h1181459_1 35 | - lerc=3.0=h295c915_0 36 | - libcublas=11.11.3.6=0 37 | - libcufft=10.9.0.58=0 38 | - libcufile=1.9.1.3=0 39 | - libcurand=10.3.5.147=0 40 | - libcusolver=11.4.1.48=0 41 | - libcusparse=11.7.5.86=0 42 | - libdeflate=1.17=h5eee18b_1 43 | - libffi=3.4.4=h6a678d5_1 44 | - libgcc-ng=11.2.0=h1234567_1 45 | - libgfortran-ng=8.4.0=he6ba991_17 46 | - libgfortran5=8.4.0=he6ba991_17 47 | - libgomp=11.2.0=h1234567_1 48 | - libiconv=1.16=h5eee18b_3 49 | - libidn2=2.3.4=h5eee18b_0 50 | - libjpeg-turbo=2.0.0=h9bf148f_0 51 | - libnpp=11.8.0.86=0 52 | - libnvjpeg=11.9.0.86=0 53 | - libopenblas=0.3.2=h9ac9557_1 54 | - libpng=1.6.39=h5eee18b_0 55 | - libstdcxx-ng=11.2.0=h1234567_1 56 | - libtasn1=4.19.0=h5eee18b_0 57 | - libtiff=4.5.1=h6a678d5_0 58 | - libunistring=0.9.10=h27cfd23_0 59 | - libwebp-base=1.3.2=h5eee18b_0 60 | - llvm-openmp=14.0.6=h9e868ea_0 61 | - lz4-c=1.9.4=h6a678d5_1 62 | - markupsafe=2.1.3=py38h5eee18b_0 63 | - mkl=2023.1.0=h213fc3f_46344 64 | - mkl-service=2.4.0=py38h5eee18b_1 65 | - mkl_fft=1.3.8=py38h5eee18b_0 66 | - mkl_random=1.2.4=py38hdb19cb5_0 67 | - mpc=1.1.0=h10f8cd9_1 68 | - mpfr=4.0.2=hb69a4c5_1 69 | - mpmath=1.3.0=py38h06a4308_0 70 | - ncurses=6.4=h6a678d5_0 71 | - nettle=3.7.3=hbbd107a_1 72 | - networkx=3.1=py38h06a4308_0 73 | - nomkl=2.0=0 74 | - numpy-base=1.24.3=py38h060ed82_1 75 | - openblas-devel=0.3.2=0 76 | - openh264=2.1.1=h4ff587b_0 77 | - openjpeg=2.4.0=h3ad879b_0 78 | - openssl=3.0.13=h7f8727e_2 79 | - pillow=10.3.0=py38h5eee18b_0 80 | - pip=24.0=py38h06a4308_0 81 | - pysocks=1.7.1=py38h06a4308_0 82 | - python=3.8.19=h955ad1f_0 83 | - pytorch=1.12.1=py3.8_cuda11.3_cudnn8.3.2_0 84 | - pytorch-cuda=11.8=h7e8668a_5 85 | - pytorch-mutex=1.0=cuda 86 | - pyyaml=6.0.1=py38h5eee18b_0 87 | - readline=8.2=h5eee18b_0 88 | - sqlite=3.45.3=h5eee18b_0 89 | - sympy=1.12=py38h06a4308_0 90 | - tbb=2021.8.0=hdb19cb5_0 91 | - tk=8.6.14=h39e8969_0 92 | - torchaudio=0.12.1=py38_cu113 93 | - torchtriton=2.3.1=py38 94 | - torchvision=0.13.1=py38_cu113 95 | - typing_extensions=4.11.0=py38h06a4308_0 96 | - wheel=0.43.0=py38h06a4308_0 97 | - xz=5.4.6=h5eee18b_1 98 | - yaml=0.2.5=h7b6447c_0 99 | - zlib=1.2.13=h5eee18b_1 100 | - zstd=1.5.5=hc292b87_2 101 | - pip: 102 | - addict==2.4.0 103 | - aliyun-python-sdk-core==2.15.1 104 | - aliyun-python-sdk-kms==2.16.3 105 | - annotated-types==0.7.0 106 | - asttokens==2.4.1 107 | - attrs==23.2.0 108 | - backcall==0.2.0 109 | - blinker==1.8.2 110 | - cffi==1.16.0 111 | - click==8.1.7 112 | - colorama==0.4.6 113 | - comm==0.2.2 114 | - configargparse==1.7 115 | - contourpy==1.1.1 116 | - crcmod==1.7 117 | - cryptography==42.0.8 118 | - cycler==0.12.1 119 | - cython==3.0.10 120 | - dash==2.17.1 121 | - dash-core-components==2.0.0 122 | - dash-html-components==2.0.0 123 | - dash-table==5.0.0 124 | - decorator==5.1.1 125 | - deepspeed==0.14.3 126 | - docker-pycreds==0.4.0 127 | - easydict==1.13 128 | - executing==2.0.1 129 | - fastjsonschema==2.19.1 130 | - filelock==3.14.0 131 | - flask==3.0.3 132 | - fonttools==4.53.0 133 | - fsspec==2024.6.0 134 | - ftfy==6.2.0 135 | - future==1.0.0 136 | - gitdb==4.0.11 137 | - gitpython==3.1.43 138 | - h5py==3.11.0 139 | - hjson==3.1.0 140 | - huggingface-hub==0.23.3 141 | - importlib-metadata==7.1.0 142 | - importlib-resources==6.4.0 143 | - ipython==8.12.3 144 | - ipywidgets==8.1.3 145 | - itsdangerous==2.2.0 146 | - jedi==0.19.1 147 | - jmespath==0.10.0 148 | - joblib==1.4.2 149 | - jsonschema==4.22.0 150 | - jsonschema-specifications==2023.12.1 151 | - jupyter-core==5.7.2 152 | - jupyterlab-widgets==3.0.11 153 | - kiwisolver==1.4.5 154 | - markdown==3.6 155 | - markdown-it-py==3.0.0 156 | - matplotlib==3.7.5 157 | - matplotlib-inline==0.1.7 158 | - mdurl==0.1.2 159 | - minkowskiengine==0.5.4 160 | - mmcv-full==1.6.1 161 | - model-index==0.1.11 162 | - nbformat==5.10.4 163 | - nest-asyncio==1.6.0 164 | - ninja==1.11.1.1 165 | - numpy==1.24.4 166 | - nvidia-ml-py==12.555.43 167 | - open-clip-torch==2.24.0 168 | - open3d==0.18.0 169 | - opencv-python==4.10.0.82 170 | - opendatalab==0.0.10 171 | - openmim==0.3.9 172 | - openxlab==0.1.0 173 | - ordered-set==4.1.0 174 | - oss2==2.17.0 175 | - packaging==24.1 176 | - pandas==2.0.3 177 | - parso==0.8.4 178 | - pexpect==4.9.0 179 | - pickleshare==0.7.5 180 | - pkgutil-resolve-name==1.3.10 181 | - platformdirs==4.2.2 182 | - plotly==5.22.0 183 | - plyfile==1.0.3 184 | - pointnet2-ops==3.0.0 185 | - prompt-toolkit==3.0.47 186 | - protobuf==5.27.1 187 | - psutil==5.9.8 188 | - ptyprocess==0.7.0 189 | - pure-eval==0.2.2 190 | - py-cpuinfo==9.0.0 191 | - pycparser==2.22 192 | - pycryptodome==3.20.0 193 | - pydantic==2.7.4 194 | - pydantic-core==2.18.4 195 | - pygments==2.18.0 196 | - pyparsing==3.1.2 197 | - pyquaternion==0.9.9 198 | - python-dateutil==2.9.0.post0 199 | - pytz==2023.4 200 | - pyviz3d==0.3.5 201 | - referencing==0.35.1 202 | - regex==2024.5.15 203 | - requests==2.28.2 204 | - retrying==1.3.4 205 | - rich==13.4.2 206 | - rpds-py==0.18.1 207 | - safetensors==0.4.3 208 | - scikit-learn==1.3.2 209 | - scipy==1.10.1 210 | - sentencepiece==0.2.0 211 | - sentry-sdk==2.5.1 212 | - setproctitle==1.3.3 213 | - setuptools==60.2.0 214 | - six==1.16.0 215 | - smmap==5.0.1 216 | - stack-data==0.6.3 217 | - tabulate==0.9.0 218 | - tenacity==8.3.0 219 | - threadpoolctl==3.5.0 220 | - timm==1.0.3 221 | - tomli==2.0.1 222 | - torch==1.6.0 223 | - tqdm==4.65.2 224 | - traitlets==5.14.3 225 | - trimesh==4.4.1 226 | - tzdata==2024.1 227 | - urllib3==1.26.18 228 | - wandb==0.17.1 229 | - wcwidth==0.2.13 230 | - werkzeug==3.0.3 231 | - widgetsnbextension==4.0.11 232 | - yapf==0.40.2 233 | - zipp==3.19.2 234 | prefix: /root/anaconda3/envs/vdetr 235 | -------------------------------------------------------------------------------- /util/box_intersection.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import numpy as np 3 | cimport numpy as np 4 | cimport cython 5 | cdef bint boolean_variable = True 6 | np.import_array() 7 | 8 | 9 | FLOAT = np.float32 10 | 11 | @cython.boundscheck(False) 12 | @cython.wraparound(False) 13 | def computeIntersection(cp1, cp2, s, e): 14 | dc = [ cp1[0] - cp2[0], cp1[1] - cp2[1] ] 15 | dp = [ s[0] - e[0], s[1] - e[1] ] 16 | n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0] 17 | n2 = s[0] * e[1] - s[1] * e[0] 18 | n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0]) 19 | return [(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3] 20 | 21 | @cython.boundscheck(False) 22 | @cython.wraparound(False) 23 | cdef inline bint inside(cp1, cp2, p): 24 | return(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0]) 25 | 26 | @cython.boundscheck(False) 27 | def polygon_clip_unnest(float [:, :] subjectPolygon, float [:, :] clipPolygon): 28 | """ Clip a polygon with another polygon. 29 | 30 | Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python 31 | 32 | Args: 33 | subjectPolygon: a list of (x,y) 2d points, any polygon. 34 | clipPolygon: a list of (x,y) 2d points, has to be *convex* 35 | Note: 36 | **points have to be counter-clockwise ordered** 37 | 38 | Return: 39 | a list of (x,y) vertex point for the intersection polygon. 40 | """ 41 | outputList = [subjectPolygon[x] for x in range(subjectPolygon.shape[0])] 42 | cp1 = clipPolygon[-1] 43 | cdef int lenc = len(clipPolygon) 44 | cdef int iidx = 0 45 | 46 | # for clipVertex in clipPolygon: 47 | for cidx in range(lenc): 48 | clipVertex = clipPolygon[cidx] 49 | cp2 = clipVertex 50 | inputList = outputList.copy() 51 | outputList.clear() 52 | s = inputList[-1] 53 | 54 | inc = len(inputList) 55 | 56 | # for subjectVertex in inputList: 57 | for iidx in range(inc): 58 | subjectVertex = inputList[iidx] 59 | e = subjectVertex 60 | if inside(cp1, cp2, e): 61 | if not inside(cp1, cp2, s): 62 | outputList.append(computeIntersection(cp1, cp2, s, e)) 63 | outputList.append(e) 64 | elif inside(cp1, cp2, s): 65 | outputList.append(computeIntersection(cp1, cp2, s, e)) 66 | s = e 67 | cp1 = cp2 68 | if len(outputList) == 0: 69 | break 70 | return outputList 71 | 72 | 73 | @cython.boundscheck(False) 74 | @cython.wraparound(False) 75 | cdef void copy_points(float[:, :] src, float[:, :] dst, Py_ssize_t num_points): 76 | cdef Py_ssize_t i 77 | for i in range(num_points): 78 | dst[i][0] = src[i][0] 79 | dst[i][1] = src[i][1] 80 | 81 | 82 | @cython.boundscheck(False) 83 | @cython.wraparound(False) 84 | cdef inline Py_ssize_t add_point(float[:, :] arr, float[:] point, Py_ssize_t num_points): 85 | # assert num_points < arr.shape[0] - 1 86 | # for j in range(dim): 87 | arr[num_points][0] = point[0] 88 | arr[num_points][1] = point[1] 89 | num_points = num_points + 1 90 | return num_points 91 | 92 | @cython.boundscheck(False) 93 | @cython.wraparound(False) 94 | cdef Py_ssize_t computeIntersection_and_add(float[:] cp1, float[:] cp2, float[:] s, float[:] e, float[:, :] arr, Py_ssize_t num_points): 95 | # dc_np = np.zeros(2, dtype=np.float32) 96 | cdef float[2] dc 97 | dc[0] = cp1[0] - cp2[0] 98 | dc[1] = cp1[1] - cp2[1] 99 | 100 | # dp_np = np.zeros(2, dtype=np.float32) 101 | cdef float[2] dp 102 | dp[0] = s[0] - e[0] 103 | dp[1] = s[1] - e[1] 104 | 105 | cdef float n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0] 106 | cdef float n2 = s[0] * e[1] - s[1] * e[0] 107 | cdef float n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0]) 108 | 109 | arr[num_points][0] = (n1*dp[0] - n2*dc[0]) * n3 110 | arr[num_points][1] = (n1*dp[1] - n2*dc[1]) * n3 111 | num_points = num_points + 1 112 | 113 | return num_points 114 | 115 | @cython.boundscheck(False) 116 | @cython.wraparound(False) 117 | def polygon_clip_float(float [:, :] subjectPolygon, float [:, :] clipPolygon): 118 | """ 119 | Assumes subjectPolygon and clipPolygon have 4 vertices 120 | """ 121 | cdef Py_ssize_t num_clip_points = clipPolygon.shape[0] 122 | cp1 = clipPolygon[num_clip_points - 1] 123 | 124 | MAX_INTERSECT_POINTS = 10 125 | num_intersect_points = 0 126 | outputList_np = np.zeros((MAX_INTERSECT_POINTS, 2), dtype=np.float32) 127 | cdef float[:, :] outputList = outputList_np 128 | 129 | inputList_np = np.zeros((MAX_INTERSECT_POINTS, 2), dtype=np.float32) 130 | cdef float[:, :] inputList = inputList_np 131 | 132 | copy_points(subjectPolygon, outputList, subjectPolygon.shape[0]) 133 | cdef Py_ssize_t noutput_list = subjectPolygon.shape[0] 134 | cdef Py_ssize_t ninput_list = 0 135 | cdef Py_ssize_t iidx = 0 136 | 137 | for cidx in range(num_clip_points): 138 | clipVertex = clipPolygon[cidx] 139 | cp2 = clipVertex 140 | 141 | copy_points(outputList, inputList, noutput_list) 142 | ninput_list = noutput_list 143 | noutput_list = 0 144 | 145 | s = inputList[ninput_list - 1] 146 | 147 | for iidx in range(ninput_list): 148 | e = inputList[iidx] 149 | if inside(cp1, cp2, e): 150 | if not inside(cp1, cp2, s): 151 | noutput_list = computeIntersection_and_add(cp1, cp2, s, e, outputList, noutput_list) 152 | 153 | noutput_list = add_point(outputList, e, noutput_list) 154 | elif inside(cp1, cp2, s): 155 | noutput_list = computeIntersection_and_add(cp1, cp2, s, e, outputList, noutput_list) 156 | s = e 157 | cp1 = cp2 158 | if noutput_list == 0: 159 | break 160 | return outputList_np, noutput_list 161 | 162 | 163 | 164 | @cython.boundscheck(False) 165 | @cython.wraparound(False) 166 | def box_intersection(float [:, :, :, :] rect1, 167 | float [:, :, :, :] rect2, 168 | float [:, :, :] non_rot_inter_areas, 169 | int[:] nums_k2, 170 | float [:, :, :] inter_areas, 171 | bint approximate): 172 | """ 173 | rect1 - B x K1 x 8 x 3 matrix of box corners 174 | rect2 - B x K2 x 8 x 3 matrix of box corners 175 | non_rot_inter_areas - intersection areas of boxes 176 | """ 177 | 178 | cdef Py_ssize_t B = rect1.shape[0] 179 | cdef Py_ssize_t K1 = rect1.shape[1] 180 | cdef Py_ssize_t K2 = rect2.shape[2] 181 | 182 | 183 | for b in range(B): 184 | for k1 in range(K1): 185 | for k2 in range(K2): 186 | if k2 >= nums_k2[b]: 187 | break 188 | 189 | if approximate and non_rot_inter_areas[b][k1][k2] == 0: 190 | continue 191 | 192 | ##### compute volume of intersection 193 | inter = polygon_clip_unnest(rect1[b, k1], rect2[b, k2]) 194 | ninter = len(inter) 195 | if ninter > 0: # there is some intersection between the boxes 196 | xs = np.array([x[0] for x in inter]).astype(dtype=FLOAT) 197 | ys = np.array([x[1] for x in inter]).astype(dtype=FLOAT) 198 | inter_areas[b,k1,k2] = 0.5 * np.abs(np.dot(xs,np.roll(ys,1))-np.dot(ys,np.roll(xs,1))) 199 | 200 | 201 | -------------------------------------------------------------------------------- /util/ply_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from plyfile import PlyData,PlyElement 5 | 6 | 7 | def read_mesh_vertices_rgb_normal(filename): 8 | """ read XYZ RGB normals point cloud from filename PLY file """ 9 | assert(os.path.isfile(filename)) 10 | with open(filename, 'rb') as f: 11 | plydata = PlyData.read(f) 12 | num_verts = plydata['vertex'].count 13 | vertices = np.zeros(shape=[num_verts, 6], dtype=np.float32) 14 | vertices[:,0] = plydata['vertex'].data['x'] 15 | vertices[:,1] = plydata['vertex'].data['y'] 16 | vertices[:,2] = plydata['vertex'].data['z'] 17 | vertices[:,3] = plydata['vertex'].data['red'] 18 | vertices[:,4] = plydata['vertex'].data['green'] 19 | vertices[:,5] = plydata['vertex'].data['blue'] 20 | 21 | # compute normals 22 | face = np.array([f[0] for f in plydata["face"].data]) 23 | 24 | return vertices, face 25 | 26 | 27 | def write_ply(verts, colors, indices, output_file): 28 | if colors is None: 29 | colors = np.zeros_like(verts) 30 | if indices is None: 31 | indices = [] 32 | 33 | file = open(output_file, 'w') 34 | file.write('ply \n') 35 | file.write('format ascii 1.0\n') 36 | file.write('element vertex {:d}\n'.format(len(verts))) 37 | file.write('property float x\n') 38 | file.write('property float y\n') 39 | file.write('property float z\n') 40 | file.write('property uchar red\n') 41 | file.write('property uchar green\n') 42 | file.write('property uchar blue\n') 43 | file.write('element face {:d}\n'.format(len(indices))) 44 | file.write('property list uchar uint vertex_indices\n') 45 | file.write('end_header\n') 46 | for vert, color in zip(verts, colors): 47 | file.write("{:f} {:f} {:f} {:d} {:d} {:d}\n".format(vert[0], vert[1], vert[2] , int(color[0]*255), int(color[1]*255), int(color[2]*255))) 48 | for ind in indices: 49 | file.write('3 {:d} {:d} {:d}\n'.format(ind[0], ind[1], ind[2])) 50 | file.close() 51 | 52 | 53 | def create_cylinder_mesh(radius, p0, p1, stacks=10, slices=10): 54 | 55 | import math 56 | 57 | def compute_length_vec3(vec3): 58 | return math.sqrt(vec3[0]*vec3[0] + vec3[1]*vec3[1] + vec3[2]*vec3[2]) 59 | 60 | def rotation(axis, angle): 61 | rot = np.eye(4) 62 | c = np.cos(-angle) 63 | s = np.sin(-angle) 64 | t = 1.0 - c 65 | axis /= compute_length_vec3(axis) 66 | x = axis[0] 67 | y = axis[1] 68 | z = axis[2] 69 | rot[0,0] = 1 + t*(x*x-1) 70 | rot[0,1] = z*s+t*x*y 71 | rot[0,2] = -y*s+t*x*z 72 | rot[1,0] = -z*s+t*x*y 73 | rot[1,1] = 1+t*(y*y-1) 74 | rot[1,2] = x*s+t*y*z 75 | rot[2,0] = y*s+t*x*z 76 | rot[2,1] = -x*s+t*y*z 77 | rot[2,2] = 1+t*(z*z-1) 78 | return rot 79 | 80 | 81 | verts = [] 82 | indices = [] 83 | diff = (p1 - p0).astype(np.float32) 84 | height = compute_length_vec3(diff) 85 | for i in range(stacks+1): 86 | for i2 in range(slices): 87 | theta = i2 * 2.0 * math.pi / slices 88 | pos = np.array([radius*math.cos(theta), radius*math.sin(theta), height*i/stacks]) 89 | verts.append(pos) 90 | for i in range(stacks): 91 | for i2 in range(slices): 92 | i2p1 = math.fmod(i2 + 1, slices) 93 | indices.append( np.array([(i + 1)*slices + i2, i*slices + i2, i*slices + i2p1], dtype=np.uint32) ) 94 | indices.append( np.array([(i + 1)*slices + i2, i*slices + i2p1, (i + 1)*slices + i2p1], dtype=np.uint32) ) 95 | transform = np.eye(4) 96 | va = np.array([0, 0, 1], dtype=np.float32) 97 | vb = diff 98 | vb /= compute_length_vec3(vb) 99 | axis = np.cross(vb, va) 100 | angle = np.arccos(np.clip(np.dot(va, vb), -1, 1)) 101 | if angle != 0: 102 | if compute_length_vec3(axis) == 0: 103 | dotx = va[0] 104 | if (math.fabs(dotx) != 1.0): 105 | axis = np.array([1,0,0]) - dotx * va 106 | else: 107 | axis = np.array([0,1,0]) - va[1] * va 108 | axis /= compute_length_vec3(axis) 109 | transform = rotation(axis, -angle) 110 | transform[:3,3] += p0 111 | verts = [np.dot(transform, np.array([v[0], v[1], v[2], 1.0])) for v in verts] 112 | verts = [np.array([v[0], v[1], v[2]]) / v[3] for v in verts] 113 | 114 | return verts, indices 115 | 116 | def write_bbox(corners, color, output_file): 117 | """ 118 | bbox: (cx, cy, cz, lx, ly, lz, r), center and length in three axis, the last is the rotation 119 | output_file: string 120 | """ 121 | 122 | def get_bbox_edges(bbox_min, bbox_max): 123 | def get_bbox_verts(bbox_min, bbox_max): 124 | verts = [ 125 | np.array([bbox_min[0], bbox_min[1], bbox_min[2]]), 126 | np.array([bbox_max[0], bbox_min[1], bbox_min[2]]), 127 | np.array([bbox_max[0], bbox_max[1], bbox_min[2]]), 128 | np.array([bbox_min[0], bbox_max[1], bbox_min[2]]), 129 | 130 | np.array([bbox_min[0], bbox_min[1], bbox_max[2]]), 131 | np.array([bbox_max[0], bbox_min[1], bbox_max[2]]), 132 | np.array([bbox_max[0], bbox_max[1], bbox_max[2]]), 133 | np.array([bbox_min[0], bbox_max[1], bbox_max[2]]) 134 | ] 135 | return verts 136 | 137 | box_verts = get_bbox_verts(bbox_min, bbox_max) 138 | edges = [ 139 | (box_verts[0], box_verts[1]), 140 | (box_verts[1], box_verts[2]), 141 | (box_verts[2], box_verts[3]), 142 | (box_verts[3], box_verts[0]), 143 | 144 | (box_verts[4], box_verts[5]), 145 | (box_verts[5], box_verts[6]), 146 | (box_verts[6], box_verts[7]), 147 | (box_verts[7], box_verts[4]), 148 | 149 | (box_verts[0], box_verts[4]), 150 | (box_verts[1], box_verts[5]), 151 | (box_verts[2], box_verts[6]), 152 | (box_verts[3], box_verts[7]) 153 | ] 154 | return edges 155 | 156 | radius = 0.03 157 | offset = [0,0,0] 158 | verts = [] 159 | indices = [] 160 | colors = [] 161 | 162 | box_min = np.min(corners, axis=0) 163 | box_max = np.max(corners, axis=0) 164 | edges = get_bbox_edges(box_min, box_max) 165 | for k in range(len(edges)): 166 | cyl_verts, cyl_ind = create_cylinder_mesh(radius, edges[k][0], edges[k][1]) 167 | cur_num_verts = len(verts) 168 | cyl_color = [[c / 255 for c in color] for _ in cyl_verts] 169 | cyl_verts = [x + offset for x in cyl_verts] 170 | cyl_ind = [x + cur_num_verts for x in cyl_ind] 171 | verts.extend(cyl_verts) 172 | indices.extend(cyl_ind) 173 | colors.extend(cyl_color) 174 | 175 | write_ply(verts, colors, indices, output_file) 176 | return 177 | 178 | 179 | def write_path(points, color, output_file): 180 | 181 | radius = 0.03 182 | offset = [0,0,0] 183 | verts = [] 184 | indices = [] 185 | colors = [] 186 | 187 | for start, end in zip(points[:-1], points[1:]): 188 | cyl_verts, cyl_ind = create_cylinder_mesh(radius, start, end) 189 | cur_num_verts = len(verts) 190 | cyl_color = [[c / 255 for c in color] for _ in cyl_verts] 191 | cyl_verts = [x + offset for x in cyl_verts] 192 | cyl_ind = [x + cur_num_verts for x in cyl_ind] 193 | verts.extend(cyl_verts) 194 | indices.extend(cyl_ind) 195 | colors.extend(cyl_color) 196 | 197 | write_ply(verts, colors, indices, output_file) 198 | return 199 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/sampling_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | 4 | #include 5 | #include 6 | 7 | #include "cuda_utils.h" 8 | 9 | // input: points(b, c, n) idx(b, m) 10 | // output: out(b, c, m) 11 | __global__ void gather_points_kernel(int b, int c, int n, int m, 12 | const float *__restrict__ points, 13 | const int *__restrict__ idx, 14 | float *__restrict__ out) { 15 | for (int i = blockIdx.x; i < b; i += gridDim.x) { 16 | for (int l = blockIdx.y; l < c; l += gridDim.y) { 17 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 18 | int a = idx[i * m + j]; 19 | out[(i * c + l) * m + j] = points[(i * c + l) * n + a]; 20 | } 21 | } 22 | } 23 | } 24 | 25 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 26 | const float *points, const int *idx, 27 | float *out) { 28 | gather_points_kernel<<>>(b, c, n, npoints, 30 | points, idx, out); 31 | 32 | CUDA_CHECK_ERRORS(); 33 | } 34 | 35 | // input: grad_out(b, c, m) idx(b, m) 36 | // output: grad_points(b, c, n) 37 | __global__ void gather_points_grad_kernel(int b, int c, int n, int m, 38 | const float *__restrict__ grad_out, 39 | const int *__restrict__ idx, 40 | float *__restrict__ grad_points) { 41 | for (int i = blockIdx.x; i < b; i += gridDim.x) { 42 | for (int l = blockIdx.y; l < c; l += gridDim.y) { 43 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 44 | int a = idx[i * m + j]; 45 | atomicAdd(grad_points + (i * c + l) * n + a, 46 | grad_out[(i * c + l) * m + j]); 47 | } 48 | } 49 | } 50 | } 51 | 52 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 53 | const float *grad_out, const int *idx, 54 | float *grad_points) { 55 | gather_points_grad_kernel<<>>( 57 | b, c, n, npoints, grad_out, idx, grad_points); 58 | 59 | CUDA_CHECK_ERRORS(); 60 | } 61 | 62 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, 63 | int idx1, int idx2) { 64 | const float v1 = dists[idx1], v2 = dists[idx2]; 65 | const int i1 = dists_i[idx1], i2 = dists_i[idx2]; 66 | dists[idx1] = max(v1, v2); 67 | dists_i[idx1] = v2 > v1 ? i2 : i1; 68 | } 69 | 70 | // Input dataset: (b, n, 3), tmp: (b, n) 71 | // Ouput idxs (b, m) 72 | template 73 | __global__ void furthest_point_sampling_kernel( 74 | int b, int n, int m, const float *__restrict__ dataset, 75 | float *__restrict__ temp, int *__restrict__ idxs) { 76 | if (m <= 0) return; 77 | __shared__ float dists[block_size]; 78 | __shared__ int dists_i[block_size]; 79 | 80 | int batch_index = blockIdx.x; 81 | dataset += batch_index * n * 3; 82 | temp += batch_index * n; 83 | idxs += batch_index * m; 84 | 85 | int tid = threadIdx.x; 86 | const int stride = block_size; 87 | 88 | int old = 0; 89 | if (threadIdx.x == 0) idxs[0] = old; 90 | 91 | __syncthreads(); 92 | for (int j = 1; j < m; j++) { 93 | int besti = 0; 94 | float best = -1; 95 | float x1 = dataset[old * 3 + 0]; 96 | float y1 = dataset[old * 3 + 1]; 97 | float z1 = dataset[old * 3 + 2]; 98 | for (int k = tid; k < n; k += stride) { 99 | float x2, y2, z2; 100 | x2 = dataset[k * 3 + 0]; 101 | y2 = dataset[k * 3 + 1]; 102 | z2 = dataset[k * 3 + 2]; 103 | float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); 104 | if (mag <= 1e-3) continue; 105 | 106 | float d = 107 | (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); 108 | 109 | float d2 = min(d, temp[k]); 110 | temp[k] = d2; 111 | besti = d2 > best ? k : besti; 112 | best = d2 > best ? d2 : best; 113 | } 114 | dists[tid] = best; 115 | dists_i[tid] = besti; 116 | __syncthreads(); 117 | 118 | if (block_size >= 512) { 119 | if (tid < 256) { 120 | __update(dists, dists_i, tid, tid + 256); 121 | } 122 | __syncthreads(); 123 | } 124 | if (block_size >= 256) { 125 | if (tid < 128) { 126 | __update(dists, dists_i, tid, tid + 128); 127 | } 128 | __syncthreads(); 129 | } 130 | if (block_size >= 128) { 131 | if (tid < 64) { 132 | __update(dists, dists_i, tid, tid + 64); 133 | } 134 | __syncthreads(); 135 | } 136 | if (block_size >= 64) { 137 | if (tid < 32) { 138 | __update(dists, dists_i, tid, tid + 32); 139 | } 140 | __syncthreads(); 141 | } 142 | if (block_size >= 32) { 143 | if (tid < 16) { 144 | __update(dists, dists_i, tid, tid + 16); 145 | } 146 | __syncthreads(); 147 | } 148 | if (block_size >= 16) { 149 | if (tid < 8) { 150 | __update(dists, dists_i, tid, tid + 8); 151 | } 152 | __syncthreads(); 153 | } 154 | if (block_size >= 8) { 155 | if (tid < 4) { 156 | __update(dists, dists_i, tid, tid + 4); 157 | } 158 | __syncthreads(); 159 | } 160 | if (block_size >= 4) { 161 | if (tid < 2) { 162 | __update(dists, dists_i, tid, tid + 2); 163 | } 164 | __syncthreads(); 165 | } 166 | if (block_size >= 2) { 167 | if (tid < 1) { 168 | __update(dists, dists_i, tid, tid + 1); 169 | } 170 | __syncthreads(); 171 | } 172 | 173 | old = dists_i[0]; 174 | if (tid == 0) idxs[j] = old; 175 | } 176 | } 177 | 178 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 179 | const float *dataset, float *temp, 180 | int *idxs) { 181 | unsigned int n_threads = opt_n_threads(n); 182 | 183 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 184 | 185 | switch (n_threads) { 186 | case 512: 187 | furthest_point_sampling_kernel<512> 188 | <<>>(b, n, m, dataset, temp, idxs); 189 | break; 190 | case 256: 191 | furthest_point_sampling_kernel<256> 192 | <<>>(b, n, m, dataset, temp, idxs); 193 | break; 194 | case 128: 195 | furthest_point_sampling_kernel<128> 196 | <<>>(b, n, m, dataset, temp, idxs); 197 | break; 198 | case 64: 199 | furthest_point_sampling_kernel<64> 200 | <<>>(b, n, m, dataset, temp, idxs); 201 | break; 202 | case 32: 203 | furthest_point_sampling_kernel<32> 204 | <<>>(b, n, m, dataset, temp, idxs); 205 | break; 206 | case 16: 207 | furthest_point_sampling_kernel<16> 208 | <<>>(b, n, m, dataset, temp, idxs); 209 | break; 210 | case 8: 211 | furthest_point_sampling_kernel<8> 212 | <<>>(b, n, m, dataset, temp, idxs); 213 | break; 214 | case 4: 215 | furthest_point_sampling_kernel<4> 216 | <<>>(b, n, m, dataset, temp, idxs); 217 | break; 218 | case 2: 219 | furthest_point_sampling_kernel<2> 220 | <<>>(b, n, m, dataset, temp, idxs); 221 | break; 222 | case 1: 223 | furthest_point_sampling_kernel<1> 224 | <<>>(b, n, m, dataset, temp, idxs); 225 | break; 226 | default: 227 | furthest_point_sampling_kernel<512> 228 | <<>>(b, n, m, dataset, temp, idxs); 229 | } 230 | 231 | CUDA_CHECK_ERRORS(); 232 | } 233 | -------------------------------------------------------------------------------- /models/modules/common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) V-DETR authors. All Rights Reserved. 2 | import sys 3 | 4 | if sys.version_info[:2] >= (3, 8): 5 | from collections.abc import Sequence 6 | else: 7 | from collections import Sequence 8 | 9 | from enum import Enum 10 | 11 | import torch.nn as nn 12 | import MinkowskiEngine as ME 13 | 14 | 15 | class NormType(Enum): 16 | BATCH_NORM = 0 17 | INSTANCE_NORM = 1 18 | INSTANCE_BATCH_NORM = 2 19 | 20 | 21 | def get_norm(norm_type, n_channels, D, bn_momentum=0.1): 22 | if norm_type == NormType.BATCH_NORM: 23 | return ME.MinkowskiBatchNorm(n_channels, momentum=bn_momentum) 24 | elif norm_type == NormType.INSTANCE_NORM: 25 | return ME.MinkowskiInstanceNorm(n_channels) 26 | elif norm_type == NormType.INSTANCE_BATCH_NORM: 27 | return nn.Sequential( 28 | ME.MinkowskiInstanceNorm(n_channels), 29 | ME.MinkowskiBatchNorm(n_channels, momentum=bn_momentum), 30 | ) 31 | else: 32 | raise ValueError(f"Norm type: {norm_type} not supported") 33 | 34 | 35 | class ConvType(Enum): 36 | """ 37 | Define the kernel region type 38 | """ 39 | 40 | HYPERCUBE = 0, "HYPERCUBE" 41 | SPATIAL_HYPERCUBE = 1, "SPATIAL_HYPERCUBE" 42 | SPATIO_TEMPORAL_HYPERCUBE = 2, "SPATIO_TEMPORAL_HYPERCUBE" 43 | HYPERCROSS = 3, "HYPERCROSS" 44 | SPATIAL_HYPERCROSS = 4, "SPATIAL_HYPERCROSS" 45 | SPATIO_TEMPORAL_HYPERCROSS = 5, "SPATIO_TEMPORAL_HYPERCROSS" 46 | SPATIAL_HYPERCUBE_TEMPORAL_HYPERCROSS = 6, "SPATIAL_HYPERCUBE_TEMPORAL_HYPERCROSS " 47 | 48 | def __new__(cls, value, name): 49 | member = object.__new__(cls) 50 | member._value_ = value 51 | member.fullname = name 52 | return member 53 | 54 | def __int__(self): 55 | return self.value 56 | 57 | 58 | # Covert the ConvType var to a RegionType var 59 | conv_to_region_type = { 60 | # kernel_size = [k, k, k, 1] 61 | ConvType.HYPERCUBE: ME.RegionType.HYPER_CUBE, 62 | ConvType.SPATIAL_HYPERCUBE: ME.RegionType.HYPER_CUBE, 63 | ConvType.SPATIO_TEMPORAL_HYPERCUBE: ME.RegionType.HYPER_CUBE, 64 | ConvType.HYPERCROSS: ME.RegionType.HYPER_CROSS, 65 | ConvType.SPATIAL_HYPERCROSS: ME.RegionType.HYPER_CROSS, 66 | ConvType.SPATIO_TEMPORAL_HYPERCROSS: ME.RegionType.HYPER_CROSS, 67 | ConvType.SPATIAL_HYPERCUBE_TEMPORAL_HYPERCROSS: ME.RegionType.HYPER_CUBE, # JONAS CHANGE from HYBRID 68 | } 69 | 70 | # int_to_region_type = {m.value: m for m in ME.RegionType} 71 | int_to_region_type = {m: ME.RegionType(m) for m in range(3)} 72 | 73 | 74 | def convert_region_type(region_type): 75 | """ 76 | Convert the integer region_type to the corresponding RegionType enum object. 77 | """ 78 | return int_to_region_type[region_type] 79 | 80 | 81 | def convert_conv_type(conv_type, kernel_size, D): 82 | assert isinstance(conv_type, ConvType), "conv_type must be of ConvType" 83 | region_type = conv_to_region_type[conv_type] 84 | axis_types = None 85 | if conv_type == ConvType.SPATIAL_HYPERCUBE: 86 | # No temporal convolution 87 | if isinstance(kernel_size, Sequence): 88 | kernel_size = kernel_size[:3] 89 | else: 90 | kernel_size = [ 91 | kernel_size, 92 | ] * 3 93 | if D == 4: 94 | kernel_size.append(1) 95 | elif conv_type == ConvType.SPATIO_TEMPORAL_HYPERCUBE: 96 | # conv_type conversion already handled 97 | assert D == 4 98 | elif conv_type == ConvType.HYPERCUBE: 99 | # conv_type conversion already handled 100 | pass 101 | elif conv_type == ConvType.SPATIAL_HYPERCROSS: 102 | if isinstance(kernel_size, Sequence): 103 | kernel_size = kernel_size[:3] 104 | else: 105 | kernel_size = [ 106 | kernel_size, 107 | ] * 3 108 | if D == 4: 109 | kernel_size.append(1) 110 | elif conv_type == ConvType.HYPERCROSS: 111 | # conv_type conversion already handled 112 | pass 113 | elif conv_type == ConvType.SPATIO_TEMPORAL_HYPERCROSS: 114 | # conv_type conversion already handled 115 | assert D == 4 116 | elif conv_type == ConvType.SPATIAL_HYPERCUBE_TEMPORAL_HYPERCROSS: 117 | # Define the CUBIC conv kernel for spatial dims and CROSS conv for temp dim 118 | axis_types = [ 119 | ME.RegionType.HYPER_CUBE, 120 | ] * 3 121 | if D == 4: 122 | axis_types.append(ME.RegionType.HYPER_CROSS) 123 | return region_type, axis_types, kernel_size 124 | 125 | 126 | def conv( 127 | in_planes, 128 | out_planes, 129 | kernel_size, 130 | stride=1, 131 | dilation=1, 132 | bias=False, 133 | conv_type=ConvType.HYPERCUBE, 134 | D=-1, 135 | ): 136 | assert D > 0, "Dimension must be a positive integer" 137 | region_type, axis_types, kernel_size = convert_conv_type(conv_type, kernel_size, D) 138 | kernel_generator = ME.KernelGenerator( 139 | kernel_size, 140 | stride, 141 | dilation, 142 | region_type=region_type, 143 | axis_types=None, # axis_types JONAS 144 | dimension=D, 145 | ) 146 | 147 | return ME.MinkowskiConvolution( 148 | in_channels=in_planes, 149 | out_channels=out_planes, 150 | kernel_size=kernel_size, 151 | stride=stride, 152 | dilation=dilation, 153 | bias=bias, 154 | kernel_generator=kernel_generator, 155 | dimension=D, 156 | ) 157 | 158 | 159 | def conv_tr( 160 | in_planes, 161 | out_planes, 162 | kernel_size, 163 | upsample_stride=1, 164 | dilation=1, 165 | bias=False, 166 | conv_type=ConvType.HYPERCUBE, 167 | D=-1, 168 | ): 169 | assert D > 0, "Dimension must be a positive integer" 170 | region_type, axis_types, kernel_size = convert_conv_type(conv_type, kernel_size, D) 171 | kernel_generator = ME.KernelGenerator( 172 | kernel_size, 173 | upsample_stride, 174 | dilation, 175 | region_type=region_type, 176 | axis_types=axis_types, 177 | dimension=D, 178 | ) 179 | 180 | return ME.MinkowskiConvolutionTranspose( 181 | in_channels=in_planes, 182 | out_channels=out_planes, 183 | kernel_size=kernel_size, 184 | stride=upsample_stride, 185 | dilation=dilation, 186 | bias=bias, 187 | kernel_generator=kernel_generator, 188 | dimension=D, 189 | ) 190 | 191 | 192 | def avg_pool( 193 | kernel_size, 194 | stride=1, 195 | dilation=1, 196 | conv_type=ConvType.HYPERCUBE, 197 | in_coords_key=None, 198 | D=-1, 199 | ): 200 | assert D > 0, "Dimension must be a positive integer" 201 | region_type, axis_types, kernel_size = convert_conv_type(conv_type, kernel_size, D) 202 | kernel_generator = ME.KernelGenerator( 203 | kernel_size, 204 | stride, 205 | dilation, 206 | region_type=region_type, 207 | axis_types=axis_types, 208 | dimension=D, 209 | ) 210 | 211 | return ME.MinkowskiAvgPooling( 212 | kernel_size=kernel_size, 213 | stride=stride, 214 | dilation=dilation, 215 | kernel_generator=kernel_generator, 216 | dimension=D, 217 | ) 218 | 219 | 220 | def avg_unpool(kernel_size, stride=1, dilation=1, conv_type=ConvType.HYPERCUBE, D=-1): 221 | assert D > 0, "Dimension must be a positive integer" 222 | region_type, axis_types, kernel_size = convert_conv_type(conv_type, kernel_size, D) 223 | kernel_generator = ME.KernelGenerator( 224 | kernel_size, 225 | stride, 226 | dilation, 227 | region_type=region_type, 228 | axis_types=axis_types, 229 | dimension=D, 230 | ) 231 | 232 | return ME.MinkowskiAvgUnpooling( 233 | kernel_size=kernel_size, 234 | stride=stride, 235 | dilation=dilation, 236 | kernel_generator=kernel_generator, 237 | dimension=D, 238 | ) 239 | 240 | 241 | def sum_pool(kernel_size, stride=1, dilation=1, conv_type=ConvType.HYPERCUBE, D=-1): 242 | assert D > 0, "Dimension must be a positive integer" 243 | region_type, axis_types, kernel_size = convert_conv_type(conv_type, kernel_size, D) 244 | kernel_generator = ME.KernelGenerator( 245 | kernel_size, 246 | stride, 247 | dilation, 248 | region_type=region_type, 249 | axis_types=axis_types, 250 | dimension=D, 251 | ) 252 | 253 | return ME.MinkowskiSumPooling( 254 | kernel_size=kernel_size, 255 | stride=stride, 256 | dilation=dilation, 257 | kernel_generator=kernel_generator, 258 | dimension=D, 259 | ) 260 | --------------------------------------------------------------------------------