├── utils ├── __init__.py ├── merge_csv.py ├── gen_args.py └── visualize.py ├── third_party └── nndistance │ ├── modules │ ├── __init__.py │ └── nnd.py │ ├── functions │ ├── __init__.py │ └── nnd.py │ ├── README.md │ ├── src │ ├── my_lib.cpp │ ├── my_lib_cuda.cpp │ ├── cpu_ops.cpp │ └── nnd_cuda.cu │ ├── build_cpu.py │ ├── build.py │ ├── test.py │ └── LICENSE ├── assets ├── pccai_coding.png ├── pccai_training.png └── mpeg_test_seq.txt ├── pccai ├── __init__.py ├── codecs │ ├── __init__.py │ ├── utils.py │ ├── pcc_codec.py │ └── octree_partition_codec.py ├── optim │ ├── __init__.py │ ├── pcc_loss.py │ ├── cd_sparse.py │ ├── utils.py │ └── cd_canonical.py ├── utils │ ├── __init__.py │ ├── logger.py │ ├── misc.py │ ├── pc_metric.py │ ├── convert_image.py │ ├── syntax.py │ ├── option_handler.py │ └── convert_octree.py ├── pipelines │ ├── __init__.py │ ├── bench.py │ └── test.py ├── models │ ├── __init__.py │ ├── modules │ │ ├── get_modules.py │ │ ├── pointnet.py │ │ └── mlpdecoder.py │ ├── pcc_models.py │ ├── architectures │ │ ├── sparsecnncomp.py │ │ └── mlpcomp.py │ └── utils.py └── dataloaders │ ├── __init__.py │ ├── point_cloud_dataset.py │ ├── lidar_base_loader.py │ ├── lidar_loader.py │ ├── shapenet_part_loader.py │ └── modelnet_loader.py ├── scripts ├── config_args.sh ├── visualize.sh ├── examples │ ├── test_ford.sh │ ├── test_mdlnet.sh │ ├── test_ford_hetero.sh │ ├── train_ford.sh │ ├── train_mdlnet.sh │ ├── test_mdlnet_hetero.sh │ ├── train_ford_hetero.sh │ ├── train_mdlnet_hetero.sh │ └── bench_ford_hetero.sh └── run.sh ├── config ├── optim_config │ ├── optim_cd_sparse.yaml │ └── optim_cd_canonical.yaml ├── net_config │ ├── sparsecnn_compression.yaml │ └── mlp_compression.yaml ├── codec_config │ └── ford.yaml └── data_config │ ├── modelnet_simple.yaml │ ├── modelnet_voxel.yaml │ ├── ford_simple.yaml │ ├── shapenet_part.yaml │ ├── ford_voxel.yaml │ ├── ford_spherical.yaml │ ├── kitti_simple.yaml │ ├── modelnet_octree.yaml │ ├── kitti_voxel.yaml │ ├── kitti_spherical.yaml │ ├── ford_octree.yaml │ └── kitti_octree.yaml ├── install_torch-1.7.0+cu-10.1.sh ├── install_torch-1.8.1+cu-11.2.sh ├── experiments ├── test.py ├── train.py └── bench.py ├── LICENSE ├── .gitignore └── README.md /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /third_party/nndistance/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /third_party/nndistance/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /third_party/nndistance/README.md: -------------------------------------------------------------------------------- 1 | python build.py install 2 | -------------------------------------------------------------------------------- /assets/pccai_coding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InterDigitalInc/PccAI/HEAD/assets/pccai_coding.png -------------------------------------------------------------------------------- /assets/pccai_training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InterDigitalInc/PccAI/HEAD/assets/pccai_training.png -------------------------------------------------------------------------------- /pccai/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | -------------------------------------------------------------------------------- /pccai/codecs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | -------------------------------------------------------------------------------- /pccai/optim/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | -------------------------------------------------------------------------------- /pccai/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | -------------------------------------------------------------------------------- /pccai/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | -------------------------------------------------------------------------------- /pccai/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | from .pcc_models import * -------------------------------------------------------------------------------- /pccai/dataloaders/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | from .point_cloud_dataset import * 8 | -------------------------------------------------------------------------------- /third_party/nndistance/modules/nnd.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from functions.nnd import NNDFunction 3 | 4 | class NNDModule(Module): 5 | def forward(self, input1, input2): 6 | return NNDFunction().apply(input1, input2) 7 | -------------------------------------------------------------------------------- /third_party/nndistance/src/my_lib.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "cpu_ops.cpp" 4 | 5 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 6 | m.def("nnd_forward", &nnd_forward, "nnd_forward"); 7 | m.def("nnd_backward", &nnd_backward, "nnd_backward"); 8 | } 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /third_party/nndistance/build_cpu.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension,CppExtension 3 | 4 | 5 | setup(name='my_lib_cuda', 6 | ext_modules=[CppExtension('my_lib_cuda', ['src/my_lib.cpp'])], 7 | cmdclass={'build_ext': BuildExtension}) 8 | 9 | 10 | #if __name__ == '__main__': 11 | # ffi.build() 12 | -------------------------------------------------------------------------------- /scripts/config_args.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright (c) 2010-2022, InterDigital 4 | # All rights reserved. 5 | 6 | # See LICENSE under the root folder. 7 | 8 | 9 | if [ $# -eq 2 ]; then 10 | export CUDA_VISIBLE_DEVICES=$2 11 | echo export CUDA_VISIBLE_DEVICES=$2 12 | fi 13 | 14 | source './scripts/tmp/'$1 15 | echo $1 16 | echo python ${RUN_ARGUMENTS} 17 | python ${RUN_ARGUMENTS} 18 | -------------------------------------------------------------------------------- /config/optim_config/optim_cd_sparse.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Optimization configuration 8 | 9 | --- 10 | n_epoch: 50 11 | main_args: 12 | lr: 0.0008 13 | opt_args: [0.9, 0.999, 0] 14 | schedule_args: ['step', 10, 0.5] 15 | loss_args: 16 | loss: cd_sparse 17 | alpha: 5 18 | beta: 1 19 | clip_max_norm: -1 -------------------------------------------------------------------------------- /config/net_config/sparsecnn_compression.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Network configuration, a template just for reference 8 | 9 | --- 10 | architecture: sparsecnn_compression 11 | modules: 12 | entropy_bottleneck: 8 13 | # encoder: 14 | # model: scnn_down 15 | # dims: [] 16 | # decoder: 17 | # model: scnn_up 18 | # dims: [] 19 | # etc. 20 | -------------------------------------------------------------------------------- /scripts/visualize.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright (c) 2010-2022, InterDigital 4 | # All rights reserved. 5 | 6 | # See LICENSE under the root folder. 7 | 8 | 9 | # Rendering settings 10 | FILE="./datasets/ford/Ford_01_q_1mm/Ford_01_vox1mm-0100.ply" 11 | RADIUS=-1 12 | RADIUS_ORIGIN=-1 13 | VIEW_FILE=. 14 | 15 | # Begin rendering 16 | python ./utils/visualize.py \ 17 | --file_name $FILE \ 18 | --output_file . \ 19 | --view_file $VIEW_FILE \ 20 | --radius $RADIUS \ 21 | --radius_origin $RADIUS_ORIGIN \ 22 | --window_name $FILE 23 | -------------------------------------------------------------------------------- /third_party/nndistance/build.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension,CppExtension,CUDAExtension 3 | 4 | 5 | # setup(name='my_lib', 6 | # ext_modules=[CppExtension('my_lib', ['src/my_lib.cpp'])], 7 | # cmdclass={'build_ext': BuildExtension}) 8 | 9 | setup(name='my_lib_cuda', 10 | ext_modules=[CUDAExtension('my_lib_cuda',['src/my_lib_cuda.cpp', 'src/nnd_cuda.cu'] 11 | )], 12 | cmdclass={'build_ext': BuildExtension} 13 | ) 14 | 15 | 16 | #if __name__ == '__main__': 17 | # ffi.build() 18 | -------------------------------------------------------------------------------- /config/optim_config/optim_cd_canonical.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Optimization configuration 8 | 9 | --- 10 | n_epoch: 100 11 | main_args: 12 | lr: 0.0008 13 | opt_args: [0.9, 0.999, 0] 14 | schedule_args: ['step', 20, 0.5] 15 | aux_args: 16 | lr: 0.005 17 | opt_args: [0.9, 0.999, 0] 18 | schedule_args: ['step', 20, 0.5] 19 | loss_args: 20 | loss: cd_canonical 21 | xyz_loss_type: cd_l1max 22 | xyz_subset_weight: 1 23 | alpha: 1 24 | beta: 1 25 | clip_max_norm: -1 26 | -------------------------------------------------------------------------------- /config/net_config/mlp_compression.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Network configuration 8 | 9 | --- 10 | architecture: mlp_compression 11 | modules: 12 | entropy_bottleneck: 64 13 | cw_gen: 14 | model: pointnet 15 | mlp_dims: [3, 64, 64, 64, 128, 1024] 16 | fc_dims: [1024, 512, 64] 17 | pc_gen: 18 | model: mlpdecoder 19 | dims: [64, 128, 256, 512, 1024] 20 | num_points: 1024 21 | # overall actual mlp dim is [64, 128, 256, 512, 1024, 3072] 22 | # In your custom architecture, you can introduce more modules here 23 | -------------------------------------------------------------------------------- /assets/mpeg_test_seq.txt: -------------------------------------------------------------------------------- 1 | queen_0200 2 | soldier_vox10_0690 3 | facade_00064_vox11 4 | dancer_vox11_00000001 5 | thaidancer_viewdep_vox12 6 | soldier_viewdep_vox12 7 | boxer_viewdep_vox12 8 | facade_00009_vox12 9 | house_without_roof_00057_vox12 10 | landscape_00014_vox14 11 | facade_00064_vox14 12 | arco_valentino_dense_vox12 13 | staue_klimt_vox12 14 | shiva_00035_vox12 15 | egyptian_mask_vox12 16 | ulb_unicorn_vox13 17 | ulb_unicorn_hires_vox15 18 | stanford_area_2_vox16 19 | stanford_area_4_vox16 20 | citytunnel_q1mm 21 | overpass_q1mm 22 | tollbooth_q1mm 23 | ford_02_q1mm 24 | ford_03_q1mm 25 | qnxadas-junction-approach 26 | qnxadas-junction-exit 27 | qnxadas-motorway-join 28 | qnxadas-navigating-bends 29 | -------------------------------------------------------------------------------- /scripts/examples/test_ford.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | PY_NAME="${HOME_DIR}/experiments/test.py" 8 | 9 | # Main configurations 10 | OPTIM_CONFIG="${HOME_DIR}/config/optim_config/optim_cd_canonical.yaml" 11 | TEST_DATA_CONFIG="${HOME_DIR}/config/data_config/ford_simple.yaml test_cfg" 12 | CHECKPOINT="${HOME_DIR}/results/train_ford/epoch_newest.pth" 13 | CHECKPOINT_NET_CONFIG="True" 14 | GEN_BITSTREAM="False" # generate actual bit-stream if True, needs CompressAI 15 | 16 | # Logging settings 17 | PRINT_FREQ="10" 18 | PC_WRITE_FREQ="20" 19 | TF_SUMMARY="False" 20 | LOG_FILE=$(date); LOG_FILE=log_${LOG_FILE//' '/$'_'}.txt 21 | -------------------------------------------------------------------------------- /scripts/examples/test_mdlnet.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | PY_NAME="${HOME_DIR}/experiments/test.py" 8 | 9 | # Main configurations 10 | OPTIM_CONFIG="${HOME_DIR}/config/optim_config/optim_cd_canonical.yaml" 11 | TEST_DATA_CONFIG="${HOME_DIR}/config/data_config/modelnet_simple.yaml test_cfg" 12 | CHECKPOINT="${HOME_DIR}/results/train_mdlnet/epoch_newest.pth" 13 | CHECKPOINT_NET_CONFIG="True" 14 | GEN_BITSTREAM="False" # generate actual bit-stream if True, needs CompressAI 15 | 16 | # Logging settings 17 | PRINT_FREQ="10" 18 | PC_WRITE_FREQ="20" 19 | TF_SUMMARY="False" 20 | LOG_FILE=$(date); LOG_FILE=log_${LOG_FILE//' '/$'_'}.txt 21 | -------------------------------------------------------------------------------- /config/codec_config/ford.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Configure the encoder/decoder for actual compression/decompression 8 | 9 | # Codec class 10 | codec: 'octree_partition' 11 | 12 | # Native translation and scaling of the data 13 | translate: [131072, 131072, 131072] 14 | scale: 1 15 | max_num_points: 1500000 # maximum number of points that can be handled 16 | 17 | # Options about octree partitioning 18 | octree_cfg: 19 | bbox_min: [0, 0, 0] 20 | bbox_max: [262143, 262143, 262143] 21 | point_min: 64 22 | point_max: 2025 # keep splitting if points in a block larger than this number 23 | level_min: 3 24 | level_max: 10 -------------------------------------------------------------------------------- /scripts/examples/test_ford_hetero.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | PY_NAME="${HOME_DIR}/experiments/test.py" 8 | 9 | # Main configurations 10 | HETERO="True" 11 | OPTIM_CONFIG="${HOME_DIR}/config/optim_config/optim_cd_canonical.yaml" 12 | TEST_DATA_CONFIG="${HOME_DIR}/config/data_config/ford_octree.yaml test_cfg" 13 | CHECKPOINT="${HOME_DIR}/results/train_ford_hetero/epoch_newest.pth" 14 | CHECKPOINT_NET_CONFIG="True" 15 | GEN_BITSTREAM="False" # generate actual bit-stream if True, needs CompressAI 16 | 17 | # Logging settings 18 | PRINT_FREQ="10" 19 | PC_WRITE_FREQ="20" 20 | TF_SUMMARY="False" 21 | LOG_FILE=$(date); LOG_FILE=log_${LOG_FILE//' '/$'_'}.txt 22 | -------------------------------------------------------------------------------- /pccai/codecs/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Utilities related to point cloud codec 8 | 9 | import torch 10 | import numpy as np 11 | 12 | # Import all the codecs to be used 13 | from pccai.codecs.octree_partition_codec import OctreePartitionCodec 14 | 15 | 16 | # List the all the codecs in the following dictionary 17 | codec_classes = { 18 | 'octree_partition': OctreePartitionCodec, 19 | } 20 | 21 | def get_codec_class(codec_name): 22 | codec = codec_classes.get(codec_name.lower(), None) 23 | assert codec is not None, f'codec class "{codec_name}" not found, valid codec classes are: {list(codec_classes.keys())}' 24 | return codec 25 | -------------------------------------------------------------------------------- /scripts/examples/train_ford.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | PY_NAME="${HOME_DIR}/experiments/train.py" 8 | 9 | # Main configurations 10 | NET_CONFIG="${HOME_DIR}/config/net_config/mlp_compression.yaml" 11 | OPTIM_CONFIG="${HOME_DIR}/config/optim_config/optim_cd_canonical.yaml" 12 | TRAIN_DATA_CONFIG="${HOME_DIR}/config/data_config/ford_simple.yaml train_cfg" 13 | VAL_DATA_CONFIG="${HOME_DIR}/config/data_config/ford_simple.yaml val_cfg" 14 | 15 | # Logging settings 16 | PRINT_FREQ="20" 17 | PC_WRITE_FREQ="-1" 18 | TF_SUMMARY="True" 19 | SAVE_CHECKPOINT_FREQ="1" 20 | SAVE_CHECKPOINT_MAX="10" 21 | VAL_FREQ="5" 22 | VAL_PRINT_FREQ="20" 23 | LOG_FILE=$(date); LOG_FILE=log_${LOG_FILE//' '/$'_'}.txt 24 | -------------------------------------------------------------------------------- /scripts/examples/train_mdlnet.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | PY_NAME="${HOME_DIR}/experiments/train.py" 8 | 9 | # Main configurations 10 | NET_CONFIG="${HOME_DIR}/config/net_config/mlp_compression.yaml" 11 | OPTIM_CONFIG="${HOME_DIR}/config/optim_config/optim_cd_canonical.yaml" 12 | TRAIN_DATA_CONFIG="${HOME_DIR}/config/data_config/modelnet_simple.yaml train_cfg" 13 | VAL_DATA_CONFIG="${HOME_DIR}/config/data_config/modelnet_simple.yaml val_cfg" 14 | 15 | # Logging settings 16 | PRINT_FREQ="20" 17 | PC_WRITE_FREQ="-1" 18 | TF_SUMMARY="True" 19 | SAVE_CHECKPOINT_FREQ="1" 20 | SAVE_CHECKPOINT_MAX="10" 21 | VAL_FREQ="5" 22 | VAL_PRINT_FREQ="20" 23 | LOG_FILE=$(date); LOG_FILE=log_${LOG_FILE//' '/$'_'}.txt 24 | -------------------------------------------------------------------------------- /scripts/examples/test_mdlnet_hetero.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | PY_NAME="${HOME_DIR}/experiments/test.py" 8 | 9 | # Main configurations 10 | NET_CONFIG="${HOME_DIR}/config/net_config/mlp_compression.yaml" 11 | OPTIM_CONFIG="${HOME_DIR}/config/optim_config/optim_cd_canonical.yaml" 12 | TEST_DATA_CONFIG="${HOME_DIR}/config/data_config/modelnet_octree.yaml test_cfg" 13 | CHECKPOINT="${HOME_DIR}/results/train_mdlnet_hetero/epoch_newest.pth" 14 | CHECKPOINT_NET_CONFIG="False" 15 | GEN_BITSTREAM="False" # generate actual bit-stream if True, needs CompressAI 16 | 17 | # Logging settings 18 | PRINT_FREQ="10" 19 | PC_WRITE_FREQ="20" 20 | TF_SUMMARY="False" 21 | LOG_FILE=$(date); LOG_FILE=log_${LOG_FILE//' '/$'_'}.txt 22 | -------------------------------------------------------------------------------- /scripts/examples/train_ford_hetero.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | PY_NAME="${HOME_DIR}/experiments/train.py" 8 | 9 | # Main configurations 10 | HETERO="True" 11 | NET_CONFIG="${HOME_DIR}/config/net_config/mlp_compression.yaml" 12 | OPTIM_CONFIG="${HOME_DIR}/config/optim_config/optim_cd_canonical.yaml" 13 | TRAIN_DATA_CONFIG="${HOME_DIR}/config/data_config/ford_octree.yaml train_cfg" 14 | VAL_DATA_CONFIG="${HOME_DIR}/config/data_config/ford_octree.yaml val_cfg" 15 | 16 | # Logging settings 17 | PRINT_FREQ="20" 18 | PC_WRITE_FREQ="-1" 19 | TF_SUMMARY="True" 20 | SAVE_CHECKPOINT_FREQ="1" 21 | SAVE_CHECKPOINT_MAX="10" 22 | VAL_FREQ="5" 23 | VAL_PRINT_FREQ="20" 24 | LOG_FILE=$(date); LOG_FILE=log_${LOG_FILE//' '/$'_'}.txt 25 | -------------------------------------------------------------------------------- /scripts/examples/train_mdlnet_hetero.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | PY_NAME="${HOME_DIR}/experiments/train.py" 8 | 9 | # Main configurations 10 | HETERO="True" 11 | NET_CONFIG="${HOME_DIR}/config/net_config/mlp_compression.yaml" 12 | OPTIM_CONFIG="${HOME_DIR}/config/optim_config/optim_cd_canonical.yaml" 13 | TRAIN_DATA_CONFIG="${HOME_DIR}/config/data_config/modelnet_octree.yaml train_cfg" 14 | VAL_DATA_CONFIG="${HOME_DIR}/config/data_config/modelnet_octree.yaml val_cfg" 15 | 16 | # Logging settings 17 | PRINT_FREQ="20" 18 | PC_WRITE_FREQ="-1" 19 | TF_SUMMARY="True" 20 | SAVE_CHECKPOINT_FREQ="1" 21 | SAVE_CHECKPOINT_MAX="10" 22 | VAL_FREQ="5" 23 | VAL_PRINT_FREQ="20" 24 | LOG_FILE=$(date); LOG_FILE=log_${LOG_FILE//' '/$'_'}.txt -------------------------------------------------------------------------------- /pccai/codecs/pcc_codec.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | import numpy as np 8 | 9 | class PccCodecBase: 10 | """A base class of PCC codec. User needs to implement the compress() and decompress() method.""" 11 | 12 | def __init__(self, codec_config, pccnet, syntax): 13 | self.translate = codec_config['translate'] 14 | self.scale = codec_config['scale'] 15 | self.hetero = syntax.hetero 16 | self.phase = syntax.phase 17 | self.pccnet = pccnet 18 | 19 | 20 | def compress(self, points, tag): 21 | """Compression method.""" 22 | 23 | raise NotImplementedError() 24 | 25 | 26 | def decompress(self, file_name): 27 | """Decompression method.""" 28 | 29 | raise NotImplementedError() -------------------------------------------------------------------------------- /config/data_config/modelnet_simple.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Configure the ModelNet dataset to load raw 3D points 8 | 9 | # Common options of the dataset 10 | dataset: modelnet_simple 11 | num_points: 2048 12 | 13 | # Options under individual configurations 14 | train_cfg: 15 | batch_size: 32 16 | shuffle: True 17 | num_workers: 1 18 | augmentation: True 19 | split: train 20 | # class_choice: Bag 21 | val_cfg: 22 | batch_size: 8 23 | shuffle: False 24 | num_workers: 1 25 | augmentation: False 26 | split: val 27 | # class_choice: Bag 28 | test_cfg: 29 | # class_choice: Airplane 30 | batch_size: 1 31 | num_points: null 32 | shuffle: False 33 | num_workers: 1 34 | augmentation: False 35 | split: test -------------------------------------------------------------------------------- /config/data_config/modelnet_voxel.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Configure the ModelNet dataset for loading as sparse voxels 8 | 9 | # Common options of the dataset 10 | dataset: modelnet_simple 11 | num_points: 25000 12 | coord_min: 0 13 | coord_max: 255 14 | voxelize: True 15 | sparse_collate: True 16 | centralize: True 17 | 18 | # Options under individual configurations 19 | train_cfg: 20 | batch_size: 8 21 | shuffle: True 22 | num_workers: 4 23 | augmentation: True 24 | split: train 25 | val_cfg: 26 | batch_size: 8 27 | shuffle: False 28 | num_workers: 1 29 | augmentation: False 30 | split: val 31 | test_cfg: 32 | batch_size: 1 33 | num_points: null 34 | shuffle: False 35 | num_workers: 1 36 | augmentation: False 37 | split: test -------------------------------------------------------------------------------- /scripts/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright (c) 2010-2022, InterDigital 4 | # All rights reserved. 5 | 6 | # See LICENSE under the root folder. 7 | 8 | 9 | SPEC=$1 10 | LAUNCHER=$2 11 | USE_GPU=$3 12 | 13 | TMP_ARGS=`python ./utils/gen_args.py ${SPEC}` 14 | if [[ ${LAUNCHER} == "d" ]]; then 15 | echo "Launch the job directly." 16 | ./scripts/config_args.sh ${TMP_ARGS} ${USE_GPU} 2>&1 & 17 | elif [[ ${LAUNCHER} == "f" ]]; then 18 | echo "Launch the job directly in foreground." 19 | ./scripts/config_args.sh ${TMP_ARGS} ${USE_GPU} 2>&1 20 | elif [[ ${LAUNCHER} == "s" ]]; then 21 | echo "Launch the job with slurm." 22 | source './scripts/tmp/'${TMP_ARGS} 23 | # Please modify according your needs 24 | sbatch --job-name=${EXP_NAME} -n 1 -D ${HOME_DIR} --gres=gpu:1 ./scripts/config_args.sh ${TMP_ARGS} 0 25 | else 26 | echo "No launcher is specified." 27 | fi 28 | -------------------------------------------------------------------------------- /scripts/examples/bench_ford_hetero.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | PY_NAME="${HOME_DIR}/experiments/bench.py" 8 | 9 | # Main configurations 10 | HETERO="True" 11 | CHECKPOINTS="${HOME_DIR}/results/train_ford_hetero/epoch_newest.pth" 12 | CHECKPOINT_NET_CONFIG="True" 13 | CODEC_CONFIG="${HOME_DIR}/config/codec_config/ford.yaml" 14 | INPUT="${HOME_DIR}/datasets/ford/Ford_02_q_1mm ${HOME_DIR}/datasets/ford/Ford_03_q_1mm" 15 | COMPUTE_D2="True" 16 | MPEG_REPORT="mpeg_report.csv" # generate the CSV file for MPEG reporting 17 | MPEG_REPORT_SEQUENCE="True" # view the input point clouds as sequences 18 | WRITE_PREFIX="compress_" 19 | PEAK_VALUE="30000" 20 | BIT_DEPTH="18" 21 | 22 | # Logging settings 23 | PRINT_FREQ="1" 24 | PC_WRITE_FREQ="-1" 25 | TF_SUMMARY="False" 26 | REMOVE_COMPRESSED_FILES="True" 27 | LOG_FILE=$(date); LOG_FILE=log_${LOG_FILE//' '/$'_'}.txt 28 | -------------------------------------------------------------------------------- /config/data_config/ford_simple.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Configure the Ford dataset to load raw 3D points 8 | 9 | # Common options of the dataset 10 | dataset: ford_simple 11 | num_points: 80000 12 | seed: 0 13 | return_intensity: False 14 | 15 | # Options on the splitting scheme 16 | splitting: 17 | train: 18 | - 1 19 | val: 20 | - 2 21 | - 3 22 | test: 23 | - 2 24 | - 3 25 | 26 | # Options under individual configurations 27 | train_cfg: 28 | batch_size: 4 29 | shuffle: True 30 | num_workers: 4 31 | augmentation: True 32 | split: train 33 | val_cfg: 34 | batch_size: 4 35 | shuffle: False 36 | num_workers: 4 37 | augmentation: True 38 | split: val 39 | test_cfg: 40 | batch_size: 4 41 | shuffle: False 42 | num_workers: 4 43 | augmentation: False 44 | split: test -------------------------------------------------------------------------------- /config/data_config/shapenet_part.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Configuration for the ShapeNet-Part dataset 8 | 9 | # Common options of the dataset 10 | dataset: shapenet_part 11 | num_points: 2048 12 | classification: False 13 | normalize: True 14 | lazy_loading: False 15 | 16 | # Options under individual configurations 17 | train_cfg: 18 | batch_size: 32 19 | shuffle: True 20 | num_workers: 8 21 | augmentation: True 22 | split: train 23 | # class_choice: Bag 24 | val_cfg: 25 | batch_size: 8 26 | shuffle: False 27 | num_workers: 1 28 | augmentation: False 29 | split: val 30 | # class_choice: Bag 31 | test_cfg: 32 | # class_choice: Airplane 33 | # Ensure reproducibility with batch_size 1 and full (unsampled) point clouds 34 | batch_size: 1 35 | num_points: null 36 | shuffle: False 37 | num_workers: 1 38 | augmentation: False 39 | split: test -------------------------------------------------------------------------------- /pccai/models/modules/get_modules.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Import all the modules to be used here 8 | from pccai.models.modules.pointnet import PointNet, PointNetHetero 9 | from pccai.models.modules.mlpdecoder import MlpDecoder, MlpDecoderHetero 10 | 11 | 12 | def get_module_class(module_name, hetero=False): 13 | """Retrieve the module classes from the module name.""" 14 | 15 | # List all the modules and their string name in this dictionary 16 | module_dict = { 17 | 'pointnet': [PointNet, PointNetHetero], # pointnet 18 | 'mlpdecoder': [MlpDecoder, MlpDecoderHetero], # mlpdecoder 19 | } 20 | 21 | module = module_dict.get(module_name.lower(), None) 22 | assert module is not None, f'module {module_name} was not found, valid modules are: {list(module_dict.keys())}' 23 | try: 24 | module = module[hetero] 25 | except IndexError as e: 26 | raise Exception(f'module {module_name} is not implemented for hetero={hetero}') 27 | 28 | return module -------------------------------------------------------------------------------- /config/data_config/ford_voxel.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Configure the Ford dataset for loading as sparse voxels 8 | 9 | # Common options of the dataset 10 | dataset: ford_simple 11 | num_points: 150000 # for point clouds in folder 0 - 10, at least have 82602 points, at most have 129392 points 12 | translate: [131072, 131072, 131072] 13 | scale: 1 14 | voxelize: True 15 | sparse_collate: True 16 | return_intensity: False 17 | 18 | # Options on the splitting scheme 19 | splitting: 20 | train: 21 | - 1 22 | test: 23 | - 2 24 | - 3 25 | 26 | # Options under individual configurations 27 | train_cfg: 28 | batch_size: 2 29 | shuffle: True 30 | num_workers: 4 31 | augmentation: True 32 | split: train 33 | val_cfg: 34 | batch_size: 4 35 | shuffle: False 36 | num_workers: 4 37 | augmentation: True 38 | split: val 39 | test_cfg: 40 | batch_size: 4 41 | shuffle: False 42 | num_workers: 4 43 | augmentation: False 44 | split: test -------------------------------------------------------------------------------- /third_party/nndistance/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | from modules.nnd import NNDModule 6 | 7 | dist = NNDModule() 8 | 9 | p1 = torch.rand(10,1000,3) 10 | p2 = torch.rand(10,1500,3) 11 | points1 = Variable(p1,requires_grad = True) 12 | points2 = Variable(p2) 13 | points1=points1.cuda() 14 | points2=points2.cuda() 15 | dist1, dist2, idx1, idx2 = dist(points1, points2) 16 | print(dist1, dist2, idx1, idx2) 17 | loss = torch.sum(dist1) 18 | print(loss) 19 | loss.backward() 20 | print(points1.grad, points2.grad) 21 | 22 | 23 | points1 = Variable(p1.cuda(), requires_grad = True) 24 | points2 = Variable(p2.cuda()) 25 | dist1, dist2, idx1, idx2 = dist(points1, points2) 26 | print(dist1, dist2, idx1, idx2) 27 | loss = torch.sum(dist1) 28 | print(loss) 29 | loss.backward() 30 | print(points1.grad, points2.grad) 31 | 32 | # Test indexing 33 | nn2 = torch.gather(points1, 1, idx2.unsqueeze(-1).expand([-1,-1,points1.shape[2]]).cuda()) 34 | print(nn2) 35 | loss = torch.sum(nn2) 36 | print(loss) 37 | loss.backward() 38 | print(points1.grad, points2.grad) 39 | -------------------------------------------------------------------------------- /config/data_config/ford_spherical.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Configure the Ford dataset as 2D images by spherical projection 8 | 9 | dataset: ford_spherical 10 | 11 | # Options on spherical coordinate conversion 12 | spherical_cfg: 13 | height: 128 14 | width: 2048 15 | v_fov: [-28, 3.0] 16 | h_fov: [-180, 180] 17 | origin_shift: [-0.1, 0.0, -0.4] 18 | # origin_shift: [0.0, 0.0, 0.0] 19 | 20 | # Options on the splitting scheme 21 | splitting: 22 | train: 23 | - 1 24 | val: 25 | - 2 26 | - 3 27 | test: 28 | - 2 29 | - 3 30 | 31 | # Options under individual configurations 32 | train_cfg: 33 | batch_size: 4 34 | shuffle: True 35 | num_workers: 4 36 | augmentation: True 37 | split: train 38 | val_cfg: 39 | batch_size: 4 40 | shuffle: False 41 | num_workers: 4 42 | augmentation: True 43 | split: val 44 | test_cfg: 45 | batch_size: 4 46 | shuffle: False 47 | num_workers: 4 48 | augmentation: False 49 | split: test -------------------------------------------------------------------------------- /config/data_config/kitti_simple.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Configure the KITTI dataset to load raw 3D points 8 | 9 | # Common options of the dataset 10 | dataset: kitti_simple 11 | num_points: 80000 # for point clouds in folder 0 - 10, at least have 82602 points, at most have 129392 points 12 | seed: 0 13 | 14 | # Options on the splitting scheme 15 | splitting: 16 | train: 17 | - 0 18 | - 1 19 | - 2 20 | - 3 21 | - 4 22 | - 5 23 | val: 24 | - 6 25 | - 7 26 | test: 27 | - 11 28 | - 12 29 | - 13 30 | 31 | # Options under individual configurations 32 | train_cfg: 33 | batch_size: 8 34 | shuffle: True 35 | num_workers: 4 36 | augmentation: True 37 | split: train 38 | val_cfg: 39 | batch_size: 4 40 | shuffle: False 41 | num_workers: 2 42 | augmentation: False 43 | split: val 44 | test_cfg: 45 | batch_size: 4 46 | shuffle: False 47 | num_workers: 2 48 | augmentation: False 49 | split: test -------------------------------------------------------------------------------- /pccai/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # A logger for outputting all the information to both the display and a specified file 8 | 9 | import logging 10 | import sys 11 | import os 12 | 13 | log = None 14 | 15 | 16 | def create_logger(exp_folder, file_name, log_file_only): 17 | 18 | if log_file_only: 19 | handlers = [] 20 | else: 21 | handlers = [logging.StreamHandler(sys.stdout)] 22 | if file_name != '': 23 | log_path = os.path.join(exp_folder, file_name) 24 | os.makedirs(os.path.split(log_path)[0], exist_ok=True) 25 | handlers.append(logging.FileHandler(log_path, mode = 'w')) 26 | [logging.root.removeHandler(handler) for handler in logging.root.handlers[:]] # remove all existing handlers 27 | logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', handlers=handlers) 28 | global log 29 | log = logging.getLogger() 30 | 31 | def destroy_logger(): 32 | handlers = log.handlers[:] 33 | for handler in handlers: 34 | handler.close() 35 | log.removeHandler(handler) -------------------------------------------------------------------------------- /config/data_config/modelnet_octree.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Configure the ModelNet dataset as the octree representation 8 | 9 | # Common options of the dataset 10 | dataset: modelnet_octree 11 | rw_octree: False 12 | rw_partition_scheme: default 13 | coord_min: 0 14 | coord_max: 1023 15 | num_points: 150000 16 | 17 | # Options about octree partitioning 18 | octree_cfg: 19 | bbox_min: [0, 0, 0] 20 | bbox_max: [1023, 1023, 1023] 21 | point_min: 256 # marked as skip if points in a block less than this number 22 | point_max: 4096 # keep splitting if points in a block larger than this number 23 | level_min: 1 24 | level_max: 6 25 | 26 | # Options under individual configurations 27 | train_cfg: 28 | batch_size: 4 29 | shuffle: True 30 | num_workers: 4 31 | augmentation: True 32 | split: train 33 | val_cfg: 34 | batch_size: 4 35 | shuffle: False 36 | num_workers: 4 37 | augmentation: True 38 | split: test 39 | test_cfg: 40 | batch_size: 4 41 | shuffle: False 42 | num_workers: 4 43 | augmentation: False 44 | split: test -------------------------------------------------------------------------------- /install_torch-1.7.0+cu-10.1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # PccAI installation example 3 | # Run "echo y | conda create -n pccai python=3.6 && conda activate pccai && ./install_torch-1.7.0+cu-10.1.sh" 4 | 5 | # 1. Basic installation for PccAI 6 | echo y | conda install pytorch==1.7.0 torchvision==0.8.0 torchaudio==0.7.0 cudatoolkit=10.1 -c pytorch 7 | pip install tensorboard==2.9.0 8 | pip install plyfile==0.7.4 9 | pip install --no-index torch-scatter==2.0.7 -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html 10 | pip install --no-index torch-sparse==0.6.9 -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html 11 | pip install torch-geometric==2.0.3 12 | 13 | # 2. Additional installation for the examples 14 | 15 | # Optional: nndistance for fast Chamfer Distance computation 16 | cd third_party/nndistance 17 | export PATH="/usr/local/cuda-10.1/bin:$PATH" 18 | export LD_LIBRARY_PATH="/usr/local/cuda-10.1/lib64:$LD_LIBRARY_PATH" 19 | python build.py install 20 | cd ../.. 21 | 22 | # Optional: CompressAI for entropy modeling and coding, necessary for the benchmarking example "bench_ford_hetero.sh" 23 | pip install compressai==1.1.1 24 | 25 | # Optional: Open3D for visualization 26 | pip install open3d==0.15.2 27 | -------------------------------------------------------------------------------- /config/data_config/kitti_voxel.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Configure the KITTI dataset to load raw 3D points 8 | 9 | # Common options of the dataset 10 | dataset: kitti_simple 11 | num_points: 130000 # for point clouds in folder 0 - 10, at least have 82602 points, at most have 129392 points 12 | seed: 0 13 | quantize_resolution: 4095 14 | voxelize: True 15 | sparse_collate: True 16 | return_intensity: False 17 | 18 | # Options on the splitting scheme 19 | splitting: 20 | train: 21 | - 0 22 | - 1 23 | - 2 24 | - 3 25 | - 4 26 | - 5 27 | - 6 28 | - 7 29 | test: 30 | - 8 31 | - 9 32 | - 10 33 | 34 | # Options under individual configurations 35 | train_cfg: 36 | batch_size: 4 37 | shuffle: True 38 | num_workers: 4 39 | augmentation: True 40 | split: train 41 | val_cfg: 42 | batch_size: 1 43 | shuffle: False 44 | num_workers: 2 45 | augmentation: False 46 | split: val 47 | test_cfg: 48 | batch_size: 1 49 | shuffle: False 50 | num_workers: 2 51 | augmentation: False 52 | split: test -------------------------------------------------------------------------------- /install_torch-1.8.1+cu-11.2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # PccAI installation example 3 | # Run "echo y | conda create -n pccai python=3.8 && conda activate pccai && ./install_torch-1.8.1+cu-11.2.sh" 4 | 5 | # 1. Basic installation for PccAI 6 | pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html 7 | pip install tensorboard==2.8.0 8 | pip install plyfile==0.7.4 9 | pip install --no-index torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.1+cu111.html 10 | pip install --no-index torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.1+cu111.html 11 | pip install torch-geometric==2.0.3 12 | 13 | # 2. Additional installation for the examples 14 | 15 | # Optional: nndistance for fast Chamfer Distance computation 16 | cd third_party/nndistance 17 | export PATH="/usr/local/cuda-10.1/bin:$PATH" 18 | export LD_LIBRARY_PATH="/usr/local/cuda-10.1/lib64:$LD_LIBRARY_PATH" 19 | python build.py install 20 | cd ../.. 21 | 22 | # Optional: CompressAI for entropy modeling and coding, necessary for the benchmarking example "bench_ford_hetero.sh" 23 | pip install compressai==1.1.1 24 | 25 | # Optional: Open3D for visualization 26 | pip install open3d==0.14.1 27 | -------------------------------------------------------------------------------- /config/data_config/kitti_spherical.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Configure the KITTI dataset as 2D images by spherical projection 8 | 9 | dataset: kitti_spherical 10 | 11 | # Options on spherical coordinate conversion 12 | spherical_cfg: 13 | height: 128 14 | width: 2048 15 | v_fov: [-26.5, 3.0] 16 | h_fov: [-180, 180] 17 | origin_shift: [0.0, -0.1, -0.17] 18 | # origin_shift: [0.0, 0.0, 0.0] 19 | 20 | # Options on the splitting scheme 21 | splitting: 22 | train: 23 | - 0 24 | - 1 25 | - 2 26 | - 3 27 | - 4 28 | - 5 29 | val: 30 | - 6 31 | - 7 32 | test: 33 | - 11 34 | - 12 35 | - 13 36 | 37 | # Options under individual configurations 38 | train_cfg: 39 | batch_size: 8 40 | shuffle: True 41 | num_workers: 4 42 | augmentation: True 43 | split: train 44 | val_cfg: 45 | batch_size: 4 46 | shuffle: False 47 | num_workers: 2 48 | augmentation: False 49 | split: val 50 | test_cfg: 51 | batch_size: 4 52 | shuffle: False 53 | num_workers: 2 54 | augmentation: False 55 | split: test -------------------------------------------------------------------------------- /pccai/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | import numpy as np 8 | import pccai.utils.logger as logger 9 | from plyfile import PlyData, PlyElement 10 | 11 | 12 | def pc_write(pc, file_name): 13 | pc_np = pc.T.cpu().numpy() 14 | vertex = list(zip(pc_np[0], pc_np[1], pc_np[2])) 15 | vertex = np.array(vertex, dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4')]) 16 | elements = PlyElement.describe(vertex, "vertex") 17 | PlyData([elements]).write(file_name) 18 | return 19 | 20 | 21 | def pc_read(filename): 22 | ply_raw = PlyData.read(filename)['vertex'].data 23 | pc = np.vstack((ply_raw['x'], ply_raw['y'], ply_raw['z'])).transpose() 24 | return np.ascontiguousarray(pc) 25 | 26 | 27 | def pt_to_np(tensor): 28 | """Convert PyTorch tensor to NumPy array.""" 29 | 30 | return tensor.contiguous().cpu().detach().numpy() 31 | 32 | 33 | def load_state_dict_with_fallback(obj, dict): 34 | """Load a checkpoint with fall back.""" 35 | 36 | try: 37 | obj.load_state_dict(dict) 38 | except RuntimeError as e: 39 | logger.log.exception(e) 40 | logger.log.info(f'Strict load_state_dict has failed. Attempting in non strict mode.') 41 | obj.load_state_dict(dict, strict=False) -------------------------------------------------------------------------------- /third_party/nndistance/LICENSE: -------------------------------------------------------------------------------- 1 | 3D Point Capsule Networks 2 | 3 | Copyright (c) 2019, Chair for Computer Aided Medical Procedures & Augmented Reality, Technical University of Munich 4 | 5 | The MIT License (MIT) 6 | 7 | Copyright (c) 2019 Yongheng Zhao 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. -------------------------------------------------------------------------------- /pccai/optim/pcc_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | import math 8 | import torch 9 | import sys 10 | import os 11 | 12 | class PccLossBase: 13 | """A base class of rate-distortion loss computation for point cloud compression.""" 14 | 15 | def __init__(self, loss_args, syntax): 16 | self.alpha = loss_args['alpha'] 17 | self.beta = loss_args['beta'] 18 | self.hetero = syntax.hetero 19 | self.phase = syntax.phase 20 | 21 | 22 | @staticmethod 23 | def bpp_loss(loss_out, likelihoods, count): 24 | """Compute the rate loss with the likelihoods.""" 25 | 26 | bpp_loss = 0 27 | for k, v in likelihoods.items(): 28 | if v is not None: 29 | loss = torch.log(v).sum() / (-math.log(2) * count) 30 | bpp_loss += loss 31 | loss_out[f'bpp_loss_{k}'] = loss.unsqueeze(0) 32 | loss_out['bpp_loss'] = bpp_loss.unsqueeze(0) 33 | 34 | 35 | def xyz_loss(self, **kwargs): 36 | """Needs to implement the xyz_loss""" 37 | 38 | raise NotImplementedError() 39 | 40 | 41 | def loss(self, **kwargs): 42 | """Needs to implement the overall loss. Can be R-D loss for lossy compression, or rate-only loss for lossless compression.""" 43 | 44 | raise NotImplementedError() 45 | -------------------------------------------------------------------------------- /config/data_config/ford_octree.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Common options of the dataset 8 | dataset: ford_octree 9 | rw_octree: False 10 | rw_partition_scheme: default 11 | translate: [131072, 131072, 131072] 12 | scale: 1 13 | 14 | # Options about octree partitioning 15 | octree_cfg: 16 | bbox_min: [0, 0, 0] 17 | bbox_max: [262143, 262143, 262143] 18 | point_min: 64 # marked as skip if points in a block less than this number 19 | point_max: 2025 # keep splitting if points in a block larger than this number 20 | level_min: 3 21 | level_max: 10 22 | 23 | # Options on the splitting scheme 24 | splitting: 25 | train: 26 | - 1 27 | val: 28 | - 2 29 | - 3 30 | test: 31 | - 2 32 | - 3 33 | 34 | # Options under individual configurations 35 | train_cfg: 36 | batch_size: 4 37 | shuffle: True 38 | num_workers: 8 39 | augmentation: True 40 | max_num_points: 150000 41 | shuffle_blocks: True 42 | split: train 43 | val_cfg: 44 | batch_size: 4 45 | shuffle: False 46 | num_workers: 4 47 | augmentation: True 48 | max_num_points: 150000 49 | split: val 50 | test_cfg: 51 | batch_size: 4 52 | shuffle: False 53 | num_workers: 4 54 | augmentation: False 55 | max_num_points: 150000 56 | split: train 57 | # split: test 58 | -------------------------------------------------------------------------------- /experiments/test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Test a trained point cloud compression model 8 | 9 | import multiprocessing 10 | multiprocessing.set_start_method('spawn', True) 11 | 12 | import random 13 | import os 14 | import torch 15 | import sys 16 | sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/..') 17 | 18 | # Load different utilities from PccAI 19 | from pccai.utils.option_handler import TestOptionHandler 20 | import pccai.utils.logger as logger 21 | from pccai.pipelines.test import * 22 | 23 | 24 | if __name__ == "__main__": 25 | 26 | # Parse the options and perform training 27 | option_handler = TestOptionHandler() 28 | opt = option_handler.parse_options() 29 | 30 | # Create a folder to save the models and the log 31 | if not os.path.exists(opt.exp_folder): 32 | os.makedirs(opt.exp_folder) 33 | 34 | # Initialize a global logger then print out all the options 35 | logger.create_logger(opt.exp_folder, opt.log_file, opt.log_file_only) 36 | option_handler.print_options(opt) 37 | opt = load_test_config(opt) 38 | 39 | # Go with the actual training 40 | if opt.seed is not None: 41 | torch.manual_seed(opt.seed) 42 | random.seed(opt.seed) 43 | avg_loss = test_pccnet(opt) 44 | logger.log.info('Testing session %s finished.\n' % opt.exp_name) 45 | logger.destroy_logger() 46 | -------------------------------------------------------------------------------- /config/data_config/kitti_octree.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Configure the KITTI dataset as the octree representation 8 | 9 | # Common options of the dataset 10 | dataset: kitti_octree 11 | rw_octree: False 12 | rw_partition_scheme: default 13 | translate: [0, 0, 0] 14 | scale: 1 15 | 16 | # Options about octree partitioning 17 | octree_cfg: 18 | bbox_min: [-96, -96, -96] 19 | bbox_max: [96, 96, 96] 20 | point_min: 64 # marked as skip if points in a block less than this number 21 | point_max: 2048 # keep splitting if points in a block larger than this number 22 | level_min: 3 23 | level_max: 10 24 | 25 | # Options on the splitting scheme 26 | splitting: 27 | train: 28 | - 0 29 | - 1 30 | - 2 31 | - 3 32 | - 4 33 | - 5 34 | val: 35 | - 6 36 | - 7 37 | test: 38 | - 11 39 | - 12 40 | - 13 41 | 42 | # Options under individual configurations 43 | train_cfg: 44 | batch_size: 4 45 | shuffle: True 46 | num_workers: 16 47 | augmentation: True 48 | max_num_points: 150000 49 | shuffle_blocks: True 50 | split: train 51 | val_cfg: 52 | batch_size: 4 53 | shuffle: False 54 | num_workers: 2 55 | augmentation: False 56 | max_num_points: 150000 57 | split: val 58 | test_cfg: 59 | batch_size: 4 60 | shuffle: False 61 | num_workers: 2 62 | augmentation: False 63 | max_num_points: 150000 64 | split: test 65 | 66 | -------------------------------------------------------------------------------- /pccai/utils/pc_metric.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | import subprocess 8 | import os 9 | import random 10 | 11 | from pccai.utils.misc import pc_write 12 | base_path = os.path.split(__file__)[0] 13 | 14 | def compute_metrics(gt_file, pc_rec, res, normal=False): 15 | """Compute D1 and/or D2 with pc_error tool from MPEG""" 16 | 17 | tmp_file_name = os.path.join('./tmp/', 'metric_'+str(hex(int(random.random() * 1e15)))+'.ply') 18 | rec_file = os.path.join(base_path, '../..', tmp_file_name) 19 | pc_error_path = os.path.join(base_path, '../..', 'third_party/pc_error') 20 | pc_write(pc_rec, rec_file) 21 | cmd = pc_error_path + ' -a '+ gt_file + ' -b '+ rec_file + ' --hausdorff=1 '+ ' --resolution=' + str(res) 22 | if normal: cmd = cmd + ' -n ' + gt_file 23 | bg_proc=subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) 24 | line_b = bg_proc.stdout.readline() 25 | 26 | d1_key = 'mseF,PSNR (p2point):' 27 | d2_key = 'mseF,PSNR (p2plane):' 28 | d1_psnr, d2_psnr = None, None 29 | while line_b: 30 | line = line_b.decode(encoding='utf-8') 31 | line_b = bg_proc.stdout.readline() 32 | idx = line.find(d1_key) 33 | if idx > 0: d1_psnr = float(line[idx + len(d1_key):]) 34 | if normal: 35 | idx = line.find(d2_key) 36 | if idx > 0: d2_psnr = float(line[idx + len(d2_key):]) 37 | os.remove(rec_file) 38 | return {"d1_psnr": d1_psnr, "d2_psnr": d2_psnr} 39 | -------------------------------------------------------------------------------- /utils/merge_csv.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | ''' 7 | Merge CSV files for MPEG reporting purpose. 8 | Usage: python ./utils/merge_csv.py --input_files file_1.csv file_2.csv --output_file file_3.csv 9 | ''' 10 | 11 | import argparse 12 | import csv 13 | import os 14 | 15 | 16 | def main(opt): 17 | 18 | # Read the input CSV files and sort the entries 19 | log_dict_all = [] 20 | for csv_file in opt.input_files: 21 | with open(csv_file, 'r') as f: 22 | reader = csv.DictReader(f) 23 | for item in reader: 24 | log_dict_all.append(dict(item)) 25 | log_dict_all.sort(key=lambda x: (x['sequence'], int(x['numBitsGeoEncT']))) # perform sorting with two keys 26 | 27 | # Write the merged CSV file 28 | mpeg_report_header = ['sequence', 'numOutputPointsT', 'numBitsGeoEncT', 'd1T', 'd2T', 'encTimeT', 'decTimeT'] 29 | with open(opt.output_file, 'w') as f: 30 | writer = csv.DictWriter(f, fieldnames=mpeg_report_header) 31 | writer.writeheader() 32 | writer.writerows(log_dict_all) 33 | 34 | 35 | def add_options(parser): 36 | 37 | parser.add_argument('--input_files', type=str, nargs='+', required=True, help='File name of the input image.') 38 | parser.add_argument('--output_file', type=str, required=True, help='File name of the output image.') 39 | 40 | return parser 41 | 42 | 43 | if __name__ == "__main__": 44 | 45 | # Initialize parser with basic options 46 | parser = argparse.ArgumentParser( 47 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 48 | parser = add_options(parser) 49 | opt, _ = parser.parse_known_args() 50 | main(opt) -------------------------------------------------------------------------------- /pccai/models/pcc_models.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | import torch.nn as nn 8 | from pccai.optim.utils import get_loss_class 9 | 10 | # Import all the architectures to be used 11 | from pccai.models.architectures.mlpcomp import MlpCompression 12 | from pccai.models.architectures.sparsecnncomp import SparseCnnCompression 13 | 14 | # List the all the architectures in the following dictionary 15 | # For a custom architecture, it is recommended to implement a compress() and a decompress() functions that can be called by the codec. 16 | architectures = { 17 | 'mlp_compression': MlpCompression, 18 | 'sparsecnn_compression': SparseCnnCompression, 19 | } 20 | 21 | 22 | def get_architecture_class(architecture_name): 23 | architecture = architectures.get(architecture_name.lower(), None) 24 | assert architecture is not None, f'architecture "{architecture_name}" not found, valid architectures are: {list(architectures.keys())}' 25 | return architecture 26 | 27 | 28 | class PccModelWithLoss(nn.Module): 29 | """A wrapper class for point cloud compression model and its associated loss function.""" 30 | 31 | def __init__(self, net_config, syntax, loss_args = None): 32 | super(PccModelWithLoss, self).__init__() 33 | 34 | # Get the architecture and initilize it 35 | architecture_class = get_architecture_class(net_config['architecture']) 36 | self.pcc_model = architecture_class(net_config['modules'], syntax) 37 | 38 | # Get the loss class and initlize it 39 | if loss_args is not None: 40 | loss_class = get_loss_class(loss_args['loss']) 41 | self.loss = loss_class(loss_args, syntax) 42 | 43 | def forward(self, data): 44 | out = self.pcc_model(data) 45 | if self.loss is not None: out['loss'] = self.loss.loss(data, out) 46 | 47 | return out -------------------------------------------------------------------------------- /third_party/nndistance/functions/nnd.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | #from _ext import my_lib 5 | import my_lib_cuda as my_lib 6 | 7 | 8 | class NNDFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx, xyz1, xyz2): 12 | device = xyz1.device 13 | batchsize, n, _ = xyz1.size() 14 | _, m, _ = xyz2.size() 15 | 16 | dist1 = torch.zeros(batchsize, n) 17 | dist2 = torch.zeros(batchsize, m) 18 | 19 | idx1 = torch.zeros(batchsize, n).type(torch.IntTensor) 20 | idx2 = torch.zeros(batchsize, m).type(torch.IntTensor) 21 | 22 | if not xyz1.is_cuda: 23 | my_lib.nnd_forward(xyz1, xyz2, dist1, dist2, idx1, idx2) 24 | else: 25 | dist1 = dist1.cuda() 26 | dist2 = dist2.cuda() 27 | idx1 = idx1.cuda() 28 | idx2 = idx2.cuda() 29 | my_lib.nnd_forward_cuda(xyz1, xyz2, dist1, dist2, idx1, idx2) 30 | 31 | ctx.save_for_backward(xyz1,xyz2,dist1,dist2,idx1,idx2) 32 | idx1 = idx1.to(device=device, dtype=torch.long) 33 | idx2 = idx2.to(device=device, dtype=torch.long) 34 | return dist1, dist2, idx1, idx2 35 | 36 | @staticmethod 37 | def backward(ctx, graddist1, graddist2, gradidx1, gradidx2): 38 | 39 | xyz1, xyz2, dist1, dist2, idx1, idx2 = ctx.saved_tensors 40 | graddist1 = graddist1.contiguous() 41 | graddist2 = graddist2.contiguous() 42 | 43 | gradxyz1 = torch.zeros(xyz1.size()) 44 | gradxyz2 = torch.zeros(xyz2.size()) 45 | 46 | if not graddist1.is_cuda: 47 | my_lib.nnd_backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2) 48 | else: 49 | gradxyz1 = gradxyz1.cuda() 50 | gradxyz2 = gradxyz2.cuda() 51 | my_lib.nnd_backward_cuda(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2) 52 | return gradxyz1, gradxyz2 -------------------------------------------------------------------------------- /utils/gen_args.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # A simple tool to generate temporary scripts which holds the options 8 | 9 | import sys 10 | import os 11 | import random 12 | 13 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 14 | home_dir=os.path.abspath(os.path.join(cur_dir, '..')) 15 | 16 | def main(): 17 | 18 | # Create a folder if not exist 19 | tmp_script_folder = 'tmp' 20 | tmp_script_path = os.path.join(home_dir, 'scripts', tmp_script_folder) 21 | if not os.path.exists(tmp_script_path): 22 | os.makedirs(tmp_script_path) 23 | 24 | # Create the new argument script 25 | tmp_file_name = 'tmp_'+str(hex(int(random.random() * 1e15)))+'.sh' 26 | tmp_file = open(os.path.join(home_dir, 'scripts', 'tmp', tmp_file_name), 'w') 27 | tmp_file.write('HOME_DIR="' + home_dir + '"\n') 28 | exp_name = os.path.basename(sys.argv[1]).split('.')[0] 29 | tmp_file.write('EXP_NAME="' + exp_name + '"\n') 30 | 31 | # add the arguments one-by-one 32 | addline = 'RUN_ARGUMENTS="${PY_NAME} --exp_name ${EXP_NAME} ' 33 | len_addline = len(addline) 34 | with open(sys.argv[1]) as f: 35 | args = f.readlines() 36 | for line in args: 37 | line = line.lstrip() 38 | if len(line) > 0 and line[0].isalpha(): 39 | idx = line.find('=') 40 | opt_name = line[0:idx].upper() 41 | if opt_name != "PY_NAME" and opt_name != "EXP_NAME": 42 | addline += "--" + opt_name.lower() + " ${" + opt_name + "} " 43 | if opt_name != 'RUN_ARGUMENTS' and opt_name != "EXP_NAME": 44 | tmp_file.write(line) 45 | addline = "\n" + addline[:-1] + '"' 46 | if len(addline) > len_addline: 47 | tmp_file.write(addline) 48 | 49 | return tmp_file_name 50 | 51 | 52 | if __name__ == "__main__": 53 | 54 | tmp_file_name = main() 55 | print(tmp_file_name) 56 | sys.exit(0) 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The copyright in this software is being made available under the BSD License, 2 | included below. This software may be subject to InterDigital and other third 3 | party and contributor rights, including patent rights, and no such rights are 4 | granted under this license. 5 | 6 | Copyright (c) 2010-2022, InterDigital 7 | All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted (subject to the limitations in the disclaimer 11 | below) provided that the following conditions are met: 12 | 13 | * Redistributions of source code must retain the above copyright notice, 14 | this list of conditions and the following disclaimer. 15 | 16 | * Redistributions in binary form must reproduce the above copyright notice, 17 | this list of conditions and the following disclaimer in the documentation 18 | and/or other materials provided with the distribution. 19 | 20 | * Neither the name of InterDigital nor the names of the Project where this 21 | contribution had been made may be used to endorse or promote products derived 22 | from this software without specific prior written permission. 23 | 24 | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS 25 | LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 27 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 29 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 31 | GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 | OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 | -------------------------------------------------------------------------------- /pccai/models/architectures/sparsecnncomp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Make an attempt to load MinkowskiEngine 8 | try: 9 | import MinkowskiEngine as ME 10 | found_ME = True 11 | except ModuleNotFoundError: 12 | found_ME = False 13 | 14 | import torch 15 | import torch.nn as nn 16 | from pccai.models.modules.get_modules import get_module_class 17 | 18 | 19 | class SparseCnnCompression(): 20 | """ 21 | This example shows how PccAI works with MinkowskiEngine and the sparse_collate() function in 22 | point cloud_dataset.py to operate on sparse 3D tensors A simple compression architecture using 23 | sparse convolutions. This is just an incomplete template for reference. 24 | """ 25 | 26 | def __init__(self, net_config, syntax): 27 | super().__init__(net_config['entropy_bottleneck'], False) 28 | 29 | # initialize necessary modules with get_module_class() 30 | return None 31 | 32 | def forward(self, coords): 33 | 34 | # Construct coordnates from sparse tensor 35 | coords = coords[coords[:, 0] != -1] 36 | coords[0][0] = 0 37 | coords[:, 0] = torch.cumsum(coords[:,0], 0) 38 | device = coords.device 39 | 40 | # An example to build a sparse tensor x with the MinkowskiEngine 41 | if found_ME: 42 | x = ME.SparseTensor( 43 | features=torch.ones(coords.shape[0], 1, device=device, dtype=torch.float32), 44 | coordinates=coords, 45 | device=device) 46 | 47 | # TODO: Perform processing to the sparse tensor x 48 | 49 | return None 50 | 51 | def compress(self, x): 52 | """Performs actual compression with learned statistics of the entropy bottleneck, consumes one point cloud at a time.""" 53 | return None 54 | 55 | def decompress(self, strings, shape, meta_data=None): 56 | """Performs actual decompression with learned statistics of the entropy bottleneck, consumes one point cloud at a time.""" 57 | return None 58 | -------------------------------------------------------------------------------- /third_party/nndistance/src/my_lib_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cpu_ops.cpp" 4 | 5 | #define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor") 6 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous") 7 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 8 | 9 | int NmDistanceKernelLauncher( 10 | at::Tensor xyz1, 11 | at::Tensor xyz2, 12 | at::Tensor dist1, 13 | at::Tensor dist2, 14 | at::Tensor idx1, 15 | at::Tensor idx2); 16 | int NmDistanceGradKernelLauncher( 17 | at::Tensor xyz1, 18 | at::Tensor xyz2, 19 | at::Tensor gradxyz1, 20 | at::Tensor gradxyz2, 21 | at::Tensor graddist1, 22 | at::Tensor graddist2, 23 | at::Tensor idx1, 24 | at::Tensor idx2); 25 | 26 | int nnd_forward_cuda( 27 | at::Tensor xyz1, 28 | at::Tensor xyz2, 29 | at::Tensor dist1, 30 | at::Tensor dist2, 31 | at::Tensor idx1, 32 | at::Tensor idx2) { 33 | CHECK_INPUT(xyz1); 34 | CHECK_INPUT(xyz2); 35 | CHECK_INPUT(dist1); 36 | CHECK_INPUT(dist2); 37 | CHECK_INPUT(idx1); 38 | CHECK_INPUT(idx2); 39 | 40 | 41 | return NmDistanceKernelLauncher(xyz1, xyz2, dist1, dist2, idx1, idx2); 42 | } 43 | 44 | 45 | int nnd_backward_cuda( 46 | at::Tensor xyz1, 47 | at::Tensor xyz2, 48 | at::Tensor gradxyz1, 49 | at::Tensor gradxyz2, 50 | at::Tensor graddist1, 51 | at::Tensor graddist2, 52 | at::Tensor idx1, 53 | at::Tensor idx2) 54 | { 55 | CHECK_INPUT(xyz1); 56 | CHECK_INPUT(xyz2); 57 | CHECK_INPUT(gradxyz1); 58 | CHECK_INPUT(gradxyz2); 59 | CHECK_INPUT(graddist1); 60 | CHECK_INPUT(graddist2); 61 | CHECK_INPUT(idx1); 62 | CHECK_INPUT(idx2); 63 | 64 | return NmDistanceGradKernelLauncher( 65 | xyz1, 66 | xyz2, 67 | gradxyz1, 68 | gradxyz2, 69 | graddist1, 70 | graddist2, 71 | idx1, 72 | idx2); 73 | } 74 | 75 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 76 | m.def("nnd_forward_cuda", &nnd_forward_cuda, "NND forward (CUDA)"); 77 | m.def("nnd_backward_cuda", &nnd_backward_cuda, "NND backward (CUDA)"); 78 | m.def("nnd_forward", &nnd_forward, "nnd_forward"); 79 | m.def("nnd_backward", &nnd_backward, "nnd_backward"); 80 | } -------------------------------------------------------------------------------- /pccai/utils/convert_image.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Convert a LiDAR point cloud to a range image based on spherical coordinate conversion 8 | 9 | import numpy as np 10 | 11 | 12 | def cart2spherical(input_xyz): 13 | """Conversion from Cartisian coordinates to spherical coordinates.""" 14 | 15 | r = np.sqrt(input_xyz[:, 0] ** 2 + input_xyz[:, 1] ** 2 + input_xyz[:, 2] ** 2) 16 | alpha = np.arctan2(input_xyz[:, 1], input_xyz[:, 0]) # corresponding to width 17 | epsilon = np.arcsin(input_xyz[:, 2] / r) # corrsponding to height 18 | return np.stack((r, alpha, epsilon), axis = 1) 19 | 20 | 21 | def spherical2cart(input_spherical): 22 | """Conversion from spherical coordinates to Cartesian coordinates.""" 23 | 24 | x = input_spherical[:, 0] * np.cos(input_spherical[:, 1]) * np.cos(input_spherical[:, 2]) 25 | y = input_spherical[:, 0] * np.sin(input_spherical[:, 1]) * np.cos(input_spherical[:, 2]) 26 | z = input_spherical[:, 0] * np.sin(input_spherical[:, 2]) 27 | return np.stack((x, y, z), axis=1) 28 | 29 | 30 | def pc2img(h_fov, v_fov, width, height, inf, data): 31 | """Convert a point cloud to an 2D image.""" 32 | 33 | data_spherical = cart2spherical(data) 34 | 35 | # Project the point cloud onto an image! 36 | x = (data_spherical[:, 1] - h_fov[0]) / (h_fov[1] - h_fov[0]) 37 | y = (data_spherical[:, 2] - v_fov[0]) / (v_fov[1] - v_fov[0]) 38 | x = np.round(x * (width - 1)).astype(np.int32) 39 | y = np.round(y * (height - 1)).astype(np.int32) 40 | 41 | # exclude the pixels that are out of the selected FOV 42 | mask = ~((x < 0) | (x >= width) | (y < 0) | (y >= height)) 43 | x, y = x[mask], y[mask] 44 | range = data_spherical[:, 0][mask] 45 | data_img = np.ones((height, width), dtype = np.float32) * inf 46 | data_img[y, x] = range 47 | 48 | return data_img 49 | 50 | 51 | def img2pc(h_fov, v_fov, width, height, inf, data): 52 | """Convert an 2D image back to the point cloud.""" 53 | 54 | alpha = (np.arange(width) / (width - 1)) * (h_fov[1] - h_fov[0]) + h_fov[0] 55 | epsilon = (np.arange(height) / (height - 1)) * (v_fov[1] - v_fov[0]) + v_fov[0] 56 | alpha, epsilon = np.meshgrid(alpha, epsilon) 57 | data_pc = np.stack((data, alpha, epsilon), axis=2) 58 | data_pc = data_pc.reshape(-1, 3) 59 | data_pc = data_pc[data_pc[:, 0] < inf - 1, :] 60 | data_pc = spherical2cart(data_pc) 61 | 62 | return data_pc -------------------------------------------------------------------------------- /experiments/train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Train a point cloud compression model 8 | 9 | import random 10 | import os 11 | import torch 12 | import sys 13 | import socket 14 | sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/..') 15 | 16 | # multi-processing utilities 17 | import torch.multiprocessing as mp 18 | import torch.distributed as dist 19 | 20 | # Load different utilities from PccAI 21 | from pccai.utils.option_handler import TrainOptionHandler 22 | import pccai.utils.logger as logger 23 | from pccai.pipelines.train import * 24 | 25 | 26 | def setup(rank, world_size, master_address, master_port): 27 | """Setup the DDP processes if necessary, each process will be allocated to one GPU.""" 28 | 29 | # Look for an available port first 30 | tmp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 31 | while True: 32 | loc = (master_address, master_port) 33 | res = tmp_socket.connect_ex(loc) 34 | if res != 0: break # found a port 35 | else: master_port += 1 36 | 37 | # initialize the process group 38 | os.environ['MASTER_PORT'] = str(master_port) 39 | os.environ['MASTER_ADDR'] = master_address 40 | dist.init_process_group("gloo", rank=rank, world_size=world_size) 41 | 42 | 43 | def cleanup(): 44 | """Destropy all processes.""" 45 | 46 | dist.destroy_process_group() 47 | 48 | 49 | def train_main(device, opt): 50 | """Main training wrapper.""" 51 | 52 | # Initialize a global logger then print out all the options 53 | logger.create_logger(opt.exp_folder, opt.log_file, opt.log_file_only) 54 | option_handler = TrainOptionHandler() 55 | option_handler.print_options(opt) 56 | opt = load_train_config(opt) 57 | opt.device = device 58 | opt.device_count = torch.cuda.device_count() 59 | if opt.ddp: setup(device, opt.device_count, opt.master_address, opt.master_port) 60 | 61 | # Go with the actual training 62 | if opt.seed is not None: 63 | torch.manual_seed(opt.seed) 64 | random.seed(opt.seed) 65 | avg_loss = train_pccnet(opt) 66 | logger.log.info('Training session %s finished.\n' % opt.exp_name) 67 | logger.destroy_logger() 68 | if opt.ddp: cleanup() 69 | 70 | 71 | if __name__ == "__main__": 72 | 73 | # Parse the options and perform training 74 | option_handler = TrainOptionHandler() 75 | opt = option_handler.parse_options() 76 | 77 | # Create a folder to save the models and the log 78 | if not os.path.exists(opt.exp_folder): 79 | os.makedirs(opt.exp_folder) 80 | if opt.ddp: 81 | mp.spawn(train_main, args=(opt,), nprocs=torch.cuda.device_count(), join=True) 82 | else: 83 | train_main(0, opt) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tags 2 | builds 3 | *.csv 4 | *.inc 5 | *.bin 6 | *.pth 7 | *.png 8 | *.ply 9 | *.pkl 10 | *.sh 11 | *.enc 12 | *.npy 13 | *.out 14 | ScreenCamera*.json 15 | venv/ 16 | venv*/ 17 | runs/ 18 | runs*/ 19 | third_party/ 20 | tmp_runs/ 21 | .vscode/ 22 | datasets/ 23 | results/ 24 | tmp/ 25 | scripts/ 26 | datasets 27 | 28 | # Created by gitignore.io 29 | ### Python ### 30 | # Byte-compiled / optimized / DLL files 31 | __pycache__/ 32 | *.py[cod] 33 | *$py.class 34 | 35 | # C extensions 36 | *.so 37 | 38 | # Distribution / packaging 39 | .Python 40 | build/ 41 | develop-eggs/ 42 | dist/ 43 | downloads/ 44 | eggs/ 45 | .eggs/ 46 | lib/ 47 | lib64/ 48 | parts/ 49 | sdist/ 50 | var/ 51 | wheels/ 52 | pip-wheel-metadata/ 53 | share/python-wheels/ 54 | *.egg-info/ 55 | .installed.cfg 56 | *.egg 57 | MANIFEST 58 | 59 | # PyInstaller 60 | # Usually these files are written by a python script from a template 61 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 62 | *.manifest 63 | *.spec 64 | 65 | # Installer logs 66 | pip-log.txt 67 | pip-delete-this-directory.txt 68 | 69 | # Unit test / coverage reports 70 | htmlcov/ 71 | .tox/ 72 | .nox/ 73 | .coverage 74 | .coverage.* 75 | .cache 76 | nosetests.xml 77 | coverage.xml 78 | *.cover 79 | .hypothesis/ 80 | .pytest_cache/ 81 | 82 | # Translations 83 | *.mo 84 | *.pot 85 | 86 | # Django stuff: 87 | *.log 88 | local_settings.py 89 | db.sqlite3 90 | 91 | # Flask stuff: 92 | instance/ 93 | .webassets-cache 94 | 95 | # Scrapy stuff: 96 | .scrapy 97 | 98 | # Sphinx documentation 99 | docs/_build/ 100 | 101 | # PyBuilder 102 | target/ 103 | 104 | # Jupyter Notebook 105 | .ipynb_checkpoints 106 | 107 | # IPython 108 | profile_default/ 109 | ipython_config.py 110 | 111 | # pyenv 112 | .python-version 113 | 114 | # pipenv 115 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in 116 | # version control. 117 | # However, in case of collaboration, if having platform-specific dependencies 118 | # or dependencies 119 | # having no cross-platform support, pipenv may install dependencies that don’t 120 | # work, or not 121 | # install all needed dependencies. 122 | #Pipfile.lock 123 | 124 | # celery beat schedule file 125 | celerybeat-schedule 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | -------------------------------------------------------------------------------- /pccai/utils/syntax.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Defines and generates the internal syntax and status, which serves the heterogeneous mode and marks the module phase 8 | 9 | def gen_syntax_gt(hetero): 10 | if hetero: 11 | syntax_gt = { 12 | '__len__': 10, 13 | 'xyz': [0, 2], 14 | 'block_pntcnt': 3, 15 | 'block_center': [4, 6], 16 | 'block_scale': 7, 17 | 'block_start': 9, 18 | } 19 | else: 20 | syntax_gt = None 21 | return syntax_gt 22 | 23 | 24 | class SyntaxGenerator(): 25 | """Generate the syntax for internal data and module status communications.""" 26 | 27 | def __init__(self, opt): 28 | self.hetero = opt.hetero 29 | self.phase = opt.phase 30 | self.generate_syntax_gt() 31 | self.generate_syntax_rec() 32 | self.generate_syntax_cw(opt.net_config) 33 | 34 | def generate_syntax_gt(self, **kwargs): 35 | """xyz have to be arranged at the beginning, the rest can be swapped 36 | Data syntax: x, y, z, block_pntcnt, block_center, block_scale, block_start 37 | index: 0, 1, 2, 3, 4 ~ 6, 7, 8 38 | """ 39 | self.syntax_gt = gen_syntax_gt(self.hetero) 40 | 41 | def generate_syntax_rec(self, **kwargs): 42 | """xyz have to be arranged at the beginning, the rest can be swapped 43 | Rec. syntax: x, y, z, pc_start 44 | index: 0, 1, 2, 3 45 | """ 46 | if self.hetero: 47 | self.syntax_rec = { 48 | '__len__': 10, 49 | 'xyz': [0, 2], 50 | 'block_start': 3, 51 | 'block_center': [4, 6], 52 | 'block_scale': 7, 53 | 'pc_start': 8, 54 | } 55 | else: self.syntax_rec = None 56 | 57 | def generate_syntax_cw(self, net_config, **kwargs): 58 | """Codewords have to be arranged at the beginning, the rest can be swapped 59 | Code syntax: codeword, block_pntcnt, block_center, block_scale, pc_start 60 | index: 0 ~ 511, 512, 513 ~ 515, 516, 517 61 | \-------------------- --------------------/ 62 | \/ 63 | meta_data 64 | """ 65 | if self.hetero: 66 | len_cw = net_config['modules']['entropy_bottleneck'] 67 | self.syntax_cw = { 68 | '__len__': len_cw + 7, 69 | '__meta_idx__': len_cw, 70 | 'cw': [0, len_cw - 1], 71 | 'block_pntcnt': len_cw, 72 | 'block_center': [len_cw + 1, len_cw + 3], 73 | 'block_scale': len_cw + 4, 74 | 'pc_start': len_cw + 5, 75 | } 76 | else: self.syntax_cw = None 77 | 78 | 79 | def syn_slc(syntax, attr): 80 | """Create a slice from a syntax and a key""" 81 | 82 | syn = syntax[attr] 83 | return slice(syn[0], syn[1] + 1) -------------------------------------------------------------------------------- /pccai/optim/cd_sparse.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Compute Chamfer Distance loss for MinkowskiEngine sparse tensors 8 | 9 | import torch 10 | import sys 11 | import os 12 | 13 | from pccai.optim.pcc_loss import PccLossBase 14 | 15 | 16 | def nndistance_simple(rec, data): 17 | """ 18 | A simple nearest neighbor search, not very efficient, just for reference 19 | """ 20 | rec_sq = torch.sum(rec * rec, dim=2, keepdim=True) 21 | data_sq = torch.sum(data * data, dim=2, keepdim=True) 22 | cross = torch.matmul(data, rec.permute(0, 2, 1)) 23 | dist = data_sq - 2 * cross + rec_sq.permute(0, 2, 1) 24 | data_dist, data_idx = torch.min(dist, dim=2) 25 | rec_dist, rec_idx = torch.min(dist, dim=1) 26 | return data_dist, rec_dist, data_idx, rec_idx 27 | 28 | 29 | try: 30 | # If you want to use the efficient NN search for computing CD loss, compiled the nndistance() 31 | # function under the third_party folder according to instructions in Readme.md 32 | sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../third_party/nndistance')) 33 | from modules.nnd import NNDModule 34 | nndistance = NNDModule() 35 | except ModuleNotFoundError: 36 | # Without the compiled nndistance(), by default the nearest neighbor will be done using pytorch-geometric 37 | nndistance = nndistance_simple 38 | 39 | 40 | class ChamferDistSparse(PccLossBase): 41 | """Chamfer distance loss for sparse voxels.""" 42 | 43 | def __init__(self, loss_args, syntax): 44 | super().__init__(loss_args, syntax) 45 | 46 | 47 | def xyz_loss(self, loss_out, net_in, net_out): 48 | """Compute the xyz-loss.""" 49 | 50 | x_hat = net_out['x_hat'] 51 | gt = net_out['gt'] 52 | batch_size = x_hat[-1][0].round().int().item() + 1 53 | dist = torch.zeros(batch_size, device=x_hat.device) 54 | for i in range(batch_size): 55 | dist_out, dist_x, _, _ = nndistance( 56 | x_hat[x_hat[:, 0].round().int()==i, 1:].unsqueeze(0).contiguous(), 57 | gt[gt[:, 0] == i, 1:].unsqueeze(0).float().contiguous() 58 | ) 59 | dist[i] = torch.max(torch.mean(dist_out), torch.mean(dist_x)) 60 | loss = torch.mean(dist) 61 | loss_out['xyz_loss'] = loss.unsqueeze(0) # write the 'xyz_loss' as return 62 | 63 | 64 | def loss(self, net_in, net_out): 65 | """Overall R-D loss computation.""" 66 | 67 | loss_out = {} 68 | 69 | # Rate loss 70 | if 'likelihoods' in net_out and len(net_out['likelihoods']) > 0: 71 | self.bpp_loss(loss_out, net_out['likelihoods'], net_out['gt'].shape[0]) 72 | else: 73 | loss_out['bpp_loss'] = torch.zeros((1,)) 74 | if net_out['x_hat'].is_cuda: 75 | loss_out['bpp_loss'] = loss_out['bpp_loss'].cuda() 76 | 77 | # Distortion loss 78 | self.xyz_loss(loss_out, net_in, net_out) 79 | 80 | # R-D loss = alpha * D + beta * R 81 | loss_out["loss"] = self.alpha * loss_out['xyz_loss'] + self.beta * loss_out["bpp_loss"] # R-D loss 82 | return loss_out 83 | -------------------------------------------------------------------------------- /pccai/dataloaders/point_cloud_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # A generic point cloud dataset wrapper 8 | 9 | from torch.utils.data import DataLoader 10 | from pccai.dataloaders.shapenet_part_loader import ShapeNetPart 11 | from pccai.dataloaders.modelnet_loader import ModelNetSimple, ModelNetOctree 12 | from pccai.dataloaders.lidar_loader import LidarSimple, LidarSpherical, LidarOctree 13 | import torch 14 | import numpy as np 15 | 16 | 17 | # https://github.com/pytorch/pytorch/issues/5059 18 | # Fix numpy random seed issue with multi worker DataLoader 19 | # Multi worker based on process forking duplicates the same numpy random seed across all workers 20 | # Note that this issue is absent with pytorch random operations 21 | def wif(id): 22 | process_seed = torch.initial_seed() 23 | # Back out the base_seed so we can use all the bits. 24 | base_seed = process_seed - id 25 | ss = np.random.SeedSequence([id, base_seed]) 26 | # More than 128 bits (4 32-bit words) would be overkill. 27 | np.random.seed(ss.generate_state(4)) 28 | 29 | 30 | def get_point_cloud_dataset(dataset_name): 31 | """List all the data sets in this function for class retrival.""" 32 | 33 | if dataset_name.lower() == 'shapenet_part': 34 | dataset_class = ShapeNetPart 35 | elif dataset_name.lower() == 'modelnet_simple': 36 | dataset_class = ModelNetSimple 37 | elif dataset_name.lower() == 'modelnet_octree': 38 | dataset_class = ModelNetOctree 39 | elif dataset_name.lower().find('simple') >= 0: 40 | dataset_class = LidarSimple 41 | elif dataset_name.lower().find('spherical') >= 0: 42 | dataset_class = LidarSpherical 43 | elif dataset_name.lower().find('octree') >= 0: 44 | dataset_class = LidarOctree 45 | else: 46 | dataset_class = None 47 | return dataset_class 48 | 49 | 50 | def sparse_collate(list_data): 51 | """A collate function tailored for generating sparse voxels of MinkowskiEngine.""" 52 | 53 | list_data = np.vstack(list_data) 54 | list_data = torch.from_numpy(list_data) 55 | return list_data 56 | 57 | 58 | def point_cloud_dataloader(data_config, syntax=None, ddp=False): 59 | """A wrapper for point cloud datasets.""" 60 | 61 | point_cloud_dataset = get_point_cloud_dataset(data_config[0]['dataset'])(data_config[0], data_config[1], syntax=syntax) 62 | collate_fn = sparse_collate if data_config[0].get('sparse_collate', False) else None 63 | dl_conf = data_config[0][data_config[1]] 64 | 65 | if ddp: # for distributed data parallel 66 | sampler = torch.utils.data.distributed.DistributedSampler(point_cloud_dataset, shuffle=dl_conf['shuffle']) 67 | point_cloud_dataloader = DataLoader(point_cloud_dataset, batch_size=int(dl_conf['batch_size'] / torch.cuda.device_count()), 68 | num_workers=int(dl_conf['num_workers'] / torch.cuda.device_count()), persistent_workers=True if dl_conf['num_workers'] > 0 else False, 69 | worker_init_fn=wif, sampler=sampler, pin_memory=False, drop_last=False, collate_fn=collate_fn) 70 | else: 71 | point_cloud_dataloader = DataLoader(point_cloud_dataset, batch_size=dl_conf['batch_size'], shuffle=dl_conf['shuffle'], 72 | num_workers=dl_conf['num_workers'], persistent_workers=True if dl_conf['num_workers'] > 0 else False, 73 | worker_init_fn=wif, pin_memory=False, drop_last=False, collate_fn=collate_fn) 74 | return point_cloud_dataset, point_cloud_dataloader -------------------------------------------------------------------------------- /pccai/optim/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Utilities related to network optimization 8 | 9 | import torch 10 | import torch.optim as optim 11 | 12 | # Import all the loss classes to be used 13 | from pccai.optim.cd_canonical import ChamferDistCanonical 14 | from pccai.optim.cd_sparse import ChamferDistSparse 15 | 16 | 17 | # List the all the loss classes in the following dictionary 18 | loss_classes = { 19 | 'cd_canonical': ChamferDistCanonical, 20 | 'cd_sparse': ChamferDistSparse 21 | } 22 | 23 | def get_loss_class(loss_name): 24 | loss = loss_classes.get(loss_name.lower(), None) 25 | assert loss is not None, f'loss class "{loss_name}" not found, valid loss classes are: {list(loss_classes.keys())}' 26 | return loss 27 | 28 | 29 | def configure_optimization(pccnet, optim_config): 30 | """Configure the optimizers and the schedulers for training.""" 31 | 32 | # Separate parameters for the main optimizer and the auxiliary optimizer 33 | parameters = set( 34 | n 35 | for n, p in pccnet.named_parameters() 36 | if not n.endswith(".quantiles") and p.requires_grad 37 | ) 38 | aux_parameters = set( 39 | n 40 | for n, p in pccnet.named_parameters() 41 | if n.endswith(".quantiles") and p.requires_grad 42 | ) 43 | 44 | # Make sure we don't have an intersection of parameters 45 | params_dict = dict(pccnet.named_parameters()) 46 | inter_params = parameters & aux_parameters 47 | union_params = parameters | aux_parameters 48 | assert len(inter_params) == 0 49 | assert len(union_params) - len(params_dict.keys()) == 0 50 | 51 | # We only support the Adam optimizer to make things less complicated 52 | optimizer = optim.Adam( 53 | (params_dict[n] for n in sorted(list(parameters))), 54 | lr=optim_config['main_args']['lr'], 55 | betas=(optim_config['main_args']['opt_args'][0], optim_config['main_args']['opt_args'][1]), 56 | weight_decay=optim_config['main_args']['opt_args'][2] 57 | ) 58 | sche_args = optim_config['main_args']['schedule_args'] 59 | if sche_args[0].lower() == 'exp': 60 | scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=sche_args[1]) 61 | elif sche_args[0].lower() == 'step': 62 | scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=sche_args[1], gamma=sche_args[2]) 63 | elif sche_args[0].lower() == 'multistep': 64 | scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=sche_args[1:-1], gamma=sche_args[-1]) 65 | else: # 'fix' scheme 66 | scheduler = None 67 | 68 | # For the auxiliary parameters 69 | if len(aux_parameters) > 0: 70 | aux_optimizer = optim.Adam( 71 | (params_dict[n] for n in sorted(list(aux_parameters))), 72 | lr=optim_config['aux_args']['lr'], 73 | betas=(optim_config['aux_args']['opt_args'][0], optim_config['aux_args']['opt_args'][1]), 74 | weight_decay=optim_config['aux_args']['opt_args'][2] 75 | ) 76 | aux_sche_args = optim_config['aux_args']['schedule_args'] 77 | if aux_sche_args[0].lower() == 'exp': 78 | aux_scheduler = optim.lr_scheduler.ExponentialLR(aux_optimizer, gamma=aux_sche_args[1]) 79 | elif aux_sche_args[0].lower() == 'step': 80 | aux_scheduler = optim.lr_scheduler.StepLR(aux_optimizer, step_size=aux_sche_args[1], gamma=aux_sche_args[2]) 81 | elif aux_sche_args[0].lower() == 'multistep': 82 | aux_scheduler = optim.lr_scheduler.MultiStepLR(aux_optimizer, milestones=aux_sche_args[1:-1], gamma=aux_sche_args[-1]) 83 | else: # 'fix' scheme 84 | aux_scheduler = None 85 | else: 86 | aux_optimizer = aux_scheduler = None 87 | 88 | return optimizer, scheduler, aux_optimizer, aux_scheduler -------------------------------------------------------------------------------- /pccai/models/architectures/mlpcomp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | import torch 8 | import torch.nn as nn 9 | from pccai.models.modules.get_modules import get_module_class 10 | 11 | # Make an attempt to load CompressAI 12 | try: 13 | from compressai.models.priors import CompressionModel 14 | found_compressai = True 15 | except ModuleNotFoundError: 16 | found_compressai = False 17 | CompressionModel = nn.Module 18 | 19 | 20 | class MlpCompression(CompressionModel): 21 | 22 | """A simple compression architecture with MLP Decoder.""" 23 | 24 | def __init__(self, net_config, syntax): 25 | 26 | if found_compressai: 27 | super().__init__(net_config['entropy_bottleneck'], False) 28 | else: 29 | super().__init__() 30 | self.encoder = get_module_class(net_config['cw_gen']['model'], syntax.hetero)(net_config['cw_gen'], syntax=syntax) 31 | decoder_class_name = net_config['pc_gen'].get('model', 'mlpdecoder') # use "mlpdecoder" by default 32 | self.decoder = get_module_class(decoder_class_name, syntax.hetero)(net_config['pc_gen'], syntax=syntax) 33 | self.syntax = syntax 34 | self.compression = net_config.get('compression', True) and found_compressai 35 | self.entropy_bottleneck_channels = net_config['entropy_bottleneck'] 36 | 37 | def forward(self, x): 38 | 39 | y = self.encoder(x) 40 | if self.compression: 41 | y_hat, y_likelihoods = self.entropy_bottleneck(y[:, :self.entropy_bottleneck_channels].unsqueeze(-1).unsqueeze(-1)) # remove the metadata 42 | y_hat = y_hat.squeeze(-1).squeeze(-1) 43 | y_likelihoods = y_likelihoods.squeeze(-1).squeeze(-1) 44 | else: 45 | y_hat = y[:, :self.entropy_bottleneck_channels] 46 | x_hat = self.decoder(torch.hstack((y_hat, y[:, self.entropy_bottleneck_channels:]))) # also pass the metadata to the decoder when hetero is on 47 | 48 | output = {"x_hat": x_hat} 49 | if self.compression: 50 | output["likelihoods"]={"y": y_likelihoods} 51 | 52 | return output 53 | 54 | 55 | def compress(self, x): 56 | """Performs actual compression with learned statistics of the entropy bottleneck, consumes one point cloud at a time.""" 57 | 58 | assert found_compressai 59 | y = self.encoder(x) 60 | y_strings = self.entropy_bottleneck.compress(y[:, :self.entropy_bottleneck.channels].unsqueeze(-1).unsqueeze(-1)) 61 | meta_data = y[:, self.entropy_bottleneck.channels:] if self.syntax.hetero else None 62 | 63 | # "width" and "height" of the codeword are both one 64 | return {"strings": [y_strings], "shape": torch.Size([1, 1])}, meta_data # meta data also returned 65 | 66 | 67 | def decompress(self, strings, shape, meta_data=None): 68 | """Performs actual decompression with learned statistics of the entropy bottleneck, consumes one point cloud at a time.""" 69 | 70 | assert found_compressai and isinstance(strings, list) and len(strings) == 1 71 | y_hat = self.entropy_bottleneck.decompress(strings[0], shape).squeeze(-1).squeeze(-1) # get back the codeword 72 | 73 | if self.syntax.hetero: 74 | if meta_data is not None: 75 | y_hat_full = torch.hstack((y_hat, meta_data.squeeze(-1).squeeze(-1))) 76 | x_hat = self.decoder(y_hat_full) 77 | else: 78 | x_hat = self.decoder(y_hat) 79 | meta_data = x_hat[:, self.syntax.syntax_rec['xyz'][1] + 1:] # this is the new meta_data 80 | x_hat = x_hat[:, self.syntax.syntax_rec['xyz'][0] : self.syntax.syntax_rec['xyz'][1] + 1] 81 | else: 82 | x_hat = self.decoder(y_hat) 83 | x_hat = x_hat.squeeze(0) 84 | meta_data = None 85 | 86 | return x_hat, meta_data # also return meta data 87 | -------------------------------------------------------------------------------- /pccai/models/modules/pointnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # PointNet encoder 8 | 9 | import torch 10 | import torch.nn as nn 11 | from pccai.models.utils import PointwiseMLP, GlobalPool 12 | from torch_scatter import scatter_max, scatter_min, scatter_mean 13 | 14 | 15 | class PointNet(nn.Module): 16 | """The vanilla PointNet model in homogeneous batching mode. 17 | 18 | Args: 19 | mlp_dims: Dimension of the MLP 20 | fc_dims: Dimension of the FC after max pooling 21 | mlp_dolastrelu: whether do the last ReLu after the MLP 22 | """ 23 | 24 | def __init__(self, net_config, **kwargs): 25 | super(PointNet, self).__init__() 26 | self.pointwise_mlp = PointwiseMLP(net_config['mlp_dims'], net_config.get('mlp_dolastrelu', False)) # learnable 27 | self.fc = PointwiseMLP(net_config['fc_dims'], net_config.get('fc_dolastrelu', False)) # learnable 28 | 29 | # self.pointnet = PointNet(net_config['mlp_dims'], net_config['fc_dims'], net_config['mlp_dolastrelu']) 30 | self.global_pool = GlobalPool(nn.AdaptiveMaxPool2d((1, net_config['mlp_dims'][-1]))) 31 | 32 | def forward(self, data): 33 | return self.fc(self.global_pool(self.pointwise_mlp(data))) 34 | 35 | 36 | class PointNetHetero(nn.Module): 37 | """PointNet in heterogeneous batching mode.""" 38 | 39 | def __init__(self, net_config, **kwargs): 40 | super(PointNetHetero, self).__init__() 41 | self.pointwise_mlp = PointwiseMLP(net_config['mlp_dims'], net_config.get('mlp_dolastrelu', False)) # learnable 42 | self.fc = PointwiseMLP(net_config['fc_dims'], False) # learnable 43 | self.ext_cw = net_config.get('ext_cw', False) 44 | 45 | # Get the syntax 46 | self.syntax_gt = kwargs['syntax'].syntax_gt 47 | self.syntax_cw = kwargs['syntax'].syntax_cw 48 | 49 | def forward(self, data): 50 | device = data.device 51 | 52 | batch_size, pnt_cnt, dims = data.shape[0], data.shape[1], data.shape[2] 53 | data = data.view(-1, dims) 54 | block_idx = torch.cumsum(data[:, self.syntax_gt['block_start']] > 0, dim=0) - 1 # compute the block index with cumsum() 55 | block_idx = block_idx[data[:, self.syntax_gt['block_pntcnt']] > 0] # remove the padding and the skip points 56 | pc_start = torch.arange(0, batch_size, dtype=torch.long, device=device).repeat_interleave(pnt_cnt) 57 | pc_start = pc_start[data[:, self.syntax_gt['block_start']] > 0] # remove the "non-start" points 58 | pc_start = torch.cat((torch.ones(1, device=device), pc_start[1:] - pc_start[0: -1])) 59 | data = data[data[:, self.syntax_gt['block_pntcnt']] > 0, :] # remove the padding and the skip points 60 | 61 | # Normalize the point cloud: translation and scaling 62 | xyz_slc = slice(self.syntax_gt['xyz'][0], self.syntax_gt['xyz'][1] + 1) 63 | data[:, xyz_slc] -= data[:, self.syntax_gt['block_center'][0] : self.syntax_gt['block_center'][1] + 1] 64 | data[:, xyz_slc] *= data[:, self.syntax_gt['block_scale']].unsqueeze(-1) 65 | 66 | pnts_3d = data[:, xyz_slc] 67 | point_feature = self.pointwise_mlp(pnts_3d) # in this case, use the xyz coordinates as feature 68 | if self.ext_cw: 69 | cw_inp1 = scatter_max(point_feature, block_idx.long(), dim=0)[0] 70 | cw_inp2 = scatter_min(point_feature, block_idx.long(), dim=0)[0] 71 | cw_inp3 = scatter_mean(point_feature, block_idx.long(), dim=0) 72 | cw_inp = torch.cat([cw_inp1, cw_inp2, cw_inp3], dim=1) 73 | else: 74 | cw_inp = scatter_max(point_feature, block_idx.long(), dim=0)[0] 75 | block_feature = self.fc(cw_inp) 76 | mask = data[:, self.syntax_gt['block_start']] > 0 77 | 78 | # Return the codeword with the meta data 79 | out = torch.zeros(torch.sum(mask), self.syntax_cw['__len__'], device=device) 80 | out[:, self.syntax_cw['cw'][0] : self.syntax_cw['cw'][1] + 1] = block_feature 81 | out[:, self.syntax_cw['block_pntcnt']] = data[mask, self.syntax_gt['block_pntcnt']] 82 | out[:, self.syntax_cw['block_center'][0] : self.syntax_cw['block_center'][1] + 1] = data[mask, self.syntax_gt['block_center'][0] : self.syntax_gt['block_center'][1] + 1] 83 | out[:, self.syntax_cw['block_scale']] = data[mask, self.syntax_gt['block_scale']] 84 | out[:, self.syntax_cw['pc_start']] = pc_start 85 | return out -------------------------------------------------------------------------------- /pccai/models/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Elementary modules and utility functions to process point clouds 8 | 9 | import numpy as np 10 | import torch 11 | import torch.nn as nn 12 | 13 | 14 | def get_Conv2d_layer(dims, kernel_size, stride, doLastRelu): 15 | """Elementary 2D convolution layers.""" 16 | 17 | layers = [] 18 | for i in range(1, len(dims)): 19 | padding = int((kernel_size - 1) / 2) if kernel_size != 1 else 0 20 | layers.append(nn.Conv2d(in_channels=dims[i-1], out_channels=dims[i], 21 | kernel_size=kernel_size, stride=stride, padding=padding, bias=True)) 22 | if i==len(dims)-1 and not doLastRelu: 23 | continue 24 | layers.append(nn.ReLU(inplace=True)) 25 | return layers # nn.Sequential(*layers) 26 | 27 | 28 | class Conv2dLayers(nn.Sequential): 29 | """2D convolutional layers. 30 | 31 | Args: 32 | dims: dimensions of the channels 33 | kernel_size: kernel size of the convolutional layers. 34 | doLastRelu: do the last Relu (nonlinear activation) or not. 35 | """ 36 | def __init__(self, dims, kernel_size, doLastRelu=False): 37 | layers = get_Conv2d_layer(dims, kernel_size, 1, doLastRelu) # Note: may need to init the weights and biases here 38 | super(Conv2dLayers, self).__init__(*layers) 39 | 40 | 41 | def get_and_init_FC_layer(din, dout, init_bias='zeros'): 42 | """Get a fully-connected layer.""" 43 | 44 | li = nn.Linear(din, dout) 45 | #init weights/bias 46 | nn.init.xavier_uniform_(li.weight.data, gain=nn.init.calculate_gain('relu')) 47 | if init_bias == 'uniform': 48 | nn.init.uniform_(li.bias) 49 | elif init_bias == 'zeros': 50 | li.bias.data.fill_(0.) 51 | else: 52 | raise 'Unknown init ' + init_bias 53 | return li 54 | 55 | 56 | def get_MLP_layers(dims, doLastRelu, init_bias='zeros'): 57 | """Get a series of MLP layers.""" 58 | 59 | layers = [] 60 | for i in range(1, len(dims)): 61 | layers.append(get_and_init_FC_layer(dims[i-1], dims[i], init_bias=init_bias)) 62 | if i==len(dims)-1 and not doLastRelu: 63 | continue 64 | layers.append(nn.ReLU()) 65 | return layers 66 | 67 | 68 | class PointwiseMLP(nn.Sequential): 69 | """PointwiseMLP layers. 70 | 71 | Args: 72 | dims: dimensions of the channels 73 | doLastRelu: do the last Relu (nonlinear activation) or not. 74 | Nxdin ->Nxd1->Nxd2->...-> Nxdout 75 | """ 76 | def __init__(self, dims, doLastRelu=False, init_bias='zeros'): 77 | layers = get_MLP_layers(dims, doLastRelu, init_bias) 78 | super(PointwiseMLP, self).__init__(*layers) 79 | 80 | 81 | class GlobalPool(nn.Module): 82 | """BxNxK -> BxK""" 83 | 84 | def __init__(self, pool_layer): 85 | super(GlobalPool, self).__init__() 86 | self.Pool = pool_layer 87 | 88 | def forward(self, X): 89 | X = X.unsqueeze(-3) #Bx1xNxK 90 | X = self.Pool(X) 91 | X = X.squeeze(-2) 92 | X = X.squeeze(-2) #BxK 93 | return X 94 | 95 | 96 | class PointNetGlobalMax(nn.Sequential): 97 | """BxNxdims[0] -> Bxdims[-1]""" 98 | 99 | def __init__(self, dims, doLastRelu=False): 100 | layers = [ 101 | PointwiseMLP(dims, doLastRelu=doLastRelu), #BxNxK 102 | GlobalPool(nn.AdaptiveMaxPool2d((1, dims[-1]))),#BxK 103 | ] 104 | super(PointNetGlobalMax, self).__init__(*layers) 105 | 106 | 107 | class PointNetGlobalAvg(nn.Sequential): 108 | """BxNxdims[0] -> Bxdims[-1]""" 109 | 110 | def __init__(self, dims, doLastRelu=True): 111 | layers = [ 112 | PointwiseMLP(dims, doLastRelu=doLastRelu), #BxNxK 113 | GlobalPool(nn.AdaptiveAvgPool2d((1, dims[-1]))),#BxK 114 | ] 115 | super(PointNetGlobalAvg, self).__init__(*layers) 116 | 117 | 118 | class PointNet(nn.Sequential): 119 | """Vanilla PointNet Model. 120 | 121 | Args: 122 | MLP_dims: dimensions of the pointwise MLP 123 | FC_dims: dimensions of the FC to process the max pooled feature 124 | doLastRelu: do the last Relu (nonlinear activation) or not. 125 | Nxdin ->Nxd1->Nxd2->...-> Nxdout 126 | """ 127 | def __init__(self, MLP_dims, FC_dims, MLP_doLastRelu): 128 | assert(MLP_dims[-1]==FC_dims[0]) 129 | layers = [ 130 | PointNetGlobalMax(MLP_dims, doLastRelu=MLP_doLastRelu),#BxK 131 | ] 132 | layers.extend(get_MLP_layers(FC_dims, False)) 133 | super(PointNet, self).__init__(*layers) -------------------------------------------------------------------------------- /third_party/nndistance/src/cpu_ops.cpp: -------------------------------------------------------------------------------- 1 | void nnsearch(int b,int n,int m,const float * xyz1,const float * xyz2,float * dist,int * idx){ 2 | for (int i=0;i(); 49 | float *xyz2_data = xyz2.data(); 50 | float *dist1_data = dist1.data(); 51 | float *dist2_data = dist2.data(); 52 | int *idx1_data = idx1.data(); 53 | int *idx2_data = idx2.data(); 54 | nnsearch(batchsize, n, m, xyz1_data, xyz2_data, dist1_data, idx1_data); 55 | nnsearch(batchsize, m, n, xyz2_data, xyz1_data, dist2_data, idx2_data); 56 | 57 | return 1; 58 | } 59 | 60 | 61 | 62 | int nnd_backward( 63 | at::Tensor xyz1, 64 | at::Tensor xyz2, 65 | at::Tensor gradxyz1, 66 | at::Tensor gradxyz2, 67 | at::Tensor graddist1, 68 | at::Tensor graddist2, 69 | at::Tensor idx1, 70 | at::Tensor idx2) 71 | { 72 | int b = xyz1.size(0); 73 | int n = xyz1.size(1); 74 | int m = xyz2.size(1); 75 | 76 | /* 77 | auto gradxyz1 = at::zeros_like(xyz1); 78 | auto gradxyz2 = at::zeros_like(xyz2); 79 | */ 80 | float *xyz1_data = xyz1.data(); 81 | float *xyz2_data = xyz2.data(); 82 | float *gradxyz1_data = gradxyz1.data(); 83 | float *gradxyz2_data = gradxyz2.data(); 84 | float *graddist1_data = graddist1.data(); 85 | float *graddist2_data = graddist2.data(); 86 | int *idx1_data = idx1.data(); 87 | int *idx2_data = idx2.data(); 88 | 89 | for (int i=0;i 0: # Render the point cloud with ball decorator 64 | pc_elem = sphere_pc_generator(opt) 65 | 66 | # Aggregate the generated spheres 67 | geo = pc_elem[0] 68 | for i in range(1, len(pc_elem)): 69 | geo += pc_elem[i] 70 | print('Aggregated %d shperes for the point cloud.' % len(pc_elem)) 71 | else: 72 | geo = read_point_cloud(opt.file_name) 73 | print('Loaded a point cloud with %d points.' % len(geo.points)) 74 | 75 | # Draw the stuff finally 76 | vis.add_geometry(geo) # Add the point cloud 77 | 78 | # Mark the origin if needed 79 | if opt.radius_origin > 0: 80 | origin = o3d.geometry.TriangleMesh.create_sphere(opt.radius_origin, resolution=20) # create a ball 81 | origin.compute_vertex_normals() 82 | origin.paint_uniform_color(opt.color) # paint the ball 83 | origin.translate([0, 0, 0], False) # translate it 84 | vis.add_geometry(origin) # Add the origin 85 | 86 | ctr = vis.get_view_control() 87 | if opt.view_file != '.': # Set the camera view point 88 | param = o3d.io.read_pinhole_camera_parameters(opt.view_file) 89 | ctr.convert_from_pinhole_camera_parameters(param) 90 | 91 | # Render and save as an image if the ouput file path is given 92 | if opt.output_file != '.': 93 | vis.capture_screen_image(opt.output_file, True) 94 | else: 95 | vis.run() 96 | vis.destroy_window() 97 | 98 | 99 | def add_options(parser): 100 | parser.add_argument('--file_name', type=str, required=True, help='File name of the point cloud.') 101 | parser.add_argument('--output_file', type=str, default='.', help='Output file name for the rendered image.') 102 | parser.add_argument('--view_file', type=str, default='.', help='View point file for rendering.') 103 | parser.add_argument('--radius', type=float, default=-1, help='Radius of the rendered points. If > 0, render each point as a ball.') 104 | parser.add_argument('--color', type=float, nargs='+', default=[0.2, 0.2, 0.2], help='Specify the color of the rendered point cloud if ball decorator is used.') 105 | parser.add_argument('--radius_origin', type=float, default=-1, help='Radius of the origin points. If < 0, do not add origin.') 106 | parser.add_argument('--window_name', type=str, default='Point Cloud', help='Window name.') 107 | parser.add_argument('--window_height', type=int, default=1200, help='Window height.') 108 | parser.add_argument('--window_width', type=int, default=1600, help='Window width.') 109 | 110 | return parser 111 | 112 | 113 | if __name__ == "__main__": 114 | 115 | # Initialize parser with basic options 116 | parser = argparse.ArgumentParser( 117 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 118 | parser = add_options(parser) 119 | opt, _ = parser.parse_known_args() 120 | main() -------------------------------------------------------------------------------- /pccai/codecs/octree_partition_codec.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # This is an example PCC Codec based on octree partitioning, then each block is digested and compressed individually 8 | 9 | import torch 10 | import gzip 11 | import numpy as np 12 | import time 13 | 14 | from pccai.utils.convert_octree import OctreeOrganizer 15 | from pccai.codecs.pcc_codec import PccCodecBase 16 | 17 | 18 | class OctreePartitionCodec(PccCodecBase): 19 | """An example PCC Codec based on octree partitioning and blockwise processing.""" 20 | 21 | def __init__(self, codec_config, pccnet, bit_depth, syntax): 22 | super().__init__(codec_config, pccnet, syntax) 23 | self.pc_organizer = OctreeOrganizer( 24 | codec_config['octree_cfg'], 25 | codec_config['max_num_points'], 26 | syntax.syntax_gt 27 | ) 28 | self.cw_shape = torch.Size([1, 1]) 29 | 30 | 31 | def compress(self, points, tag): 32 | """Compress all the blocks of a point cloud then write the bitstream to a file.""" 33 | 34 | start = time.monotonic() 35 | file_name = tag + '.bin' 36 | points = (points + np.array(self.translate)) * self.scale 37 | points, _, octree_strs, block_pntcnt, _ = self.pc_organizer.organize_data(points) 38 | points = torch.from_numpy(points).cuda() 39 | compress_out, _ = self.pccnet.compress(points.unsqueeze(0)) # perform compression 40 | pc_strs = compress_out['strings'][0] 41 | end = time.monotonic() 42 | 43 | # Write down the point cloud on disk 44 | with gzip.open(file_name, 'wb') as f: 45 | ret = save_pc_stream(pc_strs, octree_strs, block_pntcnt) 46 | f.write(ret) 47 | 48 | # Return other statistics through this dictionary 49 | stat_dict = { 50 | 'enc_time': round(end - start, 3), 51 | } 52 | 53 | return [file_name], stat_dict 54 | 55 | 56 | def decompress(self, file_name): 57 | """Decompress all the blocks of a point cloud from a file.""" 58 | 59 | with gzip.open(file_name[0], 'rb') as f: 60 | pc_strs, octree_strs, block_pntcnt = load_pc_stream(f) 61 | 62 | start = time.monotonic() 63 | meta_data = self.pccnet.decoder.prepare_meta_data(octree_strs, block_pntcnt, self.pc_organizer) 64 | 65 | # Decompress the point cloud 66 | pc_rec, _ = self.pccnet.decompress([pc_strs], self.cw_shape, meta_data) 67 | pc_rec = (pc_rec / self.scale - torch.tensor(self.translate, device=pc_rec.device)).long() # denormalize 68 | end = time.monotonic() 69 | 70 | # Return other statistics through this dictionary 71 | stat_dict = { 72 | 'dec_time': round(end - start, 3), 73 | } 74 | 75 | return pc_rec, stat_dict 76 | 77 | 78 | def save_pc_stream(pc_strs, octree_strs, block_pntcnt): 79 | """Save an octree-partitioned point cloud and its partitioning information as an unified bitstream.""" 80 | 81 | n_octree_str_b = array_to_bytes([len(octree_strs)], np.uint16) # number of nodes in the octree 82 | n_blocks_b = array_to_bytes([len(block_pntcnt)], np.uint16) # number of blocks in total 83 | n_trans_block_b = array_to_bytes([len(pc_strs)], np.uint16) # number of blocks that are coded with transformed mode 84 | octree_strs_b = array_to_bytes(octree_strs, np.uint8) # bit stream of the octree 85 | pntcnt_b = array_to_bytes(block_pntcnt, np.uint16) # bit stream of the point count in each block 86 | out_stream = n_octree_str_b + n_blocks_b + n_trans_block_b + octree_strs_b + pntcnt_b 87 | 88 | # Work on each block of the point cloud 89 | for strings in pc_strs: 90 | n_bytes_b = array_to_bytes([len(strings)], np.uint16) # number of bytes spent in the current block 91 | out_stream += n_bytes_b + strings 92 | return out_stream 93 | 94 | 95 | def load_pc_stream(f): 96 | """Load an octree-partitioned point cloud unified bitstream.""" 97 | 98 | n_octree_str = load_buffer(f, 1, np.uint16)[0] 99 | n_blocks = load_buffer(f, 1, np.uint16)[0] 100 | n_trans_block = load_buffer(f, 1, np.uint16)[0] 101 | octree_strs = load_buffer(f, n_octree_str, np.uint8) 102 | block_pntcnt = load_buffer(f, n_blocks, np.uint16) 103 | 104 | pc_strs = [] 105 | for _ in range(n_trans_block): 106 | n_bytes = load_buffer(f, 1, np.uint16)[0] 107 | string = f.read(int(n_bytes)) 108 | pc_strs.append(string) 109 | file_end = f.read() 110 | assert file_end == b'', f'File not read completely file_end {file_end}' 111 | 112 | return pc_strs, octree_strs, block_pntcnt 113 | 114 | 115 | def array_to_bytes(x, dtype): 116 | x = np.array(x, dtype=dtype) 117 | if np.issubdtype(dtype, np.floating): 118 | type_info = np.finfo(dtype) 119 | else: 120 | type_info = np.iinfo(dtype) 121 | assert np.all(x <= type_info.max), f'Overflow {x} {type_info}' 122 | assert np.all(type_info.min <= x), f'Underflow {x} {type_info}' 123 | return x.tobytes() 124 | 125 | 126 | def load_buffer(file, cnt, dtype): 127 | return np.frombuffer(file.read(int(np.dtype(dtype).itemsize * cnt)), dtype=dtype) -------------------------------------------------------------------------------- /pccai/models/modules/mlpdecoder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # MLP Decoder 8 | 9 | import torch 10 | import torch.nn as nn 11 | import numpy as np 12 | from pccai.models.utils import PointwiseMLP 13 | 14 | 15 | class MlpDecoder(nn.Module): 16 | """MLP decoder in homogeneous batching mode.""" 17 | 18 | def __init__(self, net_config, **kwargs): 19 | super(MlpDecoder, self).__init__() 20 | self.num_points = net_config['num_points'] 21 | dims = net_config['dims'] 22 | self.mlp = PointwiseMLP(dims + [3 * self.num_points], doLastRelu=False) # the MLP layers 23 | 24 | def forward(self, cw): 25 | 26 | out1 = self.mlp(cw) # BatchSize X PointNum X 3 27 | return out1.view(cw.shape[0], self.num_points, -1) 28 | 29 | 30 | class MlpDecoderHetero(nn.Module): 31 | """MLP decoder for heterogeneous batching.""" 32 | 33 | def __init__(self, net_config, **kwargs): 34 | super(MlpDecoderHetero, self).__init__() 35 | self.num_points = net_config['num_points'] 36 | dims = net_config['dims'] 37 | self.mlp = PointwiseMLP(dims + [3 * self.num_points], doLastRelu=False) # the MLP layers 38 | 39 | # Grab the syntax 40 | self.syntax_cw = kwargs['syntax'].syntax_cw 41 | self.syntax_rec = kwargs['syntax'].syntax_rec 42 | 43 | def forward(self, cw): 44 | device = cw.device 45 | pc_block = self.mlp(cw[:, self.syntax_cw['cw'][0] : self.syntax_cw['cw'][1] + 1]) # apply MLP layers directly 46 | pc_block = pc_block.view(cw.shape[0] * self.num_points, -1) 47 | 48 | block_npts = torch.ones(cw.shape[0], dtype=torch.long, device=device) * self.num_points 49 | # For each point, indice the index of its codeword/block 50 | cw_idx = torch.arange(block_npts.shape[0], device=device).repeat_interleave(block_npts) 51 | # Mark a point with 1 if it is the first point of a block 52 | block_start = torch.cat((torch.ones(1, device=device), cw_idx[1:] - cw_idx[:-1])).float() 53 | 54 | # Denormalize the point cloud 55 | center = cw[:, self.syntax_cw['block_center'][0]: self.syntax_cw['block_center'][1] + 1].repeat_interleave(block_npts, 0) 56 | scale = cw[:, self.syntax_cw['block_scale']: self.syntax_cw['block_scale'] + 1].repeat_interleave(block_npts, 0) 57 | 58 | # From pc_start in cw (blocks), build pc_start for points 59 | pc_start = torch.zeros(cw.shape[0], device=device).repeat_interleave(block_npts) 60 | # Starting point index for each block 61 | block_idx = torch.cat((torch.zeros(1, device=device, dtype=torch.long), torch.cumsum(block_npts, 0)[:-1]), 0) 62 | # Mark a point as one if it is the first of its point cloud 63 | # We have this binary marker for each block of the point cloud (1 if first block, 0 otherwise) 64 | # We mark the first point of all blocks with the marker of their block 65 | pc_start[block_idx] = cw[:, self.syntax_cw['pc_start']: self.syntax_cw['pc_start'] + 1].squeeze(-1) 66 | 67 | # Denormalization: scaling and translation 68 | pc_block = pc_block / scale # scaling 69 | pc_block = pc_block + center # translation 70 | 71 | # Assemble the output 72 | out = torch.zeros(pc_block.shape[0], self.syntax_rec['__len__']).cuda() 73 | out[:, self.syntax_rec['xyz'][0] : self.syntax_rec['xyz'][1] + 1] = pc_block 74 | out[:, self.syntax_rec['block_start']] = block_start 75 | out[:, self.syntax_rec['block_center'][0] : self.syntax_rec['block_center'][1] + 1] = center 76 | out[:, self.syntax_rec['block_scale']] = scale[:, 0] 77 | out[:, self.syntax_rec['pc_start']] = pc_start 78 | return out 79 | 80 | 81 | def prepare_meta_data(self, binstrs, block_pntcnt, octree_organizer): 82 | """Convert the binary strings of an octree to a set of scales and centers of the leaf nodes. 83 | Next, arranges them as the meta data array according to the syntax for decoding. 84 | """ 85 | 86 | leaf_blocks = octree_organizer.departition_octree(binstrs, block_pntcnt) # departition the octree strings to blocks 87 | meta_data = np.zeros((len(leaf_blocks), self.syntax_cw['__len__'] - self.syntax_cw['__meta_idx__']), dtype=np.float32) 88 | cur = 0 89 | 90 | # Assemble the meta data 91 | meta_data[0, self.syntax_cw['pc_start'] - self.syntax_cw['__meta_idx__']] = 1 92 | for idx, block in enumerate(leaf_blocks): 93 | if block['binstr'] >= 0: # only keep the blocks with transform mode 94 | center, scale = octree_organizer.get_normalizer(block['bbox_min'], block['bbox_max']) 95 | meta_data[cur, self.syntax_cw['block_pntcnt'] - self.syntax_cw['__meta_idx__']] = block_pntcnt[idx] 96 | meta_data[cur, self.syntax_cw['block_scale'] - self.syntax_cw['__meta_idx__']] = scale 97 | meta_data[cur, self.syntax_cw['block_center'][0] - self.syntax_cw['__meta_idx__'] : 98 | self.syntax_cw['block_center'][1] - self.syntax_cw['__meta_idx__'] + 1] = center 99 | cur += 1 100 | 101 | # Only returns the useful part 102 | return torch.as_tensor(meta_data[:cur, :], device=torch.device('cuda')).unsqueeze(-1).unsqueeze(-1) -------------------------------------------------------------------------------- /pccai/dataloaders/lidar_base_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Base LiDAR data sets, includeing Ford, KITTI and Qnxadas 8 | 9 | import os 10 | import numpy as np 11 | from torch.utils import data 12 | from pccai.utils.misc import pc_read 13 | 14 | found_quantize = False 15 | 16 | 17 | def absoluteFilePaths(directory): 18 | for dirpath, _, file_names in os.walk(directory): 19 | for f in file_names: 20 | yield os.path.abspath(os.path.join(dirpath, f)) 21 | 22 | 23 | class FordBase(data.Dataset): 24 | """A base Ford dataset.""" 25 | 26 | def __init__(self, data_config, sele_config, **kwargs): 27 | 28 | base_dir = os.path.dirname(os.path.abspath(__file__)) 29 | 30 | # Common options of the dataset 31 | self.return_intensity = data_config.get('return_intensity', False) 32 | self.dataset_path = data_config.get('dataset_path', '../../datasets/ford/') # the default dataset path 33 | self.dataset_path = os.path.abspath(os.path.join(base_dir, self.dataset_path)) 34 | self.translate = data_config.get('translate', [0, 0, 0]) 35 | self.scale = data_config.get('scale', 1) 36 | self.point_max = data_config.get('point_max', -1) 37 | 38 | # Options under a specific configuration 39 | self.split = data_config[sele_config]['split'] 40 | splitting = data_config['splitting'][self.split] 41 | 42 | self.im_idx = [] 43 | for i_folder in splitting: 44 | folder_path = os.path.join(self.dataset_path, 'Ford_' + str(i_folder).zfill(2) + '_q_1mm') 45 | assert os.path.exists(folder_path), f'{folder_path} does not exist' 46 | self.im_idx += absoluteFilePaths(folder_path) 47 | self.im_idx.sort() 48 | 49 | 50 | def __len__(self): 51 | """Returns the total number of samples""" 52 | return len(self.im_idx) 53 | 54 | 55 | def __getitem__(self, index): 56 | 57 | pc = (pc_read(self.im_idx[index]) + np.array(self.translate)) * self.scale 58 | if self.point_max > 0 and pc.shape[0] > self.point_max: 59 | pc = pc[:self.point_max, :] 60 | return {'pc': pc, 'ref': None} 61 | 62 | 63 | def get_pc_idx(self, index): 64 | return self.im_idx[index] 65 | 66 | 67 | class QnxadasBase(data.Dataset): 68 | """A base Qnxadas dataset.""" 69 | 70 | def __init__(self, data_config, sele_config, **kwargs): 71 | 72 | base_dir = os.path.dirname(os.path.abspath(__file__)) 73 | dataset_path_default = os.path.abspath(os.path.join(base_dir, '../../datasets/qnxadas/')) # the default dataset path 74 | 75 | # Common options of the dataset 76 | self.return_intensity = data_config.get('return_intensity', False) 77 | dataset_path = data_config.get('dataset_path', dataset_path_default) 78 | self.translate = data_config.get('translate', [0, 0, 0]) 79 | self.scale = data_config.get('scale', 1) 80 | 81 | # Options under a specific configuration 82 | self.split = data_config[sele_config]['split'] 83 | splitting = data_config['splitting'][self.split] 84 | 85 | self.im_idx = [] 86 | for i_folder in splitting: 87 | self.im_idx += absoluteFilePaths(os.path.join(dataset_path, i_folder)) 88 | self.im_idx.sort() 89 | 90 | 91 | def __len__(self): 92 | """Returns the total number of samples""" 93 | return len(self.im_idx) // 2 94 | 95 | 96 | def __getitem__(self, index): 97 | pc = (pc_read(self.im_idx[2 * index + 1]) + np.array(self.translate)) * self.scale 98 | return {'pc': pc, 'ref': None} 99 | 100 | 101 | def get_pc_idx(self, index): 102 | return self.im_idx[2 * index + 1] 103 | 104 | 105 | class KITTIBase(data.Dataset): 106 | """A base SemanticKITTI dataset.""" 107 | 108 | def __init__(self, data_config, sele_config, **kwargs): 109 | 110 | base_dir = os.path.dirname(os.path.abspath(__file__)) 111 | dataset_path = os.path.abspath(os.path.join(base_dir, '../../datasets/kitti/')) # the default dataset path 112 | 113 | # Other specific options 114 | self.translate = data_config.get('translate', [0, 0, 0]) 115 | self.scale = data_config.get('scale', 1) 116 | self.quantize_resolution = data_config.get('quantize_resolution', None) if found_quantize else None 117 | self.split = data_config[sele_config]['split'] 118 | splitting = data_config['splitting'][self.split] 119 | 120 | self.im_idx = [] 121 | for i_folder in splitting: 122 | self.im_idx += absoluteFilePaths('/'.join([dataset_path, str(i_folder).zfill(2),'velodyne'])) 123 | self.im_idx.sort() 124 | 125 | 126 | def __len__(self): 127 | """Returns the total number of samples""" 128 | return len(self.im_idx) 129 | 130 | 131 | def __getitem__(self, index): 132 | raw_data = np.fromfile(self.im_idx[index], dtype=np.float32).reshape((-1, 4)) 133 | if self.quantize_resolution is not None: 134 | pc = quantize_resolution(raw_data[:, :3], self.quantize_resolution) 135 | else: 136 | pc = (raw_data[:, :3] + np.array(self.translate)) * self.scale 137 | return {'pc': pc} 138 | 139 | 140 | def get_pc_idx(self, index): 141 | return self.im_idx[index] 142 | -------------------------------------------------------------------------------- /pccai/dataloaders/lidar_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # A multi-modal data loader for LiDAR datasets. 8 | 9 | import os 10 | import numpy as np 11 | from torch.utils import data 12 | 13 | from pccai.utils.convert_image import pc2img 14 | from pccai.utils.convert_octree import OctreeOrganizer 15 | from pccai.dataloaders.lidar_base_loader import FordBase, KITTIBase, QnxadasBase 16 | 17 | 18 | def get_base_lidar_dataset(data_config, sele_config): 19 | if data_config['dataset'].lower().find('ford') >= 0: 20 | loader_class = FordBase 21 | elif data_config['dataset'].lower().find('kitti') >= 0: 22 | loader_class = KITTIBase 23 | elif data_config['dataset'].lower().find('qnxadas') >= 0: 24 | loader_class = QnxadasBase 25 | else: 26 | loader_class = None 27 | return loader_class(data_config, sele_config) 28 | 29 | 30 | class LidarSimple(data.Dataset): 31 | """A simple LiDAR dataset which returns a specified number of 3D points in each point cloud.""" 32 | 33 | def __init__(self, data_config, sele_config, **kwargs): 34 | 35 | self.point_cloud_dataset = get_base_lidar_dataset(data_config, sele_config) 36 | self.num_points = data_config.get('num_points', 150000) # about 150000 points per point cloud 37 | self.seed = data_config.get('seed', None) 38 | self.sparse_collate = data_config.get('sparse_collate', False) 39 | self.voxelize = data_config.get('voxelize', False) 40 | 41 | def __len__(self): 42 | return len(self.point_cloud_dataset) 43 | 44 | def __getitem__(self, index): 45 | pc = self.point_cloud_dataset[index]['pc'] # take out the point cloud coordinates only 46 | np.random.seed(self.seed) 47 | if self.voxelize: 48 | pc = np.round(pc[:self.num_points, :]).astype('int32') # always <= num_points 49 | # This is to facilitate the sparse tensor construction with Minkowski Engine 50 | if self.sparse_collate: 51 | pc = np.hstack((np.zeros((pc.shape[0], 1), dtype='int32'), pc)) 52 | # pc = np.vstack((pc, np.ones((self.num_points - pc.shape[0], 4), dtype='int32') * -1)) 53 | pc[0][0] = 1 54 | return pc 55 | else: 56 | choice = np.random.choice(pc.shape[0], self.num_points, replace=True) # always == num_points 57 | return pc[choice, :].astype(dtype=np.float32) 58 | 59 | class LidarSpherical(data.Dataset): 60 | """Converts the original Cartesian coordinate to spherical coordinate then represent as 2D images.""" 61 | 62 | def __init__(self, data_config, sele_config, **kwargs): 63 | 64 | self.point_cloud_dataset = get_base_lidar_dataset(data_config, sele_config) 65 | self.width = data_config['spherical_cfg'].get('width', 1024) # grab all the options about speherical projection 66 | self.height = data_config['spherical_cfg'].get('height', 128) 67 | self.v_fov = data_config['spherical_cfg'].get('v_fov', [-28, 3.0]) 68 | self.h_fov = data_config['spherical_cfg'].get('h_fov', [-180, 180]) 69 | self.origin_shift = data_config['spherical_cfg'].get('origin_shift', [0, 0, 0]) 70 | self.v_fov, self.h_fov = np.array(self.v_fov) / 180 * np.pi, np.array(self.h_fov) / 180 * np.pi 71 | self.num_points = self.width * self.height 72 | self.inf = 1e6 73 | 74 | def __len__(self): 75 | return len(self.point_cloud_dataset) 76 | 77 | def __getitem__(self, index): 78 | data = self.point_cloud_dataset[index]['pc'] # take out the point cloud coordinates only 79 | data[:, 0] += self.origin_shift[0] 80 | data[:, 1] += self.origin_shift[1] 81 | data[:, 2] += self.origin_shift[2] 82 | data_img = pc2img(self.h_fov, self.v_fov, self.width, self.height, self.inf, data) 83 | 84 | return data_img 85 | 86 | 87 | class LidarOctree(data.Dataset): 88 | """Converts an original point cloud into an octree.""" 89 | 90 | def __init__(self, data_config, sele_config, **kwargs): 91 | 92 | self.point_cloud_dataset = get_base_lidar_dataset(data_config, sele_config) 93 | self.rw_octree = data_config.get('rw_octree', False) 94 | if self.rw_octree: 95 | self.rw_partition_scheme = data_config.get('rw_partition_scheme', 'default') 96 | self.octree_cache_folder = 'octree_cache' 97 | 98 | # Create an octree formatter to organize octrees into arrays 99 | self.octree_organizer = OctreeOrganizer( 100 | data_config['octree_cfg'], 101 | data_config[sele_config].get('max_num_points', 150000), 102 | kwargs['syntax'].syntax_gt, 103 | self.rw_octree, 104 | data_config[sele_config].get('shuffle_blocks', False), 105 | ) 106 | 107 | def __len__(self): 108 | return len(self.point_cloud_dataset) 109 | 110 | def __getitem__(self, index): 111 | 112 | if self.rw_octree: 113 | file_name = os.path.relpath(self.point_cloud_dataset.get_pc_idx(index), self.point_cloud_dataset.dataset_path) 114 | file_name = os.path.join(self.point_cloud_dataset.dataset_path, self.octree_cache_folder, self.rw_partition_scheme, file_name) 115 | file_name = os.path.splitext(file_name)[0] + '.pkl' 116 | else: file_name = None 117 | 118 | pc = self.point_cloud_dataset[index]['pc'] 119 | # perform octree partitioning and organize the data 120 | pc_formatted, _, _, _, _ = self.octree_organizer.organize_data(pc, file_name=file_name) 121 | 122 | return pc_formatted 123 | -------------------------------------------------------------------------------- /pccai/dataloaders/shapenet_part_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # A ShapeNet-Part data loader 8 | 9 | import torch.utils.data as data 10 | import os 11 | import os.path 12 | import torch 13 | import json 14 | import numpy as np 15 | import pccai.utils.logger as logger 16 | import multiprocessing 17 | from tqdm import tqdm 18 | from functools import partial 19 | 20 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 21 | dataset_path_default=os.path.abspath(os.path.join(BASE_DIR, '../../datasets/shapenet_part/')) # the default dataset path 22 | 23 | 24 | def pc_normalize(pc): 25 | """ pc: NxC, return NxC """ 26 | l = pc.shape[0] 27 | centroid = np.mean(pc, axis=0) 28 | pc = pc - centroid 29 | m = np.max(np.sqrt(np.sum(pc**2, axis=1))) 30 | pc = pc / m 31 | return pc 32 | 33 | 34 | def load_pc(index, datapath, classes, normalize): 35 | fn = datapath[index] 36 | cls = classes[datapath[index][0]] 37 | point_set = np.loadtxt(fn[1]).astype(np.float32) 38 | if normalize: 39 | point_set = pc_normalize(point_set) 40 | seg = np.loadtxt(fn[2]).astype(np.int64) - 1 41 | foldername = fn[3] 42 | filename = fn[4] 43 | return (point_set, seg, cls, foldername, filename) 44 | 45 | 46 | class ShapeNetPart(data.Dataset): 47 | """A ShapeNet part dataset class.""" 48 | 49 | def __init__(self, data_config, sele_config, **kwargs): 50 | # Common options of the dataset 51 | dataset_path = data_config.get('dataset_path', dataset_path_default) 52 | dataset_path = os.path.join(dataset_path, 'shapenetcore_partanno_segmentation_benchmark_v0') 53 | # Allow override of num_points in specific modes 54 | # null (YAML) / None (Python) means no sampling 55 | num_points = data_config[sele_config].get('num_points', data_config.get('num_points', 2500)) 56 | classification = data_config.get('classification', False) 57 | normalize = data_config.get('normalize', True) 58 | 59 | # Options under a specific configuration 60 | class_choice = data_config[sele_config].get('class_choice', None) 61 | split = data_config[sele_config].get('split', 'train') 62 | augmentation = data_config[sele_config].get('augmentation', False) 63 | # Should perform augmentation in __getitem__() if needed 64 | self.num_points = num_points 65 | self.catfile = os.path.join(dataset_path, 'synsetoffset2category.txt') 66 | self.cat = {} 67 | self.classification = classification 68 | self.normalize = normalize 69 | 70 | with open(self.catfile, 'r') as f: 71 | for line in f: 72 | ls = line.strip().split() 73 | self.cat[ls[0]] = ls[1] 74 | if not class_choice is None: 75 | self.cat = {k: v for k, v in self.cat.items() if k in class_choice} 76 | logger.log.info(self.cat) 77 | self.meta = {} 78 | with open(os.path.join(dataset_path, 'train_test_split', 'shuffled_train_file_list.json'), 'r') as f: 79 | train_ids = set([str(d.split('/')[2]) for d in json.load(f)]) 80 | with open(os.path.join(dataset_path, 'train_test_split', 'shuffled_val_file_list.json'), 'r') as f: 81 | val_ids = set([str(d.split('/')[2]) for d in json.load(f)]) 82 | with open(os.path.join(dataset_path, 'train_test_split', 'shuffled_test_file_list.json'), 'r') as f: 83 | test_ids = set([str(d.split('/')[2]) for d in json.load(f)]) 84 | 85 | for item in self.cat: 86 | self.meta[item] = [] 87 | dir_point = os.path.join(dataset_path, self.cat[item], 'points') 88 | dir_seg = os.path.join(dataset_path, self.cat[item], 'points_label') 89 | fns = sorted(os.listdir(dir_point)) 90 | if split == 'trainval': 91 | fns = [fn for fn in fns if ((fn[0:-4] in train_ids) or (fn[0:-4] in val_ids))] 92 | elif split == 'train': 93 | fns = [fn for fn in fns if fn[0:-4] in train_ids] 94 | elif split == 'val': 95 | fns = [fn for fn in fns if fn[0:-4] in val_ids] 96 | elif split == 'test': 97 | fns = [fn for fn in fns if fn[0:-4] in test_ids] 98 | else: 99 | logger.log.info('Unknown split: %s. Exiting..' % (split)) 100 | exit(0) 101 | 102 | for fn in fns: 103 | token = (os.path.splitext(os.path.basename(fn))[0]) 104 | self.meta[item].append((os.path.join(dir_point, token + '.pts'), os.path.join(dir_seg, token + '.seg'),self.cat[item], token)) 105 | self.datapath = [] 106 | for item in self.cat: 107 | for fn in self.meta[item]: 108 | self.datapath.append((item, fn[0], fn[1], fn[2], fn[3])) 109 | 110 | self.classes = dict(zip(sorted(self.cat), range(len(self.cat)))) 111 | logger.log.info(self.classes) 112 | self.num_seg_classes = 0 113 | if not self.classification: 114 | for i in range(len(self.datapath)//50): 115 | l = len(np.unique(np.loadtxt(self.datapath[i][2]).astype(np.uint8))) 116 | if l > self.num_seg_classes: 117 | self.num_seg_classes = l 118 | 119 | load_pc_part = partial(load_pc, datapath=self.datapath, classes=self.classes, normalize=self.normalize) 120 | 121 | self.cache = np.empty(len(self.datapath), dtype=object) 122 | if not data_config.get('lazy_loading', False): 123 | # Precaching 124 | with multiprocessing.Pool() as p: 125 | self.cache = np.array(list(tqdm(p.imap(load_pc_part, np.arange(len(self.datapath)), 32), total=len(self.datapath))), dtype=object) 126 | 127 | def __getitem__(self, index): 128 | value = self.cache[index] 129 | if value is None: 130 | value = self.cache[index] = load_pc(index, self.datapath, self.classes, self.normalize) 131 | point_set, seg, cls, foldername, filename = value 132 | 133 | if self.num_points is not None: 134 | choice = np.random.choice(len(seg), self.num_points, replace=True) 135 | # resample 136 | point_set = point_set[choice, :] 137 | 138 | # To Pytorch 139 | point_set = torch.from_numpy(point_set) 140 | return point_set 141 | 142 | 143 | def __len__(self): 144 | return len(self.datapath) 145 | -------------------------------------------------------------------------------- /third_party/nndistance/src/nnd_cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | //#include "nnd_cuda.h" 3 | #include 4 | #include 5 | 6 | 7 | 8 | __global__ void NmDistanceKernel(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i){ 9 | const int batch=512; 10 | __shared__ float buf[batch*3]; 11 | for (int i=blockIdx.x;ibest){ 123 | result[(i*n+j)]=best; 124 | result_i[(i*n+j)]=best_i; 125 | } 126 | } 127 | __syncthreads(); 128 | } 129 | } 130 | } 131 | 132 | int NmDistanceKernelLauncher( 133 | at::Tensor xyz1, 134 | at::Tensor xyz2, 135 | at::Tensor dist1, 136 | at::Tensor dist2, 137 | at::Tensor idx1, 138 | at::Tensor idx2) 139 | { 140 | int b = xyz1.size(0); 141 | int n = xyz1.size(1); 142 | int m = xyz2.size(1); 143 | 144 | float *xyz1_data = xyz1.data(); 145 | float *xyz2_data = xyz2.data(); 146 | float *dist1_data = dist1.data(); 147 | float *dist2_data = dist2.data(); 148 | int *idx1_data = idx1.data(); 149 | int *idx2_data = idx2.data(); 150 | 151 | 152 | NmDistanceKernel<<>>(b,n,xyz1_data,m,xyz2_data,dist1_data,idx1_data); 153 | NmDistanceKernel<<>>(b,m,xyz2_data,n,xyz1_data,dist2_data,idx2_data); 154 | 155 | cudaError_t err = cudaGetLastError(); 156 | if (err != cudaSuccess) { 157 | printf("error in nnd updateOutput: %s\n", cudaGetErrorString(err)); 158 | //THError("aborting"); 159 | return 0; 160 | } 161 | return 1; 162 | } 163 | 164 | __global__ void NmDistanceGradKernel(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,float * grad_xyz1,float * grad_xyz2){ 165 | for (int i=blockIdx.x;i(); 203 | float *xyz2_data = xyz2.data(); 204 | float *gradxyz1_data = gradxyz1.data(); 205 | float *gradxyz2_data = gradxyz2.data(); 206 | float *graddist1_data = graddist1.data(); 207 | float *graddist2_data = graddist2.data(); 208 | int *idx1_data = idx1.data(); 209 | int *idx2_data = idx2.data(); 210 | 211 | NmDistanceGradKernel<<>>(b,n,xyz1_data,m,xyz2_data,graddist1_data,idx1_data,gradxyz1_data,gradxyz2_data); 212 | NmDistanceGradKernel<<>>(b,m,xyz2_data,n,xyz1_data,graddist2_data,idx2_data,gradxyz2_data,gradxyz1_data); 213 | 214 | cudaError_t err = cudaGetLastError(); 215 | if (err != cudaSuccess) { 216 | printf("error in nnd get grad: %s\n", cudaGetErrorString(err)); 217 | //THError("aborting"); 218 | return 0; 219 | } 220 | return 1; 221 | 222 | } 223 | -------------------------------------------------------------------------------- /pccai/dataloaders/modelnet_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # A ModelNet data loader 8 | 9 | import os 10 | import os.path 11 | import numpy as np 12 | import pickle 13 | 14 | import torch.utils.data as data 15 | from torch_geometric.transforms.sample_points import SamplePoints 16 | from torch_geometric.datasets.modelnet import ModelNet 17 | from pccai.utils.convert_octree import OctreeOrganizer 18 | import pccai.utils.logger as logger 19 | 20 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 21 | dataset_path_default=os.path.abspath(os.path.join(BASE_DIR, '../../datasets/modelnet/')) # the default dataset path 22 | 23 | 24 | def gen_rotate(): 25 | rot = np.eye(3, dtype='float32') 26 | rot[0,0] *= np.random.randint(0,2) * 2 - 1 27 | rot = np.dot(rot, np.linalg.qr(np.random.randn(3, 3))[0]) 28 | return rot 29 | 30 | 31 | class ModelNetBase(data.Dataset): 32 | """A base ModelNet data loader.""" 33 | 34 | def __init__(self, data_config, sele_config, **kwargs): 35 | if 'coord_min' in data_config or 'coord_max' in data_config: 36 | self.coord_minmax = [data_config.get('coord_min', 0), data_config.get('coord_max', 1023)] 37 | else: 38 | self.coord_minmax = None 39 | self.centralize = data_config.get('centralize', True) 40 | self.voxelize = data_config.get('voxelize', False) 41 | self.sparse_collate = data_config.get('sparse_collate', False) 42 | self.augmentation = data_config[sele_config].get('augmentation', False) 43 | self.split = data_config[sele_config]['split'].lower() 44 | self.num_points = data_config['num_points'] 45 | sampler = SamplePoints(num=self.num_points, remove_faces=True, include_normals=False) 46 | self.point_cloud_dataset = ModelNet(root=dataset_path_default, name='40', 47 | train=True if self.split == 'train' else False, transform=sampler) 48 | 49 | 50 | def __len__(self): 51 | return len(self.point_cloud_dataset) 52 | 53 | 54 | def pc_preprocess(self, pc): 55 | """Perform different types of pre-processings to the ModelNet point clouds.""" 56 | 57 | if self.centralize: 58 | centroid = np.mean(pc, axis=0) 59 | pc = pc - centroid 60 | 61 | if self.augmentation: # random rotation 62 | pc = np.dot(pc, gen_rotate()) 63 | 64 | if self.coord_minmax is not None: 65 | pc_min, pc_max = np.min(pc), np.max(pc) 66 | pc = (pc - pc_min) / (pc_max - pc_min) * (self.coord_minmax[1] - self.coord_minmax[0]) + self.coord_minmax[0] 67 | 68 | if self.voxelize: 69 | pc = np.unique(np.round(pc).astype('int32'), axis=0) 70 | # This is to facilitate the sparse tensor construction with Minkowski Engine 71 | if self.sparse_collate: 72 | pc = np.hstack((np.zeros((pc.shape[0], 1), dtype='int32'), pc)) 73 | # pc = np.vstack((pc, np.ones((self.num_points - pc.shape[0], 4), dtype='int32') * -1)) 74 | pc[0][0] = 1 75 | return pc 76 | else: # if do not specify minmax, normalize the point cloud within a unit ball 77 | m = np.max(np.sqrt(np.sum(pc**2, axis=1))) 78 | pc = pc / m # scaling 79 | return pc.astype('float32') 80 | 81 | 82 | class ModelNetSimple(ModelNetBase): 83 | """A simple ModelNet data loader where point clouds are directly represented as 3D points.""" 84 | 85 | def __init__(self, data_config, sele_config, **kwargs): 86 | super().__init__(data_config, sele_config) 87 | 88 | # Use_cache specifies the pickle file to be read/written down, "" means no caching mechanism is used 89 | self.use_cache = data_config.get('use_cache', '') 90 | 91 | # By using the cache file, the data is no longer generated on the fly but the loading becomes much faster 92 | if self.use_cache != '': 93 | cache_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../datasets/', self.use_cache) 94 | if os.path.exists(cache_file): # the cache file already exist 95 | logger.log.info("Loading pre-processed ModelNet40 cache file...") 96 | with open(cache_file, 'rb') as f: 97 | self.cache = pickle.load(f) 98 | else: # the cache file is not there yet 99 | self.cache = [] 100 | logger.log.info("Sampling point clouds from raw ModelNet40 data...") 101 | for i in range(len(self.point_cloud_dataset)): 102 | # Be careful that here the data type is converted as uint8 to save space 103 | self.cache.append(self.pc_preprocess(self.point_cloud_dataset[i].pos.numpy()).astype(np.uint8)) 104 | with open(cache_file, 'wb') as f: 105 | pickle.dump(self.cache, f) 106 | logger.log.info("ModelNet40 data loaded...\n") 107 | 108 | def __getitem__(self, index): 109 | if self.use_cache: 110 | return self.cache[index].astype(np.int32) # data type convert back to int32 111 | else: 112 | return self.pc_preprocess(self.point_cloud_dataset[index].pos.numpy()) 113 | 114 | 115 | class ModelNetOctree(ModelNetBase): 116 | """ModelNet data loader with uniform sampling and octree partitioning.""" 117 | 118 | def __init__(self, data_config, sele_config, **kwargs): 119 | 120 | data_config['voxelize'] = True 121 | data_config['sparse_collate'] = False 122 | super().__init__(data_config, sele_config) 123 | 124 | self.rw_octree = data_config.get('rw_octree', False) 125 | if self.rw_octree: 126 | self.rw_partition_scheme = data_config.get('rw_partition_scheme', 'default') 127 | self.octree_cache_folder = 'octree_cache' 128 | 129 | # Create an octree formatter to organize octrees into arrays 130 | self.octree_organizer = OctreeOrganizer( 131 | data_config['octree_cfg'], 132 | data_config[sele_config].get('max_num_points', data_config['num_points']), 133 | kwargs['syntax'].syntax_gt, 134 | self.rw_octree, 135 | data_config[sele_config].get('shuffle_blocks', False), 136 | ) 137 | 138 | def __len__(self): 139 | return len(self.point_cloud_dataset) 140 | 141 | def __getitem__(self, index): 142 | 143 | while True: 144 | if self.rw_octree: 145 | file_name = os.path.join(dataset_path_default, self.octree_cache_folder, self.rw_partition_scheme, str(index)) + '.pkl' 146 | else: file_name = None 147 | 148 | # perform octree partitioning and organize the data 149 | pc = self.pc_preprocess(self.point_cloud_dataset[index].pos.numpy()) 150 | pc_formatted, _, _, _, all_skip = self.octree_organizer.organize_data(pc, file_name=file_name) 151 | if all_skip: 152 | index += 1 153 | if index >= len(self.point_cloud_dataset): index = 0 154 | else: break 155 | 156 | return pc_formatted 157 | -------------------------------------------------------------------------------- /pccai/optim/cd_canonical.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Compute Chamfer Distance loss for raw point clouds (homogeneous or heterogeneous) 8 | 9 | import torch 10 | import sys 11 | import os 12 | 13 | from pccai.optim.pcc_loss import PccLossBase 14 | 15 | 16 | def nndistance_simple(rec, data): 17 | """ 18 | A simple nearest neighbor search, not very efficient, just for reference 19 | """ 20 | rec_sq = torch.sum(rec * rec, dim=2, keepdim=True) # (B,N,1) 21 | data_sq = torch.sum(data * data, dim=2, keepdim=True) # (B,M,1) 22 | cross = torch.matmul(data, rec.permute(0, 2, 1)) # (B,M,N) 23 | dist = data_sq - 2 * cross + rec_sq.permute(0, 2, 1) # (B,M,N) 24 | data_dist, data_idx = torch.min(dist, dim=2) 25 | rec_dist, rec_idx = torch.min(dist, dim=1) 26 | return data_dist, rec_dist, data_idx, rec_idx 27 | 28 | 29 | try: 30 | # If you want to use the efficient NN search for computing CD loss, compiled the nndistance() 31 | # function under the third_party folder according to instructions in Readme.md 32 | sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../third_party/nndistance')) 33 | from modules.nnd import NNDModule 34 | nndistance = NNDModule() 35 | except ModuleNotFoundError: 36 | # Without the compiled nndistance(), by default the nearest neighbor will be done using pytorch-geometric 37 | nndistance = nndistance_simple 38 | 39 | 40 | class ChamferDistCanonical(PccLossBase): 41 | """Chamfer distance loss for both homogeneous and heterogeneous batching.""" 42 | 43 | def __init__(self, loss_args, syntax): 44 | super().__init__(loss_args, syntax) 45 | self.xyz_loss_type = loss_args['xyz_loss_type'] 46 | self.xyz_subset_weight = loss_args.get('xyz_subset_weight', 1.0) # weight of the subset distance 47 | self.inf =1e12 48 | 49 | # Syntax of the ground-truth and the reconstruction 50 | self.syntax_gt = syntax.syntax_gt 51 | self.syntax_rec = syntax.syntax_rec 52 | 53 | 54 | def dist_hetero(self, data, rec, batch_size): 55 | """Compute birectional distances between two point clouds in heterogeneous mode.""" 56 | 57 | device = data.device 58 | batch_size = data.shape[0] 59 | 60 | # Get the number of points for each point cloud in the reconstruction 61 | pnt_cnt_batch = torch.ones(batch_size + 1, dtype=torch.int32, device=device) * rec.shape[0] 62 | pnt_cnt_batch[0 : -1] = torch.arange(rec.shape[0], device=device)[rec[:, self.syntax_rec['pc_start']] > 0] 63 | pnt_cnt_batch = pnt_cnt_batch[1:] - pnt_cnt_batch[0 : -1] 64 | max_pnts_batch = max(pnt_cnt_batch) # get the maximum number of points among all point clouds 65 | avail_idx = torch.cat([torch.arange(n, dtype=torch.long, device=device) + idx * max_pnts_batch 66 | for idx, n in enumerate(pnt_cnt_batch)]) # obtain the indices of the available points 67 | 68 | rec_homo = torch.ones((max_pnts_batch * batch_size, 3), device=device) * self.inf # set the pading to inf 69 | rec_homo[avail_idx, :] = rec[:, self.syntax_rec['xyz'][0] : self.syntax_rec['xyz'][1] + 1] 70 | rec_homo = rec_homo.view(batch_size, -1, 3) # build a homogeneous 3D tensor holding the reconstrcutions 71 | 72 | # Build a homogeneous 3D tensor holding the ground-truths 73 | data_homo = data[:, :, self.syntax_gt['xyz'][0] : self.syntax_gt['xyz'][1] + 1].clone() 74 | data_homo = data_homo.view(-1, 3) 75 | data = data.view(-1, self.syntax_gt['__len__']) 76 | data_homo[data[:, self.syntax_gt['block_pntcnt']] <= 0, :] = self.inf # set the padding to inf 77 | data_homo = data_homo.view(batch_size, -1, 3) 78 | 79 | # Compute the nearest neighbor distances, then retrieve the available distance values 80 | data_dist, rec_dist, _, _ = nndistance(data_homo.contiguous(), rec_homo.contiguous()) 81 | data_dist = data_dist.view(-1)[data[:, self.syntax_gt['block_pntcnt']] > 0] 82 | rec_dist = rec_dist.view(-1)[avail_idx] 83 | rep_times = data_homo.shape[1] 84 | return data_dist, rec_dist, rep_times 85 | 86 | 87 | def xyz_loss(self, loss_out, net_in, net_out): 88 | """Chamfer distance computation using nndistance().""" 89 | 90 | rec = net_out['x_hat'] 91 | batch_size = net_in.shape[0] 92 | loss = 0 93 | if self.hetero: 94 | data_dist, rec_dist, rep_times = self.dist_hetero(net_in, rec, batch_size) # compute the Chamfer distance values 95 | if self.xyz_loss_type.find('l1') >= 0: # compute square root if l1-norm is used 96 | data_dist, rec_dist = data_dist ** 0.5, rec_dist ** 0.5 97 | rec_dist = rec_dist * self.xyz_subset_weight # weight the subset distance 98 | if self.xyz_loss_type.find('max') >= 0: # use max function for aggregation 99 | net_in = net_in.view(-1, self.syntax_gt['__len__']) 100 | memb_data = torch.arange(0, batch_size, dtype=torch.long, device=net_in.device).repeat_interleave(rep_times) 101 | memb_data = memb_data[(net_in[:, self.syntax_gt['block_pntcnt']] > 0).view(-1)] # membership of each point in the batch of gt_data 102 | memb_rec = torch.cumsum(rec[:, self.syntax_rec['pc_start']], dim=0).long() - 1 # membership of each point in the batch of rec 103 | losses = torch.stack([torch.max(torch.mean(data_dist[memb_data==idx]), torch.mean(rec_dist[memb_rec==idx])) 104 | for idx in range(batch_size)]) 105 | loss = torch.mean(losses) 106 | else: 107 | loss = torch.mean(data_dist + rec_dist) 108 | else: 109 | net_in = net_in.contiguous() 110 | rec = rec.contiguous() 111 | data_dist, rec_dist, _, _ = nndistance(net_in, rec) 112 | if self.xyz_loss_type.find('l1') >= 0: # compute square root if l1-norm is used 113 | data_dist, rec_dist = data_dist ** 0.5, rec_dist ** 0.5 114 | rec_dist = rec_dist * self.xyz_subset_weight # weight the subset distance 115 | data_dist, rec_dist = torch.mean(data_dist, 1), torch.mean(rec_dist, 1) 116 | if self.xyz_loss_type.find('max') >= 0: # use max function for aggregation 117 | loss = torch.mean(torch.max(data_dist, rec_dist)) 118 | else: loss = torch.mean(data_dist + rec_dist) 119 | 120 | loss_out['xyz_loss'] = loss.unsqueeze(0) # write the 'xyz_loss' as return 121 | 122 | 123 | def loss(self, net_in, net_out): 124 | """Overall R-D loss computation.""" 125 | 126 | loss_out = {} 127 | 128 | # Rate loss 129 | if 'likelihoods' in net_out and len(net_out['likelihoods']) > 0: 130 | count = torch.sum(net_in[:, :, self.syntax_gt['block_pntcnt']] > 0) if self.hetero else net_in.shape[0] * net_in.shape[1] 131 | self.bpp_loss(loss_out, net_out['likelihoods'], count) 132 | else: 133 | loss_out['bpp_loss'] = torch.zeros((1,)) 134 | if net_out['x_hat'].is_cuda: 135 | loss_out['bpp_loss'] = loss_out['bpp_loss'].cuda() 136 | 137 | # Distortion loss 138 | self.xyz_loss(loss_out, net_in, net_out) 139 | 140 | # R-D loss = alpha * D + beta * R 141 | loss_out["loss"] = self.alpha * loss_out['xyz_loss'] + self.beta * loss_out["bpp_loss"] # R-D loss 142 | 143 | return loss_out 144 | -------------------------------------------------------------------------------- /pccai/pipelines/bench.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Benchmarking PCC models 8 | 9 | import time 10 | import os 11 | import numpy as np 12 | import yaml 13 | import torch 14 | import glob 15 | 16 | # Load different utilities from PccAI 17 | from pccai.utils.syntax import SyntaxGenerator 18 | from pccai.utils.pc_metric import compute_metrics 19 | from pccai.utils.misc import pc_read, pc_write, load_state_dict_with_fallback 20 | from pccai.codecs.utils import get_codec_class 21 | from pccai.models.pcc_models import get_architecture_class 22 | import pccai.utils.logger as logger 23 | 24 | 25 | def create_pccnet(net_config, checkpoint, syntax, device): 26 | """Build the network model.""" 27 | 28 | # Construct the PCC model 29 | architecture_class = get_architecture_class(net_config['architecture']) 30 | pccnet = architecture_class(net_config['modules'], syntax) 31 | 32 | # Load the network weights 33 | state_dict = checkpoint['net_state_dict'].copy() 34 | for _ in range(len(state_dict)): 35 | k, v = state_dict.popitem(False) 36 | state_dict[k[len('.pcc_model'):]] = v 37 | load_state_dict_with_fallback(pccnet, state_dict) 38 | pccnet.to(device) 39 | pccnet.eval() 40 | logger.log.info("Model weights loaded.") 41 | return pccnet 42 | 43 | 44 | def benchmark_checkpoints(opt): 45 | """Benchmarking several networks with the same architecture.""" 46 | 47 | logger.log.info("%d GPU(s) will be used for benchmarking." % torch.cuda.device_count()) 48 | opt.phase = 'deploy' 49 | device = torch.device("cuda:0") 50 | log_dict_all = {} 51 | tmp_folder = './tmp' 52 | os.makedirs(tmp_folder, exist_ok=True) 53 | 54 | # Gather all the point cloud files to be tested 55 | pc_file_list=[] 56 | for item in opt.input: 57 | if item.lower()[-4:] == '.ply': 58 | pc_file_list.append(item) 59 | else: 60 | pc_file_list += list(glob.iglob(item + '/**/*.ply', recursive=True)) 61 | pc_file_list.sort() 62 | 63 | for filename_ckpt in opt.checkpoints: 64 | 65 | log_dict_ckpt = [] 66 | logger.log.info("Working on checkpoint %s." % filename_ckpt) 67 | checkpoint = torch.load(filename_ckpt) 68 | if opt.checkpoint_net_config == True: 69 | opt.net_config = checkpoint['net_config'] 70 | logger.log.info("Model config loaded from check point.") 71 | logger.log.info(opt.net_config) 72 | syntax = SyntaxGenerator(opt=opt) 73 | pccnet = create_pccnet(opt.net_config, checkpoint, syntax, device) 74 | 75 | # Start the benchmarking 76 | t = time.monotonic() 77 | for idx, pc_file in enumerate(pc_file_list): 78 | 79 | bit_depth = opt.bit_depth[0 if len(opt.bit_depth) == 1 else idx] # support testing several point clouds with different bit-depths, individual bit_depths need to be provided in this case 80 | codec = get_codec_class(opt.codec_config['codec'])(opt.codec_config, pccnet, bit_depth, syntax) # initialize the codec 81 | 82 | # Load the point cloud and initialize the log_dict 83 | pc_raw = pc_read(pc_file) 84 | log_dict = { 85 | 'pc_name': os.path.split(pc_file)[1], 86 | 'num_points': pc_raw.shape[0], 87 | } 88 | if opt.mpeg_report_sequence: 89 | log_dict['seq_name'] = os.path.basename(os.path.dirname(pc_file)) 90 | 91 | with torch.no_grad(): 92 | # Encode pc_raw with pccnet, obtain compressed_files 93 | compressed_files, stat_dict_enc = codec.compress(pc_raw, tag=os.path.join(tmp_folder, os.path.splitext(log_dict['pc_name'])[0] + '_' + opt.exp_name)) 94 | 95 | # Decode compressed_files with pccnet, obtain pc_rec 96 | if opt.skip_decode == False: 97 | pc_rec, stat_dict_dec = codec.decompress(compressed_files) 98 | 99 | # Update the log_dict and compute D1, D2 100 | log_dict['bit_total'] = np.sum([os.stat(f).st_size for f in compressed_files]) * 8 101 | log_dict['bpp'] = log_dict['bit_total'] / log_dict['num_points'] 102 | 103 | peak_value = opt.peak_value[0 if len(opt.peak_value) == 1 else idx] # support point clouds with different bit-depths, individual peak values need to be provided in this case 104 | if opt.skip_decode: 105 | log_dict['d1_psnr'] = -1 106 | log_dict['d2_psnr'] = -1 107 | log_dict['rec_num_points'] = -1 108 | else: 109 | log_dict.update(compute_metrics(pc_file, pc_rec, peak_value, opt.compute_d2)) 110 | log_dict['rec_num_points'] = pc_rec.shape[0] 111 | log_dict.update(stat_dict_enc) 112 | if opt.skip_decode == False: 113 | log_dict.update(stat_dict_dec) 114 | log_dict_ckpt.append(log_dict) 115 | if opt.remove_compressed_files: 116 | for f in compressed_files: os.remove(f) 117 | 118 | # Log current metrics if needed 119 | if opt.print_freq > 0 and idx % opt.print_freq == 0: 120 | message = ' id: %d/%d, ' % (idx + 1, len(pc_file_list)) 121 | for k, v in log_dict.items(): 122 | message += '%s: %s, ' % (k, str(v)) 123 | logger.log.info(message[:-2]) 124 | 125 | # Write down the point cloud if needed 126 | if opt.pc_write_freq > 0 and idx % opt.pc_write_freq == 0 and opt.skip_decode == False: 127 | filename_rec = os.path.join(opt.exp_folder, opt.write_prefix + os.path.splitext(log_dict['pc_name'])[0] + "_rec.ply") 128 | pc_write(pc_rec, filename_rec) 129 | 130 | elapse = time.monotonic() - t 131 | log_dict_all[filename_ckpt] = log_dict_ckpt 132 | 133 | # Compute the average metrics for this current checkpoint 134 | basic_metrics = [(log_dict['bpp'], log_dict['bit_total'], log_dict['num_points'], log_dict['d1_psnr'], 135 | log_dict['d2_psnr'] if opt.compute_d2 else -1) for log_dict in log_dict_ckpt] 136 | avg_bpp, avg_size, avg_num_points, avg_d1_psnr, avg_d2_psnr = np.mean(np.array(basic_metrics), axis=0).tolist() 137 | avg_metrics = {'bpp': avg_bpp, 'seq_bpp': avg_size / avg_num_points, 'd1_psnr': avg_d1_psnr} 138 | if avg_d2_psnr > 0: avg_metrics['d2_psnr'] = avg_d2_psnr 139 | 140 | # Log current metrics for the check point 141 | message = 'Compression metrics --- time: %f, ' % elapse 142 | for k, v in avg_metrics.items(): message += 'avg_%s: %f, ' % (k, v) 143 | logger.log.info(message[:-2] + '\n') 144 | 145 | return log_dict_all 146 | 147 | 148 | def load_benchmark_config(opt): 149 | """Load all the configuration files for benchmarking.""" 150 | 151 | # Load the codec configuration 152 | with open(opt.codec_config, 'r') as file: 153 | codec_config = yaml.load(file, Loader=yaml.FullLoader) 154 | if opt.slice is not None: 155 | codec_config['slice'] = opt.slice 156 | opt.codec_config = codec_config 157 | 158 | # Load the network configuration 159 | if opt.net_config != '': 160 | with open(opt.net_config, 'r') as file: 161 | net_config = yaml.load(file, Loader=yaml.FullLoader) 162 | opt.net_config = net_config 163 | 164 | return opt 165 | 166 | 167 | if __name__ == "__main__": 168 | 169 | logger.log.error('Not implemented.') -------------------------------------------------------------------------------- /experiments/bench.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Benchmarking one or more models 8 | 9 | import multiprocessing 10 | multiprocessing.set_start_method('spawn', True) 11 | 12 | import random 13 | import os 14 | import torch 15 | import sys 16 | import csv 17 | sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/..') 18 | 19 | # Load different utilities from PccAI 20 | from pccai.utils.option_handler import BenchmarkOptionHandler 21 | import pccai.utils.logger as logger 22 | from pccai.pipelines.bench import * 23 | 24 | 25 | def aggregate_sequence_log(log_dict_all): 26 | ''' 27 | Aggregate the dictionaries belonging to the same point cloud seqence as one dictionary, will be used when 28 | benchmarking dynamic point cloud sequences 29 | ''' 30 | for ckpt in log_dict_all.keys(): 31 | log_dict_ckpt = log_dict_all[ckpt] 32 | log_dict_ckpt.sort(key=lambda x: x['seq_name']) 33 | cur_seq_name = '' 34 | log_dict_ckpt_aggregate=[] 35 | 36 | for idx, log_dict in enumerate(log_dict_ckpt): 37 | if log_dict['seq_name'].lower() != cur_seq_name: # encounter a new sequence 38 | cur_seq_name = log_dict['seq_name'].lower() 39 | log_dict_tmp = { # make a new dictionary, only include keys necessary for MPEG reporting 40 | 'pc_name': cur_seq_name, 41 | 'rec_num_points': log_dict['rec_num_points'], 42 | 'bit_total': log_dict['bit_total'], 43 | 'd1_psnr': log_dict['d1_psnr'], 44 | 'seq_cnt': 1 45 | } 46 | if 'd2_psnr' in log_dict: 47 | log_dict_tmp['d2_psnr'] = log_dict['d2_psnr'] 48 | if 'enc_time' in log_dict: 49 | log_dict_tmp['enc_time'] = float(log_dict['enc_time']) 50 | if 'dec_time' in log_dict: 51 | log_dict_tmp['dec_time'] = float(log_dict['dec_time']) 52 | log_dict_ckpt_aggregate.append(log_dict_tmp) 53 | else: # update the existing sequence 54 | log_dict_ckpt_aggregate[-1]['rec_num_points'] += log_dict['rec_num_points'] 55 | log_dict_ckpt_aggregate[-1]['bit_total'] += log_dict['bit_total'] 56 | log_dict_ckpt_aggregate[-1]['d1_psnr'] += log_dict['d1_psnr'] 57 | log_dict_ckpt_aggregate[-1]['seq_cnt'] += 1 58 | if 'd2_psnr' in log_dict: 59 | log_dict_ckpt_aggregate[-1]['d2_psnr'] += log_dict['d2_psnr'] 60 | if 'enc_time' in log_dict: 61 | log_dict_ckpt_aggregate[-1]['enc_time'] += float(log_dict['enc_time']) 62 | if 'dec_time' in log_dict: 63 | log_dict_ckpt_aggregate[-1]['dec_time'] += float(log_dict['dec_time']) 64 | 65 | # Take average for each sequence 66 | for idx, log_dict in enumerate(log_dict_ckpt_aggregate): 67 | log_dict['d1_psnr'] /= log_dict['seq_cnt'] 68 | if 'd2_psnr' in log_dict: 69 | log_dict['d2_psnr'] /= log_dict['seq_cnt'] 70 | if 'enc_time' in log_dict: 71 | log_dict['enc_time'] = str(log_dict['enc_time']) 72 | if 'dec_time' in log_dict: 73 | log_dict['dec_time'] = str(log_dict['dec_time']) 74 | 75 | log_dict_all[ckpt] = log_dict_ckpt_aggregate 76 | return None 77 | 78 | 79 | def flatten_ckpt_log(log_dict_all): 80 | ''' 81 | The original log_dict_all is a dictionary indexed by the ckpts, then log_dict_all[ckpt] is a list of several 82 | dictionaries, each correspoing to the results of a inference test. This function flatten log_dict_all, so 83 | the output log_dict_all_flat is a list of dicionaries, and sorted by the pc_name (1st key) and bit_total (2nd key) 84 | ''' 85 | log_dict_all_flat = [] 86 | for ckpt, log_dict_ckpt in log_dict_all.items(): 87 | for log_dict in log_dict_ckpt: 88 | log_dict['ckpt'] = ckpt 89 | log_dict_all_flat += log_dict_ckpt 90 | log_dict_all_flat.sort(key=lambda x: (x['pc_name'], int(x['bit_total']))) # perform sorting with two keys 91 | return log_dict_all_flat 92 | 93 | 94 | def gen_mpeg_report(log_dict_all, mpeg_report_path, compute_d2, mpeg_report_sequence): 95 | """Generate the MPEG reporting CSV file""" 96 | 97 | # Parse the MPEG reporting template 98 | mpeg_seqname_file = os.path.join(os.path.split(__file__)[0], '..', 'assets', 'mpeg_test_seq.txt') 99 | with open(mpeg_seqname_file) as f: 100 | lines = f.readlines() 101 | mpeg_sequence_name = [str[:-1] for str in lines] 102 | 103 | # Preprocessing to log_dict_all 104 | if mpeg_report_sequence: 105 | aggregate_sequence_log(log_dict_all) 106 | log_dict_all = flatten_ckpt_log(log_dict_all) 107 | 108 | # Write down CSV file for MPEG reporting 109 | mpeg_report_dict_list = [] 110 | for log_dict in log_dict_all: 111 | pc_name = os.path.splitext(log_dict['pc_name'])[0].lower() 112 | if pc_name[-2:] == '_n': 113 | pc_name = pc_name[:-2] 114 | if pc_name in mpeg_sequence_name: # found an MPEG sequence 115 | mpeg_report_dict = { 116 | 'sequence': pc_name, # sequence 117 | 'numOutputPointsT': log_dict['rec_num_points'], # numOutputPointsT 118 | 'numBitsGeoEncT': log_dict['bit_total'], # numBitsGeoEncT 119 | 'd1T': log_dict['d1_psnr'] # d1T, 120 | } 121 | if compute_d2: 122 | mpeg_report_dict['d2T'] = log_dict['d2_psnr'] # d2T 123 | 124 | # Encoding/decoding time 125 | if 'enc_time' in log_dict: 126 | mpeg_report_dict['encTimeT'] = log_dict['enc_time'] 127 | if 'dec_time' in log_dict: 128 | mpeg_report_dict['decTimeT'] = log_dict['dec_time'] 129 | mpeg_report_dict_list.append(mpeg_report_dict) 130 | 131 | # Write the CSV file according to the aggregated statistics 132 | mpeg_report_header = ['sequence', 'numOutputPointsT', 'numBitsGeoEncT', 'd1T', 'd2T', 'encTimeT', 'decTimeT'] 133 | with open(mpeg_report_path, 'w') as f: 134 | writer = csv.DictWriter(f, fieldnames=mpeg_report_header) 135 | writer.writeheader() 136 | writer.writerows(mpeg_report_dict_list) 137 | if len(mpeg_report_dict_list) > 0: 138 | logger.log.info('CSV file for MPEG reporting: %s' % mpeg_report_path) 139 | 140 | 141 | if __name__ == "__main__": 142 | 143 | # Parse the options and perform training 144 | option_handler = BenchmarkOptionHandler() 145 | opt = option_handler.parse_options() 146 | 147 | # Create a folder to save the models and the log 148 | if not os.path.exists(opt.exp_folder): 149 | os.makedirs(opt.exp_folder) 150 | 151 | # Initialize a global logger then print out all the options 152 | logger.create_logger(opt.exp_folder, opt.log_file, opt.log_file_only) 153 | option_handler.print_options(opt) 154 | opt = load_benchmark_config(opt) 155 | 156 | # Go with the actual training 157 | if opt.seed is not None: 158 | torch.manual_seed(opt.seed) 159 | random.seed(opt.seed) 160 | log_dict_all = benchmark_checkpoints(opt) 161 | 162 | # Create the MPEG reporting CSV file if needed 163 | if opt.mpeg_report is not None: 164 | gen_mpeg_report( 165 | log_dict_all=log_dict_all, 166 | mpeg_report_path=os.path.join(opt.exp_folder, opt.mpeg_report), 167 | compute_d2=opt.compute_d2, 168 | mpeg_report_sequence=opt.mpeg_report_sequence 169 | ) 170 | logger.log.info('Benchmarking session %s finished.\n' % opt.exp_name) 171 | logger.destroy_logger() 172 | 173 | -------------------------------------------------------------------------------- /pccai/pipelines/test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Test the point cloud compression model, this is to verify the loss on the datasets but not for actual compression 8 | 9 | import time 10 | import os 11 | import yaml 12 | import torch 13 | import numpy as np 14 | 15 | # Load different utilities from PccAI 16 | from pccai.models import PccModelWithLoss 17 | from pccai.dataloaders.point_cloud_dataset import point_cloud_dataloader 18 | from pccai.utils.syntax import SyntaxGenerator 19 | from pccai.utils.misc import pc_write, load_state_dict_with_fallback 20 | import pccai.utils.logger as logger 21 | 22 | 23 | def test_one_epoch(pccnet, dataset, dataloader, syntax, gen_bitstream, print_freq, pc_write_freq, pc_write_prefix, exp_folder=None): 24 | """Test one epoch with the given model, the specified loss function, and the given dataset, etc.""" 25 | 26 | # Perform testing of one epoch 27 | avg_loss = {} 28 | avg_real = {'xyz_loss': 0, 'bpp_loss': 0, 'loss': 0} if gen_bitstream else None 29 | len_data = len(dataloader) 30 | batch_id = None 31 | if syntax.hetero: 32 | syntax_gt = syntax.syntax_gt 33 | syntax_rec = syntax.syntax_rec 34 | 35 | for batch_id, points in enumerate(dataloader): 36 | 37 | if(points.shape[0] < dataloader.batch_size): 38 | batch_id -= 1 39 | break 40 | points = points.cuda() 41 | 42 | # Inference and compute loss 43 | with torch.no_grad(): 44 | output = pccnet(points) 45 | loss = output['loss'] 46 | for k, v in loss.items(): loss[k] = torch.mean(v) 47 | 48 | # Log the results 49 | if batch_id == 0: 50 | for k, v in loss.items(): avg_loss[k] = v.item() 51 | else: 52 | for k, v in loss.items(): avg_loss[k] += v.item() 53 | if batch_id % print_freq == 0: 54 | message = ' batch count: %d/%d, ' % (batch_id, len_data) 55 | for k, v in loss.items(): message += '%s: %f, ' % (k, v) 56 | logger.log.info(message[:-2]) 57 | 58 | # Perform REAL compression, this part is useful under the heterogeneous mode 59 | if gen_bitstream: 60 | # Compress then decompress 61 | with torch.no_grad(): 62 | cmp_out, meta_data = pccnet.pcc_model.compress(points) # compression 63 | rec_real, meta_data = pccnet.pcc_model.decompress(cmp_out['strings'], cmp_out['shape'], meta_data) # decompression 64 | if syntax.hetero: 65 | rec_real = torch.hstack([rec_real, meta_data]) 66 | elif len(rec_real.shape) == 2: 67 | rec_real = rec_real.unsqueeze(0) 68 | 69 | # Compute loss and log the results 70 | bpp_loss_batch = 0 # bit per point for current batch 71 | for i in range(len(cmp_out['strings'][0])): 72 | bpp_loss_batch += len(cmp_out['strings'][0][i]) * 8 73 | if syntax.hetero: 74 | bpp_loss_batch /= torch.sum(points[:, :, syntax_gt['block_pntcnt']] > 0) 75 | else: 76 | bpp_loss_batch /= dataloader.batch_size * dataset.num_points 77 | xyz_loss_batch = {} 78 | pccnet.loss.xyz_loss(xyz_loss_batch, points, output) # distortion loss for current batch 79 | xyz_loss_batch = xyz_loss_batch['xyz_loss'].item() 80 | real_loss_batch = pccnet.loss.alpha * xyz_loss_batch + pccnet.loss.beta * bpp_loss_batch 81 | avg_real['bpp_loss'] += bpp_loss_batch 82 | avg_real['xyz_loss'] += xyz_loss_batch 83 | avg_real['loss'] += real_loss_batch 84 | if batch_id % print_freq == 0: 85 | logger.log.info(' real stat. ---- bpp_loss: %f, xyz_loss: %f, loss: %f' % (bpp_loss_batch, xyz_loss_batch, real_loss_batch)) 86 | 87 | # Write down the point cloud if needed 88 | if pc_write_freq > 0 and batch_id % pc_write_freq == 0: # write point clouds if needed 89 | filename_rec_real = os.path.join(exp_folder, pc_write_prefix + str(batch_id) + "_rec_real.ply") 90 | if syntax.hetero: 91 | pc_rec_real = rec_real[torch.cumsum(rec_real[:, syntax_rec['pc_start']], dim=0) == 1, 92 | syntax_rec['xyz'][0] : syntax_rec['xyz'][1] + 1] 93 | pc_write(pc_rec_real, filename_rec_real) 94 | else: 95 | pc_write(rec_real[0], filename_rec_real) 96 | 97 | # Log the results 98 | for k in avg_loss.keys(): avg_loss[k] = avg_loss[k] / (batch_id + 1) # the average loss 99 | 100 | # Log the results if REAL compression has performed 101 | if gen_bitstream: 102 | for k in avg_real.keys(): avg_real[k] = avg_real[k] / (batch_id + 1) # the average loss 103 | 104 | return avg_loss, avg_real 105 | 106 | 107 | def test_pccnet(opt): 108 | """Test a point cloud compression network. This is not for actual point cloud compression but for the purpose of testing the trained networks.""" 109 | 110 | logger.log.info("%d GPU(s) will be used for testing." % torch.cuda.device_count()) 111 | opt.phase = 'test' 112 | 113 | # Load an existing check point 114 | checkpoint = torch.load(opt.checkpoint) 115 | if opt.checkpoint_net_config == True: 116 | opt.net_config = checkpoint['net_config'] 117 | logger.log.info("Model config loaded from check point %s." % opt.checkpoint) 118 | logger.log.info(opt.net_config) 119 | syntax = SyntaxGenerator(opt) 120 | 121 | pccnet = PccModelWithLoss(opt.net_config, syntax, opt.optim_config['loss_args']) 122 | state_dict = checkpoint['net_state_dict'] 123 | for _ in range(len(state_dict)): 124 | k, v = state_dict.popitem(False) 125 | state_dict[k[len('.pcc_model'):]] = v 126 | load_state_dict_with_fallback(pccnet.pcc_model, state_dict) 127 | logger.log.info("Model weights loaded from check point %s.\n" % opt.checkpoint) 128 | device = torch.device("cuda:0") 129 | pccnet.to(device) 130 | pccnet.eval() # to let the noise add to the codeword, should NOT set it to evaluation mode 131 | 132 | # Miscellaneous configurations 133 | test_dataset, test_dataloader = point_cloud_dataloader(opt.test_data_config, syntax) # configure the datasets 134 | 135 | # Start the testing process 136 | t = time.monotonic() 137 | avg_loss, avg_real = test_one_epoch(pccnet, test_dataset, test_dataloader, syntax, 138 | opt.gen_bitstream, opt.print_freq, opt.pc_write_freq, opt.pc_write_prefix, opt.exp_folder) 139 | elapse = time.monotonic() - t 140 | 141 | # Log the testing result 142 | message = 'Validation --- time: %f, ' % elapse 143 | for k, v in avg_loss.items(): message += 'avg_%s: %f, ' % (k, v) 144 | logger.log.info(message[:-2]) 145 | if opt.gen_bitstream: 146 | message = 'real stat --- ' 147 | for k, v in avg_real.items(): message += 'avg_%s: %f, ' % (k, v) 148 | logger.log.info(message[:-2]) 149 | 150 | return avg_loss 151 | 152 | 153 | def load_test_config(opt): 154 | """Load all the configuration files for testing.""" 155 | 156 | # Load the test data configuration 157 | with open(opt.test_data_config[0], 'r') as file: 158 | test_data_config = yaml.load(file, Loader=yaml.FullLoader) 159 | opt.test_data_config[0] = test_data_config 160 | 161 | # Load the optimization configuration 162 | with open(opt.optim_config, 'r') as file: 163 | optim_config = yaml.load(file, Loader = yaml.FullLoader) 164 | if opt.alpha is not None: 165 | optim_config['loss_args']['alpha'] = opt.alpha 166 | else: 167 | logger.log.info('alpha from optim config: ' + str(optim_config['loss_args']['alpha'])) 168 | if opt.beta is not None: 169 | optim_config['loss_args']['beta'] = opt.beta 170 | else: 171 | logger.log.info('beta from optim config: ' + str(optim_config['loss_args']['beta'])) 172 | opt.optim_config = optim_config 173 | 174 | # Load the network configuration 175 | if opt.net_config != '': 176 | with open(opt.net_config, 'r') as file: 177 | net_config = yaml.load(file, Loader=yaml.FullLoader) 178 | opt.net_config = net_config 179 | 180 | return opt 181 | 182 | 183 | if __name__ == "__main__": 184 | 185 | logger.log.error('Not implemented.') -------------------------------------------------------------------------------- /pccai/utils/option_handler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Handle all the input argumants during training, testing, benchmarking, etc. 8 | 9 | import pccai.utils.logger as logger 10 | import argparse 11 | import os 12 | 13 | def str2bool(val): 14 | if isinstance(val, bool): 15 | return val 16 | if val.lower() in ('true', 'yes', 't', 'y', '1'): 17 | return True 18 | elif val.lower() in ('false', 'no', 'f', 'n', '0'): 19 | return False 20 | else: 21 | raise argparse.ArgumentTypeError('Expect a Boolean value.') 22 | 23 | 24 | class BasicOptionHandler(): 25 | """A class that includes the basic options sharing among all phases.""" 26 | 27 | def __init__(self): 28 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 29 | parser = self.add_options(parser) 30 | self.parser = parser 31 | 32 | def add_options(self, parser): 33 | 34 | # What do you want to do 35 | parser.add_argument('--exp_name', type=str, default='experiment_name', help='Name of the experiment, result folder is created based on this name.') 36 | 37 | # What are your ingredients 38 | parser.add_argument('--net_config', type=str, default='', help='Network configuration in YAML.') 39 | parser.add_argument('--optim_config', type=str, default='', help='Optimization configuration in YAML.') 40 | parser.add_argument('--hetero', type=str2bool, nargs='?', const=True, default=False, help='Whether to use the heterogeneous batching mode.') 41 | parser.add_argument('--checkpoint', type=str, default='', help='Load an existing checkpoint.') 42 | 43 | # How do you cook 44 | parser.add_argument('--alpha', type=float, default=None, help='Weight for distortion in R-D optimization, can overwrite the one in the YAML config.') 45 | parser.add_argument('--beta', type=float, default=None, help='Weight for bit-rate in R-D optimization, can overwrite the one in the YAML config.') 46 | parser.add_argument('--seed', type=float, default=None, help='Set random seed for reproducibility') 47 | 48 | # Logging options 49 | parser.add_argument('--result_folder', type=str, default='results', help='Indicate the result folder.') 50 | parser.add_argument('--log_file', type=str, default='', help='Log file name.') 51 | parser.add_argument('--log_file_only', type=str2bool, nargs='?', const=True, default=False, help='Only prints to the log file if set True.') 52 | parser.add_argument('--print_freq', type=int, default=20, help='Frequency of displaying results.') 53 | parser.add_argument('--pc_write_freq', type=int, default=50, help='Frequency of writing down the point cloud, use tensorboard to write during training, write "ply" file proint cloud during testing.') 54 | parser.add_argument('--tf_summary', type=str2bool, nargs='?', const=True, default=False, help='Whether to use tensorboard for log.') 55 | return parser 56 | 57 | def parse_options(self): 58 | opt, _ = self.parser.parse_known_args() 59 | opt.exp_folder = os.path.join(opt.result_folder, opt.exp_name) 60 | return opt 61 | 62 | def print_options(self, opt): 63 | message = '' 64 | message += '\n----------------- Input Arguments ---------------\n' 65 | # For k, v in sorted(vars(opt).items()): 66 | for k, v in vars(opt).items(): 67 | comment = '' 68 | default = self.parser.get_default(k) 69 | if v != default: 70 | comment = '\t[default: %s]' % str(default) 71 | message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) 72 | message += '----------------- End -------------------' 73 | logger.log.info(message) 74 | 75 | 76 | class TrainOptionHandler(BasicOptionHandler): 77 | """A class that includes specific options for training.""" 78 | 79 | def add_options(self, parser): 80 | parser = BasicOptionHandler.add_options(self, parser) 81 | parser.add_argument('--train_data_config', type=str, nargs='+', required=True, help='Training data configuration in YAML.') 82 | parser.add_argument('--val_data_config', type=str, nargs='+', default='', help='Validataion data configuration in YAML.') 83 | parser.add_argument('--checkpoint_optim_config', type=str2bool, nargs='?', const=True, default=False, help='Whether to load the optimizers and schedulers from checkpoint.') 84 | parser.add_argument('--checkpoint_epoch_state', type=str2bool, nargs='?', const=True, default=False, help='Whether to load the epoch state from the checkpoint.') 85 | parser.add_argument('--save_checkpoint_freq', type=int, default=2, help='Frequency of saving the trained model.') 86 | parser.add_argument('--save_checkpoint_max', type=int, default=10, help='Maximum number of check points to be saved.') 87 | parser.add_argument('--save_checkpoint_prefix', type=str, default='epoch_', help='Prefix of the check points file names. ') 88 | parser.add_argument('--val_freq', type=int, default=-1, help='Frequency of validation with the validation set, <=0 means no validation.') 89 | parser.add_argument('--val_print_freq', type=int, default=20, help='Frequency of displaying results during validation.') 90 | parser.add_argument('--lr', type=float, default=None, help='Learning rate of the main parameters, can overwrite the one in the YAML config.') 91 | parser.add_argument('--lr_aux', type=float, default=None, help='Learning rate of the aux parameters, can overwrite the one in the YAML config.') 92 | parser.add_argument('--fix_modules', type=str, nargs='+', default='', help='Names of the fixed modules during training.') 93 | parser.add_argument('--ddp', type=str2bool, nargs='?', const=True, default=False, help='Whether to DPP mode or not.') 94 | parser.add_argument('--master_address', type=str, default='localhost', help='Master address of DDP.') 95 | parser.add_argument('--master_port', type=int, default=29500, help='Master port of DPP.') 96 | 97 | # You can add your method-specific parameters here if necessary. They can be passed to the loaded YAML configs before training. 98 | # Check how alpha, beta and lr are overwritten in pipelines/train.py as examples. 99 | return parser 100 | 101 | 102 | class TestOptionHandler(BasicOptionHandler): 103 | """A class that includes specific options for tesing.""" 104 | 105 | def add_options(self, parser): 106 | parser = BasicOptionHandler.add_options(self, parser) 107 | parser.add_argument('--checkpoint_net_config', type=str2bool, nargs='?', const=True, default=False, help='Whether to load the model configuration from the checkpoint, if yes, net_config will be ignored.') 108 | parser.add_argument('--test_data_config', type=str, nargs='+', required=True, help='Test data configuration in YAML.') 109 | parser.add_argument('--gen_bitstream', type=str2bool, nargs='?', const=True, default=False, help='Generate the actual bitstream or not.') 110 | parser.add_argument('--pc_write_prefix', type=str, default='', help='Prefix when writing down the point clouds.') 111 | return parser 112 | 113 | 114 | class BenchmarkOptionHandler(BasicOptionHandler): 115 | """A class that includes specific options for benchmarking.""" 116 | 117 | def add_options(self, parser): 118 | parser = BasicOptionHandler.add_options(self, parser) 119 | parser.add_argument('--checkpoints', type=str, nargs='+', default=None, help='Specify several existing checkpoints.') 120 | parser.add_argument('--checkpoint_net_config', type=str2bool, nargs='?', const=True, default=True, help='Whether to load the model configuration from the checkpoint, if yes, net_config will be ignored.') 121 | parser.add_argument('--codec_config', type=str, required=True, help='Codec configuration in YAML.') 122 | parser.add_argument('--input', type=str, nargs='+', required=True, help='A list of folders containing the point clouds to be tested, or simply just a ply file.') 123 | parser.add_argument('--peak_value', type=int, nargs='+', required=True, help='Peak value(s) for computing the D1 and D2 metrics. If only one value is provided, it will be used for the whole test; otherwise peak values for every point clouds need to be given.') 124 | parser.add_argument('--bit_depth', type=int, nargs='+', required=True, help='Bit-depth value(s) of the point cloud(s) to be tested. If only one value is provided, it will be used for the whole test; otherwise bit-depths for every point clouds need to be given.') 125 | parser.add_argument('--remove_compressed_files', type=str2bool, nargs='?', const=True, default=True, help='Whether to remove the compressed files.') 126 | parser.add_argument('--skip_decode', type=str2bool, nargs='?', const=True, default=False, help='Whether to skip the decoding process, useful for lossless compression.') 127 | parser.add_argument('--compute_d2', type=str2bool, nargs='?', const=True, default=False, help='Whether to compute the D2 metric.') 128 | parser.add_argument('--mpeg_report', type=str, default=None, help='Write the results for MPEG reporting in the CSV format.') 129 | parser.add_argument('--mpeg_report_sequence', type=str2bool, nargs='?', const=True, default=False, help='If true, create MPEG report in the CSV format by viewing the inputs as point cloud sequences.') 130 | parser.add_argument('--write_prefix', type=str, default='', help='Prefix when writing down the point clouds and the bitstreams.') 131 | parser.add_argument('--slice', type=int, default=None, help='Slicing parameter.') 132 | return parser -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PccAI: A Framework for AI-based Point Cloud Compression 2 | 3 | PccAI (*pick-kai*) is a PyTorch-based framework for conducting AI-based Point Cloud Compression (PCC) experiments. It is a modularized testbed for implementing AI-based PCC approaches, which supports the following tasks: 4 | * Training/Testing deep neural networks for PCC. 5 | * Benchmarking AI-based PCC approaches according to MPEG recommendation. 6 | * Visualization of point clouds. 7 | 8 | ## News 9 | * [Dec 2024] MPEG has adopted the PccAI framework as the testbed for establishing the international standard on AI-based point cloud compression. More details are available [here](https://git.mpeg.expert/MPEG/3dgh/ai-gc/software/mpeg-pcc-tmap) (MPEG account required). 10 | 11 | ## Features 12 | 13 | * *Modularized design*: PccAI adopts a modularized design for training/inference, which makes it a highly flexible framework to verify AI-based PCC approaches. 14 | * *Plug-and-play pipelines*: PccAI provides the basic experiment pipelines for training, testing, and benchmarking. Users only need to focus on implementing necessary ingredients (*e.g.*, the neural network architecture) to test a proposed PCC method. 15 | * *Multi-modal data loading*: 3D point clouds have different representations, *e.g.*, 3D coordinates, voxel grids, octrees, and even images. PccAI supports multi-representation data loading so that a point cloud can be loaded in different ways. 16 | * *Heterogeneous batching*: an experimental mode for loading point clouds as octrees explicitly, where the leaf nodes with different numbers of points are organized as mini-batches for training/inference. 17 | 18 | ## Installation 19 | 20 | We tested the PccAI framework under two different virtual environments with conda: 21 | * Python 3.6, PyTorch 1.7.0, and CUDA 10.1. For this configuration, please launch the installation script `install_torch-1.7.0+cu-10.1.sh` with the following command: 22 | ```bash 23 | echo y | conda create -n pccai python=3.6 && conda activate pccai && ./install_torch-1.7.0+cu-10.1.sh 24 | ``` 25 | * Python 3.8, PyTorch 1.8.1, and CUDA 11.1. For this configuration, please launch the installation script `install_torch-1.8.1+cu-11.2.sh` with the following command: 26 | ```bash 27 | echo y | conda create -n pccai python=3.8 && conda activate pccai && ./install_torch-1.8.1+cu-11.2.sh 28 | ``` 29 | It is *highly recommended* to check the installation scripts which describe the details of the necessary packages. Note that [plyfile](https://github.com/dranjan/python-plyfile) is used for reading/writing PLY files, which is under GPL license. By replacing it with another library providing the same functionality, our implementation can still run. 30 | 31 | After that, put the binary of `pc_error` (D1 & D2 computation tool used by the MPEG group) under the `third_party` folder if benchmarking with D1 and D2 metrics is desired. A publicly-available version of `pc_error` can be found [here](https://github.com/NJUVISION/PCGCv2/blob/master/pc_error_d). To use it with PccAI, please download and rename it to `pc_error`. 32 | 33 | ## Datasets 34 | Create a `datasets` folder then put all the datasets below. One may create soft links to the existing datasets to save space. 35 | ### LiDAR Datasets 36 | 37 | We support the loading of LiDAR datasets. To use the *Ford* sequences, please arranged it as follows: 38 | ```bash 39 | ${ROOT_OF_THE_REPO}/datasets/ford 40 | ├── ford_01_q1mm 41 | ├── ford_02_q1mm 42 | └── ford_03_q1mm 43 | ├── Ford_03_vox1mm-0200.ply 44 | ├── Ford_03_vox1mm-0201.ply 45 | ├── Ford_03_vox1mm-0202.ply 46 | ... 47 | └── Ford_03_vox1mm-1699.ply 48 | ``` 49 | Other LiDAR datasets, such as *KITTI*, are arranged similarly. Refer to the data loaders in `pccai/dataloaders/lidar_base_loader.py` for more details. 50 | 51 | ### CAD Model Datasets 52 | 53 | We support the loading of *ModelNet40* and *ShapeNet-Part* datasets. Our ModelNet40 data loader is built on top of the ModelNet40 mesh data loader of PyTorch Geometric. For the first run, it will automatically download ModelNet40 under the `datasets` folder and preprocess it. The ShapeNet-Part dataset can be downloaded [here](https://shapenet.cs.stanford.edu/ericyi/shapenetcore_partanno_segmentation_benchmark_v0.zip). After that, please arrange it as follows: 54 | ```bash 55 | ${ROOT_OF_THE_REPO}/pccai/datasets/shapenet_part 56 | ├── shapenet_part_overallid_to_catid_partid.json 57 | └── shapenetcore_partanno_segmentation_benchmark_v0 58 | ├── 02691156 59 | ├── 02773838 60 | ... 61 | └── train_test_split 62 | ``` 63 | 64 | ## Basic Usages 65 | 66 | The core code of training, testing, and benchmarking code is below the `pccai/pipelines` folder. They are called by their wrappers below the `experiments` folder. The basic way to launch experiments with PccAI is: 67 | ```bash 68 | ./scripts/run.sh ./scripts/[filename].sh [launcher] [GPU ID(s)] 69 | ``` 70 | where `[launcher]` can be `s` (slurm), `d` (directly run in background) or `f` (directly run in foreground). `[GPU ID(s)]` should be ignored when launching with slurm. The results (checkpoints, point cloud files, log, *etc.*) will be generated under the `results/[filename]` folder. To understand the options for training/testing/benchmarking, refer to `pccai/utils/option_handler.py` for details. 71 | 72 | ### Working Examples 73 | 74 | A recently published work, [GRASP-Net](https://github.com/InterDigitalInc/GRASP-Net), utilizes the PccAI framework to implement a high-performance AI-based codec for lossy point cloud compression. It can be a good working example for reference. 75 | 76 | In the following, we also take a simple MLP-based PCC method for illustration. We will train it on the first Ford sequence and then test/benchmark it on the rest of the sequences. 77 | 78 | We first apply octree partitioning to an input LiDAR point cloud where the octree will be encoded losslessly. Then the points in each partitioned block are encoded with a *PointNet*, leading to a group of codewords to be arithmetically encoded as a bitstream. On the decoder side, an MLP-based decoder is applied to decode each codeword, the decoded blocks are then assembled as a decoded point cloud. 79 | 80 | ### Trainging/Testing 81 | 82 | The following command is used to launch the training script: 83 | ```bash 84 | ./scripts/run.sh ./scripts/examples/train_ford_hetero.sh d 0,1 85 | ``` 86 | which launches the training experiment directly on GPUs 0 & 1. 87 | 88 | Having trained the network, testing can be launched by the following command: 89 | ```bash 90 | ./scripts/run.sh ./scripts/examples/test_ford_hetero.sh d 0 91 | ``` 92 | Note that "testing" here means to check the loss values of the trained network on a specified dataset, but not to perform actual compression/decompression. 93 | 94 | ### Benchmarking 95 | 96 | To run this particular benchmarking example, it is necessary to install [CompressAI](https://github.com/InterDigitalInc/CompressAI), refer to the installation script for more details. Please use the following command to benchmark the compression performance of the trained network, *e.g.*, compute *D1*, *D2*, and *bpp*: 97 | ```bash 98 | ./scripts/run.sh ./scripts/examples/bench_ford_hetero.sh d 0 99 | ``` 100 | The above command also generates a CSV file `mpeg_report.csv` holding the benchmarking results of one rate point. We provide a tool `utils/merge_csv.py` to merge several CSV files into one. The generated CSV file can be fed to [mpeg-pcc-ai-report](http://mpegx.int-evry.fr/software/MPEG/PCC/ai/mpeg-pcc-ai-report) or [AI-PCC-Reporting-Template](https://github.com/yydlmzyz/AI-PCC-Reporting-Template) to generate R-D curves and B-D statistics for reporting in the MPEG group. 101 | 102 | ### Visualization 103 | 104 | A simple tool to visualize 3D point clouds is provided in `utils/visualize.py`, see `scripts/visualize.sh` for an example to use it. To use this tool, please also install the [Open3D](http://www.open3d.org/) library. Note that by pressing `h` at the visualization window, instructions provided by Open3D will be printed to the terminal. 105 | 106 | ## Software Architecture 107 | Modern deep learning systems have three main ingredients for training: a deep neural network, a training data set, and an optimization method. Bearing this in mind, we developed three modules: a neural network constructor (`pccai/models`), a data loader constructor (`pccai/dataloaders`), and an optimization configurator (`pccai/optim`). These three modules take their associated configuration files in YAML format as inputs (see the examples under the `config` folder), then initialize the training pipeline. The diagram of training with PccAI is shown below: 108 | 109 |

110 | framework 111 |

112 | 113 | To perform actual encoding/decoding, one needs not only the trained neural network but also extra steps for pre-/post-processing and arithmetic encoding/decoding of the point clouds. In PccAI, an additional module, the codec configurator, is introduced to specify the behavior of the codec (`pccai/codecs`). It also takes as input a YAML file, please also check the examples under the `config`. The diagram of encoding/decoding with PccAI is shown below: 114 | 115 |

116 | framework 117 |

118 | 119 | ## Implement Your AI-based PCC Method 120 | 121 | Basic steps to implement your own AI-based PCC method are listed below: 122 | 123 | * Implement your neural network class under the `pccai/models/architectures` folder. For sub-modules needed in your network (*e.g.*, the PointNet in our example), please put them under `pccai/models/modules`. Your neural network class should contains a `.compress()` and a `.decompress()` functions, which perform actual compression/decompression operations with the trained network. 124 | 125 | * Implement your data loader in `pccai/dataloaders`, if our provided ones cannot satisfy your needs. 126 | 127 | * Implement your loss functions in `pccai/optim` by subclassing the `PccLossBase` class, if our provided ones cannot satisfy your needs. 128 | 129 | * Implement your codec class in `pccai/codecs` by subclassing the `PccCodecBase` class. It should include basic pre-/post- processing and functionalities to read/write bit-streams. It should has its own `.compress()` and `.decompress()` functions to respectively call `.compress()` and `.decompress()` of your network class. 130 | 131 | We *highly recommend* checking the aforementioned working examples for details of using the PccAI framework, where `pccai/models/architectures/mlpcomp.py` implements the example neural network architecture while `pccai/codecs/octree_partition_codec.py` implements the example codec. 132 | 133 | ## License 134 | The PccAI framework is released under the BSD License, see `LICENSE` for details. 135 | 136 | ## Contacts 137 | Please contact Jiahao Pang (jiahao.pang@interdigital.com), the main contributor of PccAI, if you have any questions. 138 | 139 | ## Acknowledgement 140 | We thank Maurice Quach for providing valuable help, advice, and enlightenment during the development of the PccAI framework. 141 | 142 | ## Related Resources 143 | * [Guidelines for Conducting AI Exploration Experiments for PCC](https://www.mpeg.org/wp-content/uploads/mpeg_meetings/139_OnLine/w21694.zip), MPEG 3DG w21694, July 2022. 144 | * [PyTorch Geometric](https://pytorch-geometric.readthedocs.io) 145 | * [CompressAI](https://github.com/InterDigitalInc/CompressAI) 146 | * [GRASP-Net](https://github.com/InterDigitalInc/GRASP-Net) 147 | * [Open3D](http://www.open3d.org) 148 | * [mpeg-pcc-ai-report](http://mpegx.int-evry.fr/software/MPEG/PCC/ai/mpeg-pcc-ai-report) / [AI-PCC-Reporting-Template](https://github.com/yydlmzyz/AI-PCC-Reporting-Template) 149 | * [TMC13](https://github.com/MPEGGroup/mpeg-pcc-tmc13) 150 | * [TMC2](https://github.com/MPEGGroup/mpeg-pcc-tmc2) 151 | 152 | -------------------------------------------------------------------------------- /pccai/utils/convert_octree.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2010-2022, InterDigital 2 | # All rights reserved. 3 | 4 | # See LICENSE under the root folder. 5 | 6 | 7 | # Octree partitioning and departitioning with breadth-first search 8 | 9 | import os 10 | import pickle 11 | import numpy as np 12 | # from numba import njit 13 | 14 | 15 | # @njit 16 | def compute_new_bbox(idx, bbox_min, bbox_max): 17 | """Compute global block bounding box given an index.""" 18 | 19 | midpoint = (bbox_min + bbox_max) / 2 20 | cur_bbox_min = bbox_min.copy() 21 | cur_bbox_max = midpoint.copy() 22 | if idx & 1: 23 | cur_bbox_min[0] = midpoint[0] 24 | cur_bbox_max[0] = bbox_max[0] 25 | if (idx >> 1) & 1: 26 | cur_bbox_min[1] = midpoint[1] 27 | cur_bbox_max[1] = bbox_max[1] 28 | if (idx >> 2) & 1: 29 | cur_bbox_min[2] = midpoint[2] 30 | cur_bbox_max[2] = bbox_max[2] 31 | 32 | return cur_bbox_min, cur_bbox_max 33 | 34 | 35 | # @njit 36 | def _analyze_octant(points, bbox_min, bbox_max): 37 | """Analyze the statistics of the points in a given block.""" 38 | 39 | center = (np.asarray(bbox_min) + np.asarray(bbox_max)) / 2 40 | 41 | locations = (points >= np.expand_dims(center, 0)).astype(np.uint8) 42 | locations *= np.array([[1, 2, 4]], dtype=np.uint8) 43 | locations = np.sum(locations, axis=1) 44 | 45 | location_cnt = np.zeros((8,), dtype=np.uint32) 46 | for idx in range(locations.shape[0]): 47 | loc = locations[idx] 48 | location_cnt[loc] += 1 49 | 50 | location_map = np.zeros(locations.shape[0], dtype=np.uint32) 51 | location_idx = np.zeros((8,), dtype=np.uint32) 52 | for i in range(1, location_idx.shape[0]): 53 | location_idx[i] = location_idx[i-1] + location_cnt[i-1] 54 | for idx in range(locations.shape[0]): 55 | loc = locations[idx] 56 | location_map[location_idx[loc]] = idx 57 | location_idx[loc] += 1 58 | 59 | # occupancy pattern of current node 60 | pattern = np.sum((location_cnt > 0).astype(np.uint32) * np.array([1, 2, 4, 8, 16, 32, 64, 128], dtype=np.uint32)) 61 | points = points[location_map, :] # rearrange the points 62 | child_bboxes = [compute_new_bbox(i, bbox_min, bbox_max) for i in range(8)] 63 | 64 | return points, location_cnt, pattern, child_bboxes, location_map 65 | 66 | 67 | def analyze_octant(points, bbox_min, bbox_max, attr=None): 68 | points, location_cnt, pattern, child_bboxes, location_map = _analyze_octant(points, bbox_min, bbox_max) 69 | if attr is not None: 70 | attr = attr[location_map, :] 71 | 72 | return points, location_cnt, pattern, child_bboxes, attr 73 | 74 | 75 | class OctreeConverter(): 76 | """ 77 | A class to store the octree paramters and perform octree partitioning. 78 | """ 79 | 80 | def __init__(self, bbox_min, bbox_max, point_min, point_max, level_min, level_max): 81 | 82 | # Set the octree partitioning options 83 | self.bbox_min, self.bbox_max = np.asarray(bbox_min, dtype=np.float32), np.asarray(bbox_max, dtype=np.float32) 84 | # self.bbox_min, self.bbox_max = np.asarray(bbox_min, dtype=np.int32), np.asarray(bbox_max, dtype=np.int32) 85 | self.point_min, self.point_max = point_min, point_max 86 | self.level_min, self.level_max = level_min, level_max 87 | self.normalized_box_size = 2 88 | 89 | 90 | def leaf_test(self, point_cnt, level): 91 | """Determine whether a block is a leaf.""" 92 | return (level >= self.level_max) or (point_cnt <= self.point_max and level >= self.level_min) 93 | 94 | 95 | def skip_test(self, point_cnt): 96 | """Determine whether a block should be skipped or not.""" 97 | return point_cnt < self.point_min # True: skip; False: Transform 98 | 99 | 100 | def partition_octree(self, points, attr=None): 101 | """Octree partitioning with breadth-first search.""" 102 | 103 | # Remove the points out of bounding box 104 | mask = np.ones(points.shape[0], dtype=bool) 105 | for i in range(3): 106 | mask = mask & (points[:, i] >= self.bbox_min[i]) & (points[:, i] <= self.bbox_max[i]) 107 | points = points[mask,:] 108 | if attr is not None: attr = attr[mask,:] 109 | 110 | # initialization 111 | root_block = {'level': 0, 'bbox_min': self.bbox_min, 'bbox_max': self.bbox_max, 'pnt_range': np.array([0, points.shape[0] - 1]), 'parent': -1, 'binstr': 0} 112 | blocks = [root_block] 113 | leaf_idx = [] 114 | cur = 0 115 | 116 | # Start the splitting 117 | while True: 118 | pnt_start, pnt_end = blocks[cur]['pnt_range'][0], blocks[cur]['pnt_range'][1] 119 | point_cnt = pnt_end - pnt_start + 1 120 | if self.leaf_test(point_cnt, blocks[cur]['level']): # found a leaf node 121 | leaf_idx.append(cur) 122 | if self.skip_test(point_cnt): # Use skip transform if very few points 123 | blocks[cur]['binstr'] = -1 # -1 - "skip"; 0 - "transform" 124 | else: # split current node 125 | points[pnt_start : pnt_end + 1], location_cnt, blocks[cur]['binstr'], child_bboxes, attr_tmp = \ 126 | analyze_octant(points[pnt_start : pnt_end + 1], blocks[cur]['bbox_min'], blocks[cur]['bbox_max'], 127 | attr[pnt_start : pnt_end + 1] if attr is not None else None) 128 | if attr is not None: attr[pnt_start : pnt_end + 1] = attr_tmp 129 | 130 | # Create the child nodes 131 | location_idx = np.insert(np.cumsum(location_cnt, dtype=np.uint32), 0, 0) + blocks[cur]['pnt_range'][0] 132 | for idx in range(8): 133 | if location_cnt[idx] > 0: # creat a child node if still have points 134 | block = {'level': blocks[cur]['level'] + 1, 'bbox_min': child_bboxes[idx][0], 'bbox_max': child_bboxes[idx][1], 135 | 'pnt_range': np.array([location_idx[idx], location_idx[idx + 1] - 1], dtype=location_idx.dtype), 136 | 'parent': cur, 'binstr': 0} 137 | blocks.append(block) 138 | cur += 1 139 | if cur >= len(blocks): break 140 | 141 | binstrs = np.asarray([np.max((blocks[i]['binstr'], 0)) for i in range(len(blocks))]).astype(np.uint8) # the final binary strings are always no less than 0 142 | return blocks, leaf_idx, points, attr, binstrs 143 | 144 | 145 | def departition_octree(self, binstrs, block_pntcnt): 146 | """Departition a given octree with breadth-first search. 147 | Given the binary strings and the bounding box, recover the bounding boxes and the levels of every leaf nodes. 148 | """ 149 | 150 | # Initialization 151 | root_block = {'level': 0, 'bbox_min': self.bbox_min, 'bbox_max': self.bbox_max} 152 | blocks = [root_block] 153 | leaf_idx = [] 154 | cur = 0 155 | 156 | while True: 157 | blocks[cur]['binstr'] = binstrs[cur] 158 | if blocks[cur]['binstr'] <= 0: 159 | leaf_idx.append(cur) # found a leaf node 160 | if self.skip_test(block_pntcnt[len(leaf_idx) - 1]): 161 | blocks[cur]['binstr'] = -1 # marked as a skip 162 | else: 163 | blocks[cur]['binstr'] = 0 # marked as transform 164 | else: # split current node 165 | idx = 0 166 | binstr = blocks[cur]['binstr'] 167 | while binstr > 0: 168 | if (binstr & 1) == 1: # create a block according to the binary string 169 | box = compute_new_bbox(idx, blocks[cur]['bbox_min'], blocks[cur]['bbox_max']) 170 | block = {'level': blocks[cur]['level'] + 1, 'bbox_min': box[0], 'bbox_max': box[1]} 171 | blocks.append(block) 172 | idx += 1 173 | binstr >>= 1 174 | cur += 1 175 | if cur >= len(blocks): break 176 | 177 | return [blocks[leaf_idx[i]] for i in range(len(leaf_idx))] 178 | 179 | 180 | class OctreeOrganizer(OctreeConverter): 181 | """Prepare the octree array and data of skip blocks given the syntax, so as to enable internal data communications.""" 182 | 183 | def __init__(self, octree_cfg, max_num_points, syntax_gt, rw_octree=False, shuffle_blocks=False): 184 | 185 | # Grab the specs for octree partitioning and create an octree converter 186 | super().__init__( 187 | octree_cfg['bbox_min'], 188 | octree_cfg['bbox_max'], 189 | octree_cfg['point_min'], 190 | octree_cfg['point_max'], 191 | octree_cfg['level_min'], 192 | octree_cfg['level_max'], 193 | ) 194 | 195 | # Set the octree partitioning options 196 | self.syntax_gt = syntax_gt 197 | self.max_num_points = max_num_points 198 | self.rw_octree = rw_octree 199 | self.normalized_box_size = 2 200 | self.shuffle_blocks = shuffle_blocks 201 | self.infinitesimal = 1e-6 202 | 203 | def get_normalizer(self, bbox_min, bbox_max, pnts=None): 204 | center = (bbox_min + bbox_max) / 2 205 | scaling = self.normalized_box_size / (bbox_max[0] - bbox_min[0]) 206 | return center, scaling 207 | 208 | 209 | def organize_data(self, points_raw, normal=None, file_name=None): 210 | if self.rw_octree and os.path.isfile(file_name): # Check whether the point cloud has been converted to octree already 211 | with open(file_name, 'rb') as f_pkl: 212 | octree_raw = pickle.load(f_pkl) 213 | blocks = octree_raw['blocks'] 214 | leaf_idx = octree_raw['leaf_idx'] 215 | points = octree_raw['points'] 216 | binstrs = octree_raw['binstrs'] 217 | else: 218 | # Perform octree partitioning 219 | blocks, leaf_idx, points, normal, binstrs = self.partition_octree(points_raw, normal) 220 | if self.rw_octree: 221 | os.makedirs(os.path.dirname(file_name), exist_ok=True) 222 | with open(file_name, "wb") as f_pkl: # write down the partitioning results 223 | pickle.dump({'blocks': blocks, 'leaf_idx': leaf_idx, 'points': points, 'normal': normal, 'binstrs': binstrs}, f_pkl) 224 | 225 | # Organize the data for batching 226 | total_cnt = 0 227 | points_out = np.zeros((self.max_num_points, self.syntax_gt['__len__']), dtype=np.float32) 228 | normal_out = np.zeros((self.max_num_points, 3), dtype=np.float32) if normal is not None else None 229 | block_pntcnt = [] 230 | 231 | # Shuffle the blocks, only for training 232 | if self.shuffle_blocks: np.random.shuffle(leaf_idx) 233 | 234 | all_skip = True 235 | for idx in leaf_idx: 236 | pnt_start, pnt_end = blocks[idx]['pnt_range'][0], blocks[idx]['pnt_range'][1] 237 | xyz_slc = slice(pnt_start, pnt_end + 1) 238 | cnt = pnt_end - pnt_start + 1 239 | 240 | # If we can still add more blocks then continue 241 | if total_cnt + cnt <= self.max_num_points: 242 | block_slc = slice(total_cnt, total_cnt + cnt) 243 | center, scaling = self.get_normalizer( 244 | blocks[idx]['bbox_min'], blocks[idx]['bbox_max'], points[xyz_slc, :]) 245 | points_out[block_slc, 0 : points.shape[1]] = points[xyz_slc, :] # x, y, z, and others if exists 246 | points_out[block_slc, self.syntax_gt['block_center'][0] : self.syntax_gt['block_center'][1] + 1] = center # center of the block 247 | points_out[block_slc, self.syntax_gt['block_scale']] = scaling # scale of the blcok 248 | points_out[block_slc, self.syntax_gt['block_pntcnt']] = cnt if (blocks[idx]['binstr'] >= 0) else -cnt # number of points in the block 249 | points_out[total_cnt, self.syntax_gt['block_start']] = 1 if (blocks[idx]['binstr'] >= 0) else -1 # start flag of the block 250 | if normal is not None: normal_out[block_slc, :] = normal[xyz_slc, :] 251 | if (blocks[idx]['binstr'] >= 0): all_skip = False 252 | block_pntcnt.append(cnt) 253 | total_cnt += cnt 254 | else: break 255 | 256 | # More stuffs can be returned here, e.g., details about the skip blocks 257 | return points_out, normal_out, binstrs, np.asarray(block_pntcnt), all_skip 258 | --------------------------------------------------------------------------------