├── utils ├── __init__.py ├── align.py └── logging.py ├── assets └── mean_points_emb.npy ├── config ├── run_eval_camera.py ├── run_eval_real.py ├── _base_.py └── run.py ├── network ├── __init__.py ├── encoder.py ├── basic.py ├── pointnet.py ├── loss.py ├── network.py └── decoder.py ├── Pointnet2 ├── tools │ ├── _init_path.py │ ├── pointnet2_msg.py │ ├── dataset.py │ ├── train_and_eval.py │ └── kitti_utils.py └── pointnet2 │ ├── src │ ├── cuda_utils.h │ ├── ball_query_gpu.h │ ├── group_points_gpu.h │ ├── ball_query.cpp │ ├── sampling_gpu.h │ ├── pointnet2_api.cpp │ ├── interpolate_gpu.h │ ├── group_points.cpp │ ├── sampling.cpp │ ├── interpolate.cpp │ ├── ball_query_gpu.cu │ ├── group_points_gpu.cu │ ├── interpolate_gpu.cu │ └── sampling_gpu.cu │ ├── setup.py │ ├── pointnet2_modules.py │ ├── pytorch_utils.py │ └── pointnet2_utils.py ├── .gitignore ├── dataset ├── __init__.py ├── data_augmentation.py └── pose_dataset.py ├── core └── trainer.py ├── README.md └── tools ├── train.py └── valid.py /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .logging import * 2 | from .utils import * -------------------------------------------------------------------------------- /assets/mean_points_emb.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustvl/Query6DoF/HEAD/assets/mean_points_emb.npy -------------------------------------------------------------------------------- /config/run_eval_camera.py: -------------------------------------------------------------------------------- 1 | _base_=['run.py'] 2 | train=False 3 | DATA='val' 4 | RESUME_FILE = 'checkpoint_epoch_50.tar.pth' -------------------------------------------------------------------------------- /config/run_eval_real.py: -------------------------------------------------------------------------------- 1 | _base_=['run.py'] 2 | train=False 3 | DATA='real_test' 4 | RESUME_FILE = 'checkpoint_epoch_50.tar.pth' -------------------------------------------------------------------------------- /network/__init__.py: -------------------------------------------------------------------------------- 1 | from .pointnet import Pointnet2MSG 2 | from .network import NETWORK_REGISTRY 3 | from .loss import LOSS_REGISTRY -------------------------------------------------------------------------------- /Pointnet2/tools/_init_path.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '../')) 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | 3 | build/ 4 | develop-eggs/ 5 | dist/ 6 | downloads/ 7 | eggs/ 8 | .eggs/ 9 | lib/ 10 | lib64/ 11 | parts/ 12 | sdist/ 13 | var/ 14 | wheels/ 15 | *.egg-info/ 16 | .installed.cfg 17 | *.egg 18 | 19 | *.pth 20 | 21 | data/ 22 | 23 | results 24 | runs -------------------------------------------------------------------------------- /network/encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import numpy as np 6 | import copy 7 | import math 8 | from mmengine import Registry 9 | from .basic import MLP 10 | 11 | ENCODER_REGISTRY = Registry("ENCODER") 12 | 13 | -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | 6 | #define TOTAL_THREADS 1024 7 | #define THREADS_PER_BLOCK 256 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 9 | 10 | inline int opt_n_threads(int work_size) { 11 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 12 | 13 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | #endif 16 | -------------------------------------------------------------------------------- /network/basic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | class MLP(nn.Module): 4 | def __init__(self,input_dims,middle_dims,output_dims=None): 5 | super(MLP,self).__init__() 6 | if output_dims is None: 7 | output_dims=input_dims 8 | self.model=nn.Sequential( 9 | nn.Linear(input_dims,middle_dims), 10 | torch.nn.GELU(), 11 | nn.Linear(middle_dims,output_dims) 12 | ) 13 | 14 | def forward(self,inputs): 15 | return self.model(inputs) -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/ball_query_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _BALL_QUERY_GPU_H 2 | #define _BALL_QUERY_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, 10 | at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor); 11 | 12 | void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, 13 | const float *xyz, const float *new_xyz, int *idx, cudaStream_t stream); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /utils/align.py: -------------------------------------------------------------------------------- 1 | 2 | import time 3 | import numpy as np 4 | 5 | 6 | def backproject(depth, intrinsics, instance_mask): 7 | """ Back-projection, use opencv camera coordinate frame. 8 | 9 | """ 10 | cam_fx = intrinsics[0, 0] 11 | cam_fy = intrinsics[1, 1] 12 | cam_cx = intrinsics[0, 2] 13 | cam_cy = intrinsics[1, 2] 14 | 15 | non_zero_mask = (depth > 0) 16 | final_instance_mask = np.logical_and(instance_mask, non_zero_mask) 17 | idxs = np.where(final_instance_mask) 18 | 19 | z = depth[idxs[0], idxs[1]] 20 | x = (idxs[1] - cam_cx) * z / cam_fx 21 | y = (idxs[0] - cam_cy) * z / cam_fy 22 | pts = np.stack((x, y, z), axis=1) 23 | 24 | return pts, idxs 25 | 26 | -------------------------------------------------------------------------------- /Pointnet2/pointnet2/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='pointnet2', 6 | ext_modules=[ 7 | CUDAExtension('pointnet2_cuda', [ 8 | 'src/pointnet2_api.cpp', 9 | 10 | 'src/ball_query.cpp', 11 | 'src/ball_query_gpu.cu', 12 | 'src/group_points.cpp', 13 | 'src/group_points_gpu.cu', 14 | 'src/interpolate.cpp', 15 | 'src/interpolate_gpu.cu', 16 | 'src/sampling.cpp', 17 | 'src/sampling_gpu.cu', 18 | ], 19 | extra_compile_args={'cxx': ['-g'], 20 | 'nvcc': ['-O2']}) 21 | ], 22 | cmdclass={'build_ext': BuildExtension} 23 | ) 24 | -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/group_points_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _GROUP_POINTS_GPU_H 2 | #define _GROUP_POINTS_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, 11 | at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); 12 | 13 | void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 14 | const float *points, const int *idx, float *out, cudaStream_t stream); 15 | 16 | int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, 17 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); 18 | 19 | void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 20 | const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /config/_base_.py: -------------------------------------------------------------------------------- 1 | CFG_NAME = '' 2 | OUTPUT_DIR = 'runs' 3 | RUN_NAME='' 4 | PRINT_FREQ = 40 5 | DIST_BACKEND = 'nccl' 6 | AUTO_RESUME = False 7 | VERBOSE = True 8 | DDP = True 9 | RESUME_FILE='' 10 | ONLY_MODEL=False 11 | CHANGE_SCHEDULE=False 12 | find_unused_parameters=False 13 | is_iter=False 14 | # Cudnn related params 15 | CUDNN=dict( 16 | BENCHMARK = True, 17 | DETERMINISTIC = False, 18 | ENABLED = True 19 | ) 20 | 21 | DATASET=dict( 22 | type='PoseDataset', 23 | source='Real', 24 | mode='train', 25 | data_dir='data', 26 | n_pts=1024 27 | ) 28 | 29 | DATALOADER=dict( 30 | type='DataLoader', 31 | batch_size=10, 32 | shuffle=False, 33 | num_workers=8, 34 | pin_memory=True, 35 | persistent_workers=True 36 | ) 37 | 38 | DATALOADER['persistent_workers']=DATALOADER['num_workers']>0 39 | 40 | OPTIMIZER=dict( 41 | type='AdamW', 42 | lr=2e-4, 43 | weight_decay=1e-7 44 | ) 45 | 46 | 47 | TRAIN=dict( 48 | BEGIN_EPOCH=0, 49 | END_EPOCH=75, 50 | SAVE_EPOCH_STEP=5, 51 | VIS=False 52 | ) 53 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from .pose_dataset import DATASET_REGISTRY 4 | from mmengine import Registry 5 | from torch.utils.data import DataLoader 6 | import copy 7 | 8 | def build_dataloader(cfg, registry, *args, **kwargs): 9 | dataset=DATASET_REGISTRY.build(cfg.DATASET) 10 | loader_cfg=copy.deepcopy(cfg) 11 | loader_cfg=cfg.DATALOADER 12 | loader_cfg.dataset=dataset 13 | if 'TRAIN' in cfg.keys(): 14 | if cfg.DDP: 15 | train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) 16 | else: 17 | train_sampler = None 18 | 19 | loader_cfg.sampler=train_sampler 20 | data_loader = registry.get(cfg.DATALOADER.type) 21 | del loader_cfg['type'] 22 | data_loader=data_loader(**loader_cfg) 23 | return data_loader 24 | 25 | DATALOADER_REGISTRY = Registry("DATALODER",build_func=build_dataloader) 26 | DATALOADER_REGISTRY.register_module(module=DataLoader) 27 | 28 | def trivial_batch_collator(batch): 29 | return batch 30 | -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "ball_query_gpu.h" 8 | 9 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 10 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 11 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) 12 | 13 | int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, 14 | at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) { 15 | CHECK_INPUT(new_xyz_tensor); 16 | CHECK_INPUT(xyz_tensor); 17 | const float *new_xyz = new_xyz_tensor.data(); 18 | const float *xyz = xyz_tensor.data(); 19 | int *idx = idx_tensor.data(); 20 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 21 | ball_query_kernel_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream); 22 | return 1; 23 | } -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/sampling_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_GPU_H 2 | #define _SAMPLING_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | int gather_points_wrapper_fast(int b, int c, int n, int npoints, 10 | at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); 11 | 12 | void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, 13 | const float *points, const int *idx, float *out, cudaStream_t stream); 14 | 15 | 16 | int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, 17 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); 18 | 19 | void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, 20 | const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream); 21 | 22 | 23 | int furthest_point_sampling_wrapper(int b, int n, int m, 24 | at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor); 25 | 26 | void furthest_point_sampling_kernel_launcher(int b, int n, int m, 27 | const float *dataset, float *temp, int *idxs, cudaStream_t stream); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/pointnet2_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "ball_query_gpu.h" 5 | #include "group_points_gpu.h" 6 | #include "sampling_gpu.h" 7 | #include "interpolate_gpu.h" 8 | 9 | 10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 11 | m.def("ball_query_wrapper", &ball_query_wrapper_fast, "ball_query_wrapper_fast"); 12 | 13 | m.def("group_points_wrapper", &group_points_wrapper_fast, "group_points_wrapper_fast"); 14 | m.def("group_points_grad_wrapper", &group_points_grad_wrapper_fast, "group_points_grad_wrapper_fast"); 15 | 16 | m.def("gather_points_wrapper", &gather_points_wrapper_fast, "gather_points_wrapper_fast"); 17 | m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper_fast, "gather_points_grad_wrapper_fast"); 18 | 19 | m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper, "furthest_point_sampling_wrapper"); 20 | 21 | m.def("three_nn_wrapper", &three_nn_wrapper_fast, "three_nn_wrapper_fast"); 22 | m.def("three_interpolate_wrapper", &three_interpolate_wrapper_fast, "three_interpolate_wrapper_fast"); 23 | m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_fast, "three_interpolate_grad_wrapper_fast"); 24 | } 25 | -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/interpolate_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATE_GPU_H 2 | #define _INTERPOLATE_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, 11 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor); 12 | 13 | void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown, 14 | const float *known, float *dist2, int *idx, cudaStream_t stream); 15 | 16 | 17 | void three_interpolate_wrapper_fast(int b, int c, int m, int n, at::Tensor points_tensor, 18 | at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor); 19 | 20 | void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, 21 | const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream); 22 | 23 | 24 | void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, at::Tensor grad_out_tensor, 25 | at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor); 26 | 27 | void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, 28 | const int *idx, const float *weight, float *grad_points, cudaStream_t stream); 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "group_points_gpu.h" 8 | 9 | 10 | 11 | 12 | int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, 13 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) { 14 | 15 | float *grad_points = grad_points_tensor.data(); 16 | const int *idx = idx_tensor.data(); 17 | const float *grad_out = grad_out_tensor.data(); 18 | 19 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 20 | 21 | group_points_grad_kernel_launcher_fast(b, c, n, npoints, nsample, grad_out, idx, grad_points, stream); 22 | return 1; 23 | } 24 | 25 | 26 | int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, 27 | at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) { 28 | 29 | const float *points = points_tensor.data(); 30 | const int *idx = idx_tensor.data(); 31 | float *out = out_tensor.data(); 32 | 33 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 34 | 35 | group_points_kernel_launcher_fast(b, c, n, npoints, nsample, points, idx, out, stream); 36 | return 1; 37 | } -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/sampling.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "sampling_gpu.h" 8 | 9 | 10 | int gather_points_wrapper_fast(int b, int c, int n, int npoints, 11 | at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor){ 12 | const float *points = points_tensor.data(); 13 | const int *idx = idx_tensor.data(); 14 | float *out = out_tensor.data(); 15 | 16 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 17 | gather_points_kernel_launcher_fast(b, c, n, npoints, points, idx, out, stream); 18 | return 1; 19 | } 20 | 21 | 22 | int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, 23 | at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) { 24 | 25 | const float *grad_out = grad_out_tensor.data(); 26 | const int *idx = idx_tensor.data(); 27 | float *grad_points = grad_points_tensor.data(); 28 | 29 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 30 | gather_points_grad_kernel_launcher_fast(b, c, n, npoints, grad_out, idx, grad_points, stream); 31 | return 1; 32 | } 33 | 34 | 35 | int furthest_point_sampling_wrapper(int b, int n, int m, 36 | at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) { 37 | 38 | const float *points = points_tensor.data(); 39 | float *temp = temp_tensor.data(); 40 | int *idx = idx_tensor.data(); 41 | 42 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 43 | furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream); 44 | return 1; 45 | } 46 | -------------------------------------------------------------------------------- /utils/logging.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import time 4 | from pathlib import Path 5 | 6 | def setup_logger(final_output_dir, rank, phase,vis=False): 7 | time_str = time.strftime('%Y-%m-%d-%H-%M') 8 | log_file = '{}_{}_rank{}.log'.format(phase, time_str, rank) 9 | final_log_file = os.path.join(final_output_dir, 'log', log_file) 10 | head = '%(asctime)-15s %(message)s' 11 | if not vis: 12 | logging.basicConfig(filename=str(final_log_file), 13 | format=head) 14 | else: 15 | logging.basicConfig(filename=None, 16 | format=head) 17 | logger = logging.getLogger() 18 | logger.setLevel(logging.INFO) 19 | console = logging.StreamHandler() 20 | # if len(logging.getLogger('').handlers) < 2: 21 | logging.getLogger('').addHandler(console) 22 | 23 | return logger, time_str 24 | 25 | def create_checkpoint(cfg, phase='train'): 26 | root_output_dir = Path(cfg.OUTPUT_DIR) 27 | # set up logger 28 | if not root_output_dir.exists(): 29 | print('=> creating {}'.format(root_output_dir)) 30 | root_output_dir.mkdir() 31 | 32 | dataset = cfg.DATASET.source 33 | dataset = dataset.replace(':', '_') 34 | run_name = cfg.RUN_NAME 35 | 36 | final_output_dir = root_output_dir / dataset / run_name 37 | 38 | print('=> creating {}'.format(final_output_dir)) 39 | final_output_dir.mkdir(parents=True, exist_ok=True) 40 | 41 | log_dir = os.path.join(final_output_dir, 'log') 42 | if not os.path.exists(log_dir): 43 | print('=> creating log dir'.format(log_dir)) 44 | os.makedirs(log_dir) 45 | 46 | if phase == 'train': 47 | model_dir = os.path.join(final_output_dir, 'model') 48 | src_dir = os.path.join(final_output_dir, 'src') 49 | if not os.path.exists(model_dir): os.makedirs(model_dir) 50 | if not os.path.exists(src_dir): os.makedirs(src_dir) 51 | print('=> creating {}'.format(model_dir)) 52 | print('=> creating {}'.format(src_dir)) 53 | 54 | return str(final_output_dir) -------------------------------------------------------------------------------- /config/run.py: -------------------------------------------------------------------------------- 1 | _base_=['_base_.py'] 2 | CFG_NAME = '' 3 | OUTPUT_DIR = 'runs' 4 | RUN_NAME = 'run' 5 | PRINT_FREQ = 100 6 | DIST_BACKEND = 'nccl' 7 | AUTO_RESUME = True 8 | RESUME_FILE = '' 9 | ONLY_MODEL = False 10 | CHANGE_SCHEDULE = False 11 | find_unused_parameters = False 12 | VIS = False 13 | DATA='real_test' 14 | train=True 15 | 16 | is_iter=True 17 | 18 | MODEL = dict( 19 | type='Sparsenetv7', 20 | name='Sparsenetv7', 21 | n_pts=64, 22 | backbone=dict( 23 | type='Pointnet2MSG', 24 | input_channels=0, 25 | mlp=[[256, 256], [256, 256], [256, 256], [512, 512]]), 26 | decoder=dict( 27 | type='deep_prior_decoderv2_9', 28 | group=4, 29 | input_dim=256, 30 | middle_dim=1024, 31 | training=train, 32 | cat_num=6), 33 | pose_estimate=dict( 34 | type='pose_estimater', 35 | input_dim=512, 36 | middle_dim=256 37 | ), 38 | input_dim=256, 39 | cat_num=6, 40 | training=train, 41 | loss_name=['r','t','s','chamfer','nocs'], 42 | losses=[ 43 | dict(type='r_lossv2', weight=1.0,beta=0.001), 44 | dict(type='t_loss', weight=1,beta=0.005), 45 | dict(type='s_loss', weight=1.0,beta=0.005), 46 | dict(type='chamfer_lossv2',weight=3.0), 47 | dict(type='consistency_lossv2',weight=1.0,beta=0.1) 48 | ]) 49 | 50 | 51 | DATASET = dict( 52 | type='PoseDataset', 53 | source='CAMERA+Real', 54 | mode='train', 55 | data_dir='data', 56 | n_pts=1024, 57 | vis=False, 58 | img_size=192, 59 | use_cache=False) 60 | 61 | 62 | DATALOADER = dict( 63 | type='DataLoader', 64 | batch_size=15, 65 | shuffle=False, 66 | num_workers=4, 67 | pin_memory=True, 68 | persistent_workers=True, 69 | prefetch_factor=2, 70 | drop_last=True) 71 | 72 | DATALOADER['persistent_workers']=DATALOADER['num_workers']>0 73 | if VIS: 74 | DATALOADER['num_workers']=1 75 | 76 | 77 | OPTIMIZER = dict(type='AdamW', lr=0.0001, weight_decay=1e-4) 78 | SCHEDULER = dict(type='CosineAnnealingLR', T_max=422400, eta_min=1e-6, last_epoch=-1, verbose=False) 79 | TRAIN = dict(BEGIN_EPOCH=0, END_EPOCH=101, SAVE_EPOCH_STEP=10, VIS=False) -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "interpolate_gpu.h" 11 | 12 | 13 | void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, 14 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) { 15 | const float *unknown = unknown_tensor.data(); 16 | const float *known = known_tensor.data(); 17 | float *dist2 = dist2_tensor.data(); 18 | int *idx = idx_tensor.data(); 19 | 20 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 21 | three_nn_kernel_launcher_fast(b, n, m, unknown, known, dist2, idx, stream); 22 | } 23 | 24 | 25 | void three_interpolate_wrapper_fast(int b, int c, int m, int n, 26 | at::Tensor points_tensor, 27 | at::Tensor idx_tensor, 28 | at::Tensor weight_tensor, 29 | at::Tensor out_tensor) { 30 | 31 | const float *points = points_tensor.data(); 32 | const float *weight = weight_tensor.data(); 33 | float *out = out_tensor.data(); 34 | const int *idx = idx_tensor.data(); 35 | 36 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 37 | three_interpolate_kernel_launcher_fast(b, c, m, n, points, idx, weight, out, stream); 38 | } 39 | 40 | void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, 41 | at::Tensor grad_out_tensor, 42 | at::Tensor idx_tensor, 43 | at::Tensor weight_tensor, 44 | at::Tensor grad_points_tensor) { 45 | 46 | const float *grad_out = grad_out_tensor.data(); 47 | const float *weight = weight_tensor.data(); 48 | float *grad_points = grad_points_tensor.data(); 49 | const int *idx = idx_tensor.data(); 50 | 51 | cudaStream_t stream = c10::cuda::getCurrentCUDAStream(); 52 | three_interpolate_grad_kernel_launcher_fast(b, c, n, m, grad_out, idx, weight, grad_points, stream); 53 | } -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/ball_query_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "ball_query_gpu.h" 6 | #include "cuda_utils.h" 7 | 8 | 9 | __global__ void ball_query_kernel_fast(int b, int n, int m, float radius, int nsample, 10 | const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) { 11 | // new_xyz: (B, M, 3) 12 | // xyz: (B, N, 3) 13 | // output: 14 | // idx: (B, M, nsample) 15 | int bs_idx = blockIdx.y; 16 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 17 | if (bs_idx >= b || pt_idx >= m) return; 18 | 19 | new_xyz += bs_idx * m * 3 + pt_idx * 3; 20 | xyz += bs_idx * n * 3; 21 | idx += bs_idx * m * nsample + pt_idx * nsample; 22 | 23 | float radius2 = radius * radius; 24 | float new_x = new_xyz[0]; 25 | float new_y = new_xyz[1]; 26 | float new_z = new_xyz[2]; 27 | 28 | int cnt = 0; 29 | for (int k = 0; k < n; ++k) { 30 | float x = xyz[k * 3 + 0]; 31 | float y = xyz[k * 3 + 1]; 32 | float z = xyz[k * 3 + 2]; 33 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); 34 | if (d2 < radius2){ 35 | if (cnt == 0){ 36 | for (int l = 0; l < nsample; ++l) { 37 | idx[l] = k; 38 | } 39 | } 40 | idx[cnt] = k; 41 | ++cnt; 42 | if (cnt >= nsample) break; 43 | } 44 | } 45 | } 46 | 47 | 48 | void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, \ 49 | const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream) { 50 | // new_xyz: (B, M, 3) 51 | // xyz: (B, N, 3) 52 | // output: 53 | // idx: (B, M, nsample) 54 | 55 | cudaError_t err; 56 | 57 | dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 58 | dim3 threads(THREADS_PER_BLOCK); 59 | 60 | ball_query_kernel_fast<<>>(b, n, m, radius, nsample, new_xyz, xyz, idx); 61 | // cudaDeviceSynchronize(); // for using printf in kernel function 62 | err = cudaGetLastError(); 63 | if (cudaSuccess != err) { 64 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 65 | exit(-1); 66 | } 67 | } -------------------------------------------------------------------------------- /Pointnet2/tools/pointnet2_msg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from pointnet2.pointnet2_modules import PointnetFPModule, PointnetSAModuleMSG 4 | import pointnet2.pytorch_utils as pt_utils 5 | 6 | 7 | def get_model(input_channels=0): 8 | return Pointnet2MSG(input_channels=input_channels) 9 | 10 | 11 | NPOINTS = [4096, 1024, 256, 64] 12 | RADIUS = [[0.1, 0.5], [0.5, 1.0], [1.0, 2.0], [2.0, 4.0]] 13 | NSAMPLE = [[16, 32], [16, 32], [16, 32], [16, 32]] 14 | MLPS = [[[16, 16, 32], [32, 32, 64]], [[64, 64, 128], [64, 96, 128]], 15 | [[128, 196, 256], [128, 196, 256]], [[256, 256, 512], [256, 384, 512]]] 16 | FP_MLPS = [[128, 128], [256, 256], [512, 512], [512, 512]] 17 | CLS_FC = [128] 18 | DP_RATIO = 0.5 19 | 20 | 21 | class Pointnet2MSG(nn.Module): 22 | def __init__(self, input_channels=6): 23 | super().__init__() 24 | 25 | self.SA_modules = nn.ModuleList() 26 | channel_in = input_channels 27 | 28 | skip_channel_list = [input_channels] 29 | for k in range(NPOINTS.__len__()): 30 | mlps = MLPS[k].copy() 31 | channel_out = 0 32 | for idx in range(mlps.__len__()): 33 | mlps[idx] = [channel_in] + mlps[idx] 34 | channel_out += mlps[idx][-1] 35 | 36 | self.SA_modules.append( 37 | PointnetSAModuleMSG( 38 | npoint=NPOINTS[k], 39 | radii=RADIUS[k], 40 | nsamples=NSAMPLE[k], 41 | mlps=mlps, 42 | use_xyz=True, 43 | bn=True 44 | ) 45 | ) 46 | skip_channel_list.append(channel_out) 47 | channel_in = channel_out 48 | 49 | self.FP_modules = nn.ModuleList() 50 | 51 | for k in range(FP_MLPS.__len__()): 52 | pre_channel = FP_MLPS[k + 1][-1] if k + 1 < len(FP_MLPS) else channel_out 53 | self.FP_modules.append( 54 | PointnetFPModule(mlp=[pre_channel + skip_channel_list[k]] + FP_MLPS[k]) 55 | ) 56 | 57 | cls_layers = [] 58 | pre_channel = FP_MLPS[0][-1] 59 | for k in range(0, CLS_FC.__len__()): 60 | cls_layers.append(pt_utils.Conv1d(pre_channel, CLS_FC[k], bn=True)) 61 | pre_channel = CLS_FC[k] 62 | cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None)) 63 | cls_layers.insert(1, nn.Dropout(0.5)) 64 | self.cls_layer = nn.Sequential(*cls_layers) 65 | 66 | def _break_up_pc(self, pc): 67 | xyz = pc[..., 0:3].contiguous() 68 | features = ( 69 | pc[..., 3:].transpose(1, 2).contiguous() 70 | if pc.size(-1) > 3 else None 71 | ) 72 | 73 | return xyz, features 74 | 75 | def forward(self, pointcloud: torch.cuda.FloatTensor): 76 | xyz, features = self._break_up_pc(pointcloud) 77 | 78 | l_xyz, l_features = [xyz], [features] 79 | for i in range(len(self.SA_modules)): 80 | li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i]) 81 | l_xyz.append(li_xyz) 82 | l_features.append(li_features) 83 | 84 | for i in range(-1, -(len(self.FP_modules) + 1), -1): 85 | l_features[i - 1] = self.FP_modules[i]( 86 | l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i] 87 | ) 88 | 89 | pred_cls = self.cls_layer(l_features[0]).transpose(1, 2).contiguous() # (B, N, 1) 90 | return pred_cls 91 | -------------------------------------------------------------------------------- /network/pointnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import sys 4 | sys.path.append('..') 5 | from Pointnet2.pointnet2.pointnet2_modules import PointnetFPModule, PointnetSAModuleMSG 6 | import Pointnet2.pointnet2.pytorch_utils as pt_utils 7 | from .encoder import ENCODER_REGISTRY 8 | 9 | def get_model(input_channels=0): 10 | return Pointnet2MSG(input_channels=input_channels) 11 | 12 | NPOINTS = [512, 256, 128, 64] 13 | RADIUS = [[0.01, 0.02], [0.02, 0.04], [0.04, 0.08], [0.08, 0.16]] 14 | NSAMPLE = [[16, 32], [16, 32], [16, 32], [16, 32]] 15 | MLPS = [[[16, 16, 32], [32, 32, 64]], [[64, 64, 128], [64, 96, 128]], 16 | [[128, 196, 256], [128, 196, 256]], [[256, 256, 512], [256, 384, 512]]] 17 | FP_MLPS = [[128, 128], [128, 128], [256, 256], [512, 512]] 18 | CLS_FC = [128] 19 | DP_RATIO = 0.5 20 | 21 | @ENCODER_REGISTRY.register_module() 22 | class Pointnet2MSG(nn.Module): 23 | def __init__(self, input_channels=6,mlp=[],use_norm=False): 24 | super().__init__() 25 | if len(mlp)!=0: 26 | FP_MLPS=mlp 27 | else: 28 | FP_MLPS=[[128, 128], [128, 128], [256, 256], [512, 512]] 29 | self.SA_modules = nn.ModuleList() 30 | channel_in = input_channels 31 | 32 | skip_channel_list = [input_channels] 33 | for k in range(NPOINTS.__len__()): 34 | mlps = MLPS[k].copy() 35 | channel_out = 0 36 | for idx in range(mlps.__len__()): 37 | mlps[idx] = [channel_in] + mlps[idx] 38 | channel_out += mlps[idx][-1] 39 | 40 | self.SA_modules.append( 41 | PointnetSAModuleMSG( 42 | npoint=NPOINTS[k], 43 | radii=RADIUS[k], 44 | nsamples=NSAMPLE[k], 45 | mlps=mlps, 46 | use_xyz=True, 47 | bn=True, 48 | use_norm=use_norm 49 | ) 50 | ) 51 | skip_channel_list.append(channel_out) 52 | channel_in = channel_out 53 | 54 | self.FP_modules = nn.ModuleList() 55 | 56 | for k in range(FP_MLPS.__len__()): 57 | pre_channel = FP_MLPS[k + 1][-1] if k + 1 < len(FP_MLPS) else channel_out 58 | self.FP_modules.append( 59 | PointnetFPModule(mlp=[pre_channel + skip_channel_list[k]] + FP_MLPS[k]) 60 | ) 61 | 62 | 63 | def _break_up_pc(self, pc): 64 | xyz = pc[..., 0:3].contiguous() 65 | features = ( 66 | pc[..., 3:].transpose(1, 2).contiguous() 67 | if pc.size(-1) > 3 else None 68 | ) 69 | 70 | return xyz, features 71 | 72 | def forward(self, pointcloud: torch.cuda.FloatTensor): 73 | xyz, features = self._break_up_pc(pointcloud) 74 | 75 | l_xyz, l_features = [xyz], [features] 76 | for i in range(len(self.SA_modules)): 77 | li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i]) 78 | 79 | l_xyz.append(li_xyz) 80 | l_features.append(li_features) 81 | 82 | 83 | for i in range(-1, -(len(self.FP_modules) + 1), -1): 84 | l_features[i - 1] = self.FP_modules[i]( 85 | l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i] 86 | ) 87 | 88 | return l_features[0] 89 | 90 | if __name__ == '__main__': 91 | net = Pointnet2MSG(0).cuda() 92 | pts = torch.randn(2, 1024, 3).cuda() 93 | pre = net(pts) 94 | print(pre.shape) 95 | -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/group_points_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cuda_utils.h" 5 | #include "group_points_gpu.h" 6 | 7 | 8 | __global__ void group_points_grad_kernel_fast(int b, int c, int n, int npoints, int nsample, 9 | const float *__restrict__ grad_out, const int *__restrict__ idx, float *__restrict__ grad_points) { 10 | // grad_out: (B, C, npoints, nsample) 11 | // idx: (B, npoints, nsample) 12 | // output: 13 | // grad_points: (B, C, N) 14 | int bs_idx = blockIdx.z; 15 | int c_idx = blockIdx.y; 16 | int index = blockIdx.x * blockDim.x + threadIdx.x; 17 | int pt_idx = index / nsample; 18 | if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; 19 | 20 | int sample_idx = index % nsample; 21 | grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; 22 | idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; 23 | 24 | atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0] , grad_out[0]); 25 | } 26 | 27 | void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 28 | const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) { 29 | // grad_out: (B, C, npoints, nsample) 30 | // idx: (B, npoints, nsample) 31 | // output: 32 | // grad_points: (B, C, N) 33 | cudaError_t err; 34 | dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 35 | dim3 threads(THREADS_PER_BLOCK); 36 | 37 | group_points_grad_kernel_fast<<>>(b, c, n, npoints, nsample, grad_out, idx, grad_points); 38 | 39 | err = cudaGetLastError(); 40 | if (cudaSuccess != err) { 41 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 42 | exit(-1); 43 | } 44 | } 45 | 46 | 47 | __global__ void group_points_kernel_fast(int b, int c, int n, int npoints, int nsample, 48 | const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) { 49 | // points: (B, C, N) 50 | // idx: (B, npoints, nsample) 51 | // output: 52 | // out: (B, C, npoints, nsample) 53 | int bs_idx = blockIdx.z; 54 | int c_idx = blockIdx.y; 55 | int index = blockIdx.x * blockDim.x + threadIdx.x; 56 | int pt_idx = index / nsample; 57 | if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; 58 | 59 | int sample_idx = index % nsample; 60 | 61 | idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; 62 | int in_idx = bs_idx * c * n + c_idx * n + idx[0]; 63 | int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; 64 | 65 | out[out_idx] = points[in_idx]; 66 | } 67 | 68 | 69 | void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 70 | const float *points, const int *idx, float *out, cudaStream_t stream) { 71 | // points: (B, C, N) 72 | // idx: (B, npoints, nsample) 73 | // output: 74 | // out: (B, C, npoints, nsample) 75 | cudaError_t err; 76 | dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 77 | dim3 threads(THREADS_PER_BLOCK); 78 | 79 | group_points_kernel_fast<<>>(b, c, n, npoints, nsample, points, idx, out); 80 | // cudaDeviceSynchronize(); // for using printf in kernel function 81 | err = cudaGetLastError(); 82 | if (cudaSuccess != err) { 83 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 84 | exit(-1); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /core/trainer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | import os 4 | import torch 5 | 6 | class AverageMeter(object): 7 | """Computes and stores the average and current value""" 8 | def __init__(self,name=None): 9 | self.reset() 10 | self.name=name 11 | 12 | def reset(self): 13 | self.val = 0 14 | self.avg = 0 15 | self.sum = 0 16 | self.count = 0 17 | 18 | def update(self, val, n=1): 19 | self.val = val 20 | self.sum += val * n 21 | self.count += n 22 | self.avg = self.sum / self.count if self.count != 0 else 0 23 | 24 | class Trainer(object): 25 | def __init__(self, cfg, model, rank, output_dir,logger=None,lr_scheduler=None): 26 | self.model = model 27 | self.output_dir = output_dir 28 | self.rank = rank 29 | self.print_freq = cfg.PRINT_FREQ 30 | self.vis=cfg.VIS 31 | self.logger=logger 32 | self.loss_name=cfg.MODEL.loss_name 33 | self.meter={name:AverageMeter(name) for name in self.loss_name} 34 | self.lr_scheduler=lr_scheduler 35 | 36 | def train(self, epoch, data_loader, optimizer): 37 | for meter in self.meter: 38 | self.meter[meter].reset() 39 | batch_time = AverageMeter() 40 | data_time = AverageMeter() 41 | optimizer.zero_grad() 42 | self.model.train() 43 | if self.rank == 0: 44 | lr_msg='lr: {0}'.format(optimizer.state_dict()['param_groups'][0]['lr']) 45 | self.logger.info(lr_msg) 46 | end = time.time() 47 | 48 | for i,batched_inputs in enumerate(data_loader): 49 | 50 | data_time.update(time.time() - end) 51 | 52 | loss_dict = self.model(batched_inputs) 53 | loss = 0 54 | num_images = len(batched_inputs) 55 | 56 | for name in loss_dict: 57 | l=loss_dict[name] 58 | loss=loss+l 59 | self.meter[name].update(l.item(),num_images) 60 | 61 | 62 | if not self.vis: 63 | loss.backward() 64 | 65 | 66 | torch.nn.utils.clip_grad_norm_(self.model.parameters(), 4, norm_type=2) 67 | optimizer.step() 68 | optimizer.zero_grad() 69 | 70 | batch_time.update(time.time() - end) 71 | end = time.time() 72 | 73 | 74 | if i % self.print_freq == 0 and self.rank == 0 : 75 | msg = 'Epoch: [{0}][{1}/{2}] ' \ 76 | 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s) ' \ 77 | 'Speed: {speed:.1f} samples/s ' \ 78 | 'Data: {data_time_val:.3f}ms ({data_time_avg:.3f}ms)'.format( 79 | epoch, i, len(data_loader), 80 | batch_time=batch_time, 81 | speed=num_images / batch_time.val, 82 | data_time_val=(data_time.val)*1000, 83 | data_time_avg=(data_time.avg)*1000 84 | ) 85 | for name in self.meter: 86 | msg+='{l}'.format( 87 | l=_get_loss_info(self.meter[name],name) 88 | ) 89 | self.logger.info(msg) 90 | 91 | if self.lr_scheduler: 92 | self.lr_scheduler.step() 93 | 94 | 95 | 96 | 97 | 98 | def _get_loss_info(meter, loss_name): 99 | msg = '{name}: {meter.val:.3e} ({meter.avg:.3e})\t'.format(name=loss_name, meter=meter) 100 | return msg 101 | 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Query6DoF: Learning Sparse Queries as Implicit Shape Prior for Category-Level 6DoF Pose Estimation 2 | This is the PyTorch implemention of ICCV'23 paper Query6DoF: Learning Sparse Queries as Implicit Shape Prior for Category-Level 6DoF Pose Estimation 3 | 4 | # Abstract 5 | > Category-level 6DoF object pose estimation intends to estimate the rotation, translation, and size of unseen objects. Many previous works use point clouds as a pre-learned shape prior to overcome intra-category variability. The shape prior is deformed to reconstruct instances' point clouds in canonical space and to build dense 3D-3D correspondences between the observed and reconstructed point clouds. However, in these methods, the pre-learned shape prior is not jointly optimized with estimation networks, and they are trained with a surrogate objective. In this paper, we propose a novel 6D pose estimation network based on a series of category-specific sparse queries that serve as the representation of the shape prior. Each query represents a shape component, and these queries are learnable embeddings that can be optimized together with the estimation network according to the point cloud reconstruction loss, the normalized object coordinate loss, and the 6d pose estimation loss. Our proposed network adopts a deformation-and-matching paradigm with attention, where the queries dynamically extract features from regions of interest using the attention mechanism and then directly regress results. Furthermore, our method reduces computation overhead through the sparseness of the queries and the incorporation of a lightweight global information injection block. With the aforementioned design, our method achieves state-of-the-art (SOTA) pose estimation performance on the NOCS dataset. 6 | 7 | # Requirements 8 | - Linux (tested on Ubuntu 16.04) 9 | - Python 3.8 10 | - CUDA 11.1 11 | - PyTorch 1.10.2 12 | 13 | # Installation 14 | ~~~ 15 | conda create -n query6dof python=3.8 16 | 17 | conda activate query6dof 18 | 19 | pip install torch==1.10.2+cu111 -f https://download.pytorch.org/whl/cu111/torch_stable.html 20 | 21 | pip install opencv-python mmengine numpy tqdm 22 | 23 | cd Pointnet2/pointnet2 24 | 25 | python setup.py install 26 | ~~~ 27 | 28 | # Dataset 29 | Download camera_train, camera_eval, real_test, real_train, ground-truth annotations and mesh models provided by [NOCS](https://github.com/hughw19/NOCS_CVPR2019). 30 | Then process these files following [SPD](https://github.com/mentian/object-deformnet). And download segmentation results from Mask R-CNN, and predictions of NOCS from [SPD](https://github.com/mentian/object-deformnet). 31 | The dataset is organized as follows: 32 | ~~~ 33 | 34 | ── data 35 | ├── CAMERA 36 | ├── gts 37 | ├── obj_models 38 | ├── Real 39 | └── results 40 | └── mrcnn_results 41 | ── results 42 | └── nocs_results 43 | ~~~ 44 | 45 | # Evaluation 46 | Please download our pretrain model [here](https://drive.google.com/file/d/11DKVV6NCgecKoe6Pu9OIXWyiROXhuW3J/view?usp=drive_link) or pretrain model without linear shape augmentation and non-linear shape augmentation [here](https://drive.google.com/file/d/1zJEK_ik8ZmaC25X3RFQkeLFFwHKbtU66/view?usp=drive_link) and put it in 'runs/CAMERA+Real/run/model' dictionory. 47 | 48 | Then, you can make an evaluation for REAL275 using following command. 49 | ~~~ 50 | python tools/valid.py --cfg config/run_eval_real.py --gpus 0 51 | ~~~ 52 | Then, you can make an evaluation for CAMERA25 using following command. 53 | ~~~ 54 | python tools/valid.py --cfg config/run_eval_camera.py --gpus 0 55 | ~~~ 56 | 57 | You can get running speed at the same time. 58 | 59 | # Train 60 | 'tools/train.py' is the main file for training. You can train using the following command. 61 | 62 | ~~~ 63 | python tools/train.py --cfg config/run.py --gpus 0,1,2,3 64 | ~~~ 65 | This config is for training on 4 gpus with the batch size = 15 on a single gpu, and the total batch size = 60. 66 | 67 | # Acknowledgment 68 | The dataset is provided by [NOCS](https://github.com/hughw19/NOCS_CVPR2019). Our code is developed based on [Pointnet2.PyTorch](https://github.com/sshaoshuai/Pointnet2.PyTorch) and [SPD](https://github.com/mentian/object-deformnet) -------------------------------------------------------------------------------- /network/loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | import torch 4 | from mmengine import Registry 5 | import torch.distributed as dist 6 | 7 | LOSS_REGISTRY = Registry("LOSS") 8 | 9 | def is_dist_avail_and_initialized(): 10 | if not dist.is_available(): 11 | return False 12 | if not dist.is_initialized(): 13 | return False 14 | return True 15 | 16 | def get_world_size(): 17 | if not is_dist_avail_and_initialized(): 18 | return 1 19 | return dist.get_world_size() 20 | 21 | @LOSS_REGISTRY.register_module() 22 | class consistency_loss(nn.Module): 23 | def __init__(self,weight,eta=1e-2): 24 | super().__init__() 25 | self.weight=weight 26 | self.eta=eta 27 | 28 | def forward(self,coord,nocs,R,t,s): 29 | response=torch.bmm(R.transpose(1,2)/s,(coord-t.unsqueeze(dim=1)).transpose(1,2)).transpose(1,2) 30 | loss=nn.functional.smooth_l1_loss(nocs,response,beta=0.5,reduction='none').flatten(1).mean(-1) #B 31 | mask=loss>(self.eta) 32 | valid=mask.float().sum() 33 | if is_dist_avail_and_initialized(): 34 | torch.distributed.all_reduce(valid) 35 | world_size=dist.get_world_size() 36 | valid=(valid/world_size).clamp(min=1) 37 | loss[~mask]=0 38 | loss=(loss.sum())/valid 39 | return self.weight*(loss) 40 | 41 | @LOSS_REGISTRY.register_module() 42 | class consistency_lossv2(consistency_loss): 43 | def __init__(self,weight=1.0,beta=0.1,loss='smooth') -> None: 44 | super().__init__(weight) 45 | self.beta=beta 46 | if loss=='smooth': 47 | self.loss_f=torch.nn.SmoothL1Loss(beta=beta) 48 | else: 49 | self.loss_f=torch.nn.MSELoss() 50 | def forward(self,coord,nocs): 51 | return self.weight*(self.loss_f(coord,nocs)) 52 | 53 | 54 | 55 | @LOSS_REGISTRY.register_module() 56 | class chamfer_lossv2(nn.Module): 57 | def __init__(self,weight,threshold=1.2) -> None: 58 | super().__init__() 59 | self.threshold=threshold 60 | self.weight=weight 61 | 62 | def forward(self,coord,gt): 63 | gt=gt.transpose(2,1) 64 | coord=coord.transpose(2,1) 65 | dis=torch.pow(gt.unsqueeze(dim=-1)-coord.unsqueeze(dim=-2),2).sum(dim=1) 66 | match_gt=torch.amin(dis,dim=-1) 67 | match_coord=torch.amin(dis,dim=-2) 68 | res=(match_coord.mean()+match_gt.mean()) 69 | 70 | return self.weight*(res) 71 | 72 | 73 | @LOSS_REGISTRY.register_module() 74 | class r_lossv2(nn.Module): 75 | def __init__(self,weight=1.0,beta=0.001,loss='smooth') -> None: 76 | super().__init__() 77 | self.weight=weight 78 | self.beta=beta 79 | self.loss_f=loss 80 | def forward(self,pred_r,gt_red,gt_green,sym): 81 | pred_green=pred_r[:,:,1:2] #B,3,1 82 | pred_red=pred_r[:,:,0:1] #B,3,1 83 | if self.loss_f=='smooth': 84 | green_loss=nn.functional.smooth_l1_loss(gt_green,pred_green,beta=self.beta) 85 | else: 86 | green_loss=nn.functional.mse_loss(gt_green,pred_green) 87 | 88 | mask=(sym[:,0]==1) #B 89 | B=mask.shape[0] 90 | valid=B-(mask).float().sum() 91 | b=valid.item()==0 92 | world_size=1 93 | if is_dist_avail_and_initialized(): 94 | torch.distributed.all_reduce(valid) 95 | world_size=dist.get_world_size() 96 | valid=valid/world_size 97 | if b: 98 | red_loss=0 99 | else: 100 | if self.loss_f=='smooth': 101 | red_loss=nn.functional.smooth_l1_loss(gt_red,pred_red,reduction='none',beta=self.beta)[:,:,0].mean(-1) #B 102 | else: 103 | red_loss=nn.functional.mse_loss(gt_red,pred_red,reduction='none')[:,:,0].mean(-1) #B 104 | red_loss[mask]=0 105 | red_loss=red_loss.sum()/(valid) 106 | return self.weight*(green_loss+red_loss) 107 | 108 | 109 | @LOSS_REGISTRY.register_module() 110 | class t_loss(nn.Module): 111 | def __init__(self,weight=1.0,beta=0.005,loss='smooth') -> None: 112 | super().__init__() 113 | self.weight=weight 114 | self.beta=beta 115 | if loss=='smooth': 116 | self.loss_f=torch.nn.SmoothL1Loss(beta=beta) 117 | else: 118 | self.loss_f=torch.nn.MSELoss() 119 | def forward(self,pred_t,t): 120 | return self.weight*self.loss_f(pred_t,t) 121 | 122 | @LOSS_REGISTRY.register_module() 123 | class s_loss(nn.Module): 124 | def __init__(self,weight=1.0,beta=0.005,loss='smooth') -> None: 125 | super().__init__() 126 | self.weight=weight 127 | self.beta=beta 128 | if loss=='smooth': 129 | self.loss_f=torch.nn.SmoothL1Loss(beta=beta) 130 | else: 131 | self.loss_f=torch.nn.MSELoss() 132 | def forward(self,pred_s,s): 133 | return self.weight*self.loss_f(pred_s,s) 134 | -------------------------------------------------------------------------------- /network/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import os 4 | import cv2 5 | import math 6 | import numpy as np 7 | from .decoder import DECODER_REGISTRY 8 | from .encoder import ENCODER_REGISTRY 9 | from .loss import LOSS_REGISTRY 10 | from mmengine import Registry 11 | 12 | NETWORK_REGISTRY = Registry("NETWORK") 13 | 14 | 15 | @NETWORK_REGISTRY.register_module() 16 | class Sparsenetv7(nn.Module): 17 | def __init__(self, 18 | backbone=dict( 19 | type='PointNetfeat' 20 | ), 21 | decoder=dict( 22 | type='sparse_decoder' 23 | ), 24 | pose_estimate=dict( 25 | type='pose_estimater' 26 | ), 27 | name='Posenet', 28 | training=False, 29 | input_dim=256, 30 | n_pts=128, 31 | cat_num=6, 32 | losses=[], 33 | loss_name=[], 34 | vis=False 35 | ) -> None: 36 | super().__init__() 37 | self.backbone=ENCODER_REGISTRY.build(backbone) 38 | self.pose_estimater=DECODER_REGISTRY.build(pose_estimate) 39 | self.decoder=DECODER_REGISTRY.build(decoder) 40 | prior_feat=(2*torch.rand((cat_num,n_pts,input_dim))-1)/input_dim 41 | self.prior_feat=torch.nn.parameter.Parameter(data=prior_feat,requires_grad=True) 42 | self.cat_num=cat_num 43 | self.training=training 44 | self.sym_id=torch.Tensor([0,1,3]) 45 | self.count=0 46 | if training: 47 | self.losses=[LOSS_REGISTRY.build(loss) for loss in losses] 48 | self.loss_name=loss_name 49 | self.vis=vis 50 | 51 | def forward(self,batched_inputs): 52 | points=batched_inputs['points'] 53 | category=batched_inputs['cat_id'] #B 54 | if self.training: 55 | nocs=batched_inputs['nocs'] 56 | model=batched_inputs['model'] 57 | R=batched_inputs['R'] 58 | s=batched_inputs['s'] 59 | gt_green=batched_inputs['gt_green'] 60 | gt_red=batched_inputs['gt_red'] 61 | mean_shape=batched_inputs['mean_shape'] 62 | t=batched_inputs['t'] 63 | s_delta=batched_inputs['dimension_delta'] 64 | mean_t=points.mean(dim=1) 65 | encoder_input=points-mean_t.unsqueeze(dim=1) 66 | encoder_out=self.backbone(encoder_input) 67 | inst_feat=encoder_out.transpose(1,2) 68 | prior_feat=self.prior_feat[category,...] 69 | 70 | device=torch.cuda.current_device() 71 | index=category+torch.arange(encoder_input.shape[0],dtype=torch.long,device=device)*self.cat_num 72 | 73 | sym=batched_inputs['sym'] 74 | 75 | if self.training: 76 | inst_feat,coord,response_coord=self.decoder(prior_feat,inst_feat,index,encoder_input) 77 | pred_r,pred_t,pred_s=self.pose_estimater(inst_feat,index) 78 | pred_t=pred_t+mean_t 79 | loss_dict=self.train_forward(pred_r,pred_t,pred_s,gt_green,gt_red,t,s_delta,sym,coord,model,nocs,response_coord,prior_feat,points,R,s,mean_shape) 80 | 81 | return loss_dict 82 | 83 | else: 84 | mean_shape=batched_inputs['mean_shape'] 85 | if not self.vis: 86 | inst_feat=self.decoder(prior_feat,inst_feat,index,encoder_input) 87 | else: 88 | inst_feat,coord,response_coord,iam1,iam2=self.decoder(prior_feat,inst_feat,index,encoder_input) 89 | B,N=iam1.shape[0],iam1.shape[1] 90 | iam1=iam1.view(B,N,-1,4) 91 | M=iam2.shape[1] 92 | iam2=iam2.view(B,M,-1,4) 93 | pred_r,pred_t,pred_s=self.pose_estimater(inst_feat,index) 94 | #pred_r:B,3,3 pred_t:B,3 pred_s:B,3 95 | pred_s=pred_s+mean_shape 96 | pred_t=pred_t+mean_t 97 | B=pred_r.shape[0] 98 | trans=torch.zeros((B,4,4),device=device) 99 | nocs_scale=torch.linalg.norm(pred_s,dim=-1,keepdim=True) #B,1 100 | trans[:,3,3]=1 101 | 102 | theta_x_=pred_r[:,0,0]-pred_r[:,2,2]#B 103 | theta_y_=pred_r[:,0,2]-pred_r[:,2,0]#B 104 | r_norm_=(theta_x_**2+theta_y_**2)**0.5 #B 105 | theta_x_=theta_x_/r_norm_ 106 | theta_y_=theta_y_/r_norm_ 107 | s_map_=torch.zeros((B,3,3),device=device) #B,3,3 108 | s_map_[:,1,1]=1 109 | s_map_[:,0,0],s_map_[:,0,2],s_map_[:,2,0],s_map_[:,2,2]=theta_x_,-theta_y_,theta_y_,theta_x_ 110 | delta_r=torch.bmm(pred_r,s_map_) #B,3,3 111 | 112 | mask=torch.isin(category,self.sym_id.to(device)) 113 | pred_r[mask,...]=delta_r[mask,...] 114 | 115 | trans[:,:3,:3]=pred_r*(nocs_scale.unsqueeze(dim=-1)) 116 | trans[:,:3,3:]=pred_t.unsqueeze(dim=-1) 117 | size=pred_s/nocs_scale 118 | trans=trans.cpu().numpy() 119 | size=size.cpu().numpy() 120 | if not self.vis: 121 | return trans,size 122 | else: 123 | return trans,size,coord.cpu(),response_coord.cpu(),iam1.cpu(),iam2.cpu(),pred_r.cpu().numpy(),pred_t.cpu().numpy(),nocs_scale.cpu().numpy() 124 | 125 | 126 | def train_forward(self,pred_r,pred_t,pred_s,gt_green,gt_red,t,s_delta,sym,coord,model,nocs,response_coord,prior_feat,points,R,s,mean_shape): 127 | ''' 128 | 129 | ''' 130 | paras={ 131 | 'chamfer':(coord,model), 132 | 'r':(pred_r,gt_red,gt_green,sym), 133 | 't':(pred_t,t), 134 | 's':(pred_s,s_delta), 135 | 'nocs':(response_coord,nocs), 136 | 'consistency':(points,response_coord,pred_r,pred_t,torch.linalg.norm(pred_s+mean_shape,dim=-1,keepdim=True).unsqueeze(dim=-1)), 137 | } 138 | return {name:loss(*(paras[name])) for loss,name in zip(self.losses,self.loss_name)} 139 | 140 | -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/interpolate_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cuda_utils.h" 6 | #include "interpolate_gpu.h" 7 | 8 | 9 | __global__ void three_nn_kernel_fast(int b, int n, int m, const float *__restrict__ unknown, 10 | const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) { 11 | // unknown: (B, N, 3) 12 | // known: (B, M, 3) 13 | // output: 14 | // dist2: (B, N, 3) 15 | // idx: (B, N, 3) 16 | 17 | int bs_idx = blockIdx.y; 18 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 19 | if (bs_idx >= b || pt_idx >= n) return; 20 | 21 | unknown += bs_idx * n * 3 + pt_idx * 3; 22 | known += bs_idx * m * 3; 23 | dist2 += bs_idx * n * 3 + pt_idx * 3; 24 | idx += bs_idx * n * 3 + pt_idx * 3; 25 | 26 | float ux = unknown[0]; 27 | float uy = unknown[1]; 28 | float uz = unknown[2]; 29 | 30 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 31 | int besti1 = 0, besti2 = 0, besti3 = 0; 32 | for (int k = 0; k < m; ++k) { 33 | float x = known[k * 3 + 0]; 34 | float y = known[k * 3 + 1]; 35 | float z = known[k * 3 + 2]; 36 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 37 | if (d < best1) { 38 | best3 = best2; besti3 = besti2; 39 | best2 = best1; besti2 = besti1; 40 | best1 = d; besti1 = k; 41 | } 42 | else if (d < best2) { 43 | best3 = best2; besti3 = besti2; 44 | best2 = d; besti2 = k; 45 | } 46 | else if (d < best3) { 47 | best3 = d; besti3 = k; 48 | } 49 | } 50 | dist2[0] = best1; dist2[1] = best2; dist2[2] = best3; 51 | idx[0] = besti1; idx[1] = besti2; idx[2] = besti3; 52 | } 53 | 54 | 55 | void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown, 56 | const float *known, float *dist2, int *idx, cudaStream_t stream) { 57 | // unknown: (B, N, 3) 58 | // known: (B, M, 3) 59 | // output: 60 | // dist2: (B, N, 3) 61 | // idx: (B, N, 3) 62 | 63 | cudaError_t err; 64 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) 65 | dim3 threads(THREADS_PER_BLOCK); 66 | 67 | three_nn_kernel_fast<<>>(b, n, m, unknown, known, dist2, idx); 68 | 69 | err = cudaGetLastError(); 70 | if (cudaSuccess != err) { 71 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 72 | exit(-1); 73 | } 74 | } 75 | 76 | 77 | __global__ void three_interpolate_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points, 78 | const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) { 79 | // points: (B, C, M) 80 | // idx: (B, N, 3) 81 | // weight: (B, N, 3) 82 | // output: 83 | // out: (B, C, N) 84 | 85 | int bs_idx = blockIdx.z; 86 | int c_idx = blockIdx.y; 87 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 88 | 89 | if (bs_idx >= b || c_idx >= c || pt_idx >= n) return; 90 | 91 | weight += bs_idx * n * 3 + pt_idx * 3; 92 | points += bs_idx * c * m + c_idx * m; 93 | idx += bs_idx * n * 3 + pt_idx * 3; 94 | out += bs_idx * c * n + c_idx * n; 95 | 96 | out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]]; 97 | } 98 | 99 | void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, 100 | const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream) { 101 | // points: (B, C, M) 102 | // idx: (B, N, 3) 103 | // weight: (B, N, 3) 104 | // output: 105 | // out: (B, C, N) 106 | 107 | cudaError_t err; 108 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 109 | dim3 threads(THREADS_PER_BLOCK); 110 | three_interpolate_kernel_fast<<>>(b, c, m, n, points, idx, weight, out); 111 | 112 | err = cudaGetLastError(); 113 | if (cudaSuccess != err) { 114 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 115 | exit(-1); 116 | } 117 | } 118 | 119 | 120 | __global__ void three_interpolate_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, 121 | const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ grad_points) { 122 | // grad_out: (B, C, N) 123 | // weight: (B, N, 3) 124 | // output: 125 | // grad_points: (B, C, M) 126 | 127 | int bs_idx = blockIdx.z; 128 | int c_idx = blockIdx.y; 129 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 130 | 131 | if (bs_idx >= b || c_idx >= c || pt_idx >= n) return; 132 | 133 | grad_out += bs_idx * c * n + c_idx * n + pt_idx; 134 | weight += bs_idx * n * 3 + pt_idx * 3; 135 | grad_points += bs_idx * c * m + c_idx * m; 136 | idx += bs_idx * n * 3 + pt_idx * 3; 137 | 138 | 139 | atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]); 140 | atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]); 141 | atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]); 142 | } 143 | 144 | void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, 145 | const int *idx, const float *weight, float *grad_points, cudaStream_t stream) { 146 | // grad_out: (B, C, N) 147 | // weight: (B, N, 3) 148 | // output: 149 | // grad_points: (B, C, M) 150 | 151 | cudaError_t err; 152 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 153 | dim3 threads(THREADS_PER_BLOCK); 154 | three_interpolate_grad_kernel_fast<<>>(b, c, n, m, grad_out, idx, weight, grad_points); 155 | 156 | err = cudaGetLastError(); 157 | if (cudaSuccess != err) { 158 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 159 | exit(-1); 160 | } 161 | } -------------------------------------------------------------------------------- /Pointnet2/pointnet2/pointnet2_modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from . import pointnet2_utils 6 | from . import pytorch_utils as pt_utils 7 | from typing import List 8 | 9 | 10 | class _PointnetSAModuleBase(nn.Module): 11 | 12 | def __init__(self): 13 | super().__init__() 14 | self.npoint = None 15 | self.groupers = None 16 | self.mlps = None 17 | self.pool_method = 'max_pool' 18 | 19 | def forward(self, xyz: torch.Tensor, features: torch.Tensor = None, new_xyz=None) -> (torch.Tensor, torch.Tensor): 20 | """ 21 | :param xyz: (B, N, 3) tensor of the xyz coordinates of the features 22 | :param features: (B, N, C) tensor of the descriptors of the the features 23 | :param new_xyz: 24 | :return: 25 | new_xyz: (B, npoint, 3) tensor of the new features' xyz 26 | new_features: (B, npoint, \sum_k(mlps[k][-1])) tensor of the new_features descriptors 27 | """ 28 | new_features_list = [] 29 | 30 | xyz_flipped = xyz.transpose(1, 2).contiguous() 31 | if new_xyz is None: 32 | new_xyz = pointnet2_utils.gather_operation( 33 | xyz_flipped, 34 | pointnet2_utils.furthest_point_sample(xyz, self.npoint) 35 | ).transpose(1, 2).contiguous() if self.npoint is not None else None 36 | 37 | for i in range(len(self.groupers)): 38 | new_features = self.groupers[i](xyz, new_xyz, features) # (B, C, npoint, nsample) 39 | 40 | new_features = self.mlps[i](new_features) # (B, mlp[-1], npoint, nsample) 41 | if self.pool_method == 'max_pool': 42 | new_features = F.max_pool2d( 43 | new_features, kernel_size=[1, new_features.size(3)] 44 | ) # (B, mlp[-1], npoint, 1) 45 | elif self.pool_method == 'avg_pool': 46 | new_features = F.avg_pool2d( 47 | new_features, kernel_size=[1, new_features.size(3)] 48 | ) # (B, mlp[-1], npoint, 1) 49 | else: 50 | raise NotImplementedError 51 | 52 | new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint) 53 | new_features_list.append(new_features) 54 | 55 | return new_xyz, torch.cat(new_features_list, dim=1) 56 | 57 | 58 | class PointnetSAModuleMSG(_PointnetSAModuleBase): 59 | """Pointnet set abstraction layer with multiscale grouping""" 60 | 61 | def __init__(self, *, npoint: int, radii: List[float], nsamples: List[int], mlps: List[List[int]], bn: bool = True, 62 | use_xyz: bool = True, pool_method='max_pool', instance_norm=False,use_norm=False): 63 | """ 64 | :param npoint: int 65 | :param radii: list of float, list of radii to group with 66 | :param nsamples: list of int, number of samples in each ball query 67 | :param mlps: list of list of int, spec of the pointnet before the global pooling for each scale 68 | :param bn: whether to use batchnorm 69 | :param use_xyz: 70 | :param pool_method: max_pool / avg_pool 71 | :param instance_norm: whether to use instance_norm 72 | """ 73 | super().__init__() 74 | 75 | assert len(radii) == len(nsamples) == len(mlps) 76 | 77 | self.npoint = npoint 78 | self.groupers = nn.ModuleList() 79 | self.mlps = nn.ModuleList() 80 | for i in range(len(radii)): 81 | radius = radii[i] 82 | nsample = nsamples[i] 83 | self.groupers.append( 84 | pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz,norm=use_norm) 85 | if npoint is not None else pointnet2_utils.GroupAll(use_xyz) 86 | ) 87 | mlp_spec = mlps[i] 88 | if use_xyz: 89 | mlp_spec[0] += 3 90 | 91 | self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn, instance_norm=instance_norm)) 92 | self.pool_method = pool_method 93 | 94 | 95 | class PointnetSAModule(PointnetSAModuleMSG): 96 | """Pointnet set abstraction layer""" 97 | 98 | def __init__(self, *, mlp: List[int], npoint: int = None, radius: float = None, nsample: int = None, 99 | bn: bool = True, use_xyz: bool = True, pool_method='max_pool', instance_norm=False): 100 | """ 101 | :param mlp: list of int, spec of the pointnet before the global max_pool 102 | :param npoint: int, number of features 103 | :param radius: float, radius of ball 104 | :param nsample: int, number of samples in the ball query 105 | :param bn: whether to use batchnorm 106 | :param use_xyz: 107 | :param pool_method: max_pool / avg_pool 108 | :param instance_norm: whether to use instance_norm 109 | """ 110 | super().__init__( 111 | mlps=[mlp], npoint=npoint, radii=[radius], nsamples=[nsample], bn=bn, use_xyz=use_xyz, 112 | pool_method=pool_method, instance_norm=instance_norm 113 | ) 114 | 115 | 116 | class PointnetFPModule(nn.Module): 117 | r"""Propigates the features of one set to another""" 118 | 119 | def __init__(self, *, mlp: List[int], bn: bool = True): 120 | """ 121 | :param mlp: list of int 122 | :param bn: whether to use batchnorm 123 | """ 124 | super().__init__() 125 | self.mlp = pt_utils.SharedMLP(mlp, bn=bn) 126 | 127 | def forward( 128 | self, unknown: torch.Tensor, known: torch.Tensor, unknow_feats: torch.Tensor, known_feats: torch.Tensor 129 | ) -> torch.Tensor: 130 | """ 131 | :param unknown: (B, n, 3) tensor of the xyz positions of the unknown features 132 | :param known: (B, m, 3) tensor of the xyz positions of the known features 133 | :param unknow_feats: (B, C1, n) tensor of the features to be propigated to 134 | :param known_feats: (B, C2, m) tensor of features to be propigated 135 | :return: 136 | new_features: (B, mlp[-1], n) tensor of the features of the unknown features 137 | """ 138 | if known is not None: 139 | dist, idx = pointnet2_utils.three_nn(unknown, known) 140 | dist_recip = 1.0 / (dist + 1e-8) 141 | norm = torch.sum(dist_recip, dim=2, keepdim=True) 142 | weight = dist_recip / norm 143 | 144 | interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight) 145 | else: 146 | interpolated_feats = known_feats.expand(*known_feats.size()[0:2], unknown.size(1)) 147 | 148 | if unknow_feats is not None: 149 | new_features = torch.cat([interpolated_feats, unknow_feats], dim=1) # (B, C2 + C1, n) 150 | else: 151 | new_features = interpolated_feats 152 | 153 | new_features = new_features.unsqueeze(-1) 154 | new_features = self.mlp(new_features) 155 | 156 | return new_features.squeeze(-1) 157 | 158 | 159 | if __name__ == "__main__": 160 | pass 161 | -------------------------------------------------------------------------------- /Pointnet2/pointnet2/pytorch_utils.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from typing import List, Tuple 3 | 4 | 5 | class SharedMLP(nn.Sequential): 6 | 7 | def __init__( 8 | self, 9 | args: List[int], 10 | *, 11 | bn: bool = False, 12 | activation=nn.ReLU(inplace=True), 13 | preact: bool = False, 14 | first: bool = False, 15 | name: str = "", 16 | instance_norm: bool = False, 17 | ): 18 | super().__init__() 19 | 20 | for i in range(len(args) - 1): 21 | self.add_module( 22 | name + 'layer{}'.format(i), 23 | Conv2d( 24 | args[i], 25 | args[i + 1], 26 | bn=(not first or not preact or (i != 0)) and bn, 27 | activation=activation 28 | if (not first or not preact or (i != 0)) else None, 29 | preact=preact, 30 | instance_norm=instance_norm 31 | ) 32 | ) 33 | 34 | 35 | class _ConvBase(nn.Sequential): 36 | 37 | def __init__( 38 | self, 39 | in_size, 40 | out_size, 41 | kernel_size, 42 | stride, 43 | padding, 44 | activation, 45 | bn, 46 | init, 47 | conv=None, 48 | batch_norm=None, 49 | bias=True, 50 | preact=False, 51 | name="", 52 | instance_norm=False, 53 | instance_norm_func=None 54 | ): 55 | super().__init__() 56 | 57 | bias = bias and (not bn) 58 | conv_unit = conv( 59 | in_size, 60 | out_size, 61 | kernel_size=kernel_size, 62 | stride=stride, 63 | padding=padding, 64 | bias=bias 65 | ) 66 | init(conv_unit.weight) 67 | if bias: 68 | nn.init.constant_(conv_unit.bias, 0) 69 | 70 | if bn: 71 | if not preact: 72 | bn_unit = batch_norm(out_size) 73 | else: 74 | bn_unit = batch_norm(in_size) 75 | if instance_norm: 76 | if not preact: 77 | in_unit = instance_norm_func(out_size, affine=False, track_running_stats=False) 78 | else: 79 | in_unit = instance_norm_func(in_size, affine=False, track_running_stats=False) 80 | 81 | if preact: 82 | if bn: 83 | self.add_module(name + 'bn', bn_unit) 84 | 85 | if activation is not None: 86 | self.add_module(name + 'activation', activation) 87 | 88 | if not bn and instance_norm: 89 | self.add_module(name + 'in', in_unit) 90 | 91 | self.add_module(name + 'conv', conv_unit) 92 | 93 | if not preact: 94 | if bn: 95 | self.add_module(name + 'bn', bn_unit) 96 | 97 | if activation is not None: 98 | self.add_module(name + 'activation', activation) 99 | 100 | if not bn and instance_norm: 101 | self.add_module(name + 'in', in_unit) 102 | 103 | 104 | class _BNBase(nn.Sequential): 105 | 106 | def __init__(self, in_size, batch_norm=None, name=""): 107 | super().__init__() 108 | self.add_module(name + "bn", batch_norm(in_size)) 109 | 110 | nn.init.constant_(self[0].weight, 1.0) 111 | nn.init.constant_(self[0].bias, 0) 112 | 113 | 114 | class BatchNorm1d(_BNBase): 115 | 116 | def __init__(self, in_size: int, *, name: str = ""): 117 | super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name) 118 | 119 | 120 | class BatchNorm2d(_BNBase): 121 | 122 | def __init__(self, in_size: int, name: str = ""): 123 | super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name) 124 | 125 | 126 | class Conv1d(_ConvBase): 127 | 128 | def __init__( 129 | self, 130 | in_size: int, 131 | out_size: int, 132 | *, 133 | kernel_size: int = 1, 134 | stride: int = 1, 135 | padding: int = 0, 136 | activation=nn.ReLU(inplace=True), 137 | bn: bool = False, 138 | init=nn.init.kaiming_normal_, 139 | bias: bool = True, 140 | preact: bool = False, 141 | name: str = "", 142 | instance_norm=False 143 | ): 144 | super().__init__( 145 | in_size, 146 | out_size, 147 | kernel_size, 148 | stride, 149 | padding, 150 | activation, 151 | bn, 152 | init, 153 | conv=nn.Conv1d, 154 | batch_norm=BatchNorm1d, 155 | bias=bias, 156 | preact=preact, 157 | name=name, 158 | instance_norm=instance_norm, 159 | instance_norm_func=nn.InstanceNorm1d 160 | ) 161 | 162 | 163 | class Conv2d(_ConvBase): 164 | 165 | def __init__( 166 | self, 167 | in_size: int, 168 | out_size: int, 169 | *, 170 | kernel_size: Tuple[int, int] = (1, 1), 171 | stride: Tuple[int, int] = (1, 1), 172 | padding: Tuple[int, int] = (0, 0), 173 | activation=nn.ReLU(inplace=True), 174 | bn: bool = False, 175 | init=nn.init.kaiming_normal_, 176 | bias: bool = True, 177 | preact: bool = False, 178 | name: str = "", 179 | instance_norm=False 180 | ): 181 | super().__init__( 182 | in_size, 183 | out_size, 184 | kernel_size, 185 | stride, 186 | padding, 187 | activation, 188 | bn, 189 | init, 190 | conv=nn.Conv2d, 191 | batch_norm=BatchNorm2d, 192 | bias=bias, 193 | preact=preact, 194 | name=name, 195 | instance_norm=instance_norm, 196 | instance_norm_func=nn.InstanceNorm2d 197 | ) 198 | 199 | 200 | class FC(nn.Sequential): 201 | 202 | def __init__( 203 | self, 204 | in_size: int, 205 | out_size: int, 206 | *, 207 | activation=nn.ReLU(inplace=True), 208 | bn: bool = False, 209 | init=None, 210 | preact: bool = False, 211 | name: str = "" 212 | ): 213 | super().__init__() 214 | 215 | fc = nn.Linear(in_size, out_size, bias=not bn) 216 | if init is not None: 217 | init(fc.weight) 218 | if not bn: 219 | nn.init.constant(fc.bias, 0) 220 | 221 | if preact: 222 | if bn: 223 | self.add_module(name + 'bn', BatchNorm1d(in_size)) 224 | 225 | if activation is not None: 226 | self.add_module(name + 'activation', activation) 227 | 228 | self.add_module(name + 'fc', fc) 229 | 230 | if not preact: 231 | if bn: 232 | self.add_module(name + 'bn', BatchNorm1d(out_size)) 233 | 234 | if activation is not None: 235 | self.add_module(name + 'activation', activation) 236 | 237 | -------------------------------------------------------------------------------- /network/decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from .basic import MLP 5 | from mmengine import Registry 6 | import math 7 | 8 | DECODER_REGISTRY = Registry("DECODER") 9 | 10 | 11 | @DECODER_REGISTRY.register_module() 12 | class deep_prior_decoderv2_9(nn.Module): 13 | def __init__(self,input_dim=256,group=4,cat_num=6,middle_dim=1024,training=False,vis=False) -> None: 14 | super().__init__() 15 | self.input_dim=input_dim 16 | self.group=group 17 | self.mlp1=MLP(input_dim,middle_dim,group*input_dim*cat_num) 18 | self.fc1=torch.nn.Linear(group*input_dim,input_dim) 19 | self.fc2=torch.nn.Linear(group*input_dim,input_dim) 20 | self.mlp2=MLP(input_dim,middle_dim,input_dim) 21 | self.mlp3=MLP(input_dim,middle_dim,group*input_dim*cat_num) 22 | self.mlp4=MLP(input_dim+64,middle_dim,input_dim+64) 23 | self.mlp5=MLP(input_dim+64,middle_dim,input_dim*2) 24 | self.mlp6=MLP(input_dim,middle_dim,input_dim) 25 | self.training=training 26 | self.chamfer=MLP(input_dim,128,3) 27 | self.glo_enhance1=global_enhancev2() 28 | self.glo_enhance2=global_enhancev2() 29 | self.glo_enhance3=global_enhancev2() 30 | self.glo_enhance4=global_enhancev2(input_dim+64) 31 | self.nocs_mlp=MLP(input_dim+64,128,3) 32 | self.coord_mlp=MLP(3,32,64) 33 | self.alpha1=torch.nn.parameter.Parameter(data=torch.tensor([1],dtype=torch.float32),requires_grad=True) 34 | self.alpha2=torch.nn.parameter.Parameter(data=torch.tensor([1],dtype=torch.float32),requires_grad=True) 35 | self.vis=vis 36 | 37 | 38 | def forward(self,prior_feat,inst_feat,index,encoder_input): 39 | ''' 40 | inst:B,N,D 41 | prior:B,M,D 42 | ''' 43 | B,N,D=inst_feat.shape 44 | M=prior_feat.shape[1] 45 | 46 | prior_feat=self.glo_enhance1(prior_feat) 47 | inst_feat=self.glo_enhance2(inst_feat) 48 | 49 | conv_para1=self.mlp1(prior_feat) #B,M,6*D*4 50 | conv_para1=conv_para1.transpose(1,2).contiguous().view(-1,self.group*D,M) 51 | conv_para1=torch.index_select(conv_para1,0,index) #B,4*D,M 52 | conv_para1=(conv_para1.view(B,D,-1)) #B,D,4*M 53 | 54 | iam1=torch.bmm(inst_feat,conv_para1) #B,N,4*M 55 | iam_prob=(iam1.sigmoid_())/(self.alpha1.clamp(min=1e-5)) 56 | 57 | aggre_inst=torch.bmm(iam_prob.transpose(1,2),inst_feat) #B,4*M,D 58 | normalizer=iam_prob.sum(1,keepdim=True).transpose(1,2).clamp(min=1e-6) #B,4*M,1 59 | if not self.training and self.vis: 60 | iam1=iam_prob/(normalizer.transpose(1,2))#B,N,4*M 61 | aggre_inst=aggre_inst/normalizer 62 | aggre_inst=aggre_inst.view(B,M,-1) #B,M,4*D 63 | aggre_inst=self.fc1(aggre_inst) #B,M,D 64 | 65 | prior_feat=prior_feat+aggre_inst 66 | res_prior_feat=self.mlp2(prior_feat)#B,M,D 67 | prior_feat=res_prior_feat+prior_feat 68 | 69 | prior_feat=self.glo_enhance3(prior_feat) 70 | 71 | res_prior_feat=self.mlp6(prior_feat)#B,M,D 72 | prior_feat=res_prior_feat+prior_feat 73 | 74 | if self.training or self.vis: 75 | coord=self.chamfer(prior_feat) 76 | 77 | conv_para2=self.mlp3(inst_feat) #B,N,6*D*4 78 | conv_para2=conv_para2.transpose(1,2).contiguous().view(-1,self.group*D,N) 79 | conv_para2=torch.index_select(conv_para2,0,index) #B,4*D,N 80 | conv_para2=conv_para2.view(B,D,-1) #B,D,4*N 81 | 82 | iam2=torch.bmm(prior_feat,conv_para2) #B,M,4*N 83 | iam_prob=(iam2.sigmoid_())/(self.alpha2.clamp(min=1e-5)) #B,M,4*N 84 | 85 | aggre_prior=torch.bmm(iam_prob.transpose(1,2),prior_feat) #B,4*N,D 86 | normalizer=iam_prob.sum(1,keepdim=True).transpose(1,2).clamp(min=1e-6) #B,4*N,1 87 | if not self.training and self.vis: 88 | iam2=iam_prob/(normalizer.transpose(1,2))#B,M,4*N 89 | aggre_prior=aggre_prior/(normalizer)#B,N,4*M 90 | aggre_prior=aggre_prior.view(B,N,-1) #B,N,4*D 91 | aggre_prior=self.fc2(aggre_prior) #B,N,D 92 | 93 | 94 | inst_feat=inst_feat+aggre_prior#B,N,D 95 | 96 | coord_feat=self.coord_mlp(encoder_input) #B,N,64 97 | inst_feat=torch.cat((inst_feat,coord_feat),dim=-1) 98 | 99 | res_inst_feat=self.mlp4(inst_feat) #B,N,D 100 | inst_feat=res_inst_feat+inst_feat 101 | 102 | inst_feat=self.glo_enhance4(inst_feat) 103 | 104 | 105 | if self.training or self.vis: 106 | response_coord=self.nocs_mlp(inst_feat) 107 | response_coord=response_coord.sigmoid_()-0.5 108 | 109 | 110 | inst_feat=self.mlp5(inst_feat) 111 | 112 | inst_feat=torch.nn.functional.adaptive_avg_pool1d(inst_feat.transpose(1,2),1).transpose(1,2) #B,1,D 113 | if self.training: 114 | return inst_feat,coord,response_coord 115 | elif not self.vis: 116 | return inst_feat 117 | else: 118 | return inst_feat,coord,response_coord,iam1,iam2 119 | 120 | 121 | 122 | 123 | 124 | 125 | class global_enhancev2(nn.Module): 126 | def __init__(self,input_dim=256) -> None: 127 | super().__init__() 128 | alpha=torch.tensor([1],dtype=torch.float32) 129 | self.alpha=torch.nn.parameter.Parameter(data=alpha,requires_grad=True) 130 | beta=torch.tensor([0],dtype=torch.float32) 131 | self.beta=torch.nn.parameter.Parameter(data=beta,requires_grad=True) 132 | self.linear=torch.nn.Conv1d(input_dim,input_dim,1,bias=False) 133 | 134 | def forward(self,feat): 135 | ''' 136 | feat:B,N,D 137 | ''' 138 | global_feat=torch.nn.functional.adaptive_avg_pool1d(feat.transpose(1,2),1) #B,D,1 139 | global_feat=self.linear(global_feat) 140 | atten=torch.bmm(feat,global_feat) #B,N,1 141 | mean=atten.squeeze(-1).mean(-1) #B 142 | std=torch.std(atten.squeeze(-1), dim=-1, unbiased=False) 143 | atten=self.alpha*(atten-mean.view(-1,1,1))/(std.view(-1,1,1)+1e-5)+self.beta 144 | global_feat=torch.bmm(atten,global_feat.transpose(1,2)) #B,N,D 145 | feat=feat+global_feat #B,N,D 146 | return feat 147 | 148 | 149 | 150 | 151 | @DECODER_REGISTRY.register_module() 152 | class pose_estimater(nn.Module): 153 | def __init__(self,input_dim=512,middle_dim=256,cat_num=6) -> None: 154 | super().__init__() 155 | self.mlp_r=MLP(input_dim,middle_dim,6) 156 | self.mlp_t=MLP(input_dim,middle_dim,3) 157 | self.mlp_s=MLP(input_dim,middle_dim,3) 158 | 159 | def forward(self,inst_feat,index): 160 | ''' 161 | inst_faet:B,1,2*D 162 | ''' 163 | inst_feat=inst_feat.squeeze(dim=1) 164 | r=self.mlp_r(inst_feat) #B,6 165 | t=self.mlp_t(inst_feat) #B,3 166 | s=self.mlp_s(inst_feat) #B,3 167 | 168 | r=self.Ortho6d2Mat(r[:,0:3],r[:,3:]) 169 | 170 | return r,t,s 171 | def Ortho6d2Mat(self,x_raw, y_raw): 172 | y = self.normalize_vector(y_raw) 173 | z = self.cross_product(x_raw, y) #B,3 174 | z = self.normalize_vector(z)#B,3 175 | x = self.cross_product(y,z)#B,3 176 | 177 | x = x.unsqueeze(2) 178 | y = y.unsqueeze(2) 179 | z = z.unsqueeze(2) 180 | matrix = torch.cat((x,y,z),dim=2) #batch*3*3 181 | return matrix 182 | def normalize_vector(self, v, dim =1, return_mag =False): 183 | return torch.nn.functional.normalize(v,dim=dim) 184 | 185 | def cross_product(self,u, v): 186 | return torch.cross(u,v,dim=-1) 187 | -------------------------------------------------------------------------------- /Pointnet2/tools/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch.utils.data as torch_data 4 | import kitti_utils 5 | import cv2 6 | from PIL import Image 7 | 8 | 9 | USE_INTENSITY = False 10 | 11 | 12 | class KittiDataset(torch_data.Dataset): 13 | def __init__(self, root_dir, split='train', mode='TRAIN'): 14 | self.split = split 15 | self.mode = mode 16 | self.classes = ['Car'] 17 | is_test = self.split == 'test' 18 | self.imageset_dir = os.path.join(root_dir, 'KITTI', 'object', 'testing' if is_test else 'training') 19 | 20 | split_dir = os.path.join(root_dir, 'KITTI', 'ImageSets', split + '.txt') 21 | self.image_idx_list = [x.strip() for x in open(split_dir).readlines()] 22 | self.sample_id_list = [int(sample_id) for sample_id in self.image_idx_list] 23 | self.num_sample = self.image_idx_list.__len__() 24 | 25 | self.npoints = 16384 26 | 27 | self.image_dir = os.path.join(self.imageset_dir, 'image_2') 28 | self.lidar_dir = os.path.join(self.imageset_dir, 'velodyne') 29 | self.calib_dir = os.path.join(self.imageset_dir, 'calib') 30 | self.label_dir = os.path.join(self.imageset_dir, 'label_2') 31 | self.plane_dir = os.path.join(self.imageset_dir, 'planes') 32 | 33 | def get_image(self, idx): 34 | img_file = os.path.join(self.image_dir, '%06d.png' % idx) 35 | assert os.path.exists(img_file) 36 | return cv2.imread(img_file) # (H, W, 3) BGR mode 37 | 38 | def get_image_shape(self, idx): 39 | img_file = os.path.join(self.image_dir, '%06d.png' % idx) 40 | assert os.path.exists(img_file) 41 | im = Image.open(img_file) 42 | width, height = im.size 43 | return height, width, 3 44 | 45 | def get_lidar(self, idx): 46 | lidar_file = os.path.join(self.lidar_dir, '%06d.bin' % idx) 47 | assert os.path.exists(lidar_file) 48 | return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4) 49 | 50 | def get_calib(self, idx): 51 | calib_file = os.path.join(self.calib_dir, '%06d.txt' % idx) 52 | assert os.path.exists(calib_file) 53 | return kitti_utils.Calibration(calib_file) 54 | 55 | def get_label(self, idx): 56 | label_file = os.path.join(self.label_dir, '%06d.txt' % idx) 57 | assert os.path.exists(label_file) 58 | return kitti_utils.get_objects_from_label(label_file) 59 | 60 | @staticmethod 61 | def get_valid_flag(pts_rect, pts_img, pts_rect_depth, img_shape): 62 | val_flag_1 = np.logical_and(pts_img[:, 0] >= 0, pts_img[:, 0] < img_shape[1]) 63 | val_flag_2 = np.logical_and(pts_img[:, 1] >= 0, pts_img[:, 1] < img_shape[0]) 64 | val_flag_merge = np.logical_and(val_flag_1, val_flag_2) 65 | pts_valid_flag = np.logical_and(val_flag_merge, pts_rect_depth >= 0) 66 | return pts_valid_flag 67 | 68 | def filtrate_objects(self, obj_list): 69 | type_whitelist = self.classes 70 | if self.mode == 'TRAIN': 71 | type_whitelist = list(self.classes) 72 | if 'Car' in self.classes: 73 | type_whitelist.append('Van') 74 | 75 | valid_obj_list = [] 76 | for obj in obj_list: 77 | if obj.cls_type not in type_whitelist: 78 | continue 79 | 80 | valid_obj_list.append(obj) 81 | return valid_obj_list 82 | 83 | def __len__(self): 84 | return len(self.sample_id_list) 85 | 86 | def __getitem__(self, index): 87 | sample_id = int(self.sample_id_list[index]) 88 | calib = self.get_calib(sample_id) 89 | img_shape = self.get_image_shape(sample_id) 90 | pts_lidar = self.get_lidar(sample_id) 91 | 92 | # get valid point (projected points should be in image) 93 | pts_rect = calib.lidar_to_rect(pts_lidar[:, 0:3]) 94 | pts_intensity = pts_lidar[:, 3] 95 | 96 | pts_img, pts_rect_depth = calib.rect_to_img(pts_rect) 97 | pts_valid_flag = self.get_valid_flag(pts_rect, pts_img, pts_rect_depth, img_shape) 98 | 99 | pts_rect = pts_rect[pts_valid_flag][:, 0:3] 100 | pts_intensity = pts_intensity[pts_valid_flag] 101 | 102 | if self.npoints < len(pts_rect): 103 | pts_depth = pts_rect[:, 2] 104 | pts_near_flag = pts_depth < 40.0 105 | far_idxs_choice = np.where(pts_near_flag == 0)[0] 106 | near_idxs = np.where(pts_near_flag == 1)[0] 107 | near_idxs_choice = np.random.choice(near_idxs, self.npoints - len(far_idxs_choice), replace=False) 108 | 109 | choice = np.concatenate((near_idxs_choice, far_idxs_choice), axis=0) \ 110 | if len(far_idxs_choice) > 0 else near_idxs_choice 111 | np.random.shuffle(choice) 112 | else: 113 | choice = np.arange(0, len(pts_rect), dtype=np.int32) 114 | if self.npoints > len(pts_rect): 115 | extra_choice = np.random.choice(choice, self.npoints - len(pts_rect), replace=False) 116 | choice = np.concatenate((choice, extra_choice), axis=0) 117 | np.random.shuffle(choice) 118 | 119 | ret_pts_rect = pts_rect[choice, :] 120 | ret_pts_intensity = pts_intensity[choice] - 0.5 # translate intensity to [-0.5, 0.5] 121 | 122 | pts_features = [ret_pts_intensity.reshape(-1, 1)] 123 | ret_pts_features = np.concatenate(pts_features, axis=1) if pts_features.__len__() > 1 else pts_features[0] 124 | 125 | sample_info = {'sample_id': sample_id} 126 | 127 | if self.mode == 'TEST': 128 | if USE_INTENSITY: 129 | pts_input = np.concatenate((ret_pts_rect, ret_pts_features), axis=1) # (N, C) 130 | else: 131 | pts_input = ret_pts_rect 132 | sample_info['pts_input'] = pts_input 133 | sample_info['pts_rect'] = ret_pts_rect 134 | sample_info['pts_features'] = ret_pts_features 135 | return sample_info 136 | 137 | gt_obj_list = self.filtrate_objects(self.get_label(sample_id)) 138 | 139 | gt_boxes3d = kitti_utils.objs_to_boxes3d(gt_obj_list) 140 | 141 | # prepare input 142 | if USE_INTENSITY: 143 | pts_input = np.concatenate((ret_pts_rect, ret_pts_features), axis=1) # (N, C) 144 | else: 145 | pts_input = ret_pts_rect 146 | 147 | # generate training labels 148 | cls_labels = self.generate_training_labels(ret_pts_rect, gt_boxes3d) 149 | sample_info['pts_input'] = pts_input 150 | sample_info['pts_rect'] = ret_pts_rect 151 | sample_info['cls_labels'] = cls_labels 152 | return sample_info 153 | 154 | @staticmethod 155 | def generate_training_labels(pts_rect, gt_boxes3d): 156 | cls_label = np.zeros((pts_rect.shape[0]), dtype=np.int32) 157 | gt_corners = kitti_utils.boxes3d_to_corners3d(gt_boxes3d, rotate=True) 158 | extend_gt_boxes3d = kitti_utils.enlarge_box3d(gt_boxes3d, extra_width=0.2) 159 | extend_gt_corners = kitti_utils.boxes3d_to_corners3d(extend_gt_boxes3d, rotate=True) 160 | for k in range(gt_boxes3d.shape[0]): 161 | box_corners = gt_corners[k] 162 | fg_pt_flag = kitti_utils.in_hull(pts_rect, box_corners) 163 | cls_label[fg_pt_flag] = 1 164 | 165 | # enlarge the bbox3d, ignore nearby points 166 | extend_box_corners = extend_gt_corners[k] 167 | fg_enlarge_flag = kitti_utils.in_hull(pts_rect, extend_box_corners) 168 | ignore_flag = np.logical_xor(fg_pt_flag, fg_enlarge_flag) 169 | cls_label[ignore_flag] = -1 170 | 171 | return cls_label 172 | 173 | def collate_batch(self, batch): 174 | batch_size = batch.__len__() 175 | ans_dict = {} 176 | 177 | for key in batch[0].keys(): 178 | if isinstance(batch[0][key], np.ndarray): 179 | ans_dict[key] = np.concatenate([batch[k][key][np.newaxis, ...] for k in range(batch_size)], axis=0) 180 | 181 | else: 182 | ans_dict[key] = [batch[k][key] for k in range(batch_size)] 183 | if isinstance(batch[0][key], int): 184 | ans_dict[key] = np.array(ans_dict[key], dtype=np.int32) 185 | elif isinstance(batch[0][key], float): 186 | ans_dict[key] = np.array(ans_dict[key], dtype=np.float32) 187 | 188 | return ans_dict 189 | -------------------------------------------------------------------------------- /Pointnet2/tools/train_and_eval.py: -------------------------------------------------------------------------------- 1 | import _init_path 2 | import numpy as np 3 | import os 4 | import torch 5 | import torch.nn as nn 6 | import torch.optim as optim 7 | import torch.optim.lr_scheduler as lr_sched 8 | from torch.nn.utils import clip_grad_norm_ 9 | from torch.utils.data import DataLoader 10 | import tensorboard_logger as tb_log 11 | from dataset import KittiDataset 12 | import argparse 13 | import importlib 14 | 15 | parser = argparse.ArgumentParser(description="Arg parser") 16 | parser.add_argument("--batch_size", type=int, default=8) 17 | parser.add_argument("--epochs", type=int, default=100) 18 | parser.add_argument("--ckpt_save_interval", type=int, default=5) 19 | parser.add_argument('--workers', type=int, default=4) 20 | parser.add_argument("--mode", type=str, default='train') 21 | parser.add_argument("--ckpt", type=str, default='None') 22 | 23 | parser.add_argument("--net", type=str, default='pointnet2_msg') 24 | 25 | parser.add_argument('--lr', type=float, default=0.002) 26 | parser.add_argument('--lr_decay', type=float, default=0.2) 27 | parser.add_argument('--lr_clip', type=float, default=0.000001) 28 | parser.add_argument('--decay_step_list', type=list, default=[50, 70, 80, 90]) 29 | parser.add_argument('--weight_decay', type=float, default=0.001) 30 | 31 | parser.add_argument("--output_dir", type=str, default='output') 32 | parser.add_argument("--extra_tag", type=str, default='default') 33 | 34 | args = parser.parse_args() 35 | 36 | FG_THRESH = 0.3 37 | 38 | 39 | def log_print(info, log_f=None): 40 | print(info) 41 | if log_f is not None: 42 | print(info, file=log_f) 43 | 44 | 45 | class DiceLoss(nn.Module): 46 | def __init__(self, ignore_target=-1): 47 | super().__init__() 48 | self.ignore_target = ignore_target 49 | 50 | def forward(self, input, target): 51 | """ 52 | :param input: (N), logit 53 | :param target: (N), {0, 1} 54 | :return: 55 | """ 56 | input = torch.sigmoid(input.view(-1)) 57 | target = target.float().view(-1) 58 | mask = (target != self.ignore_target).float() 59 | return 1.0 - (torch.min(input, target) * mask).sum() / torch.clamp((torch.max(input, target) * mask).sum(), min=1.0) 60 | 61 | 62 | def train_one_epoch(model, train_loader, optimizer, epoch, lr_scheduler, total_it, tb_log, log_f): 63 | model.train() 64 | log_print('===============TRAIN EPOCH %d================' % epoch, log_f=log_f) 65 | loss_func = DiceLoss(ignore_target=-1) 66 | 67 | for it, batch in enumerate(train_loader): 68 | optimizer.zero_grad() 69 | 70 | pts_input, cls_labels = batch['pts_input'], batch['cls_labels'] 71 | pts_input = torch.from_numpy(pts_input).cuda(non_blocking=True).float() 72 | cls_labels = torch.from_numpy(cls_labels).cuda(non_blocking=True).long().view(-1) 73 | 74 | pred_cls = model(pts_input) 75 | pred_cls = pred_cls.view(-1) 76 | 77 | loss = loss_func(pred_cls, cls_labels) 78 | loss.backward() 79 | clip_grad_norm_(model.parameters(), 1.0) 80 | optimizer.step() 81 | 82 | total_it += 1 83 | 84 | pred_class = (torch.sigmoid(pred_cls) > FG_THRESH) 85 | fg_mask = cls_labels > 0 86 | correct = ((pred_class.long() == cls_labels) & fg_mask).float().sum() 87 | union = fg_mask.sum().float() + (pred_class > 0).sum().float() - correct 88 | iou = correct / torch.clamp(union, min=1.0) 89 | 90 | cur_lr = lr_scheduler.get_lr()[0] 91 | tb_log.log_value('learning_rate', cur_lr, epoch) 92 | if tb_log is not None: 93 | tb_log.log_value('train_loss', loss, total_it) 94 | tb_log.log_value('train_fg_iou', iou, total_it) 95 | 96 | log_print('training epoch %d: it=%d/%d, total_it=%d, loss=%.5f, fg_iou=%.3f, lr=%f' % 97 | (epoch, it, len(train_loader), total_it, loss.item(), iou.item(), cur_lr), log_f=log_f) 98 | 99 | return total_it 100 | 101 | 102 | def eval_one_epoch(model, eval_loader, epoch, tb_log=None, log_f=None): 103 | model.train() 104 | log_print('===============EVAL EPOCH %d================' % epoch, log_f=log_f) 105 | 106 | iou_list = [] 107 | for it, batch in enumerate(eval_loader): 108 | pts_input, cls_labels = batch['pts_input'], batch['cls_labels'] 109 | pts_input = torch.from_numpy(pts_input).cuda(non_blocking=True).float() 110 | cls_labels = torch.from_numpy(cls_labels).cuda(non_blocking=True).long().view(-1) 111 | 112 | pred_cls = model(pts_input) 113 | pred_cls = pred_cls.view(-1) 114 | 115 | pred_class = (torch.sigmoid(pred_cls) > FG_THRESH) 116 | fg_mask = cls_labels > 0 117 | correct = ((pred_class.long() == cls_labels) & fg_mask).float().sum() 118 | union = fg_mask.sum().float() + (pred_class > 0).sum().float() - correct 119 | iou = correct / torch.clamp(union, min=1.0) 120 | 121 | iou_list.append(iou.item()) 122 | log_print('EVAL: it=%d/%d, iou=%.3f' % (it, len(eval_loader), iou), log_f=log_f) 123 | 124 | iou_list = np.array(iou_list) 125 | avg_iou = iou_list.mean() 126 | if tb_log is not None: 127 | tb_log.log_value('eval_fg_iou', avg_iou, epoch) 128 | 129 | log_print('\nEpoch %d: Average IoU (samples=%d): %.6f' % (epoch, iou_list.__len__(), avg_iou), log_f=log_f) 130 | return avg_iou 131 | 132 | 133 | def save_checkpoint(model, epoch, ckpt_name): 134 | if isinstance(model, torch.nn.DataParallel): 135 | model_state = model.module.state_dict() 136 | else: 137 | model_state = model.state_dict() 138 | 139 | state = {'epoch': epoch, 'model_state': model_state} 140 | ckpt_name = '{}.pth'.format(ckpt_name) 141 | torch.save(state, ckpt_name) 142 | 143 | 144 | def load_checkpoint(model, filename): 145 | if os.path.isfile(filename): 146 | log_print("==> Loading from checkpoint %s" % filename) 147 | checkpoint = torch.load(filename) 148 | epoch = checkpoint['epoch'] 149 | model.load_state_dict(checkpoint['model_state']) 150 | log_print("==> Done") 151 | else: 152 | raise FileNotFoundError 153 | 154 | return epoch 155 | 156 | 157 | def train_and_eval(model, train_loader, eval_loader, tb_log, ckpt_dir, log_f): 158 | model.cuda() 159 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) 160 | 161 | def lr_lbmd(cur_epoch): 162 | cur_decay = 1 163 | for decay_step in args.decay_step_list: 164 | if cur_epoch >= decay_step: 165 | cur_decay = cur_decay * args.lr_decay 166 | return max(cur_decay, args.lr_clip / args.lr) 167 | 168 | lr_scheduler = lr_sched.LambdaLR(optimizer, lr_lbmd) 169 | 170 | total_it = 0 171 | for epoch in range(1, args.epochs + 1): 172 | lr_scheduler.step(epoch) 173 | total_it = train_one_epoch(model, train_loader, optimizer, epoch, lr_scheduler, total_it, tb_log, log_f) 174 | 175 | if epoch % args.ckpt_save_interval == 0: 176 | with torch.no_grad(): 177 | avg_iou = eval_one_epoch(model, eval_loader, epoch, tb_log, log_f) 178 | ckpt_name = os.path.join(ckpt_dir, 'checkpoint_epoch_%d' % epoch) 179 | save_checkpoint(model, epoch, ckpt_name) 180 | 181 | 182 | if __name__ == '__main__': 183 | MODEL = importlib.import_module(args.net) # import network module 184 | model = MODEL.get_model(input_channels=0) 185 | 186 | eval_set = KittiDataset(root_dir='./data', mode='EVAL', split='val') 187 | eval_loader = DataLoader(eval_set, batch_size=args.batch_size, shuffle=False, pin_memory=True, 188 | num_workers=args.workers, collate_fn=eval_set.collate_batch) 189 | 190 | if args.mode == 'train': 191 | train_set = KittiDataset(root_dir='./data', mode='TRAIN', split='train') 192 | train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, pin_memory=True, 193 | num_workers=args.workers, collate_fn=train_set.collate_batch) 194 | # output dir config 195 | output_dir = os.path.join(args.output_dir, args.extra_tag) 196 | os.makedirs(output_dir, exist_ok=True) 197 | tb_log.configure(os.path.join(output_dir, 'tensorboard')) 198 | ckpt_dir = os.path.join(output_dir, 'ckpt') 199 | os.makedirs(ckpt_dir, exist_ok=True) 200 | 201 | log_file = os.path.join(output_dir, 'log.txt') 202 | log_f = open(log_file, 'w') 203 | 204 | for key, val in vars(args).items(): 205 | log_print("{:16} {}".format(key, val), log_f=log_f) 206 | 207 | # train and eval 208 | train_and_eval(model, train_loader, eval_loader, tb_log, ckpt_dir, log_f) 209 | log_f.close() 210 | elif args.mode == 'eval': 211 | epoch = load_checkpoint(model, args.ckpt) 212 | model.cuda() 213 | with torch.no_grad(): 214 | avg_iou = eval_one_epoch(model, eval_loader, epoch) 215 | else: 216 | raise NotImplementedError 217 | 218 | -------------------------------------------------------------------------------- /Pointnet2/pointnet2/src/sampling_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "cuda_utils.h" 5 | #include "sampling_gpu.h" 6 | 7 | 8 | __global__ void gather_points_kernel_fast(int b, int c, int n, int m, 9 | const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) { 10 | // points: (B, C, N) 11 | // idx: (B, M) 12 | // output: 13 | // out: (B, C, M) 14 | 15 | int bs_idx = blockIdx.z; 16 | int c_idx = blockIdx.y; 17 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 18 | if (bs_idx >= b || c_idx >= c || pt_idx >= m) return; 19 | 20 | out += bs_idx * c * m + c_idx * m + pt_idx; 21 | idx += bs_idx * m + pt_idx; 22 | points += bs_idx * c * n + c_idx * n; 23 | out[0] = points[idx[0]]; 24 | } 25 | 26 | void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, 27 | const float *points, const int *idx, float *out, cudaStream_t stream) { 28 | // points: (B, C, N) 29 | // idx: (B, npoints) 30 | // output: 31 | // out: (B, C, npoints) 32 | 33 | cudaError_t err; 34 | dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 35 | dim3 threads(THREADS_PER_BLOCK); 36 | 37 | gather_points_kernel_fast<<>>(b, c, n, npoints, points, idx, out); 38 | 39 | err = cudaGetLastError(); 40 | if (cudaSuccess != err) { 41 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 42 | exit(-1); 43 | } 44 | } 45 | 46 | __global__ void gather_points_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, 47 | const int *__restrict__ idx, float *__restrict__ grad_points) { 48 | // grad_out: (B, C, M) 49 | // idx: (B, M) 50 | // output: 51 | // grad_points: (B, C, N) 52 | 53 | int bs_idx = blockIdx.z; 54 | int c_idx = blockIdx.y; 55 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 56 | if (bs_idx >= b || c_idx >= c || pt_idx >= m) return; 57 | 58 | grad_out += bs_idx * c * m + c_idx * m + pt_idx; 59 | idx += bs_idx * m + pt_idx; 60 | grad_points += bs_idx * c * n + c_idx * n; 61 | 62 | atomicAdd(grad_points + idx[0], grad_out[0]); 63 | } 64 | 65 | void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, 66 | const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) { 67 | // grad_out: (B, C, npoints) 68 | // idx: (B, npoints) 69 | // output: 70 | // grad_points: (B, C, N) 71 | 72 | cudaError_t err; 73 | dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) 74 | dim3 threads(THREADS_PER_BLOCK); 75 | 76 | gather_points_grad_kernel_fast<<>>(b, c, n, npoints, grad_out, idx, grad_points); 77 | 78 | err = cudaGetLastError(); 79 | if (cudaSuccess != err) { 80 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 81 | exit(-1); 82 | } 83 | } 84 | 85 | 86 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, int idx1, int idx2){ 87 | const float v1 = dists[idx1], v2 = dists[idx2]; 88 | const int i1 = dists_i[idx1], i2 = dists_i[idx2]; 89 | dists[idx1] = max(v1, v2); 90 | dists_i[idx1] = v2 > v1 ? i2 : i1; 91 | } 92 | 93 | template 94 | __global__ void furthest_point_sampling_kernel(int b, int n, int m, 95 | const float *__restrict__ dataset, float *__restrict__ temp, int *__restrict__ idxs) { 96 | // dataset: (B, N, 3) 97 | // tmp: (B, N) 98 | // output: 99 | // idx: (B, M) 100 | 101 | if (m <= 0) return; 102 | __shared__ float dists[block_size]; 103 | __shared__ int dists_i[block_size]; 104 | 105 | int batch_index = blockIdx.x; 106 | dataset += batch_index * n * 3; 107 | temp += batch_index * n; 108 | idxs += batch_index * m; 109 | 110 | int tid = threadIdx.x; 111 | const int stride = block_size; 112 | 113 | int old = 0; 114 | if (threadIdx.x == 0) 115 | idxs[0] = old; 116 | 117 | __syncthreads(); 118 | for (int j = 1; j < m; j++) { 119 | int besti = 0; 120 | float best = -1; 121 | float x1 = dataset[old * 3 + 0]; 122 | float y1 = dataset[old * 3 + 1]; 123 | float z1 = dataset[old * 3 + 2]; 124 | for (int k = tid; k < n; k += stride) { 125 | float x2, y2, z2; 126 | x2 = dataset[k * 3 + 0]; 127 | y2 = dataset[k * 3 + 1]; 128 | z2 = dataset[k * 3 + 2]; 129 | // float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); 130 | // if (mag <= 1e-3) 131 | // continue; 132 | 133 | float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); 134 | float d2 = min(d, temp[k]); 135 | temp[k] = d2; 136 | besti = d2 > best ? k : besti; 137 | best = d2 > best ? d2 : best; 138 | } 139 | dists[tid] = best; 140 | dists_i[tid] = besti; 141 | __syncthreads(); 142 | 143 | if (block_size >= 1024) { 144 | if (tid < 512) { 145 | __update(dists, dists_i, tid, tid + 512); 146 | } 147 | __syncthreads(); 148 | } 149 | 150 | if (block_size >= 512) { 151 | if (tid < 256) { 152 | __update(dists, dists_i, tid, tid + 256); 153 | } 154 | __syncthreads(); 155 | } 156 | if (block_size >= 256) { 157 | if (tid < 128) { 158 | __update(dists, dists_i, tid, tid + 128); 159 | } 160 | __syncthreads(); 161 | } 162 | if (block_size >= 128) { 163 | if (tid < 64) { 164 | __update(dists, dists_i, tid, tid + 64); 165 | } 166 | __syncthreads(); 167 | } 168 | if (block_size >= 64) { 169 | if (tid < 32) { 170 | __update(dists, dists_i, tid, tid + 32); 171 | } 172 | __syncthreads(); 173 | } 174 | if (block_size >= 32) { 175 | if (tid < 16) { 176 | __update(dists, dists_i, tid, tid + 16); 177 | } 178 | __syncthreads(); 179 | } 180 | if (block_size >= 16) { 181 | if (tid < 8) { 182 | __update(dists, dists_i, tid, tid + 8); 183 | } 184 | __syncthreads(); 185 | } 186 | if (block_size >= 8) { 187 | if (tid < 4) { 188 | __update(dists, dists_i, tid, tid + 4); 189 | } 190 | __syncthreads(); 191 | } 192 | if (block_size >= 4) { 193 | if (tid < 2) { 194 | __update(dists, dists_i, tid, tid + 2); 195 | } 196 | __syncthreads(); 197 | } 198 | if (block_size >= 2) { 199 | if (tid < 1) { 200 | __update(dists, dists_i, tid, tid + 1); 201 | } 202 | __syncthreads(); 203 | } 204 | 205 | old = dists_i[0]; 206 | if (tid == 0) 207 | idxs[j] = old; 208 | } 209 | } 210 | 211 | void furthest_point_sampling_kernel_launcher(int b, int n, int m, 212 | const float *dataset, float *temp, int *idxs, cudaStream_t stream) { 213 | // dataset: (B, N, 3) 214 | // tmp: (B, N) 215 | // output: 216 | // idx: (B, M) 217 | 218 | cudaError_t err; 219 | unsigned int n_threads = opt_n_threads(n); 220 | 221 | switch (n_threads) { 222 | case 1024: 223 | furthest_point_sampling_kernel<1024><<>>(b, n, m, dataset, temp, idxs); break; 224 | case 512: 225 | furthest_point_sampling_kernel<512><<>>(b, n, m, dataset, temp, idxs); break; 226 | case 256: 227 | furthest_point_sampling_kernel<256><<>>(b, n, m, dataset, temp, idxs); break; 228 | case 128: 229 | furthest_point_sampling_kernel<128><<>>(b, n, m, dataset, temp, idxs); break; 230 | case 64: 231 | furthest_point_sampling_kernel<64><<>>(b, n, m, dataset, temp, idxs); break; 232 | case 32: 233 | furthest_point_sampling_kernel<32><<>>(b, n, m, dataset, temp, idxs); break; 234 | case 16: 235 | furthest_point_sampling_kernel<16><<>>(b, n, m, dataset, temp, idxs); break; 236 | case 8: 237 | furthest_point_sampling_kernel<8><<>>(b, n, m, dataset, temp, idxs); break; 238 | case 4: 239 | furthest_point_sampling_kernel<4><<>>(b, n, m, dataset, temp, idxs); break; 240 | case 2: 241 | furthest_point_sampling_kernel<2><<>>(b, n, m, dataset, temp, idxs); break; 242 | case 1: 243 | furthest_point_sampling_kernel<1><<>>(b, n, m, dataset, temp, idxs); break; 244 | default: 245 | furthest_point_sampling_kernel<512><<>>(b, n, m, dataset, temp, idxs); 246 | } 247 | 248 | err = cudaGetLastError(); 249 | if (cudaSuccess != err) { 250 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 251 | exit(-1); 252 | } 253 | } 254 | -------------------------------------------------------------------------------- /Pointnet2/tools/kitti_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.spatial import Delaunay 3 | import scipy 4 | 5 | 6 | def cls_type_to_id(cls_type): 7 | type_to_id = {'Car': 1, 'Pedestrian': 2, 'Cyclist': 3, 'Van': 4} 8 | if cls_type not in type_to_id.keys(): 9 | return -1 10 | return type_to_id[cls_type] 11 | 12 | 13 | class Object3d(object): 14 | def __init__(self, line): 15 | label = line.strip().split(' ') 16 | self.src = line 17 | self.cls_type = label[0] 18 | self.cls_id = cls_type_to_id(self.cls_type) 19 | self.trucation = float(label[1]) 20 | self.occlusion = float(label[2]) # 0:fully visible 1:partly occluded 2:largely occluded 3:unknown 21 | self.alpha = float(label[3]) 22 | self.box2d = np.array((float(label[4]), float(label[5]), float(label[6]), float(label[7])), dtype=np.float32) 23 | self.h = float(label[8]) 24 | self.w = float(label[9]) 25 | self.l = float(label[10]) 26 | self.pos = np.array((float(label[11]), float(label[12]), float(label[13])), dtype=np.float32) 27 | self.dis_to_cam = np.linalg.norm(self.pos) 28 | self.ry = float(label[14]) 29 | self.score = float(label[15]) if label.__len__() == 16 else -1.0 30 | self.level_str = None 31 | self.level = self.get_obj_level() 32 | 33 | def get_obj_level(self): 34 | height = float(self.box2d[3]) - float(self.box2d[1]) + 1 35 | 36 | if height >= 40 and self.trucation <= 0.15 and self.occlusion <= 0: 37 | self.level_str = 'Easy' 38 | return 1 # Easy 39 | elif height >= 25 and self.trucation <= 0.3 and self.occlusion <= 1: 40 | self.level_str = 'Moderate' 41 | return 2 # Moderate 42 | elif height >= 25 and self.trucation <= 0.5 and self.occlusion <= 2: 43 | self.level_str = 'Hard' 44 | return 3 # Hard 45 | else: 46 | self.level_str = 'UnKnown' 47 | return 4 48 | 49 | def generate_corners3d(self): 50 | """ 51 | generate corners3d representation for this object 52 | :return corners_3d: (8, 3) corners of box3d in camera coord 53 | """ 54 | l, h, w = self.l, self.h, self.w 55 | x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2] 56 | y_corners = [0, 0, 0, 0, -h, -h, -h, -h] 57 | z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2] 58 | 59 | R = np.array([[np.cos(self.ry), 0, np.sin(self.ry)], 60 | [0, 1, 0], 61 | [-np.sin(self.ry), 0, np.cos(self.ry)]]) 62 | corners3d = np.vstack([x_corners, y_corners, z_corners]) # (3, 8) 63 | corners3d = np.dot(R, corners3d).T 64 | corners3d = corners3d + self.pos 65 | return corners3d 66 | 67 | def to_str(self): 68 | print_str = '%s %.3f %.3f %.3f box2d: %s hwl: [%.3f %.3f %.3f] pos: %s ry: %.3f' \ 69 | % (self.cls_type, self.trucation, self.occlusion, self.alpha, self.box2d, self.h, self.w, self.l, 70 | self.pos, self.ry) 71 | return print_str 72 | 73 | def to_kitti_format(self): 74 | kitti_str = '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' \ 75 | % (self.cls_type, self.trucation, int(self.occlusion), self.alpha, self.box2d[0], self.box2d[1], 76 | self.box2d[2], self.box2d[3], self.h, self.w, self.l, self.pos[0], self.pos[1], self.pos[2], 77 | self.ry) 78 | return kitti_str 79 | 80 | 81 | def get_calib_from_file(calib_file): 82 | with open(calib_file) as f: 83 | lines = f.readlines() 84 | 85 | obj = lines[2].strip().split(' ')[1:] 86 | P2 = np.array(obj, dtype=np.float32) 87 | obj = lines[3].strip().split(' ')[1:] 88 | P3 = np.array(obj, dtype=np.float32) 89 | obj = lines[4].strip().split(' ')[1:] 90 | R0 = np.array(obj, dtype=np.float32) 91 | obj = lines[5].strip().split(' ')[1:] 92 | Tr_velo_to_cam = np.array(obj, dtype=np.float32) 93 | 94 | return {'P2': P2.reshape(3, 4), 95 | 'P3': P3.reshape(3, 4), 96 | 'R0': R0.reshape(3, 3), 97 | 'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)} 98 | 99 | 100 | class Calibration(object): 101 | def __init__(self, calib_file): 102 | if isinstance(calib_file, str): 103 | calib = get_calib_from_file(calib_file) 104 | else: 105 | calib = calib_file 106 | 107 | self.P2 = calib['P2'] # 3 x 4 108 | self.R0 = calib['R0'] # 3 x 3 109 | self.V2C = calib['Tr_velo2cam'] # 3 x 4 110 | 111 | def cart_to_hom(self, pts): 112 | """ 113 | :param pts: (N, 3 or 2) 114 | :return pts_hom: (N, 4 or 3) 115 | """ 116 | pts_hom = np.hstack((pts, np.ones((pts.shape[0], 1), dtype=np.float32))) 117 | return pts_hom 118 | 119 | def lidar_to_rect(self, pts_lidar): 120 | """ 121 | :param pts_lidar: (N, 3) 122 | :return pts_rect: (N, 3) 123 | """ 124 | pts_lidar_hom = self.cart_to_hom(pts_lidar) 125 | pts_rect = np.dot(pts_lidar_hom, np.dot(self.V2C.T, self.R0.T)) 126 | return pts_rect 127 | 128 | def rect_to_img(self, pts_rect): 129 | """ 130 | :param pts_rect: (N, 3) 131 | :return pts_img: (N, 2) 132 | """ 133 | pts_rect_hom = self.cart_to_hom(pts_rect) 134 | pts_2d_hom = np.dot(pts_rect_hom, self.P2.T) 135 | pts_img = (pts_2d_hom[:, 0:2].T / pts_rect_hom[:, 2]).T # (N, 2) 136 | pts_rect_depth = pts_2d_hom[:, 2] - self.P2.T[3, 2] # depth in rect camera coord 137 | return pts_img, pts_rect_depth 138 | 139 | def lidar_to_img(self, pts_lidar): 140 | """ 141 | :param pts_lidar: (N, 3) 142 | :return pts_img: (N, 2) 143 | """ 144 | pts_rect = self.lidar_to_rect(pts_lidar) 145 | pts_img, pts_depth = self.rect_to_img(pts_rect) 146 | return pts_img, pts_depth 147 | 148 | 149 | def get_objects_from_label(label_file): 150 | with open(label_file, 'r') as f: 151 | lines = f.readlines() 152 | objects = [Object3d(line) for line in lines] 153 | return objects 154 | 155 | 156 | def objs_to_boxes3d(obj_list): 157 | boxes3d = np.zeros((obj_list.__len__(), 7), dtype=np.float32) 158 | for k, obj in enumerate(obj_list): 159 | boxes3d[k, 0:3], boxes3d[k, 3], boxes3d[k, 4], boxes3d[k, 5], boxes3d[k, 6] \ 160 | = obj.pos, obj.h, obj.w, obj.l, obj.ry 161 | return boxes3d 162 | 163 | 164 | def boxes3d_to_corners3d(boxes3d, rotate=True): 165 | """ 166 | :param boxes3d: (N, 7) [x, y, z, h, w, l, ry] 167 | :param rotate: 168 | :return: corners3d: (N, 8, 3) 169 | """ 170 | boxes_num = boxes3d.shape[0] 171 | h, w, l = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5] 172 | x_corners = np.array([l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2.], dtype=np.float32).T # (N, 8) 173 | z_corners = np.array([w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.], dtype=np.float32).T # (N, 8) 174 | 175 | y_corners = np.zeros((boxes_num, 8), dtype=np.float32) 176 | y_corners[:, 4:8] = -h.reshape(boxes_num, 1).repeat(4, axis=1) # (N, 8) 177 | 178 | if rotate: 179 | ry = boxes3d[:, 6] 180 | zeros, ones = np.zeros(ry.size, dtype=np.float32), np.ones(ry.size, dtype=np.float32) 181 | rot_list = np.array([[np.cos(ry), zeros, -np.sin(ry)], 182 | [zeros, ones, zeros], 183 | [np.sin(ry), zeros, np.cos(ry)]]) # (3, 3, N) 184 | R_list = np.transpose(rot_list, (2, 0, 1)) # (N, 3, 3) 185 | 186 | temp_corners = np.concatenate((x_corners.reshape(-1, 8, 1), y_corners.reshape(-1, 8, 1), 187 | z_corners.reshape(-1, 8, 1)), axis=2) # (N, 8, 3) 188 | rotated_corners = np.matmul(temp_corners, R_list) # (N, 8, 3) 189 | x_corners, y_corners, z_corners = rotated_corners[:, :, 0], rotated_corners[:, :, 1], rotated_corners[:, :, 2] 190 | 191 | x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2] 192 | 193 | x = x_loc.reshape(-1, 1) + x_corners.reshape(-1, 8) 194 | y = y_loc.reshape(-1, 1) + y_corners.reshape(-1, 8) 195 | z = z_loc.reshape(-1, 1) + z_corners.reshape(-1, 8) 196 | 197 | corners = np.concatenate((x.reshape(-1, 8, 1), y.reshape(-1, 8, 1), z.reshape(-1, 8, 1)), axis=2) 198 | 199 | return corners.astype(np.float32) 200 | 201 | 202 | def enlarge_box3d(boxes3d, extra_width): 203 | """ 204 | :param boxes3d: (N, 7) [x, y, z, h, w, l, ry] 205 | """ 206 | if isinstance(boxes3d, np.ndarray): 207 | large_boxes3d = boxes3d.copy() 208 | else: 209 | large_boxes3d = boxes3d.clone() 210 | large_boxes3d[:, 3:6] += extra_width * 2 211 | large_boxes3d[:, 1] += extra_width 212 | return large_boxes3d 213 | 214 | 215 | def in_hull(p, hull): 216 | """ 217 | :param p: (N, K) test points 218 | :param hull: (M, K) M corners of a box 219 | :return (N) bool 220 | """ 221 | try: 222 | if not isinstance(hull, Delaunay): 223 | hull = Delaunay(hull) 224 | flag = hull.find_simplex(p) >= 0 225 | except scipy.spatial.qhull.QhullError: 226 | print('Warning: not a hull %s' % str(hull)) 227 | flag = np.zeros(p.shape[0], dtype=np.bool) 228 | 229 | return flag 230 | -------------------------------------------------------------------------------- /dataset/data_augmentation.py: -------------------------------------------------------------------------------- 1 | # introduced from fs-net 2 | import numpy as np 3 | import cv2 4 | import torch 5 | import math 6 | 7 | 8 | 9 | # add noise to mask 10 | def defor_2D(roi_mask, rand_r=2, rand_pro=0.3): 11 | ''' 12 | :param roi_mask: 256 x 256 13 | :param rand_r: randomly expand or shrink the mask iter rand_r 14 | :return: 15 | ''' 16 | roi_mask = roi_mask.copy().squeeze() 17 | if np.random.rand() > rand_pro: 18 | return roi_mask 19 | mask = roi_mask.copy() 20 | kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2, 2)) 21 | mask_erode = cv2.erode(mask, kernel, rand_r) # rand_r 22 | mask_dilate = cv2.dilate(mask, kernel, rand_r) 23 | change_list = roi_mask[mask_erode != mask_dilate] 24 | l_list = change_list.size 25 | if l_list < 1.0: 26 | return roi_mask 27 | choose = np.random.choice(l_list, l_list // 2, replace=False) 28 | change_list = np.ones_like(change_list) 29 | change_list[choose] = 0.0 30 | roi_mask[mask_erode != mask_dilate] = change_list 31 | roi_mask[roi_mask > 0.0] = 1.0 32 | return roi_mask 33 | 34 | 35 | # point cloud based data augmentation 36 | # augment based on bounding box 37 | def defor_3D_bb(pc, R, t, s, nocs, model, sym=None, aug_bb=None): 38 | # pc n x 3, here s must be the original s 39 | pc_reproj = torch.mm(R.T, (pc - t.view(1, 3)).T).T # nn x 3 40 | if sym[0] == 1: # y axis symmetry 41 | ex = aug_bb[0] 42 | ey = aug_bb[1] 43 | ez = aug_bb[2] 44 | 45 | exz = (ex + ez) / 2 46 | pc_reproj[:, (0, 2)] = pc_reproj[:, (0, 2)] * exz 47 | pc_reproj[:, 1] = pc_reproj[:, 1] * ey 48 | nocs_scale_aug = torch.norm(torch.tensor([s[0] * exz, s[1] * ey, s[2] * exz])) / torch.norm(s) 49 | s[0] = s[0] * exz 50 | s[1] = s[1] * ey 51 | s[2] = s[2] * exz 52 | nocs[:, 0] = nocs[:, 0] * exz / nocs_scale_aug 53 | nocs[:, 1] = nocs[:, 1] * ey / nocs_scale_aug 54 | nocs[:, 2] = nocs[:, 2] * exz / nocs_scale_aug 55 | model[:, 0] = model[:, 0] * exz / nocs_scale_aug 56 | model[:, 1] = model[:, 1] * ey / nocs_scale_aug 57 | model[:, 2] = model[:, 2] * exz / nocs_scale_aug 58 | pc_new = torch.mm(R, pc_reproj.T) + t.view(3, 1) 59 | pc_new = pc_new.T 60 | else: 61 | ex = aug_bb[0] 62 | ey = aug_bb[1] 63 | ez = aug_bb[2] 64 | nocs_scale_aug = torch.norm(torch.tensor([s[0] * ex, s[1] * ey, s[2] * ez])) / torch.norm(s) 65 | pc_reproj[:, 0] = pc_reproj[:, 0] * ex 66 | pc_reproj[:, 1] = pc_reproj[:, 1] * ey 67 | pc_reproj[:, 2] = pc_reproj[:, 2] * ez 68 | s[0] = s[0] * ex 69 | s[1] = s[1] * ey 70 | s[2] = s[2] * ez 71 | nocs[:, 0] = nocs[:, 0] * ex / nocs_scale_aug 72 | nocs[:, 1] = nocs[:, 1] * ey / nocs_scale_aug 73 | nocs[:, 2] = nocs[:, 2] * ez / nocs_scale_aug 74 | model[:, 0] = model[:, 0] * ex / nocs_scale_aug 75 | model[:, 1] = model[:, 1] * ey / nocs_scale_aug 76 | model[:, 2] = model[:, 2] * ez / nocs_scale_aug 77 | pc_new = torch.mm(R, pc_reproj.T) + t.view(3, 1) 78 | pc_new = pc_new.T 79 | return pc_new, s, nocs, model ,nocs_scale_aug 80 | 81 | 82 | def defor_3D_bc(pc, R, t, s, model_point, nocs_scale, nocs): 83 | # resize box cage along y axis, the size s is modified 84 | ey_up = torch.rand(1, device=pc.device) * (1.2 - 0.8) + 0.8 85 | ey_down = torch.rand(1, device=pc.device) * (1.2 - 0.8) + 0.8 86 | # for each point, resize its x and z linealy 87 | pc_reproj = torch.mm(R.T, (pc - t.view(1, 3)).T).T # nn x 3 88 | per_point_resize = (pc_reproj[:, 1] + s[1] / 2) / s[1] * (ey_up - ey_down) + ey_down 89 | pc_reproj[:, 0] = pc_reproj[:, 0] * per_point_resize 90 | pc_reproj[:, 2] = pc_reproj[:, 2] * per_point_resize 91 | pc_new = torch.mm(R, pc_reproj.T) + t.view(3, 1) 92 | pc_new = pc_new.T 93 | 94 | norm_s = s / torch.norm(s) 95 | model_point_resize = (model_point[:, 1] + norm_s[1] / 2) / norm_s[1] * (ey_up - ey_down) + ey_down 96 | model_point[:, 0] = model_point[:, 0] * model_point_resize 97 | model_point[:, 2] = model_point[:, 2] * model_point_resize 98 | 99 | lx = 2 * max(max(model_point[:, 0]), -min(model_point[:, 0])) 100 | ly = max(model_point[:, 1]) - min(model_point[:, 1]) 101 | lz = max(model_point[:, 2]) - min(model_point[:, 2]) 102 | 103 | lx_t = lx * torch.norm(s) 104 | ly_t = ly * torch.norm(s) 105 | lz_t = lz * torch.norm(s) 106 | size_new = torch.tensor([lx_t, ly_t, lz_t], device=pc.device) 107 | 108 | nocs_scale_aug = torch.norm(torch.tensor([lx, ly, lz])) 109 | model_point = model_point / nocs_scale_aug 110 | 111 | nocs_resize = (nocs[:, 1] + norm_s[1] / 2) / norm_s[1] * (ey_up - ey_down) + ey_down 112 | nocs[:, 0] = nocs[:, 0] * nocs_resize 113 | nocs[:, 2] = nocs[:, 2] * nocs_resize 114 | nocs = nocs / nocs_scale_aug 115 | 116 | return pc_new, size_new, model_point, nocs,nocs_scale_aug 117 | 118 | 119 | # point cloud based data augmentation 120 | # augment based on bounding box 121 | def deform_non_linear(pc, R, t, s, nocs, model_point, axis=0): 122 | # pc n x 3, here s must be the original s 123 | assert axis in [0, 1] 124 | r_max = torch.rand(1, device=pc.device) * 0.2 + 1.1 125 | r_min = -torch.rand(1, device=pc.device) * 0.2 + 0.9 126 | # for each point, resize its x and z 127 | pc_reproj = torch.mm(R.T, (pc - t.view(1, 3)).T).T # nn x 3 128 | per_point_resize = r_min + 4 * (pc_reproj[:, axis] * pc_reproj[:, axis]) / (s[axis] ** 2) * (r_max - r_min) 129 | pc_reproj[:, axis] = pc_reproj[:, axis] * per_point_resize 130 | pc_new = torch.mm(R, pc_reproj.T) + t.view(3, 1) 131 | pc_new = pc_new.T 132 | 133 | norm_s = s / torch.norm(s) 134 | model_point_resize = r_min + 4 * (model_point[:, axis] * model_point[:, axis]) / (norm_s[axis] ** 2) * (r_max - r_min) 135 | model_point[:, axis] = model_point[:, axis] * model_point_resize 136 | 137 | lx = 2 * max(max(model_point[:, 0]), -min(model_point[:, 0])) 138 | ly = max(model_point[:, 1]) - min(model_point[:, 1]) 139 | lz = max(model_point[:, 2]) - min(model_point[:, 2]) 140 | 141 | lx_t = lx * torch.norm(s) 142 | ly_t = ly * torch.norm(s) 143 | lz_t = lz * torch.norm(s) 144 | size_new = torch.tensor([lx_t, ly_t, lz_t], device=pc.device) 145 | 146 | nocs_scale_aug = torch.norm(torch.tensor([lx, ly, lz])) 147 | model_point = model_point / nocs_scale_aug 148 | 149 | nocs_resize = r_min + 4 * (nocs[:, axis] * nocs[:, axis]) / (norm_s[axis] ** 2) * (r_max - r_min) 150 | nocs[:, axis] = nocs[:, axis] * nocs_resize 151 | nocs = nocs / nocs_scale_aug 152 | return pc_new, size_new, model_point, nocs ,nocs_scale_aug 153 | 154 | 155 | def defor_3D_pc(pc, r): 156 | points_defor = torch.clip(r*torch.randn(pc.shape).to(pc.device),min=-0.005,max=0.005) 157 | # points_defor=0.01*torch.randn(pc.shape).to(pc.device) 158 | # points_defor=torch.clamp(points_defor,min=-(pc*0.02),max=(pc*0.02)) 159 | pc = pc + points_defor 160 | return pc 161 | 162 | 163 | # point cloud based data augmentation 164 | # random rotation and translation 165 | def defor_3D_rt(pc, R, t, aug_rt_t, aug_rt_r): 166 | # add_t 167 | dx = aug_rt_t[0] 168 | dy = aug_rt_t[1] 169 | dz = aug_rt_t[2] 170 | 171 | pc[:, 0] = pc[:, 0] + dx 172 | pc[:, 1] = pc[:, 1] + dy 173 | pc[:, 2] = pc[:, 2] + dz 174 | t[0] = t[0] + dx 175 | t[1] = t[1] + dy 176 | t[2] = t[2] + dz 177 | 178 | # add r 179 | ''' 180 | Rm = get_rotation(np.random.uniform(-a, a), np.random.uniform(-a, a), np.random.uniform(-a, a)) 181 | Rm_tensor = torch.tensor(Rm, device=pc.device) 182 | pc_new = torch.mm(Rm_tensor, pc.T).T 183 | pc = pc_new 184 | R_new = torch.mm(Rm_tensor, R) 185 | R = R_new 186 | ''' 187 | ''' 188 | x_rot = torch.rand(1, dtype=torch.float32, device=pc.device) * 2 * a - a 189 | y_rot = torch.rand(1, dtype=torch.float32, device=pc.device) * 2 * a - a 190 | z_rot = torch.rand(1, dtype=torch.float32, device=pc.device) * 2 * a - a 191 | Rm = get_rotation_torch(x_rot, y_rot, z_rot) 192 | ''' 193 | Rm = aug_rt_r 194 | pc_new = torch.mm(Rm, pc.T).T 195 | pc = pc_new 196 | R_new = torch.mm(Rm, R) 197 | R = R_new 198 | T_new = torch.mm(Rm, t.view(3, 1)) 199 | t = T_new 200 | 201 | return pc, R, t 202 | 203 | 204 | def get_rotation(x_, y_, z_): 205 | # print(math.cos(math.pi/2)) 206 | x = float(x_ / 180) * math.pi 207 | y = float(y_ / 180) * math.pi 208 | z = float(z_ / 180) * math.pi 209 | R_x = np.array([[1, 0, 0], 210 | [0, math.cos(x), -math.sin(x)], 211 | [0, math.sin(x), math.cos(x)]]) 212 | 213 | R_y = np.array([[math.cos(y), 0, math.sin(y)], 214 | [0, 1, 0], 215 | [-math.sin(y), 0, math.cos(y)]]) 216 | 217 | R_z = np.array([[math.cos(z), -math.sin(z), 0], 218 | [math.sin(z), math.cos(z), 0], 219 | [0, 0, 1]]) 220 | return np.dot(R_z, np.dot(R_y, R_x)).astype(np.float32) 221 | 222 | def get_rotation_torch(x_, y_, z_): 223 | x = (x_ / 180) * math.pi 224 | y = (y_ / 180) * math.pi 225 | z = (z_ / 180) * math.pi 226 | R_x = torch.tensor([[1, 0, 0], 227 | [0, math.cos(x), -math.sin(x)], 228 | [0, math.sin(x), math.cos(x)]], device=x_.device) 229 | 230 | R_y = torch.tensor([[math.cos(y), 0, math.sin(y)], 231 | [0, 1, 0], 232 | [-math.sin(y), 0, math.cos(y)]], device=y_.device) 233 | 234 | R_z = torch.tensor([[math.cos(z), -math.sin(z), 0], 235 | [math.sin(z), math.cos(z), 0], 236 | [0, 0, 1]], device=z_.device) 237 | return torch.mm(R_z, torch.mm(R_y, R_x)) 238 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | import argparse 4 | import os 5 | import shutil 6 | 7 | import torch 8 | import torch.backends.cudnn as cudnn 9 | import torch.distributed as dist 10 | import torch.multiprocessing as mp 11 | import torch.nn.parallel 12 | import torch.optim 13 | import torch.utils.data 14 | import torch.utils.data.distributed 15 | from collections import Counter 16 | 17 | import random 18 | import numpy as np 19 | from network import NETWORK_REGISTRY 20 | from mmengine import Config,DictAction 21 | from core.trainer import Trainer 22 | from dataset import DATALOADER_REGISTRY 23 | from utils.logging import create_checkpoint, setup_logger 24 | from utils.utils import OPTIMIZER_REGISTRY, save_checkpoint, SCHEDULER_REGISTRY,farthest_point_sample,index_points 25 | 26 | def set_random_seed(seed, deterministic=False): 27 | random.seed(seed) 28 | np.random.seed(seed) 29 | torch.manual_seed(seed) 30 | torch.cuda.manual_seed_all(seed) 31 | if deterministic: 32 | torch.backends.cudnn.deterministic = True 33 | torch.backends.cudnn.benchmark = False 34 | 35 | def parse_args(): 36 | parser = argparse.ArgumentParser(description='Train') 37 | # general 38 | parser.add_argument('--cfg', 39 | help='experiment configure file name', 40 | required=True, 41 | type=str) 42 | parser.add_argument('--cfg-options', 43 | nargs='+', 44 | action=DictAction, 45 | help='override some settings in the used config, the key-value pair ' 46 | 'in xxx=yyy format will be merged into config file. If the value to ' 47 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 48 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 49 | 'Note that the quotation marks are necessary and that no white space ' 50 | 'is allowed.') 51 | # distributed training 52 | parser.add_argument('--gpus', 53 | help='gpu ids for ddp training', 54 | type=str) 55 | parser.add_argument('--port', 56 | default='23459', 57 | type=str, 58 | help='port used to set up distributed training') 59 | parser.add_argument('--dist-url', 60 | default='tcp://127.0.0.1', 61 | type=str, 62 | help='url used to set up distributed training') 63 | args = parser.parse_args() 64 | 65 | return args 66 | 67 | def main(): 68 | args = parse_args() 69 | cfg = Config.fromfile(args.cfg) 70 | if args.cfg_options is not None: 71 | cfg.merge_from_dict(args.cfg_options) 72 | cfg.MODEL.vis=cfg.VIS 73 | assert cfg.train 74 | print(cfg.pretty_text) 75 | 76 | final_output_dir = create_checkpoint(cfg, 'train') 77 | 78 | if args.gpus is not None: 79 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus 80 | dist_url = args.dist_url + ':{}'.format(args.port) 81 | # save config file 82 | if not cfg.VIS: 83 | print('save cfg and source') 84 | src_folder = os.path.join(final_output_dir, 'src') 85 | if os.path.exists(os.path.join(src_folder, 'network')): 86 | shutil.rmtree(os.path.join(src_folder, 'network')) 87 | shutil.copytree('network', os.path.join(src_folder, 'network')) 88 | if os.path.exists(os.path.join(src_folder, 'tools')): 89 | shutil.rmtree(os.path.join(src_folder, 'tools')) 90 | shutil.copytree('tools', os.path.join(src_folder, 'tools')) 91 | if os.path.exists(os.path.join(src_folder, 'cfg.py')): 92 | os.remove(os.path.join(src_folder, 'cfg.py')) 93 | cfg.dump(os.path.join(src_folder, 'cfg.py')) 94 | 95 | ngpus_per_node = torch.cuda.device_count() 96 | 97 | set_random_seed(123) 98 | 99 | if cfg.DDP: 100 | world_size = ngpus_per_node 101 | mp.spawn(main_worker, nprocs=ngpus_per_node, args=(world_size, dist_url, final_output_dir, cfg)) 102 | else: 103 | main_worker(0, 1, dist_url, final_output_dir, cfg) 104 | 105 | def main_worker(rank, world_size, dist_url, final_output_dir, cfg): 106 | set_random_seed(42) 107 | if rank==0: 108 | logger, _ = setup_logger(final_output_dir, rank, 'train',cfg.VIS) 109 | else: 110 | logger=None 111 | 112 | cudnn.benchmark = cfg.CUDNN.BENCHMARK 113 | torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC 114 | torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED 115 | 116 | print("Use GPU: {} for training".format(rank)) 117 | if cfg.DDP: 118 | print('Init process group: dist_url: {}, world_size: {}, rank: {}'.format(dist_url, world_size, rank)) 119 | dist.init_process_group( 120 | backend=cfg.DIST_BACKEND, 121 | init_method=dist_url, 122 | world_size=world_size, 123 | rank=rank 124 | ) 125 | 126 | # Data loading code 127 | train_loader = DATALOADER_REGISTRY.build(cfg) 128 | 129 | model = NETWORK_REGISTRY.build(cfg.MODEL) 130 | 131 | def count_parameters(model): 132 | return sum(p.numel() for p in model.parameters() if p.requires_grad) 133 | if rank==0: 134 | logger.info("Total params: {num:.3f}M".format(num=count_parameters(model)/1e6)) 135 | if cfg.DDP: 136 | print(rank) 137 | torch.cuda.set_device(rank) 138 | model.cuda(rank) 139 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank],find_unused_parameters=cfg.find_unused_parameters) 140 | else: 141 | model = torch.nn.DataParallel(model).cuda() 142 | 143 | 144 | 145 | best_perf = -1 146 | last_epoch = -1 147 | optimizer = OPTIMIZER_REGISTRY.build(cfg=cfg.OPTIMIZER, parameters=model.parameters()) 148 | lr_scheduler = SCHEDULER_REGISTRY.build(cfg=cfg.SCHEDULER, optimizer=optimizer) 149 | 150 | begin_epoch = cfg.TRAIN.BEGIN_EPOCH 151 | if cfg.AUTO_RESUME: 152 | if cfg.RESUME_FILE != '': 153 | checkpoint_file = os.path.join( 154 | final_output_dir, 'model', cfg.RESUME_FILE) 155 | else: 156 | checkpoint_file = os.path.join( 157 | final_output_dir, 'model', 'checkpoint.pth.tar') 158 | print(checkpoint_file) 159 | if os.path.exists(checkpoint_file): 160 | if rank==0: 161 | logger.info("=> loading checkpoint '{}'".format(checkpoint_file)) 162 | checkpoint = torch.load(checkpoint_file, map_location=lambda storage, loc: storage) 163 | model.load_state_dict(checkpoint['state_dict']) 164 | if not cfg.ONLY_MODEL: 165 | begin_epoch = checkpoint['epoch'] 166 | best_perf = checkpoint['perf'] 167 | last_epoch = checkpoint['epoch'] 168 | optimizer.load_state_dict(checkpoint['optimizer']) 169 | if cfg.CHANGE_SCHEDULE: 170 | milestones=cfg.SCHEDULER.milestones 171 | new_counter={stones:1 for stones in milestones} 172 | new_counter=Counter(new_counter) 173 | checkpoint['scheduler']['milestones']=new_counter 174 | checkpoint['scheduler']['gamma']=cfg.SCHEDULER.gamma 175 | if 'scheduler' in checkpoint.keys(): 176 | lr_scheduler.load_state_dict(checkpoint['scheduler']) 177 | 178 | # if cfg.CHANGE_SCHEDULE: 179 | # lr_scheduler.step() 180 | # print('lr',optimizer.state_dict()['param_groups'][0]['lr']) 181 | if rank==0: 182 | logger.info("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch'])) 183 | 184 | 185 | is_iter=cfg.is_iter 186 | if is_iter: 187 | trainer = Trainer(cfg, model, rank, final_output_dir,logger=logger,lr_scheduler=lr_scheduler) 188 | else: 189 | trainer = Trainer(cfg, model, rank, final_output_dir,logger=logger) 190 | 191 | for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH): 192 | train_loader.dataset.set_epoch(epoch) 193 | if cfg.DDP: 194 | train_loader.sampler.set_epoch(epoch) 195 | 196 | 197 | trainer.train(epoch, train_loader, optimizer) 198 | 199 | if not is_iter: 200 | lr_scheduler.step() 201 | 202 | perf_indicator = epoch 203 | if perf_indicator >= best_perf: 204 | 205 | best_perf = perf_indicator 206 | best_model = True 207 | else: 208 | best_model = False 209 | 210 | if not cfg.DDP or (cfg.DDP and rank == 0 and epoch%(cfg.TRAIN.SAVE_EPOCH_STEP)==0) and not cfg.VIS: 211 | file_name='checkpoint_epoch_'+str(epoch)+'.tar.pth' 212 | if rank==0: 213 | logger.info('=> saving checkpoint to {}'.format(final_output_dir)) 214 | save_checkpoint({ 215 | 'epoch': epoch + 1, 216 | 'model': cfg.MODEL.type, 217 | 'state_dict': model.state_dict(), 218 | 'best_state_dict': model.module.state_dict(), 219 | 'perf': perf_indicator, 220 | 'optimizer': optimizer.state_dict(), 221 | 'scheduler':lr_scheduler.state_dict() 222 | }, best_model,final_output_dir, filename=file_name) 223 | if best_model: 224 | torch.save( 225 | model.module.state_dict(), 226 | os.path.join(final_output_dir, 'model_best.pth.tar') 227 | ) 228 | 229 | final_model_state_file = os.path.join( 230 | final_output_dir, 'model', 'final_state{}.pth.tar'.format(rank) 231 | ) 232 | if rank==0: 233 | logger.info('saving final model state to {}'.format(final_model_state_file)) 234 | torch.save(model.module.state_dict(), final_model_state_file) 235 | 236 | if __name__ == '__main__': 237 | 238 | main() -------------------------------------------------------------------------------- /Pointnet2/pointnet2/pointnet2_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.autograd import Function 4 | import torch.nn as nn 5 | from typing import Tuple 6 | 7 | import pointnet2_cuda as pointnet2 8 | 9 | 10 | class FurthestPointSampling(Function): 11 | @staticmethod 12 | def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor: 13 | """ 14 | Uses iterative furthest point sampling to select a set of npoint features that have the largest 15 | minimum distance 16 | :param ctx: 17 | :param xyz: (B, N, 3) where N > npoint 18 | :param npoint: int, number of features in the sampled set 19 | :return: 20 | output: (B, npoint) tensor containing the set 21 | """ 22 | assert xyz.is_contiguous() 23 | 24 | B, N, _ = xyz.size() 25 | output = torch.cuda.IntTensor(B, npoint) 26 | temp = torch.cuda.FloatTensor(B, N).fill_(1e10) 27 | 28 | pointnet2.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp, output) 29 | return output 30 | 31 | @staticmethod 32 | def backward(xyz, a=None): 33 | return None, None 34 | 35 | 36 | furthest_point_sample = FurthestPointSampling.apply 37 | 38 | 39 | class GatherOperation(Function): 40 | 41 | @staticmethod 42 | def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor: 43 | """ 44 | :param ctx: 45 | :param features: (B, C, N) 46 | :param idx: (B, npoint) index tensor of the features to gather 47 | :return: 48 | output: (B, C, npoint) 49 | """ 50 | assert features.is_contiguous() 51 | assert idx.is_contiguous() 52 | 53 | B, npoint = idx.size() 54 | _, C, N = features.size() 55 | output = torch.cuda.FloatTensor(B, C, npoint) 56 | 57 | pointnet2.gather_points_wrapper(B, C, N, npoint, features, idx, output) 58 | 59 | ctx.for_backwards = (idx, C, N) 60 | return output 61 | 62 | @staticmethod 63 | def backward(ctx, grad_out): 64 | idx, C, N = ctx.for_backwards 65 | B, npoint = idx.size() 66 | 67 | grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_()) 68 | grad_out_data = grad_out.data.contiguous() 69 | pointnet2.gather_points_grad_wrapper(B, C, N, npoint, grad_out_data, idx, grad_features.data) 70 | return grad_features, None 71 | 72 | 73 | gather_operation = GatherOperation.apply 74 | 75 | 76 | class ThreeNN(Function): 77 | 78 | @staticmethod 79 | def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: 80 | """ 81 | Find the three nearest neighbors of unknown in known 82 | :param ctx: 83 | :param unknown: (B, N, 3) 84 | :param known: (B, M, 3) 85 | :return: 86 | dist: (B, N, 3) l2 distance to the three nearest neighbors 87 | idx: (B, N, 3) index of 3 nearest neighbors 88 | """ 89 | assert unknown.is_contiguous() 90 | assert known.is_contiguous() 91 | 92 | B, N, _ = unknown.size() 93 | m = known.size(1) 94 | dist2 = torch.cuda.FloatTensor(B, N, 3) 95 | idx = torch.cuda.IntTensor(B, N, 3) 96 | 97 | pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx) 98 | return torch.sqrt(dist2), idx 99 | 100 | @staticmethod 101 | def backward(ctx, a=None, b=None): 102 | return None, None 103 | 104 | 105 | three_nn = ThreeNN.apply 106 | 107 | 108 | class ThreeInterpolate(Function): 109 | 110 | @staticmethod 111 | def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: 112 | """ 113 | Performs weight linear interpolation on 3 features 114 | :param ctx: 115 | :param features: (B, C, M) Features descriptors to be interpolated from 116 | :param idx: (B, n, 3) three nearest neighbors of the target features in features 117 | :param weight: (B, n, 3) weights 118 | :return: 119 | output: (B, C, N) tensor of the interpolated features 120 | """ 121 | assert features.is_contiguous() 122 | assert idx.is_contiguous() 123 | assert weight.is_contiguous() 124 | 125 | B, c, m = features.size() 126 | n = idx.size(1) 127 | ctx.three_interpolate_for_backward = (idx, weight, m) 128 | output = torch.cuda.FloatTensor(B, c, n) 129 | 130 | pointnet2.three_interpolate_wrapper(B, c, m, n, features, idx, weight, output) 131 | return output 132 | 133 | @staticmethod 134 | def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 135 | """ 136 | :param ctx: 137 | :param grad_out: (B, C, N) tensor with gradients of outputs 138 | :return: 139 | grad_features: (B, C, M) tensor with gradients of features 140 | None: 141 | None: 142 | """ 143 | idx, weight, m = ctx.three_interpolate_for_backward 144 | B, c, n = grad_out.size() 145 | 146 | grad_features = Variable(torch.cuda.FloatTensor(B, c, m).zero_()) 147 | grad_out_data = grad_out.data.contiguous() 148 | 149 | pointnet2.three_interpolate_grad_wrapper(B, c, n, m, grad_out_data, idx, weight, grad_features.data) 150 | return grad_features, None, None 151 | 152 | 153 | three_interpolate = ThreeInterpolate.apply 154 | 155 | 156 | class GroupingOperation(Function): 157 | 158 | @staticmethod 159 | def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor: 160 | """ 161 | :param ctx: 162 | :param features: (B, C, N) tensor of features to group 163 | :param idx: (B, npoint, nsample) tensor containing the indicies of features to group with 164 | :return: 165 | output: (B, C, npoint, nsample) tensor 166 | """ 167 | assert features.is_contiguous() 168 | assert idx.is_contiguous() 169 | 170 | B, nfeatures, nsample = idx.size() 171 | _, C, N = features.size() 172 | output = torch.cuda.FloatTensor(B, C, nfeatures, nsample) 173 | 174 | pointnet2.group_points_wrapper(B, C, N, nfeatures, nsample, features, idx, output) 175 | 176 | ctx.for_backwards = (idx, N) 177 | return output 178 | 179 | @staticmethod 180 | def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: 181 | """ 182 | :param ctx: 183 | :param grad_out: (B, C, npoint, nsample) tensor of the gradients of the output from forward 184 | :return: 185 | grad_features: (B, C, N) gradient of the features 186 | """ 187 | idx, N = ctx.for_backwards 188 | 189 | B, C, npoint, nsample = grad_out.size() 190 | grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_()) 191 | 192 | grad_out_data = grad_out.data.contiguous() 193 | pointnet2.group_points_grad_wrapper(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data) 194 | return grad_features, None 195 | 196 | 197 | grouping_operation = GroupingOperation.apply 198 | 199 | 200 | class BallQuery(Function): 201 | 202 | @staticmethod 203 | def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor, new_xyz: torch.Tensor) -> torch.Tensor: 204 | """ 205 | :param ctx: 206 | :param radius: float, radius of the balls 207 | :param nsample: int, maximum number of features in the balls 208 | :param xyz: (B, N, 3) xyz coordinates of the features 209 | :param new_xyz: (B, npoint, 3) centers of the ball query 210 | :return: 211 | idx: (B, npoint, nsample) tensor with the indicies of the features that form the query balls 212 | """ 213 | assert new_xyz.is_contiguous() 214 | assert xyz.is_contiguous() 215 | 216 | B, N, _ = xyz.size() 217 | npoint = new_xyz.size(1) 218 | idx = torch.cuda.IntTensor(B, npoint, nsample).zero_() 219 | 220 | pointnet2.ball_query_wrapper(B, N, npoint, radius, nsample, new_xyz, xyz, idx) 221 | return idx 222 | 223 | @staticmethod 224 | def backward(ctx, a=None): 225 | return None, None, None, None 226 | 227 | 228 | ball_query = BallQuery.apply 229 | 230 | 231 | class QueryAndGroup(nn.Module): 232 | def __init__(self, radius: float, nsample: int, use_xyz: bool = True,norm=False): 233 | """ 234 | :param radius: float, radius of ball 235 | :param nsample: int, maximum number of features to gather in the ball 236 | :param use_xyz: 237 | """ 238 | super().__init__() 239 | self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz 240 | self.norm=norm 241 | 242 | def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None) -> Tuple[torch.Tensor]: 243 | """ 244 | :param xyz: (B, N, 3) xyz coordinates of the features 245 | :param new_xyz: (B, npoint, 3) centroids 246 | :param features: (B, C, N) descriptors of the features 247 | :return: 248 | new_features: (B, 3 + C, npoint, nsample) 249 | """ 250 | idx = ball_query(self.radius, self.nsample, xyz, new_xyz) 251 | xyz_trans = xyz.transpose(1, 2).contiguous() 252 | grouped_xyz = grouping_operation(xyz_trans, idx) # (B, 3, npoint, nsample) 253 | grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1) 254 | if self.norm: 255 | grouped_xyz/=self.radius 256 | 257 | if features is not None: 258 | grouped_features = grouping_operation(features, idx) 259 | if self.use_xyz: 260 | new_features = torch.cat([grouped_xyz, grouped_features], dim=1) # (B, C + 3, npoint, nsample) 261 | else: 262 | new_features = grouped_features 263 | else: 264 | assert self.use_xyz, "Cannot have not features and not use xyz as a feature!" 265 | new_features = grouped_xyz 266 | 267 | return new_features 268 | 269 | 270 | class GroupAll(nn.Module): 271 | def __init__(self, use_xyz: bool = True): 272 | super().__init__() 273 | self.use_xyz = use_xyz 274 | 275 | def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None): 276 | """ 277 | :param xyz: (B, N, 3) xyz coordinates of the features 278 | :param new_xyz: ignored 279 | :param features: (B, C, N) descriptors of the features 280 | :return: 281 | new_features: (B, C + 3, 1, N) 282 | """ 283 | grouped_xyz = xyz.transpose(1, 2).unsqueeze(2) 284 | if features is not None: 285 | grouped_features = features.unsqueeze(2) 286 | if self.use_xyz: 287 | new_features = torch.cat([grouped_xyz, grouped_features], dim=1) # (B, 3 + C, 1, N) 288 | else: 289 | new_features = grouped_features 290 | else: 291 | new_features = grouped_xyz 292 | 293 | return new_features 294 | -------------------------------------------------------------------------------- /dataset/pose_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import math 5 | import random 6 | import numpy as np 7 | import time 8 | import _pickle as cPickle 9 | # from PIL import Image 10 | from tqdm import tqdm 11 | import torch.utils.data as data 12 | import torch 13 | from utils import load_depth, get_bbox 14 | from mmengine import Registry 15 | from .data_augmentation import defor_3D_pc, defor_3D_bb, defor_3D_rt, defor_3D_bc, deform_non_linear,get_rotation 16 | 17 | DATASET_REGISTRY = Registry("DATASET") 18 | 19 | @DATASET_REGISTRY.register_module() 20 | class PoseDataset(data.Dataset): 21 | def __init__(self, source, mode, data_dir, n_pts, vis,img_size=192, per_obj=None,use_cache=False,use_augment=True): 22 | """ 23 | Args: 24 | source: 'CAMERA', 'Real' or 'CAMERA+Real' 25 | mode: 'train' or 'test' 26 | data_dir: 27 | n_pts: number of selected foreground points 28 | """ 29 | 30 | self.source = source 31 | self.mode = mode 32 | self.data_dir = data_dir 33 | self.n_pts = n_pts 34 | self.vis=vis 35 | self.per_obj=per_obj 36 | self.img_size=img_size 37 | self.use_augment=use_augment 38 | 39 | 40 | assert source in ['CAMERA', 'Real', 'CAMERA+Real'] 41 | assert mode in ['train', 'test'] 42 | img_list_path = ['CAMERA/train_list.txt', 'Real/train_list.txt', 43 | 'CAMERA/val_list.txt', 'Real/test_list.txt'] 44 | model_file_path = ['obj_models/camera_train.pkl', 'obj_models/real_train.pkl', 45 | 'obj_models/camera_val.pkl', 'obj_models/real_test.pkl'] 46 | if mode == 'train': 47 | del img_list_path[2:] 48 | del model_file_path[2:] 49 | else: 50 | del img_list_path[:2] 51 | del model_file_path[:2] 52 | if source == 'CAMERA': 53 | del img_list_path[-1] 54 | del model_file_path[-1] 55 | elif source == 'Real': 56 | del img_list_path[0] 57 | del model_file_path[0] 58 | elif source=='CAMERA+Real': 59 | del img_list_path[2:] 60 | 61 | img_list = [] 62 | subset_len = [] 63 | for path in img_list_path: 64 | img_list += [os.path.join(path.split('/')[0], line.rstrip('\n')) 65 | for line in open(os.path.join(data_dir, path))] 66 | subset_len.append(len(img_list)) 67 | if len(subset_len) == 2: 68 | self.subset_len = [subset_len[0], subset_len[1]-subset_len[0]] 69 | 70 | if per_obj is not None: 71 | self.img_list=[] 72 | for img in img_list: 73 | img_path = os.path.join(self.data_dir, img) 74 | with open(img_path + '_label.pkl', 'rb') as f: 75 | gts = cPickle.load(f) 76 | b=False 77 | for i in range(len(gts['instance_ids'])): 78 | if gts['class_ids'][i]-1==self.per_obj: 79 | b=True 80 | break 81 | if b: 82 | self.img_list.append(img) 83 | else: 84 | self.img_list = img_list 85 | self.length = len(self.img_list) 86 | 87 | 88 | self.random=list(range(self.length)) 89 | random.seed(1002) 90 | random.shuffle(self.random) 91 | 92 | # meta info for re-label mug category 93 | with open(os.path.join(data_dir, 'obj_models/mug_meta.pkl'), 'rb') as f: 94 | self.mug_meta = cPickle.load(f) 95 | 96 | self.mean_shapes = np.load('assets/mean_points_emb.npy') 97 | self.cat_names = ['bottle', 'bowl', 'camera', 'can', 'laptop', 'mug'] 98 | self.camera_intrinsics = [577.5, 577.5, 319.5, 239.5] # [fx, fy, cx, cy] 99 | self.real_intrinsics = [591.0125, 590.16775, 322.525, 244.11084] 100 | self.sym_ids = [0, 1, 3] # 0-indexed 101 | self.norm_scale = 1000.0 # normalization scale 102 | self.shift_range = 0.01 103 | 104 | models = {} 105 | for path in model_file_path: 106 | with open(os.path.join(data_dir, path), 'rb') as f: 107 | models.update(cPickle.load(f)) 108 | self.models = models 109 | 110 | self.xmap = np.array([[i for i in range(640)] for j in range(480)]) 111 | self.ymap = np.array([[j for i in range(640)] for j in range(480)]) 112 | 113 | print('{} images found.'.format(self.length)) 114 | 115 | def __len__(self): 116 | return self.length 117 | 118 | def set_epoch(self,epoch): 119 | random.seed(1234+epoch) 120 | 121 | def __getitem__(self, index): 122 | index=self.random[index] 123 | img_path = os.path.join(self.data_dir, self.img_list[index]) 124 | 125 | id=self.img_list[index].split('/')[-1] 126 | 127 | 128 | 129 | if self.vis: 130 | rgb = cv2.imread(img_path + '_color.png')[:, :, :3] 131 | image=rgb.copy() 132 | 133 | # rgb = rgb[:, :, ::-1] 134 | 135 | depth = load_depth(img_path) 136 | 137 | mask = cv2.imread(img_path + '_mask.png')[:, :, 2] 138 | 139 | coord = cv2.imread(img_path + '_coord.png')[:, :, :3] 140 | coord = coord[:, :, (2, 1, 0)] 141 | coord = np.array(coord, dtype=np.float32) / 255 142 | coord[:, :, 2] = 1 - coord[:, :, 2] 143 | 144 | with open(img_path + '_label.pkl', 'rb') as f: 145 | gts = cPickle.load(f) 146 | if 'CAMERA' in img_path.split('/'): 147 | cam_fx, cam_fy, cam_cx, cam_cy = self.camera_intrinsics 148 | else: 149 | cam_fx, cam_fy, cam_cx, cam_cy = self.real_intrinsics 150 | 151 | cam_K=np.identity(3, dtype=np.float32) 152 | cam_K[0,0],cam_K[1,1],cam_K[0,2],cam_K[1,2]=cam_fx, cam_fy, cam_cx, cam_cy 153 | 154 | # select one foreground object 155 | '''''' 156 | idx = random.randint(0, len(gts['instance_ids'])-1) 157 | if self.per_obj is not None: 158 | for i in range(len(gts['instance_ids'])): 159 | if gts['class_ids'][i]-1==self.per_obj: 160 | idx=i 161 | 162 | cat_id=gts['class_ids'][idx]-1 163 | inst_id = gts['instance_ids'][idx] 164 | rmin, rmax, cmin, cmax = get_bbox(gts['bboxes'][idx]) 165 | # sample points from mask 166 | mask = np.equal(mask, inst_id) 167 | mask = np.logical_and(mask, depth > 0) 168 | mask = mask.flatten() 169 | 170 | depth_masked=(depth.flatten())[mask] #N 171 | xmap_masked=(self.xmap.flatten())[mask] 172 | ymap_masked=(self.ymap.flatten())[mask] 173 | 174 | pt2=depth_masked/self.norm_scale 175 | pt0 = (xmap_masked - cam_cx) * pt2 / cam_fx 176 | pt1 = (ymap_masked - cam_cy) * pt2 / cam_fy 177 | points=np.stack((pt0,pt1,pt2),axis=1) #N,3 178 | 179 | l_all=points.shape[0] 180 | 181 | if l_all>=self.n_pts: 182 | choose=np.random.choice(l_all,self.n_pts,replace=False) 183 | else: 184 | choose=np.random.choice(l_all,self.n_pts,replace=True) 185 | 186 | nocs = coord.reshape(-1,3)[mask,...][choose, :] - 0.5 187 | 188 | points=points[choose,...] 189 | 190 | 191 | 192 | crop_w = rmax - rmin 193 | ratio = self.img_size / crop_w 194 | col_idx = choose % crop_w 195 | row_idx = choose // crop_w 196 | choose = (np.floor(row_idx * ratio) * self.img_size + np.floor(col_idx * ratio)).astype(np.int64) 197 | 198 | 199 | scale = gts['scales'][idx] 200 | rotation = gts['rotations'][idx] 201 | translation = gts['translations'][idx] 202 | prior = self.mean_shapes[cat_id].astype(np.float32) 203 | 204 | # adjust nocs coords for mug category 205 | if cat_id==5: 206 | T0 = self.mug_meta[gts['model_list'][idx]][0] 207 | s0 = self.mug_meta[gts['model_list'][idx]][1] 208 | nocs = s0 * (nocs + T0) 209 | 210 | # map ambiguous rotation to canonical rotation 211 | if cat_id in self.sym_ids: 212 | rotation = gts['rotations'][idx] 213 | # assume continuous axis rotation symmetry 214 | theta_x = rotation[0, 0] + rotation[2, 2] 215 | theta_y = rotation[0, 2] - rotation[2, 0] 216 | r_norm = math.sqrt(theta_x**2 + theta_y**2) 217 | s_map = np.array([[theta_x/r_norm, 0.0, -theta_y/r_norm], 218 | [0.0, 1.0, 0.0 ], 219 | [theta_y/r_norm, 0.0, theta_x/r_norm]]) 220 | rotation = rotation @ s_map 221 | nocs = nocs @ s_map 222 | 223 | sRT = np.identity(4, dtype=np.float32) 224 | RT=np.identity(4,dtype=np.float32) 225 | sRT[:3, :3] = scale * rotation 226 | sRT[:3, 3] = translation 227 | RT[:3,:3]=rotation 228 | RT[:3,3]=translation 229 | 230 | model = self.models[gts['model_list'][idx]].astype(np.float32) 231 | 232 | 233 | model=torch.as_tensor(model.astype(np.float32)) 234 | points=torch.as_tensor(points.astype(np.float32)) 235 | R=torch.as_tensor(rotation.astype(np.float32)) 236 | t=torch.as_tensor(translation.astype(np.float32)) 237 | s=torch.as_tensor(scale.astype(np.float32)) 238 | nocs=torch.as_tensor(nocs.astype(np.float32)) 239 | 240 | sym_info = self.get_sym_info(cat_id, mug_handle=1) 241 | bb_aug, rt_aug_t, rt_aug_R = self.generate_aug_parameters() 242 | dimension_delta,mean_shape=self.get_fs_net_scale( model, s,cat_id) 243 | 244 | sym_info=torch.as_tensor(sym_info.astype(np.float32)).contiguous() 245 | bb_aug, rt_aug_t, rt_aug_R=torch.as_tensor(bb_aug, dtype=torch.float32).contiguous(),torch.as_tensor(rt_aug_t, dtype=torch.float32).contiguous(),torch.as_tensor(rt_aug_R, dtype=torch.float32).contiguous() 246 | dimension_delta=torch.as_tensor(dimension_delta,dtype=torch.float32).contiguous() 247 | mean_shape=torch.as_tensor(mean_shape,dtype=torch.float32).contiguous() 248 | 249 | if self.use_augment: 250 | points, R, t, dimension, model, nocs,s=self.data_augment(points,R,t,dimension_delta+mean_shape,sym_info,bb_aug,rt_aug_t,rt_aug_R,model,s,nocs,cat_id) 251 | 252 | dimension_delta=dimension-mean_shape 253 | 254 | if cat_id in self.sym_ids: 255 | # assume continuous axis rotation symmetry 256 | R=R.numpy() 257 | nocs=nocs.numpy() 258 | theta_x = R[0, 0] + R[2, 2] 259 | theta_y = R[0, 2] - R[2, 0] 260 | r_norm = math.sqrt(theta_x**2 + theta_y**2) 261 | s_map = np.array([[theta_x/r_norm, 0.0, -theta_y/r_norm], 262 | [0.0, 1.0, 0.0 ], 263 | [theta_y/r_norm, 0.0, theta_x/r_norm]]) 264 | R = R @ s_map 265 | nocs = nocs @ s_map 266 | R=torch.as_tensor(R.astype(np.float32)) 267 | nocs=torch.as_tensor(nocs.astype(np.float32)) 268 | 269 | gt_green,gt_red=self.get_gt_v(R) 270 | 271 | 272 | #data=data[choose,...] 273 | data_dict={} 274 | if self.mode=='test': 275 | data_dict['handle_visiblity']=gts['handle_visibility'][idx] 276 | data_dict['points']=points.contiguous() 277 | data_dict['nocs']=nocs.contiguous() 278 | data_dict['prior']=torch.as_tensor(prior).contiguous() 279 | data_dict['cat_id']=torch.as_tensor(cat_id) 280 | data_dict['R']=R.contiguous() 281 | data_dict['t']=t.contiguous() 282 | data_dict['s']=s.contiguous() 283 | data_dict['gt_green']=gt_green.contiguous() 284 | data_dict['gt_red']=gt_red.contiguous() 285 | data_dict['dimension_delta']=dimension_delta.contiguous() 286 | data_dict['mean_shape']=mean_shape.contiguous() 287 | data_dict['sym']=sym_info.contiguous() 288 | if self.vis: 289 | data_dict['RT']=torch.as_tensor(RT.astype(np.float32)).contiguous() 290 | data_dict['id']=id 291 | data_dict['sRT']=torch.as_tensor(sRT.astype(np.float32)).contiguous() 292 | data_dict['image']=image 293 | data_dict['cam_K']=cam_K 294 | data_dict['model']=model.contiguous() 295 | data_dict['img_path']=img_path 296 | 297 | return data_dict 298 | 299 | @torch.no_grad() 300 | def get_gt_v(self,Rs, axis=2): 301 | # TODO use 3 axis, the order remains: do we need to change order? 302 | if axis == 3: 303 | raise NotImplementedError 304 | else: 305 | assert axis == 2 306 | gt_green = Rs[:,1:2] 307 | gt_red = Rs[:,0:1] 308 | return gt_green, gt_red 309 | 310 | 311 | @torch.no_grad() 312 | def data_augment(self,PC,gt_R,gt_t,gt_s,sym,aug_bb,aug_rt_t,aug_rt_r,model_point,nocs_scale,PC_nocs,obj_id): 313 | prop_bb = torch.rand(1) 314 | if prop_bb < 0.3: 315 | # R, t, s, s_x=(0.9, 1.1), s_y=(0.9, 1.1), s_z=(0.9, 1.1), sym=None 316 | PC_new, gt_s_new, nocs_new, model_new,nocs_scale_aug = defor_3D_bb(PC, gt_R, 317 | gt_t, gt_s, PC_nocs, model_point, 318 | sym=sym, aug_bb=aug_bb) 319 | PC = PC_new 320 | gt_s = gt_s_new 321 | PC_nocs = nocs_new 322 | model_point = model_new 323 | nocs_scale=nocs_scale/nocs_scale_aug 324 | 325 | 326 | prop_rt = torch.rand(1) 327 | if prop_rt < 0.3: 328 | PC_new, gt_R_new, gt_t_new = defor_3D_rt(PC, gt_R, 329 | gt_t, aug_rt_t, aug_rt_r) 330 | PC = PC_new 331 | gt_R = gt_R_new 332 | gt_t = gt_t_new.view(-1) 333 | 334 | prop_bc = torch.rand(1) 335 | # only do bc for mug and bowl 336 | b=False 337 | if prop_bc < 0.3 and (obj_id in [1,5]): 338 | b=True 339 | PC_new, gt_s_new, model_point_new, nocs_new,nocs_scale_aug = defor_3D_bc(PC, gt_R, gt_t,gt_s,model_point, nocs_scale, PC_nocs) 340 | PC = PC_new 341 | gt_s = gt_s_new 342 | model_point = model_point_new 343 | PC_nocs = nocs_new 344 | nocs_scale=nocs_scale/nocs_scale_aug 345 | 346 | prop_nl = torch.rand(1) 347 | if not b and prop_nl < 0.3 and (obj_id in [0,1,2,3,5]): 348 | if obj_id in [0,1,3,5]: 349 | sel_axis = 1 350 | elif obj_id in [2]: 351 | sel_axis = 0 352 | else: 353 | sel_axis = None 354 | 355 | PC_new, gt_s_new, model_point_new, nocs_new,nocs_scale_aug = deform_non_linear(PC, gt_R, gt_t,gt_s,PC_nocs, model_point, sel_axis) 356 | 357 | PC = PC_new 358 | gt_s = gt_s_new 359 | model_point = model_point_new 360 | PC_nocs = nocs_new 361 | nocs_scale=nocs_scale/nocs_scale_aug 362 | 363 | 364 | prop_pc = torch.rand(1) 365 | if prop_pc < 0.3: 366 | PC_new = defor_3D_pc(PC, 0.001) 367 | PC = PC_new 368 | 369 | pro_aug=torch.rand(1) 370 | if pro_aug<0.1: 371 | num=random.randint(1,10) 372 | position=list(range(1024)) 373 | position=random.sample(position,num) 374 | position=torch.tensor(position,dtype=torch.long) 375 | PC[position,...]=torch.rand((num,3))*gt_s*0.6+gt_t 376 | 377 | # augmentation finish 378 | return PC, gt_R, gt_t, gt_s, model_point, PC_nocs,nocs_scale 379 | 380 | 381 | def get_sym_info(self, c, mug_handle=1): 382 | # sym_info c0 : face classfication c1, c2, c3:Three view symmetry, correspond to xy, xz, yz respectively 383 | # c0: 0 no symmetry 1 axis symmetry 2 two reflection planes 3 unimplemented type 384 | # Y axis points upwards, x axis pass through the handle, z axis otherwise 385 | # 386 | # for specific defination, see sketch_loss 387 | if c == 0:#'bottle' 388 | sym = np.array([1, 1, 0, 1], dtype=np.int32) 389 | elif c == 1:#'bowl' 390 | sym = np.array([1, 1, 0, 1], dtype=np.int32) 391 | elif c == 2:#'camera' 392 | sym = np.array([0, 0, 0, 0], dtype=np.int32) 393 | elif c == 3:#'can' 394 | sym = np.array([1, 1, 1, 1], dtype=np.int32) 395 | elif c == 4:#'laptop' 396 | sym = np.array([0, 1, 0, 0], dtype=np.int32) 397 | elif c == 5 and mug_handle == 1:#'mug' 398 | sym = np.array([0, 1, 0, 0], dtype=np.int32) # for mug, we currently mark it as no symmetry 399 | elif c == 5 and mug_handle == 0:#'mug' 400 | sym = np.array([1, 0, 0, 0], dtype=np.int32) 401 | else: 402 | sym = np.array([0, 0, 0, 0], dtype=np.int32) 403 | return sym 404 | 405 | 406 | def generate_aug_parameters(self, s_x=(0.8, 1.2), s_y=(0.8, 1.2), s_z=(0.8, 1.2), ax=50, ay=50, az=50, a=15): 407 | # for bb aug 408 | ex, ey, ez = np.random.rand(3) 409 | ex = ex * (s_x[1] - s_x[0]) + s_x[0] 410 | ey = ey * (s_y[1] - s_y[0]) + s_y[0] 411 | ez = ez * (s_z[1] - s_z[0]) + s_z[0] 412 | # for R, t aug 413 | Rm = get_rotation(np.random.uniform(-a, a), np.random.uniform(-a, a), np.random.uniform(-a, a)) 414 | dx = np.random.rand() * 2 * ax - ax 415 | dy = np.random.rand() * 2 * ay - ay 416 | dz = np.random.rand() * 2 * az - az 417 | return np.array([ex, ey, ez], dtype=np.float32), np.array([dx, dy, dz], dtype=np.float32) / 1000.0, Rm 418 | 419 | 420 | def get_fs_net_scale(self, model, nocs_scale,c): 421 | # model pc x 3 422 | lx = 2 * max(max(model[:, 0]), -min(model[:, 0])) 423 | ly = max(model[:, 1]) - min(model[:, 1]) 424 | lz = max(model[:, 2]) - min(model[:, 2]) 425 | 426 | # real scale 427 | lx_t = lx * nocs_scale * 1000 428 | ly_t = ly * nocs_scale * 1000 429 | lz_t = lz * nocs_scale * 1000 430 | 431 | if c == 0:#'bottle' 432 | unitx = 87 433 | unity = 220 434 | unitz = 89 435 | elif c == 1:#'bowl' 436 | unitx = 165 437 | unity = 80 438 | unitz = 165 439 | elif c == 2:#'camera' 440 | unitx = 88 441 | unity = 128 442 | unitz = 156 443 | elif c == 3:#'can' 444 | unitx = 68 445 | unity = 146 446 | unitz = 72 447 | elif c == 4:#'laptop' 448 | unitx = 346 449 | unity = 200 450 | unitz = 335 451 | elif c == 5:#'mug' 452 | unitx = 146 453 | unity = 83 454 | unitz = 114 455 | elif c == '02876657': 456 | unitx = 324 / 4 457 | unity = 874 / 4 458 | unitz = 321 / 4 459 | elif c == '02880940': 460 | unitx = 675 / 4 461 | unity = 271 / 4 462 | unitz = 675 / 4 463 | elif c == '02942699': 464 | unitx = 464 / 4 465 | unity = 487 / 4 466 | unitz = 702 / 4 467 | elif c == '02946921': 468 | unitx = 450 / 4 469 | unity = 753 / 4 470 | unitz = 460 / 4 471 | elif c == '03642806': 472 | unitx = 581 / 4 473 | unity = 445 / 4 474 | unitz = 672 / 4 475 | elif c == '03797390': 476 | unitx = 670 / 4 477 | unity = 540 / 4 478 | unitz = 497 / 4 479 | else: 480 | unitx = 0 481 | unity = 0 482 | unitz = 0 483 | print('This category is not recorded in my little brain.') 484 | raise NotImplementedError 485 | # scale residual 486 | return np.array([lx_t - unitx, ly_t - unity, lz_t - unitz])/1000.0, np.array([unitx, unity, unitz])/1000.0 487 | 488 | -------------------------------------------------------------------------------- /tools/valid.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | import os 4 | import time 5 | import argparse 6 | import cv2 7 | import math 8 | import glob 9 | import numpy as np 10 | from tqdm import tqdm 11 | import _pickle as cPickle 12 | import torch 13 | import torch.nn.functional as F 14 | from mmengine import Config,DictAction 15 | from network import NETWORK_REGISTRY 16 | from utils import load_depth, get_bbox, compute_mAP, plot_mAP 17 | from utils.logging import create_checkpoint 18 | import random 19 | from utils.utils import farthest_point_sample,index_points 20 | 21 | def set_random_seed(seed, deterministic=False): 22 | random.seed(seed) 23 | np.random.seed(seed) 24 | torch.manual_seed(seed) 25 | torch.cuda.manual_seed_all(seed) 26 | if deterministic: 27 | torch.backends.cudnn.deterministic = True 28 | torch.backends.cudnn.benchmark = False 29 | 30 | 31 | parser = argparse.ArgumentParser() 32 | parser.add_argument('--data', type=str, default='real_test', help='val, real_test') 33 | parser.add_argument('--data_dir', type=str, default='data', help='data directory') 34 | parser.add_argument('--n_cat', type=int, default=6, help='number of object categories') 35 | parser.add_argument('--nv_prior', type=int, default=1024, help='number of vertices in shape priors') 36 | parser.add_argument('--model', type=str, default='results/camera/model_50.pth', help='resume from saved model') 37 | parser.add_argument('--n_pts', type=int, default=1024, help='number of foreground points') 38 | parser.add_argument('--img_size', type=int, default=192, help='cropped image size') 39 | parser.add_argument('--gpus', type=str, default='1', help='GPU to use') 40 | parser.add_argument('--cfg', 41 | help='experiment configure file name', 42 | required=True, 43 | type=str) 44 | parser.add_argument('--cfg-options', 45 | nargs='+', 46 | action=DictAction, 47 | help='override some settings in the used config, the key-value pair ' 48 | 'in xxx=yyy format will be merged into config file. If the value to ' 49 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 50 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 51 | 'Note that the quotation marks are necessary and that no white space ' 52 | 'is allowed.') 53 | opt = parser.parse_args() 54 | 55 | per_obj=None 56 | use_gt_mask=False 57 | mean_shapes = np.load('assets/mean_points_emb.npy') 58 | 59 | 60 | result_dir='results/eval_real' 61 | 62 | 63 | xmap = np.array([[i for i in range(640)] for j in range(480)]) 64 | ymap = np.array([[j for i in range(640)] for j in range(480)]) 65 | norm_scale = 1000.0 66 | 67 | @torch.inference_mode() 68 | def detect(): 69 | # resume model 70 | print('use_gt_mask: ',use_gt_mask) 71 | global opt 72 | global result_dir 73 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus 74 | opt = Config.fromfile(opt.cfg) 75 | assert not opt.train 76 | assert opt.DATA in ['val', 'real_test'] 77 | opt.MODEL.decoder.training=False 78 | opt.MODEL.training=False 79 | if opt.DATA == 'val': 80 | result_dir = 'results/eval_camera' 81 | file_path = 'CAMERA/val_list.txt' 82 | cam_fx, cam_fy, cam_cx, cam_cy = 577.5, 577.5, 319.5, 239.5 83 | else: 84 | result_dir = 'results/eval_real' 85 | file_path = 'Real/test_list.txt' 86 | cam_fx, cam_fy, cam_cx, cam_cy = 591.0125, 590.16775, 322.525, 244.11084 87 | cam_K=np.identity(3, dtype=np.float32) 88 | cam_K[0,0],cam_K[1,1],cam_K[0,2],cam_K[1,2]=cam_fx, cam_fy, cam_cx, cam_cy 89 | 90 | if not os.path.exists(result_dir): 91 | os.makedirs(result_dir) 92 | set_random_seed(123) 93 | 94 | mean_shapes = np.load('assets/mean_points_emb.npy') 95 | 96 | model = NETWORK_REGISTRY.build(opt.MODEL) 97 | #model.init_para(0) 98 | model = torch.nn.DataParallel(model).cuda() 99 | final_output_dir = create_checkpoint(opt,None) 100 | checkpoint_file = os.path.join( 101 | final_output_dir, 'model', opt.RESUME_FILE) 102 | 103 | checkpoint = torch.load(checkpoint_file, map_location=lambda storage, loc: storage) 104 | model.load_state_dict(checkpoint['state_dict']) 105 | model.eval() 106 | 107 | 108 | # get test data list 109 | img_list = [os.path.join(file_path.split('/')[0], line.rstrip('\n')) 110 | for line in open(os.path.join(opt.DATASET.data_dir, file_path))] 111 | # frame by frame test 112 | times=[] 113 | t_inference = 0.0 114 | t_umeyama = 0.0 115 | inst_count = 0 116 | img_count = 0 117 | t_start = time.time() 118 | 119 | # img_list=img_list[:300] 120 | cam_K=np.identity(3, dtype=np.float32) 121 | cam_K[0,0],cam_K[1,1],cam_K[0,2],cam_K[1,2]=cam_fx, cam_fy, cam_cx, cam_cy 122 | for path in tqdm(img_list): 123 | img_path = os.path.join(opt.DATASET.data_dir, path) 124 | raw_depth = load_depth(img_path) 125 | # load mask-rcnn detection results 126 | img_path_parsing = img_path.split('/') 127 | 128 | if use_gt_mask: 129 | gt_mask=cv2.imread(img_path + '_mask.png')[:, :, 2] 130 | with open(img_path + '_label.pkl', 'rb') as f: 131 | gts = cPickle.load(f) 132 | num_insts=len(gts['instance_ids']) 133 | else: 134 | 135 | mrcnn_path = os.path.join('data/results/mrcnn_results', opt.DATA, 'results_{}_{}_{}.pkl'.format( 136 | opt.DATA.split('_')[-1], img_path_parsing[-2], img_path_parsing[-1])) 137 | with open(mrcnn_path, 'rb') as f: 138 | mrcnn_result = cPickle.load(f) 139 | num_insts = len(mrcnn_result['class_ids']) 140 | 141 | 142 | f_sRT = np.zeros((num_insts, 4, 4), dtype=float) 143 | f_size = np.zeros((num_insts, 3), dtype=float) 144 | 145 | # prepare frame data 146 | f_points, f_catId,f_rgb,f_mask,f_choose,f_prior,f_sym,f_mean_shape= [], [],[],[],[],[],[],[] 147 | valid_inst = [] 148 | 149 | for i in range(num_insts): 150 | if use_gt_mask: 151 | new_gt_mask=gt_mask.copy() 152 | new_gt_mask=np.equal(new_gt_mask,gts['instance_ids'][i]) 153 | new_gt_mask=np.logical_and(new_gt_mask,raw_depth>0) 154 | mask=new_gt_mask 155 | cat_id=gts['class_ids'][i]-1 156 | else: 157 | cat_id = mrcnn_result['class_ids'][i] - 1 158 | mask = np.logical_and(mrcnn_result['masks'][:, :, i], raw_depth > 0) 159 | if per_obj is not None and cat_id not in per_obj: 160 | continue 161 | # raw_rgb[mask,:]=255 162 | 163 | # if cat_id==1: 164 | # raw_rgb[mask,:]=255 165 | # cv2.imwrite(os.path.join('imgs','pred_masks',img_path_parsing[-1]+'_1.png'),raw_rgb) 166 | 167 | mask = mask.flatten() 168 | 169 | depth_masked=(raw_depth.flatten())[mask] #N 170 | xmap_masked=(xmap.flatten())[mask] 171 | ymap_masked=(ymap.flatten())[mask] 172 | 173 | pt2 = depth_masked / norm_scale 174 | pt0 = (xmap_masked - cam_cx) * pt2 / cam_fx 175 | pt1 = (ymap_masked - cam_cy) * pt2 / cam_fy 176 | points = np.stack((pt0, pt1, pt2), axis=1) 177 | 178 | l_all=points.shape[0] 179 | 180 | if l_all < 32: 181 | f_sRT[i] = np.identity(4, dtype=float) 182 | prior = mean_shapes[cat_id].astype(np.float32) 183 | f_size[i] = 2 * np.amax(np.abs(prior), axis=0) 184 | continue 185 | else: 186 | valid_inst.append(i) 187 | #prior = mean_shapes[cat_id].astype(np.float32) 188 | if use_gt_mask: 189 | rmin, rmax, cmin, cmax = get_bbox(gts['bboxes'][i]) 190 | else: 191 | rmin, rmax, cmin, cmax = get_bbox(mrcnn_result['rois'][i]) 192 | 193 | 194 | # process objects with valid depth observation 195 | if l_all >= opt.DATASET.n_pts: 196 | choose=np.random.choice(l_all,opt.DATASET.n_pts,replace=False) 197 | else: 198 | choose=np.random.choice(l_all,opt.DATASET.n_pts,replace=True) 199 | 200 | points=points[choose,...] 201 | 202 | 203 | sym_info = get_sym_info(cat_id, mug_handle=1) 204 | mean_shape=get_fs_net_scale(cat_id) 205 | 206 | # concatenate instances 207 | f_points.append(points) 208 | f_catId.append(cat_id) 209 | # f_rgb.append(rgb) 210 | f_mask.append(mask) 211 | # f_choose.append(choose) 212 | f_sym.append(sym_info) 213 | f_mean_shape.append(mean_shape) 214 | #f_prior.append(prior) 215 | if len(valid_inst): 216 | f_points = torch.cuda.FloatTensor(np.array(f_points)).contiguous() 217 | f_catId = torch.cuda.LongTensor(np.array(f_catId)).contiguous() 218 | # f_rgb=torch.cuda.FloatTensor(np.array(f_rgb)).contiguous() 219 | f_choose=torch.cuda.LongTensor(np.array(f_choose)).contiguous() 220 | f_sym=torch.cuda.LongTensor(np.array(f_sym)).contiguous() 221 | f_mean_shape=torch.cuda.FloatTensor(np.array(f_mean_shape)).contiguous() 222 | batched_input={ 223 | 'points':f_points, 224 | 'cat_id':f_catId, 225 | 'sym':f_sym, 226 | 'mean_shape':f_mean_shape 227 | #'prior':f_prior 228 | } 229 | # inference 230 | torch.cuda.synchronize() 231 | t_now = time.time() 232 | pred_sRT, size = model(batched_input) 233 | for i in range(len(valid_inst)): 234 | inst_idx = valid_inst[i] 235 | f_sRT[inst_idx] = pred_sRT[i] 236 | f_size[inst_idx]=size[i] 237 | torch.cuda.synchronize() 238 | inference=time.time() - t_now 239 | times.append(inference) 240 | t_inference += (inference) 241 | img_count += 1 242 | inst_count += len(valid_inst) 243 | 244 | 245 | # save results 246 | result = {} 247 | if not use_gt_mask: 248 | with open(img_path + '_label.pkl', 'rb') as f: 249 | gts = cPickle.load(f) 250 | result['gt_class_ids'] = gts['class_ids'] 251 | result['gt_bboxes'] = gts['bboxes'] 252 | for idx,cat_id in enumerate(gts['class_ids']): 253 | cat_id=cat_id-1 254 | assert cat_id>=0 255 | rotation = gts['rotations'][idx] 256 | scale = gts['scales'][idx] 257 | translation = gts['translations'][idx] 258 | if cat_id in [0, 1, 3]: 259 | # assume continuous axis rotation symmetry 260 | theta_x = rotation[0, 0] + rotation[2, 2] 261 | theta_y = rotation[0, 2] - rotation[2, 0] 262 | r_norm = math.sqrt(theta_x**2 + theta_y**2) 263 | s_map = np.array([[theta_x/r_norm, 0.0, -theta_y/r_norm], 264 | [0.0, 1.0, 0.0 ], 265 | [theta_y/r_norm, 0.0, theta_x/r_norm]]) 266 | rotation = rotation @ s_map 267 | sRT = np.identity(4, dtype=np.float32) 268 | sRT[:3, :3] = scale * rotation 269 | sRT[:3, 3] = translation 270 | gts['poses'][idx]=sRT 271 | result['gt_RTs'] = gts['poses'] 272 | result['gt_scales'] = gts['size'] 273 | result['gt_handle_visibility'] = gts['handle_visibility'] 274 | 275 | if use_gt_mask: 276 | result['pred_class_ids'] = gts['class_ids'] 277 | result['pred_bboxes'] = gts['bboxes'] 278 | result['pred_scores'] = np.ones((num_insts,)) 279 | else: 280 | result['pred_class_ids'] = mrcnn_result['class_ids'] 281 | result['pred_bboxes'] = mrcnn_result['rois'] 282 | result['pred_scores'] = mrcnn_result['scores'] 283 | 284 | 285 | result['pred_RTs'] = f_sRT 286 | result['pred_scales']=f_size 287 | 288 | image_short_path = '_'.join(img_path_parsing[-3:]) 289 | save_path = os.path.join(result_dir, 'results_{}.pkl'.format(image_short_path)) 290 | with open(save_path, 'wb') as f: 291 | cPickle.dump(result, f) 292 | 293 | 294 | 295 | # write statistics 296 | total_time=0.0 297 | times=times[100:] 298 | for t in times: 299 | total_time+=t 300 | fw = open('{0}/eval_logs.txt'.format(result_dir), 'w') 301 | messages = [] 302 | messages.append("Total images: {}".format(len(img_list))) 303 | messages.append("Valid images: {}, Total instances: {}, Average: {:.2f}/image".format( 304 | img_count, inst_count, inst_count/img_count)) 305 | messages.append("Inference time: {:06f} Average: {:06f}/image fps:{:06f}".format(t_inference, total_time/(img_count-100),(img_count-100)/total_time)) 306 | messages.append("Total time: {:06f}".format(time.time() - t_start)) 307 | for msg in messages: 308 | print(msg) 309 | fw.write(msg + '\n') 310 | fw.close() 311 | del model 312 | 313 | 314 | def evaluate(): 315 | degree_thres_list = list(range(0, 61, 1)) 316 | shift_thres_list = [i / 2 for i in range(21)] 317 | iou_thres_list = [i / 100 for i in range(101)] 318 | # predictions 319 | result_pkl_list = glob.glob(os.path.join(result_dir, 'results_*.pkl')) 320 | result_pkl_list = sorted(result_pkl_list) 321 | 322 | # result_pkl_list=result_pkl_list[:100] 323 | assert len(result_pkl_list) 324 | pred_results = [] 325 | for pkl_path in result_pkl_list: 326 | with open(pkl_path, 'rb') as f: 327 | result = cPickle.load(f) 328 | if 'gt_handle_visibility' not in result: 329 | result['gt_handle_visibility'] = np.ones_like(result['gt_class_ids']) 330 | else: 331 | assert len(result['gt_handle_visibility']) == len(result['gt_class_ids']), "{} {}".format( 332 | result['gt_handle_visibility'], result['gt_class_ids']) 333 | if type(result) is list: 334 | pred_results += result 335 | elif type(result) is dict: 336 | pred_results.append(result) 337 | else: 338 | assert False 339 | 340 | # To be consistent with NOCS, set use_matches_for_pose=True for mAP evaluation 341 | iou_aps, pose_aps, iou_acc, pose_acc = compute_mAP(pred_results, result_dir, degree_thres_list, shift_thres_list, 342 | iou_thres_list, iou_pose_thres=0.1, use_matches_for_pose=True) 343 | #print(pose_aps) 344 | # np.save('pose_aps', pose_aps, allow_pickle=True, fix_imports=True) 345 | 346 | # metric 347 | fw = open('{0}/eval_logs.txt'.format(result_dir), 'a') 348 | iou_25_idx = iou_thres_list.index(0.25) 349 | iou_50_idx = iou_thres_list.index(0.5) 350 | iou_75_idx = iou_thres_list.index(0.75) 351 | degree_05_idx = degree_thres_list.index(5) 352 | degree_10_idx = degree_thres_list.index(10) 353 | shift_02_idx = shift_thres_list.index(2) 354 | shift_05_idx = shift_thres_list.index(5) 355 | messages = [] 356 | messages.append('mAP:') 357 | messages.append('3D IoU at 25: {:.1f}'.format(iou_aps[-1, iou_25_idx] * 100)) 358 | messages.append('3D IoU at 50: {:.1f}'.format(iou_aps[-1, iou_50_idx] * 100)) 359 | messages.append('3D IoU at 75: {:.1f}'.format(iou_aps[-1, iou_75_idx] * 100)) 360 | messages.append('5 degree, 2cm: {:.1f}'.format(pose_aps[-1, degree_05_idx, shift_02_idx] * 100)) 361 | messages.append('5 degree, 5cm: {:.1f}'.format(pose_aps[-1, degree_05_idx, shift_05_idx] * 100)) 362 | messages.append('10 degree, 2cm: {:.1f}'.format(pose_aps[-1, degree_10_idx, shift_02_idx] * 100)) 363 | messages.append('10 degree, 5cm: {:.1f}'.format(pose_aps[-1, degree_10_idx, shift_05_idx] * 100)) 364 | messages.append('Acc:') 365 | messages.append('3D IoU at 25: {:.1f}'.format(iou_acc[-1, iou_25_idx] * 100)) 366 | messages.append('3D IoU at 50: {:.1f}'.format(iou_acc[-1, iou_50_idx] * 100)) 367 | messages.append('3D IoU at 75: {:.1f}'.format(iou_acc[-1, iou_75_idx] * 100)) 368 | messages.append('5 degree, 2cm: {:.1f}'.format(pose_acc[-1, degree_05_idx, shift_02_idx] * 100)) 369 | messages.append('5 degree, 5cm: {:.1f}'.format(pose_acc[-1, degree_05_idx, shift_05_idx] * 100)) 370 | messages.append('10 degree, 2cm: {:.1f}'.format(pose_acc[-1, degree_10_idx, shift_02_idx] * 100)) 371 | messages.append('10 degree, 5cm: {:.1f}'.format(pose_acc[-1, degree_10_idx, shift_05_idx] * 100)) 372 | for msg in messages: 373 | print(msg) 374 | fw.write(msg + '\n') 375 | fw.close() 376 | # load NOCS results 377 | pkl_path = os.path.join('results/nocs_results', opt.DATA, 'mAP_Acc.pkl') 378 | with open(pkl_path, 'rb') as f: 379 | nocs_results = cPickle.load(f) 380 | nocs_iou_aps = nocs_results['iou_aps'][-1, :] 381 | nocs_pose_aps = nocs_results['pose_aps'][-1, :, :] 382 | iou_aps = np.concatenate((iou_aps, nocs_iou_aps[None, :]), axis=0) 383 | pose_aps = np.concatenate((pose_aps, nocs_pose_aps[None, :, :]), axis=0) 384 | # plot 385 | plot_mAP(iou_aps, pose_aps, result_dir, iou_thres_list, degree_thres_list, shift_thres_list) 386 | 387 | 388 | def get_sym_info(c, mug_handle=1): 389 | # sym_info c0 : face classfication c1, c2, c3:Three view symmetry, correspond to xy, xz, yz respectively 390 | # c0: 0 no symmetry 1 axis symmetry 2 two reflection planes 3 unimplemented type 391 | # Y axis points upwards, x axis pass through the handle, z axis otherwise 392 | # 393 | # for specific defination, see sketch_loss 394 | if c == 0:#'bottle' 395 | sym = np.array([1, 1, 0, 1], dtype=np.int32) 396 | elif c == 1:#'bowl' 397 | sym = np.array([1, 1, 0, 1], dtype=np.int32) 398 | elif c == 2:#'camera' 399 | sym = np.array([0, 0, 0, 0], dtype=np.int32) 400 | elif c == 3:#'can' 401 | sym = np.array([1, 1, 1, 1], dtype=np.int32) 402 | elif c == 4:#'laptop' 403 | sym = np.array([0, 1, 0, 0], dtype=np.int32) 404 | elif c == 5 and mug_handle == 1:#'mug' 405 | sym = np.array([0, 1, 0, 0], dtype=np.int32) # for mug, we currently mark it as no symmetry 406 | elif c == 5 and mug_handle == 0:#'mug' 407 | sym = np.array([1, 0, 0, 0], dtype=np.int32) 408 | else: 409 | sym = np.array([0, 0, 0, 0], dtype=np.int32) 410 | return sym 411 | 412 | 413 | def get_fs_net_scale(c): 414 | if c == 0:#'bottle' 415 | unitx = 87 416 | unity = 220 417 | unitz = 89 418 | elif c == 1:#'bowl' 419 | unitx = 165 420 | unity = 80 421 | unitz = 165 422 | elif c == 2:#'camera' 423 | unitx = 88 424 | unity = 128 425 | unitz = 156 426 | elif c == 3:#'can' 427 | unitx = 68 428 | unity = 146 429 | unitz = 72 430 | elif c == 4:#'laptop' 431 | unitx = 346 432 | unity = 200 433 | unitz = 335 434 | elif c == 5:#'mug' 435 | unitx = 146 436 | unity = 83 437 | unitz = 114 438 | elif c == '02876657': 439 | unitx = 324 / 4 440 | unity = 874 / 4 441 | unitz = 321 / 4 442 | elif c == '02880940': 443 | unitx = 675 / 4 444 | unity = 271 / 4 445 | unitz = 675 / 4 446 | elif c == '02942699': 447 | unitx = 464 / 4 448 | unity = 487 / 4 449 | unitz = 702 / 4 450 | elif c == '02946921': 451 | unitx = 450 / 4 452 | unity = 753 / 4 453 | unitz = 460 / 4 454 | elif c == '03642806': 455 | unitx = 581 / 4 456 | unity = 445 / 4 457 | unitz = 672 / 4 458 | elif c == '03797390': 459 | unitx = 670 / 4 460 | unity = 540 / 4 461 | unitz = 497 / 4 462 | else: 463 | unitx = 0 464 | unity = 0 465 | unitz = 0 466 | print('This category is not recorded in my little brain.') 467 | raise NotImplementedError 468 | # scale residual 469 | return np.array([unitx, unity, unitz])/1000.0 470 | 471 | 472 | if __name__ == '__main__': 473 | print('Detecting ...') 474 | detect() 475 | print('Evaluating ...') 476 | evaluate() --------------------------------------------------------------------------------