├── utils
    ├── __init__.py
    ├── align.py
    └── logging.py
├── assets
    └── mean_points_emb.npy
├── config
    ├── run_eval_camera.py
    ├── run_eval_real.py
    ├── _base_.py
    └── run.py
├── network
    ├── __init__.py
    ├── encoder.py
    ├── basic.py
    ├── pointnet.py
    ├── loss.py
    ├── network.py
    └── decoder.py
├── Pointnet2
    ├── tools
    │   ├── _init_path.py
    │   ├── pointnet2_msg.py
    │   ├── dataset.py
    │   ├── train_and_eval.py
    │   └── kitti_utils.py
    └── pointnet2
    │   ├── src
    │       ├── cuda_utils.h
    │       ├── ball_query_gpu.h
    │       ├── group_points_gpu.h
    │       ├── ball_query.cpp
    │       ├── sampling_gpu.h
    │       ├── pointnet2_api.cpp
    │       ├── interpolate_gpu.h
    │       ├── group_points.cpp
    │       ├── sampling.cpp
    │       ├── interpolate.cpp
    │       ├── ball_query_gpu.cu
    │       ├── group_points_gpu.cu
    │       ├── interpolate_gpu.cu
    │       └── sampling_gpu.cu
    │   ├── setup.py
    │   ├── pointnet2_modules.py
    │   ├── pytorch_utils.py
    │   └── pointnet2_utils.py
├── .gitignore
├── dataset
    ├── __init__.py
    ├── data_augmentation.py
    └── pose_dataset.py
├── core
    └── trainer.py
├── README.md
└── tools
    ├── train.py
    └── valid.py


/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .logging import *
2 | from .utils import *


--------------------------------------------------------------------------------
/assets/mean_points_emb.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hustvl/Query6DoF/HEAD/assets/mean_points_emb.npy


--------------------------------------------------------------------------------
/config/run_eval_camera.py:
--------------------------------------------------------------------------------
1 | _base_=['run.py']
2 | train=False
3 | DATA='val'
4 | RESUME_FILE = 'checkpoint_epoch_50.tar.pth'


--------------------------------------------------------------------------------
/config/run_eval_real.py:
--------------------------------------------------------------------------------
1 | _base_=['run.py']
2 | train=False
3 | DATA='real_test'
4 | RESUME_FILE = 'checkpoint_epoch_50.tar.pth'


--------------------------------------------------------------------------------
/network/__init__.py:
--------------------------------------------------------------------------------
1 | from .pointnet import Pointnet2MSG
2 | from .network import NETWORK_REGISTRY
3 | from .loss import LOSS_REGISTRY


--------------------------------------------------------------------------------
/Pointnet2/tools/_init_path.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '../'))
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | 
 3 | build/
 4 | develop-eggs/
 5 | dist/
 6 | downloads/
 7 | eggs/
 8 | .eggs/
 9 | lib/
10 | lib64/
11 | parts/
12 | sdist/
13 | var/
14 | wheels/
15 | *.egg-info/
16 | .installed.cfg
17 | *.egg
18 | 
19 | *.pth
20 | 
21 | data/
22 | 
23 | results
24 | runs


--------------------------------------------------------------------------------
/network/encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch.autograd import Variable
 5 | import numpy as np
 6 | import copy
 7 | import math
 8 | from mmengine import Registry
 9 | from .basic import MLP
10 | 
11 | ENCODER_REGISTRY = Registry("ENCODER")
12 | 
13 | 


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <cmath>
 5 | 
 6 | #define TOTAL_THREADS 1024
 7 | #define THREADS_PER_BLOCK 256
 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 9 | 
10 | inline int opt_n_threads(int work_size) {
11 |     const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
12 | 
13 |     return max(min(1 << pow_2, TOTAL_THREADS), 1);
14 | }
15 | #endif
16 | 


--------------------------------------------------------------------------------
/network/basic.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | class MLP(nn.Module):
 4 |     def __init__(self,input_dims,middle_dims,output_dims=None):
 5 |         super(MLP,self).__init__()
 6 |         if output_dims is None:
 7 |             output_dims=input_dims
 8 |         self.model=nn.Sequential(
 9 |             nn.Linear(input_dims,middle_dims),
10 |             torch.nn.GELU(),
11 |             nn.Linear(middle_dims,output_dims)
12 |         )
13 | 
14 |     def forward(self,inputs):
15 |         return self.model(inputs)


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/ball_query_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _BALL_QUERY_GPU_H
 2 | #define _BALL_QUERY_GPU_H
 3 | 
 4 | #include <torch/serialize/tensor.h>
 5 | #include <vector>
 6 | #include <cuda.h>
 7 | #include <cuda_runtime_api.h>
 8 | 
 9 | int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, 
10 | 	at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor);
11 | 
12 | void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, 
13 | 	const float *xyz, const float *new_xyz, int *idx, cudaStream_t stream);
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/utils/align.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import time
 3 | import numpy as np
 4 | 
 5 | 
 6 | def backproject(depth, intrinsics, instance_mask):
 7 |     """ Back-projection, use opencv camera coordinate frame.
 8 | 
 9 |     """
10 |     cam_fx = intrinsics[0, 0]
11 |     cam_fy = intrinsics[1, 1]
12 |     cam_cx = intrinsics[0, 2]
13 |     cam_cy = intrinsics[1, 2]
14 | 
15 |     non_zero_mask = (depth > 0)
16 |     final_instance_mask = np.logical_and(instance_mask, non_zero_mask)
17 |     idxs = np.where(final_instance_mask)
18 | 
19 |     z = depth[idxs[0], idxs[1]]
20 |     x = (idxs[1] - cam_cx) * z / cam_fx
21 |     y = (idxs[0] - cam_cy) * z / cam_fy
22 |     pts = np.stack((x, y, z), axis=1)
23 | 
24 |     return pts, idxs
25 | 
26 | 


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='pointnet2',
 6 |     ext_modules=[
 7 |         CUDAExtension('pointnet2_cuda', [
 8 |             'src/pointnet2_api.cpp',
 9 |             
10 |             'src/ball_query.cpp', 
11 |             'src/ball_query_gpu.cu',
12 |             'src/group_points.cpp', 
13 |             'src/group_points_gpu.cu',
14 |             'src/interpolate.cpp', 
15 |             'src/interpolate_gpu.cu',
16 |             'src/sampling.cpp', 
17 |             'src/sampling_gpu.cu',
18 |         ],
19 |         extra_compile_args={'cxx': ['-g'],
20 |                             'nvcc': ['-O2']})
21 |     ],
22 |     cmdclass={'build_ext': BuildExtension}
23 | )
24 | 


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/group_points_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _GROUP_POINTS_GPU_H
 2 | #define _GROUP_POINTS_GPU_H
 3 | 
 4 | #include <torch/serialize/tensor.h>
 5 | #include <cuda.h>
 6 | #include <cuda_runtime_api.h>
 7 | #include <vector>
 8 | 
 9 | 
10 | int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, 
11 |     at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
12 | 
13 | void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 
14 |     const float *points, const int *idx, float *out, cudaStream_t stream);
15 | 
16 | int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, 
17 |     at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
18 | 
19 | void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 
20 |     const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream);
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/config/_base_.py:
--------------------------------------------------------------------------------
 1 | CFG_NAME = ''
 2 | OUTPUT_DIR = 'runs'
 3 | RUN_NAME=''
 4 | PRINT_FREQ = 40
 5 | DIST_BACKEND = 'nccl'
 6 | AUTO_RESUME = False
 7 | VERBOSE = True
 8 | DDP = True
 9 | RESUME_FILE=''
10 | ONLY_MODEL=False
11 | CHANGE_SCHEDULE=False
12 | find_unused_parameters=False
13 | is_iter=False
14 | # Cudnn related params
15 | CUDNN=dict(
16 |     BENCHMARK = True,
17 |     DETERMINISTIC = False,
18 |     ENABLED = True
19 | )
20 | 
21 | DATASET=dict(
22 |     type='PoseDataset',
23 |     source='Real',
24 |     mode='train',
25 |     data_dir='data',
26 |     n_pts=1024
27 | )
28 | 
29 | DATALOADER=dict(
30 |     type='DataLoader',
31 |     batch_size=10,
32 |     shuffle=False,
33 |     num_workers=8,
34 |     pin_memory=True,
35 |     persistent_workers=True
36 | )
37 | 
38 | DATALOADER['persistent_workers']=DATALOADER['num_workers']>0
39 | 
40 | OPTIMIZER=dict(
41 |     type='AdamW',
42 |     lr=2e-4,
43 |     weight_decay=1e-7
44 | )
45 | 
46 | 
47 | TRAIN=dict(
48 |     BEGIN_EPOCH=0,
49 |     END_EPOCH=75,
50 |     SAVE_EPOCH_STEP=5,
51 |     VIS=False
52 | )
53 | 


--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from .pose_dataset import DATASET_REGISTRY
 4 | from mmengine import Registry
 5 | from torch.utils.data import DataLoader
 6 | import copy
 7 | 
 8 | def build_dataloader(cfg, registry, *args, **kwargs):
 9 |     dataset=DATASET_REGISTRY.build(cfg.DATASET)
10 |     loader_cfg=copy.deepcopy(cfg)
11 |     loader_cfg=cfg.DATALOADER
12 |     loader_cfg.dataset=dataset
13 |     if 'TRAIN' in cfg.keys():
14 |         if cfg.DDP:
15 |             train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
16 |         else:
17 |             train_sampler = None
18 |         
19 |         loader_cfg.sampler=train_sampler
20 |     data_loader = registry.get(cfg.DATALOADER.type)
21 |     del loader_cfg['type']
22 |     data_loader=data_loader(**loader_cfg)
23 |     return data_loader
24 | 
25 | DATALOADER_REGISTRY = Registry("DATALODER",build_func=build_dataloader)
26 | DATALOADER_REGISTRY.register_module(module=DataLoader)
27 | 
28 | def trivial_batch_collator(batch):
29 |     return batch
30 | 


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/ball_query.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <vector>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include <ATen/cuda/CUDAEvent.h>
 5 | #include <cuda.h>
 6 | #include <cuda_runtime_api.h>
 7 | #include "ball_query_gpu.h"
 8 | 
 9 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
10 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
11 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
12 | 
13 | int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, 
14 |     at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) {
15 |     CHECK_INPUT(new_xyz_tensor);
16 |     CHECK_INPUT(xyz_tensor);
17 |     const float *new_xyz = new_xyz_tensor.data<float>();
18 |     const float *xyz = xyz_tensor.data<float>();
19 |     int *idx = idx_tensor.data<int>();
20 |     cudaStream_t stream = c10::cuda::getCurrentCUDAStream();
21 |     ball_query_kernel_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream);
22 |     return 1;
23 | }


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/sampling_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SAMPLING_GPU_H
 2 | #define _SAMPLING_GPU_H
 3 | 
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | #include<vector>
 7 | 
 8 | 
 9 | int gather_points_wrapper_fast(int b, int c, int n, int npoints, 
10 |     at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
11 | 
12 | void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, 
13 |     const float *points, const int *idx, float *out, cudaStream_t stream);
14 | 
15 | 
16 | int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, 
17 |     at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
18 | 
19 | void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, 
20 |     const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream);
21 | 
22 | 
23 | int furthest_point_sampling_wrapper(int b, int n, int m, 
24 |     at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor);
25 | 
26 | void furthest_point_sampling_kernel_launcher(int b, int n, int m, 
27 |     const float *dataset, float *temp, int *idxs, cudaStream_t stream);
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/pointnet2_api.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <torch/extension.h>
 3 | 
 4 | #include "ball_query_gpu.h"
 5 | #include "group_points_gpu.h"
 6 | #include "sampling_gpu.h"
 7 | #include "interpolate_gpu.h"
 8 | 
 9 | 
10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
11 |     m.def("ball_query_wrapper", &ball_query_wrapper_fast, "ball_query_wrapper_fast");
12 | 
13 |     m.def("group_points_wrapper", &group_points_wrapper_fast, "group_points_wrapper_fast");
14 |     m.def("group_points_grad_wrapper", &group_points_grad_wrapper_fast, "group_points_grad_wrapper_fast");
15 | 
16 |     m.def("gather_points_wrapper", &gather_points_wrapper_fast, "gather_points_wrapper_fast");
17 |     m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper_fast, "gather_points_grad_wrapper_fast");
18 | 
19 |     m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper, "furthest_point_sampling_wrapper");
20 |     
21 |     m.def("three_nn_wrapper", &three_nn_wrapper_fast, "three_nn_wrapper_fast");
22 |     m.def("three_interpolate_wrapper", &three_interpolate_wrapper_fast, "three_interpolate_wrapper_fast");
23 |     m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_fast, "three_interpolate_grad_wrapper_fast");
24 | }
25 | 


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/interpolate_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INTERPOLATE_GPU_H
 2 | #define _INTERPOLATE_GPU_H
 3 | 
 4 | #include <torch/serialize/tensor.h>
 5 | #include<vector>
 6 | #include <cuda.h>
 7 | #include <cuda_runtime_api.h>
 8 | 
 9 | 
10 | void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, 
11 |   at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor);
12 | 
13 | void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown,
14 | 	const float *known, float *dist2, int *idx, cudaStream_t stream);
15 | 
16 | 
17 | void three_interpolate_wrapper_fast(int b, int c, int m, int n, at::Tensor points_tensor, 
18 |     at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor);
19 | 
20 | void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, 
21 |     const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream);
22 | 
23 | 
24 | void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, at::Tensor grad_out_tensor, 
25 |     at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor);
26 | 
27 | void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, 
28 |     const int *idx, const float *weight, float *grad_points, cudaStream_t stream);
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/group_points.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <cuda.h>
 3 | #include <cuda_runtime_api.h>
 4 | #include <vector>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | #include <ATen/cuda/CUDAEvent.h>
 7 | #include "group_points_gpu.h"
 8 | 
 9 | 
10 | 
11 | 
12 | int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, 
13 |     at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) {
14 | 
15 |     float *grad_points = grad_points_tensor.data<float>();
16 |     const int *idx = idx_tensor.data<int>();
17 |     const float *grad_out = grad_out_tensor.data<float>();
18 | 
19 |     cudaStream_t stream = c10::cuda::getCurrentCUDAStream();
20 | 
21 |     group_points_grad_kernel_launcher_fast(b, c, n, npoints, nsample, grad_out, idx, grad_points, stream);
22 |     return 1;
23 | }
24 | 
25 | 
26 | int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, 
27 |     at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) {
28 | 
29 |     const float *points = points_tensor.data<float>();
30 |     const int *idx = idx_tensor.data<int>();
31 |     float *out = out_tensor.data<float>();
32 | 
33 |     cudaStream_t stream = c10::cuda::getCurrentCUDAStream();
34 | 
35 |     group_points_kernel_launcher_fast(b, c, n, npoints, nsample, points, idx, out, stream);
36 |     return 1;
37 | }


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/sampling.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <ATen/cuda/CUDAContext.h>
 3 | #include <vector>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include <ATen/cuda/CUDAEvent.h>
 6 | 
 7 | #include "sampling_gpu.h"
 8 | 
 9 | 
10 | int gather_points_wrapper_fast(int b, int c, int n, int npoints, 
11 |     at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor){
12 |     const float *points = points_tensor.data<float>();
13 |     const int *idx = idx_tensor.data<int>();
14 |     float *out = out_tensor.data<float>();
15 | 
16 |     cudaStream_t stream = c10::cuda::getCurrentCUDAStream();
17 |     gather_points_kernel_launcher_fast(b, c, n, npoints, points, idx, out, stream);
18 |     return 1;
19 | }
20 | 
21 | 
22 | int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, 
23 |     at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) {
24 | 
25 |     const float *grad_out = grad_out_tensor.data<float>();
26 |     const int *idx = idx_tensor.data<int>();
27 |     float *grad_points = grad_points_tensor.data<float>();
28 | 
29 |     cudaStream_t stream = c10::cuda::getCurrentCUDAStream();
30 |     gather_points_grad_kernel_launcher_fast(b, c, n, npoints, grad_out, idx, grad_points, stream);
31 |     return 1;
32 | }
33 | 
34 | 
35 | int furthest_point_sampling_wrapper(int b, int n, int m, 
36 |     at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) {
37 | 
38 |     const float *points = points_tensor.data<float>();
39 |     float *temp = temp_tensor.data<float>();
40 |     int *idx = idx_tensor.data<int>();
41 | 
42 |     cudaStream_t stream = c10::cuda::getCurrentCUDAStream();
43 |     furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream);
44 |     return 1;
45 | }
46 | 


--------------------------------------------------------------------------------
/utils/logging.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | import time
 4 | from pathlib import Path
 5 | 
 6 | def setup_logger(final_output_dir, rank, phase,vis=False):
 7 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
 8 |     log_file = '{}_{}_rank{}.log'.format(phase, time_str, rank)
 9 |     final_log_file = os.path.join(final_output_dir, 'log', log_file)
10 |     head = '%(asctime)-15s %(message)s'
11 |     if not vis:
12 |         logging.basicConfig(filename=str(final_log_file),
13 |                         format=head)
14 |     else:
15 |         logging.basicConfig(filename=None,
16 |                         format=head)
17 |     logger = logging.getLogger()
18 |     logger.setLevel(logging.INFO)
19 |     console = logging.StreamHandler()
20 |     # if len(logging.getLogger('').handlers) < 2:
21 |     logging.getLogger('').addHandler(console)
22 | 
23 |     return logger, time_str
24 | 
25 | def create_checkpoint(cfg, phase='train'):
26 |     root_output_dir = Path(cfg.OUTPUT_DIR)
27 |     # set up logger
28 |     if not root_output_dir.exists():
29 |         print('=> creating {}'.format(root_output_dir))
30 |         root_output_dir.mkdir()
31 | 
32 |     dataset = cfg.DATASET.source
33 |     dataset = dataset.replace(':', '_')
34 |     run_name = cfg.RUN_NAME
35 | 
36 |     final_output_dir = root_output_dir / dataset / run_name
37 | 
38 |     print('=> creating {}'.format(final_output_dir))
39 |     final_output_dir.mkdir(parents=True, exist_ok=True)
40 | 
41 |     log_dir = os.path.join(final_output_dir, 'log')
42 |     if not os.path.exists(log_dir):
43 |         print('=> creating log dir'.format(log_dir))
44 |         os.makedirs(log_dir)
45 | 
46 |     if phase == 'train':
47 |         model_dir = os.path.join(final_output_dir, 'model')
48 |         src_dir = os.path.join(final_output_dir, 'src')
49 |         if not os.path.exists(model_dir): os.makedirs(model_dir)
50 |         if not os.path.exists(src_dir): os.makedirs(src_dir)
51 |         print('=> creating {}'.format(model_dir))
52 |         print('=> creating {}'.format(src_dir))
53 | 
54 |     return str(final_output_dir)


--------------------------------------------------------------------------------
/config/run.py:
--------------------------------------------------------------------------------
 1 | _base_=['_base_.py']
 2 | CFG_NAME = ''
 3 | OUTPUT_DIR = 'runs'
 4 | RUN_NAME = 'run'
 5 | PRINT_FREQ = 100
 6 | DIST_BACKEND = 'nccl'
 7 | AUTO_RESUME = True
 8 | RESUME_FILE = ''
 9 | ONLY_MODEL = False
10 | CHANGE_SCHEDULE = False
11 | find_unused_parameters = False
12 | VIS = False
13 | DATA='real_test'
14 | train=True
15 | 
16 | is_iter=True
17 | 
18 | MODEL = dict(
19 |     type='Sparsenetv7',
20 |     name='Sparsenetv7',
21 |     n_pts=64,
22 |     backbone=dict(
23 |         type='Pointnet2MSG',
24 |         input_channels=0,
25 |         mlp=[[256, 256], [256, 256], [256, 256], [512, 512]]),
26 |     decoder=dict(
27 |         type='deep_prior_decoderv2_9',
28 |         group=4,
29 |         input_dim=256,
30 |         middle_dim=1024,
31 |         training=train,
32 |         cat_num=6),
33 |     pose_estimate=dict(
34 |         type='pose_estimater',
35 |         input_dim=512,
36 |         middle_dim=256
37 |     ),
38 |     input_dim=256,
39 |     cat_num=6,
40 |     training=train,
41 |     loss_name=['r','t','s','chamfer','nocs'],
42 |     losses=[
43 |         dict(type='r_lossv2', weight=1.0,beta=0.001),
44 |         dict(type='t_loss', weight=1,beta=0.005),
45 |         dict(type='s_loss', weight=1.0,beta=0.005),
46 |         dict(type='chamfer_lossv2',weight=3.0),
47 |         dict(type='consistency_lossv2',weight=1.0,beta=0.1)
48 |     ])
49 | 
50 | 
51 | DATASET = dict(
52 |     type='PoseDataset',
53 |     source='CAMERA+Real',
54 |     mode='train',
55 |     data_dir='data',
56 |     n_pts=1024,
57 |     vis=False,
58 |     img_size=192,
59 |     use_cache=False)
60 | 
61 | 
62 | DATALOADER = dict(
63 |     type='DataLoader',
64 |     batch_size=15,
65 |     shuffle=False,
66 |     num_workers=4,
67 |     pin_memory=True,
68 |     persistent_workers=True,
69 |     prefetch_factor=2,
70 |     drop_last=True)
71 | 
72 | DATALOADER['persistent_workers']=DATALOADER['num_workers']>0
73 | if VIS:
74 |     DATALOADER['num_workers']=1
75 | 
76 | 
77 | OPTIMIZER = dict(type='AdamW', lr=0.0001, weight_decay=1e-4)
78 | SCHEDULER = dict(type='CosineAnnealingLR', T_max=422400, eta_min=1e-6, last_epoch=-1, verbose=False)
79 | TRAIN = dict(BEGIN_EPOCH=0, END_EPOCH=101, SAVE_EPOCH_STEP=10, VIS=False)


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/interpolate.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <vector>
 3 | #include <ATen/cuda/CUDAContext.h>
 4 | #include <ATen/cuda/CUDAEvent.h>
 5 | #include <math.h>
 6 | #include <stdio.h>
 7 | #include <stdlib.h>
 8 | #include <cuda.h>
 9 | #include <cuda_runtime_api.h>
10 | #include "interpolate_gpu.h"
11 | 
12 | 
13 | void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, 
14 |     at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) {
15 |     const float *unknown = unknown_tensor.data<float>();
16 |     const float *known = known_tensor.data<float>();
17 |     float *dist2 = dist2_tensor.data<float>();
18 |     int *idx = idx_tensor.data<int>();
19 | 
20 |     cudaStream_t stream = c10::cuda::getCurrentCUDAStream();
21 |     three_nn_kernel_launcher_fast(b, n, m, unknown, known, dist2, idx, stream);
22 | }
23 | 
24 | 
25 | void three_interpolate_wrapper_fast(int b, int c, int m, int n,
26 |                          at::Tensor points_tensor,
27 |                          at::Tensor idx_tensor,
28 |                          at::Tensor weight_tensor,
29 |                          at::Tensor out_tensor) {
30 | 
31 |     const float *points = points_tensor.data<float>();
32 |     const float *weight = weight_tensor.data<float>();
33 |     float *out = out_tensor.data<float>();
34 |     const int *idx = idx_tensor.data<int>();
35 | 
36 |     cudaStream_t stream = c10::cuda::getCurrentCUDAStream();
37 |     three_interpolate_kernel_launcher_fast(b, c, m, n, points, idx, weight, out, stream);
38 | }
39 | 
40 | void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m,
41 |                             at::Tensor grad_out_tensor,
42 |                             at::Tensor idx_tensor,
43 |                             at::Tensor weight_tensor,
44 |                             at::Tensor grad_points_tensor) {
45 | 
46 |     const float *grad_out = grad_out_tensor.data<float>();
47 |     const float *weight = weight_tensor.data<float>();
48 |     float *grad_points = grad_points_tensor.data<float>();
49 |     const int *idx = idx_tensor.data<int>();
50 | 
51 |     cudaStream_t stream = c10::cuda::getCurrentCUDAStream();
52 |     three_interpolate_grad_kernel_launcher_fast(b, c, n, m, grad_out, idx, weight, grad_points, stream);
53 | }


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/ball_query_gpu.cu:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | #include "ball_query_gpu.h"
 6 | #include "cuda_utils.h"
 7 | 
 8 | 
 9 | __global__ void ball_query_kernel_fast(int b, int n, int m, float radius, int nsample, 
10 |     const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) {
11 |     // new_xyz: (B, M, 3)
12 |     // xyz: (B, N, 3)
13 |     // output:
14 |     //      idx: (B, M, nsample)
15 |     int bs_idx = blockIdx.y;
16 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
17 |     if (bs_idx >= b || pt_idx >= m) return;
18 | 
19 |     new_xyz += bs_idx * m * 3 + pt_idx * 3;
20 |     xyz += bs_idx * n * 3;
21 |     idx += bs_idx * m * nsample + pt_idx * nsample;
22 | 
23 |     float radius2 = radius * radius;
24 |     float new_x = new_xyz[0];
25 |     float new_y = new_xyz[1];
26 |     float new_z = new_xyz[2];
27 | 
28 |     int cnt = 0;
29 |     for (int k = 0; k < n; ++k) {
30 |         float x = xyz[k * 3 + 0];
31 |         float y = xyz[k * 3 + 1];
32 |         float z = xyz[k * 3 + 2];
33 |         float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
34 |         if (d2 < radius2){
35 |             if (cnt == 0){
36 |                 for (int l = 0; l < nsample; ++l) {
37 |                     idx[l] = k;
38 |                 }
39 |             }
40 |             idx[cnt] = k;
41 |             ++cnt;
42 |             if (cnt >= nsample) break;
43 |         }
44 |     }
45 | }
46 | 
47 | 
48 | void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, \
49 |     const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream) {
50 |     // new_xyz: (B, M, 3)
51 |     // xyz: (B, N, 3)
52 |     // output:
53 |     //      idx: (B, M, nsample)
54 | 
55 |     cudaError_t err;
56 | 
57 |     dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b);  // blockIdx.x(col), blockIdx.y(row)
58 |     dim3 threads(THREADS_PER_BLOCK);
59 | 
60 |     ball_query_kernel_fast<<<blocks, threads, 0, stream>>>(b, n, m, radius, nsample, new_xyz, xyz, idx);
61 |     // cudaDeviceSynchronize();  // for using printf in kernel function
62 |     err = cudaGetLastError();
63 |     if (cudaSuccess != err) {
64 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
65 |         exit(-1);
66 |     }
67 | }


--------------------------------------------------------------------------------
/Pointnet2/tools/pointnet2_msg.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from pointnet2.pointnet2_modules import PointnetFPModule, PointnetSAModuleMSG
 4 | import pointnet2.pytorch_utils as pt_utils
 5 | 
 6 | 
 7 | def get_model(input_channels=0):
 8 |     return Pointnet2MSG(input_channels=input_channels)
 9 | 
10 | 
11 | NPOINTS = [4096, 1024, 256, 64]
12 | RADIUS = [[0.1, 0.5], [0.5, 1.0], [1.0, 2.0], [2.0, 4.0]]
13 | NSAMPLE = [[16, 32], [16, 32], [16, 32], [16, 32]]
14 | MLPS = [[[16, 16, 32], [32, 32, 64]], [[64, 64, 128], [64, 96, 128]],
15 |         [[128, 196, 256], [128, 196, 256]], [[256, 256, 512], [256, 384, 512]]]
16 | FP_MLPS = [[128, 128], [256, 256], [512, 512], [512, 512]]
17 | CLS_FC = [128]
18 | DP_RATIO = 0.5
19 | 
20 | 
21 | class Pointnet2MSG(nn.Module):
22 |     def __init__(self, input_channels=6):
23 |         super().__init__()
24 | 
25 |         self.SA_modules = nn.ModuleList()
26 |         channel_in = input_channels
27 | 
28 |         skip_channel_list = [input_channels]
29 |         for k in range(NPOINTS.__len__()):
30 |             mlps = MLPS[k].copy()
31 |             channel_out = 0
32 |             for idx in range(mlps.__len__()):
33 |                 mlps[idx] = [channel_in] + mlps[idx]
34 |                 channel_out += mlps[idx][-1]
35 | 
36 |             self.SA_modules.append(
37 |                 PointnetSAModuleMSG(
38 |                     npoint=NPOINTS[k],
39 |                     radii=RADIUS[k],
40 |                     nsamples=NSAMPLE[k],
41 |                     mlps=mlps,
42 |                     use_xyz=True,
43 |                     bn=True
44 |                 )
45 |             )
46 |             skip_channel_list.append(channel_out)
47 |             channel_in = channel_out
48 | 
49 |         self.FP_modules = nn.ModuleList()
50 | 
51 |         for k in range(FP_MLPS.__len__()):
52 |             pre_channel = FP_MLPS[k + 1][-1] if k + 1 < len(FP_MLPS) else channel_out
53 |             self.FP_modules.append(
54 |                 PointnetFPModule(mlp=[pre_channel + skip_channel_list[k]] + FP_MLPS[k])
55 |             )
56 | 
57 |         cls_layers = []
58 |         pre_channel = FP_MLPS[0][-1]
59 |         for k in range(0, CLS_FC.__len__()):
60 |             cls_layers.append(pt_utils.Conv1d(pre_channel, CLS_FC[k], bn=True))
61 |             pre_channel = CLS_FC[k]
62 |         cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None))
63 |         cls_layers.insert(1, nn.Dropout(0.5))
64 |         self.cls_layer = nn.Sequential(*cls_layers)
65 | 
66 |     def _break_up_pc(self, pc):
67 |         xyz = pc[..., 0:3].contiguous()
68 |         features = (
69 |             pc[..., 3:].transpose(1, 2).contiguous()
70 |             if pc.size(-1) > 3 else None
71 |         )
72 | 
73 |         return xyz, features
74 | 
75 |     def forward(self, pointcloud: torch.cuda.FloatTensor):
76 |         xyz, features = self._break_up_pc(pointcloud)
77 | 
78 |         l_xyz, l_features = [xyz], [features]
79 |         for i in range(len(self.SA_modules)):
80 |             li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i])
81 |             l_xyz.append(li_xyz)
82 |             l_features.append(li_features)
83 | 
84 |         for i in range(-1, -(len(self.FP_modules) + 1), -1):
85 |             l_features[i - 1] = self.FP_modules[i](
86 |                 l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i]
87 |             )
88 | 
89 |         pred_cls = self.cls_layer(l_features[0]).transpose(1, 2).contiguous()  # (B, N, 1)
90 |         return pred_cls
91 | 


--------------------------------------------------------------------------------
/network/pointnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import sys
 4 | sys.path.append('..')
 5 | from Pointnet2.pointnet2.pointnet2_modules import PointnetFPModule, PointnetSAModuleMSG
 6 | import Pointnet2.pointnet2.pytorch_utils as pt_utils
 7 | from .encoder import ENCODER_REGISTRY
 8 | 
 9 | def get_model(input_channels=0):
10 |     return Pointnet2MSG(input_channels=input_channels)
11 | 
12 | NPOINTS = [512, 256, 128, 64]
13 | RADIUS = [[0.01, 0.02], [0.02, 0.04], [0.04, 0.08], [0.08, 0.16]]
14 | NSAMPLE = [[16, 32], [16, 32], [16, 32], [16, 32]]
15 | MLPS = [[[16, 16, 32], [32, 32, 64]], [[64, 64, 128], [64, 96, 128]],
16 |         [[128, 196, 256], [128, 196, 256]], [[256, 256, 512], [256, 384, 512]]]
17 | FP_MLPS = [[128, 128], [128, 128], [256, 256], [512, 512]]
18 | CLS_FC = [128]
19 | DP_RATIO = 0.5
20 | 
21 | @ENCODER_REGISTRY.register_module()
22 | class Pointnet2MSG(nn.Module):
23 |     def __init__(self, input_channels=6,mlp=[],use_norm=False):
24 |         super().__init__()
25 |         if len(mlp)!=0:
26 |             FP_MLPS=mlp
27 |         else:
28 |             FP_MLPS=[[128, 128], [128, 128], [256, 256], [512, 512]]
29 |         self.SA_modules = nn.ModuleList()
30 |         channel_in = input_channels
31 | 
32 |         skip_channel_list = [input_channels]
33 |         for k in range(NPOINTS.__len__()):
34 |             mlps = MLPS[k].copy()
35 |             channel_out = 0
36 |             for idx in range(mlps.__len__()):
37 |                 mlps[idx] = [channel_in] + mlps[idx]
38 |                 channel_out += mlps[idx][-1]
39 | 
40 |             self.SA_modules.append(
41 |                 PointnetSAModuleMSG(
42 |                     npoint=NPOINTS[k],
43 |                     radii=RADIUS[k],
44 |                     nsamples=NSAMPLE[k],
45 |                     mlps=mlps,
46 |                     use_xyz=True,
47 |                     bn=True,
48 |                     use_norm=use_norm
49 |                 )
50 |             )
51 |             skip_channel_list.append(channel_out)
52 |             channel_in = channel_out
53 | 
54 |         self.FP_modules = nn.ModuleList()
55 | 
56 |         for k in range(FP_MLPS.__len__()):
57 |             pre_channel = FP_MLPS[k + 1][-1] if k + 1 < len(FP_MLPS) else channel_out
58 |             self.FP_modules.append(
59 |                 PointnetFPModule(mlp=[pre_channel + skip_channel_list[k]] + FP_MLPS[k])
60 |             )
61 | 
62 | 
63 |     def _break_up_pc(self, pc):
64 |         xyz = pc[..., 0:3].contiguous()
65 |         features = (
66 |             pc[..., 3:].transpose(1, 2).contiguous()
67 |             if pc.size(-1) > 3 else None
68 |         )
69 | 
70 |         return xyz, features
71 | 
72 |     def forward(self, pointcloud: torch.cuda.FloatTensor):
73 |         xyz, features = self._break_up_pc(pointcloud)
74 | 
75 |         l_xyz, l_features = [xyz], [features]
76 |         for i in range(len(self.SA_modules)):
77 |             li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i])
78 | 
79 |             l_xyz.append(li_xyz)
80 |             l_features.append(li_features)
81 |             
82 | 
83 |         for i in range(-1, -(len(self.FP_modules) + 1), -1):
84 |             l_features[i - 1] = self.FP_modules[i](
85 |                 l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i]
86 |             )
87 |         
88 |         return l_features[0]
89 | 
90 | if __name__ == '__main__':
91 |     net = Pointnet2MSG(0).cuda()
92 |     pts = torch.randn(2, 1024, 3).cuda()
93 |     pre = net(pts)
94 |     print(pre.shape)
95 | 


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/group_points_gpu.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include "cuda_utils.h"
 5 | #include "group_points_gpu.h"
 6 | 
 7 | 
 8 | __global__ void group_points_grad_kernel_fast(int b, int c, int n, int npoints, int nsample, 
 9 |     const float *__restrict__ grad_out, const int *__restrict__ idx, float *__restrict__ grad_points) {
10 |     // grad_out: (B, C, npoints, nsample)
11 |     // idx: (B, npoints, nsample)
12 |     // output:
13 |     //      grad_points: (B, C, N)
14 |     int bs_idx = blockIdx.z;
15 |     int c_idx = blockIdx.y;
16 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
17 |     int pt_idx = index / nsample;
18 |     if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;
19 | 
20 |     int sample_idx = index % nsample;
21 |     grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;
22 |     idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; 
23 |     
24 |     atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0] , grad_out[0]);
25 | }
26 | 
27 | void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 
28 |     const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) {
29 |     // grad_out: (B, C, npoints, nsample)
30 |     // idx: (B, npoints, nsample)
31 |     // output:
32 |     //      grad_points: (B, C, N)
33 |     cudaError_t err;
34 |     dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
35 |     dim3 threads(THREADS_PER_BLOCK);
36 | 
37 |     group_points_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, nsample, grad_out, idx, grad_points);
38 | 
39 |     err = cudaGetLastError();
40 |     if (cudaSuccess != err) {
41 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
42 |         exit(-1);
43 |     }
44 | }
45 | 
46 | 
47 | __global__ void group_points_kernel_fast(int b, int c, int n, int npoints, int nsample, 
48 |     const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) {
49 |     // points: (B, C, N)
50 |     // idx: (B, npoints, nsample)
51 |     // output:
52 |     //      out: (B, C, npoints, nsample)
53 |     int bs_idx = blockIdx.z;
54 |     int c_idx = blockIdx.y;
55 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
56 |     int pt_idx = index / nsample;
57 |     if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;
58 | 
59 |     int sample_idx = index % nsample;
60 | 
61 |     idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; 
62 |     int in_idx = bs_idx * c * n + c_idx * n + idx[0];
63 |     int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;
64 | 
65 |     out[out_idx] = points[in_idx];
66 | }
67 | 
68 | 
69 | void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 
70 |     const float *points, const int *idx, float *out, cudaStream_t stream) {
71 |     // points: (B, C, N)
72 |     // idx: (B, npoints, nsample)
73 |     // output:
74 |     //      out: (B, C, npoints, nsample)
75 |     cudaError_t err;
76 |     dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
77 |     dim3 threads(THREADS_PER_BLOCK);
78 | 
79 |     group_points_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, nsample, points, idx, out);
80 |     // cudaDeviceSynchronize();  // for using printf in kernel function
81 |     err = cudaGetLastError();
82 |     if (cudaSuccess != err) {
83 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
84 |         exit(-1);
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/core/trainer.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import time
  3 | import os
  4 | import torch
  5 | 
  6 | class AverageMeter(object):
  7 |     """Computes and stores the average and current value"""
  8 |     def __init__(self,name=None):
  9 |         self.reset()
 10 |         self.name=name
 11 | 
 12 |     def reset(self):
 13 |         self.val = 0
 14 |         self.avg = 0
 15 |         self.sum = 0
 16 |         self.count = 0
 17 | 
 18 |     def update(self, val, n=1):
 19 |         self.val = val
 20 |         self.sum += val * n
 21 |         self.count += n
 22 |         self.avg = self.sum / self.count if self.count != 0 else 0
 23 | 
 24 | class Trainer(object):
 25 |     def __init__(self, cfg, model, rank, output_dir,logger=None,lr_scheduler=None):
 26 |         self.model = model
 27 |         self.output_dir = output_dir
 28 |         self.rank = rank
 29 |         self.print_freq = cfg.PRINT_FREQ
 30 |         self.vis=cfg.VIS
 31 |         self.logger=logger
 32 |         self.loss_name=cfg.MODEL.loss_name
 33 |         self.meter={name:AverageMeter(name) for name in self.loss_name}
 34 |         self.lr_scheduler=lr_scheduler
 35 | 
 36 |     def train(self, epoch, data_loader, optimizer):
 37 |         for meter in self.meter:
 38 |             self.meter[meter].reset()
 39 |         batch_time = AverageMeter()
 40 |         data_time = AverageMeter()
 41 |         optimizer.zero_grad()
 42 |         self.model.train()
 43 |         if self.rank == 0:
 44 |             lr_msg='lr: {0}'.format(optimizer.state_dict()['param_groups'][0]['lr'])
 45 |             self.logger.info(lr_msg)
 46 |         end = time.time()
 47 |             
 48 |         for i,batched_inputs in enumerate(data_loader):
 49 | 
 50 |             data_time.update(time.time() - end)
 51 | 
 52 |             loss_dict = self.model(batched_inputs)
 53 |             loss = 0
 54 |             num_images = len(batched_inputs)
 55 | 
 56 |             for name in loss_dict:
 57 |                 l=loss_dict[name]
 58 |                 loss=loss+l
 59 |                 self.meter[name].update(l.item(),num_images)
 60 |             
 61 |             
 62 |             if not self.vis:
 63 |                 loss.backward()
 64 |                 
 65 |                 
 66 |                 torch.nn.utils.clip_grad_norm_(self.model.parameters(), 4, norm_type=2)
 67 |                 optimizer.step()
 68 |                 optimizer.zero_grad()
 69 |                 
 70 |             batch_time.update(time.time() - end)
 71 |             end = time.time()
 72 |             
 73 | 
 74 |             if i % self.print_freq == 0 and self.rank == 0 :
 75 |                 msg = 'Epoch: [{0}][{1}/{2}] ' \
 76 |                         'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s) ' \
 77 |                         'Speed: {speed:.1f} samples/s ' \
 78 |                         'Data: {data_time_val:.3f}ms ({data_time_avg:.3f}ms)'.format(
 79 |                             epoch, i, len(data_loader),
 80 |                             batch_time=batch_time,
 81 |                             speed=num_images / batch_time.val,
 82 |                             data_time_val=(data_time.val)*1000,
 83 |                             data_time_avg=(data_time.avg)*1000
 84 |                         )
 85 |                 for name in self.meter:
 86 |                     msg+='{l}'.format(
 87 |                             l=_get_loss_info(self.meter[name],name)
 88 |                         )
 89 |                 self.logger.info(msg)
 90 | 
 91 |             if self.lr_scheduler:
 92 |                 self.lr_scheduler.step()
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | def _get_loss_info(meter, loss_name):
 99 |     msg = '{name}: {meter.val:.3e} ({meter.avg:.3e})\t'.format(name=loss_name, meter=meter)
100 |     return msg
101 | 
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Query6DoF: Learning Sparse Queries as Implicit Shape Prior for Category-Level 6DoF Pose Estimation
 2 | This is the PyTorch implemention of ICCV'23 paper Query6DoF: Learning Sparse Queries as Implicit Shape Prior for Category-Level 6DoF Pose Estimation
 3 | 
 4 | # Abstract
 5 | > Category-level 6DoF object pose estimation intends to estimate the rotation, translation, and size of unseen objects. Many previous works use point clouds as a pre-learned shape prior to overcome intra-category variability. The shape prior is deformed to reconstruct instances' point clouds in canonical space and to build dense 3D-3D correspondences between the observed and reconstructed point clouds. However, in these methods, the pre-learned shape prior is not jointly optimized with estimation networks, and they are trained with a surrogate objective. In this paper, we propose a novel 6D pose estimation network based on a series of category-specific sparse queries that serve as the representation of the shape prior. Each query represents a shape component, and these queries are learnable embeddings that can be optimized together with the estimation network according to the point cloud reconstruction loss, the normalized object coordinate loss, and the 6d pose estimation loss. Our proposed network adopts a deformation-and-matching paradigm with attention, where the queries dynamically extract features from regions of interest using the attention mechanism and then directly regress results. Furthermore, our method reduces computation overhead through the sparseness of the queries and the incorporation of a lightweight global information injection block. With the aforementioned design, our method achieves state-of-the-art (SOTA) pose estimation performance on the NOCS dataset.
 6 | 
 7 | # Requirements
 8 | - Linux (tested on Ubuntu 16.04)
 9 | - Python 3.8
10 | - CUDA 11.1
11 | - PyTorch 1.10.2
12 |   
13 | # Installation
14 | ~~~
15 | conda create -n query6dof python=3.8
16 | 
17 | conda activate query6dof
18 | 
19 | pip install torch==1.10.2+cu111 -f  https://download.pytorch.org/whl/cu111/torch_stable.html
20 | 
21 | pip install opencv-python mmengine numpy tqdm
22 | 
23 | cd Pointnet2/pointnet2
24 | 
25 | python setup.py install
26 | ~~~
27 | 
28 | # Dataset
29 | Download camera_train, camera_eval, real_test, real_train, ground-truth annotations and mesh models provided by [NOCS](https://github.com/hughw19/NOCS_CVPR2019).
30 | Then process these files following [SPD](https://github.com/mentian/object-deformnet). And download segmentation results from Mask R-CNN, and predictions of NOCS from [SPD](https://github.com/mentian/object-deformnet).
31 | The dataset is organized as follows:
32 | ~~~
33 | 
34 | ── data
35 |     ├── CAMERA
36 |     ├── gts
37 |     ├── obj_models
38 |     ├── Real
39 |     └── results
40 |             └── mrcnn_results   
41 | ── results
42 |       └── nocs_results
43 | ~~~
44 | 
45 | # Evaluation
46 | Please download our pretrain model [here](https://drive.google.com/file/d/11DKVV6NCgecKoe6Pu9OIXWyiROXhuW3J/view?usp=drive_link) or pretrain model without linear shape augmentation and non-linear shape augmentation [here](https://drive.google.com/file/d/1zJEK_ik8ZmaC25X3RFQkeLFFwHKbtU66/view?usp=drive_link) and put it in 'runs/CAMERA+Real/run/model' dictionory. 
47 | 
48 | Then, you can make an evaluation for REAL275 using following command.
49 | ~~~
50 | python tools/valid.py --cfg config/run_eval_real.py --gpus 0
51 | ~~~
52 | Then, you can make an evaluation for CAMERA25 using following command.
53 | ~~~
54 | python tools/valid.py --cfg config/run_eval_camera.py --gpus 0
55 | ~~~
56 | 
57 | You can get running speed at the same time.
58 | 
59 | # Train
60 | 'tools/train.py' is the main file for training. You can train using the following command.
61 | 
62 | ~~~
63 | python tools/train.py --cfg config/run.py --gpus 0,1,2,3
64 | ~~~
65 | This config is for training on 4 gpus with the batch size = 15 on a single gpu, and the total batch size = 60.
66 | 
67 | # Acknowledgment
68 | The dataset is provided by [NOCS](https://github.com/hughw19/NOCS_CVPR2019). Our code is developed based on [Pointnet2.PyTorch](https://github.com/sshaoshuai/Pointnet2.PyTorch) and [SPD](https://github.com/mentian/object-deformnet)


--------------------------------------------------------------------------------
/network/loss.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | import torch
  4 | from mmengine import Registry
  5 | import torch.distributed as dist
  6 | 
  7 | LOSS_REGISTRY = Registry("LOSS")
  8 | 
  9 | def is_dist_avail_and_initialized():
 10 |     if not dist.is_available():
 11 |         return False
 12 |     if not dist.is_initialized():
 13 |         return False
 14 |     return True
 15 | 
 16 | def get_world_size():
 17 |     if not is_dist_avail_and_initialized():
 18 |         return 1
 19 |     return dist.get_world_size()
 20 | 
 21 | @LOSS_REGISTRY.register_module()
 22 | class consistency_loss(nn.Module):
 23 |     def __init__(self,weight,eta=1e-2):
 24 |         super().__init__()
 25 |         self.weight=weight
 26 |         self.eta=eta
 27 | 
 28 |     def forward(self,coord,nocs,R,t,s):
 29 |         response=torch.bmm(R.transpose(1,2)/s,(coord-t.unsqueeze(dim=1)).transpose(1,2)).transpose(1,2)
 30 |         loss=nn.functional.smooth_l1_loss(nocs,response,beta=0.5,reduction='none').flatten(1).mean(-1) #B
 31 |         mask=loss>(self.eta)
 32 |         valid=mask.float().sum()
 33 |         if is_dist_avail_and_initialized():
 34 |             torch.distributed.all_reduce(valid)
 35 |             world_size=dist.get_world_size()
 36 |         valid=(valid/world_size).clamp(min=1)
 37 |         loss[~mask]=0
 38 |         loss=(loss.sum())/valid
 39 |         return self.weight*(loss)
 40 | 
 41 | @LOSS_REGISTRY.register_module()
 42 | class consistency_lossv2(consistency_loss):
 43 |     def __init__(self,weight=1.0,beta=0.1,loss='smooth') -> None:
 44 |         super().__init__(weight)
 45 |         self.beta=beta
 46 |         if loss=='smooth':
 47 |             self.loss_f=torch.nn.SmoothL1Loss(beta=beta)
 48 |         else:
 49 |             self.loss_f=torch.nn.MSELoss()
 50 |     def forward(self,coord,nocs):
 51 |         return self.weight*(self.loss_f(coord,nocs))
 52 | 
 53 | 
 54 | 
 55 | @LOSS_REGISTRY.register_module()
 56 | class chamfer_lossv2(nn.Module):
 57 |     def __init__(self,weight,threshold=1.2) -> None:
 58 |         super().__init__()
 59 |         self.threshold=threshold
 60 |         self.weight=weight
 61 |         
 62 |     def forward(self,coord,gt):
 63 |         gt=gt.transpose(2,1)
 64 |         coord=coord.transpose(2,1)
 65 |         dis=torch.pow(gt.unsqueeze(dim=-1)-coord.unsqueeze(dim=-2),2).sum(dim=1)
 66 |         match_gt=torch.amin(dis,dim=-1)
 67 |         match_coord=torch.amin(dis,dim=-2)
 68 |         res=(match_coord.mean()+match_gt.mean())
 69 | 
 70 |         return self.weight*(res)
 71 | 
 72 | 
 73 | @LOSS_REGISTRY.register_module()
 74 | class r_lossv2(nn.Module):
 75 |     def __init__(self,weight=1.0,beta=0.001,loss='smooth') -> None:
 76 |         super().__init__()
 77 |         self.weight=weight
 78 |         self.beta=beta
 79 |         self.loss_f=loss
 80 |     def forward(self,pred_r,gt_red,gt_green,sym):
 81 |         pred_green=pred_r[:,:,1:2] #B,3,1
 82 |         pred_red=pred_r[:,:,0:1] #B,3,1
 83 |         if self.loss_f=='smooth':
 84 |             green_loss=nn.functional.smooth_l1_loss(gt_green,pred_green,beta=self.beta)
 85 |         else:
 86 |             green_loss=nn.functional.mse_loss(gt_green,pred_green)
 87 | 
 88 |         mask=(sym[:,0]==1) #B
 89 |         B=mask.shape[0]
 90 |         valid=B-(mask).float().sum()
 91 |         b=valid.item()==0
 92 |         world_size=1
 93 |         if is_dist_avail_and_initialized():
 94 |             torch.distributed.all_reduce(valid)
 95 |             world_size=dist.get_world_size()
 96 |         valid=valid/world_size
 97 |         if b:
 98 |             red_loss=0
 99 |         else:
100 |             if self.loss_f=='smooth':
101 |                 red_loss=nn.functional.smooth_l1_loss(gt_red,pred_red,reduction='none',beta=self.beta)[:,:,0].mean(-1) #B
102 |             else:
103 |                 red_loss=nn.functional.mse_loss(gt_red,pred_red,reduction='none')[:,:,0].mean(-1) #B
104 |             red_loss[mask]=0
105 |             red_loss=red_loss.sum()/(valid)
106 |         return self.weight*(green_loss+red_loss)
107 | 
108 | 
109 | @LOSS_REGISTRY.register_module()
110 | class t_loss(nn.Module):
111 |     def __init__(self,weight=1.0,beta=0.005,loss='smooth') -> None:
112 |         super().__init__()
113 |         self.weight=weight
114 |         self.beta=beta
115 |         if loss=='smooth':
116 |             self.loss_f=torch.nn.SmoothL1Loss(beta=beta)
117 |         else:
118 |             self.loss_f=torch.nn.MSELoss()
119 |     def forward(self,pred_t,t):
120 |         return self.weight*self.loss_f(pred_t,t)
121 | 
122 | @LOSS_REGISTRY.register_module()
123 | class s_loss(nn.Module):
124 |     def __init__(self,weight=1.0,beta=0.005,loss='smooth') -> None:
125 |         super().__init__()
126 |         self.weight=weight
127 |         self.beta=beta
128 |         if loss=='smooth':
129 |             self.loss_f=torch.nn.SmoothL1Loss(beta=beta)
130 |         else:
131 |             self.loss_f=torch.nn.MSELoss()
132 |     def forward(self,pred_s,s):
133 |         return self.weight*self.loss_f(pred_s,s)
134 | 


--------------------------------------------------------------------------------
/network/network.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import os
  4 | import cv2
  5 | import math
  6 | import numpy as np
  7 | from .decoder import DECODER_REGISTRY
  8 | from .encoder import ENCODER_REGISTRY
  9 | from .loss import LOSS_REGISTRY
 10 | from mmengine import Registry
 11 | 
 12 | NETWORK_REGISTRY = Registry("NETWORK")
 13 | 
 14 | 
 15 | @NETWORK_REGISTRY.register_module()
 16 | class Sparsenetv7(nn.Module):
 17 |     def __init__(self,
 18 |                 backbone=dict(
 19 |                     type='PointNetfeat'
 20 |                 ),
 21 |                 decoder=dict(
 22 |                     type='sparse_decoder'
 23 |                 ),
 24 |                 pose_estimate=dict(
 25 |                     type='pose_estimater'
 26 |                 ),
 27 |                 name='Posenet',
 28 |                 training=False,
 29 |                 input_dim=256,
 30 |                 n_pts=128,
 31 |                 cat_num=6,
 32 |                 losses=[],
 33 |                 loss_name=[],
 34 |                 vis=False
 35 |                 ) -> None:
 36 |         super().__init__()
 37 |         self.backbone=ENCODER_REGISTRY.build(backbone)
 38 |         self.pose_estimater=DECODER_REGISTRY.build(pose_estimate)
 39 |         self.decoder=DECODER_REGISTRY.build(decoder)
 40 |         prior_feat=(2*torch.rand((cat_num,n_pts,input_dim))-1)/input_dim
 41 |         self.prior_feat=torch.nn.parameter.Parameter(data=prior_feat,requires_grad=True)
 42 |         self.cat_num=cat_num
 43 |         self.training=training
 44 |         self.sym_id=torch.Tensor([0,1,3])
 45 |         self.count=0
 46 |         if training:
 47 |             self.losses=[LOSS_REGISTRY.build(loss) for loss in losses]
 48 |             self.loss_name=loss_name
 49 |         self.vis=vis
 50 | 
 51 |     def forward(self,batched_inputs):
 52 |         points=batched_inputs['points']
 53 |         category=batched_inputs['cat_id'] #B
 54 |         if self.training:
 55 |             nocs=batched_inputs['nocs']
 56 |             model=batched_inputs['model']
 57 |             R=batched_inputs['R']
 58 |             s=batched_inputs['s']
 59 |             gt_green=batched_inputs['gt_green']
 60 |             gt_red=batched_inputs['gt_red']
 61 |             mean_shape=batched_inputs['mean_shape']
 62 |             t=batched_inputs['t']
 63 |             s_delta=batched_inputs['dimension_delta']
 64 |         mean_t=points.mean(dim=1)
 65 |         encoder_input=points-mean_t.unsqueeze(dim=1)
 66 |         encoder_out=self.backbone(encoder_input)
 67 |         inst_feat=encoder_out.transpose(1,2)
 68 |         prior_feat=self.prior_feat[category,...]
 69 | 
 70 |         device=torch.cuda.current_device()
 71 |         index=category+torch.arange(encoder_input.shape[0],dtype=torch.long,device=device)*self.cat_num
 72 | 
 73 |         sym=batched_inputs['sym']
 74 | 
 75 |         if self.training:
 76 |             inst_feat,coord,response_coord=self.decoder(prior_feat,inst_feat,index,encoder_input)
 77 |             pred_r,pred_t,pred_s=self.pose_estimater(inst_feat,index)
 78 |             pred_t=pred_t+mean_t
 79 |             loss_dict=self.train_forward(pred_r,pred_t,pred_s,gt_green,gt_red,t,s_delta,sym,coord,model,nocs,response_coord,prior_feat,points,R,s,mean_shape)
 80 |             
 81 |             return loss_dict
 82 | 
 83 |         else:
 84 |             mean_shape=batched_inputs['mean_shape']
 85 |             if not self.vis:
 86 |                 inst_feat=self.decoder(prior_feat,inst_feat,index,encoder_input)
 87 |             else:
 88 |                 inst_feat,coord,response_coord,iam1,iam2=self.decoder(prior_feat,inst_feat,index,encoder_input)
 89 |                 B,N=iam1.shape[0],iam1.shape[1]
 90 |                 iam1=iam1.view(B,N,-1,4)
 91 |                 M=iam2.shape[1]
 92 |                 iam2=iam2.view(B,M,-1,4)
 93 |             pred_r,pred_t,pred_s=self.pose_estimater(inst_feat,index)
 94 |             #pred_r:B,3,3 pred_t:B,3 pred_s:B,3
 95 |             pred_s=pred_s+mean_shape
 96 |             pred_t=pred_t+mean_t
 97 |             B=pred_r.shape[0]
 98 |             trans=torch.zeros((B,4,4),device=device)
 99 |             nocs_scale=torch.linalg.norm(pred_s,dim=-1,keepdim=True) #B,1
100 |             trans[:,3,3]=1
101 | 
102 |             theta_x_=pred_r[:,0,0]-pred_r[:,2,2]#B
103 |             theta_y_=pred_r[:,0,2]-pred_r[:,2,0]#B
104 |             r_norm_=(theta_x_**2+theta_y_**2)**0.5 #B
105 |             theta_x_=theta_x_/r_norm_
106 |             theta_y_=theta_y_/r_norm_
107 |             s_map_=torch.zeros((B,3,3),device=device) #B,3,3
108 |             s_map_[:,1,1]=1
109 |             s_map_[:,0,0],s_map_[:,0,2],s_map_[:,2,0],s_map_[:,2,2]=theta_x_,-theta_y_,theta_y_,theta_x_
110 |             delta_r=torch.bmm(pred_r,s_map_) #B,3,3
111 | 
112 |             mask=torch.isin(category,self.sym_id.to(device))
113 |             pred_r[mask,...]=delta_r[mask,...]
114 | 
115 |             trans[:,:3,:3]=pred_r*(nocs_scale.unsqueeze(dim=-1))
116 |             trans[:,:3,3:]=pred_t.unsqueeze(dim=-1)
117 |             size=pred_s/nocs_scale
118 |             trans=trans.cpu().numpy()
119 |             size=size.cpu().numpy()
120 |             if not self.vis:
121 |                 return trans,size
122 |             else:
123 |                 return trans,size,coord.cpu(),response_coord.cpu(),iam1.cpu(),iam2.cpu(),pred_r.cpu().numpy(),pred_t.cpu().numpy(),nocs_scale.cpu().numpy()
124 | 
125 | 
126 |     def train_forward(self,pred_r,pred_t,pred_s,gt_green,gt_red,t,s_delta,sym,coord,model,nocs,response_coord,prior_feat,points,R,s,mean_shape):
127 |         '''
128 |         
129 |         '''
130 |         paras={
131 |             'chamfer':(coord,model),
132 |             'r':(pred_r,gt_red,gt_green,sym),
133 |             't':(pred_t,t),
134 |             's':(pred_s,s_delta),
135 |             'nocs':(response_coord,nocs),
136 |             'consistency':(points,response_coord,pred_r,pred_t,torch.linalg.norm(pred_s+mean_shape,dim=-1,keepdim=True).unsqueeze(dim=-1)),
137 |         }
138 |         return {name:loss(*(paras[name])) for loss,name in zip(self.losses,self.loss_name)}
139 | 
140 | 


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/interpolate_gpu.cu:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | 
  5 | #include "cuda_utils.h"
  6 | #include "interpolate_gpu.h"
  7 | 
  8 | 
  9 | __global__ void three_nn_kernel_fast(int b, int n, int m, const float *__restrict__ unknown, 
 10 |     const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) {
 11 |     // unknown: (B, N, 3)
 12 |     // known: (B, M, 3)
 13 |     // output: 
 14 |     //      dist2: (B, N, 3)
 15 |     //      idx: (B, N, 3)
 16 |     
 17 |     int bs_idx = blockIdx.y;
 18 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 19 |     if (bs_idx >= b || pt_idx >= n) return;
 20 | 
 21 |     unknown += bs_idx * n * 3 + pt_idx * 3;
 22 |     known += bs_idx * m * 3;
 23 |     dist2 += bs_idx * n * 3 + pt_idx * 3;
 24 |     idx += bs_idx * n * 3 + pt_idx * 3;
 25 | 
 26 |     float ux = unknown[0];
 27 |     float uy = unknown[1];
 28 |     float uz = unknown[2];
 29 | 
 30 |     double best1 = 1e40, best2 = 1e40, best3 = 1e40;
 31 |     int besti1 = 0, besti2 = 0, besti3 = 0;
 32 |     for (int k = 0; k < m; ++k) {
 33 |         float x = known[k * 3 + 0];
 34 |         float y = known[k * 3 + 1];
 35 |         float z = known[k * 3 + 2];
 36 |         float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
 37 |         if (d < best1) {
 38 |             best3 = best2; besti3 = besti2;
 39 |             best2 = best1; besti2 = besti1;
 40 |             best1 = d; besti1 = k;
 41 |         } 
 42 |         else if (d < best2) {
 43 |             best3 = best2; besti3 = besti2;
 44 |             best2 = d; besti2 = k;
 45 |         } 
 46 |         else if (d < best3) {
 47 |             best3 = d; besti3 = k;
 48 |         }
 49 |     }
 50 |     dist2[0] = best1; dist2[1] = best2; dist2[2] = best3;
 51 |     idx[0] = besti1; idx[1] = besti2; idx[2] = besti3;
 52 | }
 53 | 
 54 | 
 55 | void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown, 
 56 |     const float *known, float *dist2, int *idx, cudaStream_t stream) {
 57 |     // unknown: (B, N, 3)
 58 |     // known: (B, M, 3)
 59 |     // output: 
 60 |     //      dist2: (B, N, 3)
 61 |     //      idx: (B, N, 3)
 62 | 
 63 |     cudaError_t err;
 64 |     dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), b);  // blockIdx.x(col), blockIdx.y(row)
 65 |     dim3 threads(THREADS_PER_BLOCK);
 66 | 
 67 |     three_nn_kernel_fast<<<blocks, threads, 0, stream>>>(b, n, m, unknown, known, dist2, idx);
 68 | 
 69 |     err = cudaGetLastError();
 70 |     if (cudaSuccess != err) {
 71 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
 72 |         exit(-1);
 73 |     }
 74 | }
 75 | 
 76 | 
 77 | __global__ void three_interpolate_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points, 
 78 |     const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) {
 79 |     // points: (B, C, M)
 80 |     // idx: (B, N, 3)
 81 |     // weight: (B, N, 3)
 82 |     // output:
 83 |     //      out: (B, C, N)
 84 | 
 85 |     int bs_idx = blockIdx.z;
 86 |     int c_idx = blockIdx.y;
 87 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 88 | 
 89 |     if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;
 90 | 
 91 |     weight += bs_idx * n * 3 + pt_idx * 3;
 92 |     points += bs_idx * c * m + c_idx * m;
 93 |     idx += bs_idx * n * 3 + pt_idx * 3;
 94 |     out += bs_idx * c * n + c_idx * n;
 95 | 
 96 |     out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]];
 97 | }
 98 | 
 99 | void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, 
100 |     const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream) {
101 |     // points: (B, C, M)
102 |     // idx: (B, N, 3)
103 |     // weight: (B, N, 3)
104 |     // output:
105 |     //      out: (B, C, N)
106 | 
107 |     cudaError_t err;
108 |     dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
109 |     dim3 threads(THREADS_PER_BLOCK);
110 |     three_interpolate_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, m, n, points, idx, weight, out);
111 | 
112 |     err = cudaGetLastError();
113 |     if (cudaSuccess != err) {
114 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
115 |         exit(-1);
116 |     }
117 | }
118 | 
119 | 
120 | __global__ void three_interpolate_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, 
121 |     const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ grad_points) {
122 |     // grad_out: (B, C, N)
123 |     // weight: (B, N, 3)
124 |     // output:
125 |     //      grad_points: (B, C, M)
126 | 
127 |     int bs_idx = blockIdx.z;
128 |     int c_idx = blockIdx.y;
129 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
130 | 
131 |     if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;
132 |     
133 |     grad_out += bs_idx * c * n + c_idx * n + pt_idx;
134 |     weight += bs_idx * n * 3 + pt_idx * 3;
135 |     grad_points += bs_idx * c * m + c_idx * m;
136 |     idx += bs_idx * n * 3 + pt_idx * 3;
137 | 
138 | 
139 |     atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]);
140 |     atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]);
141 |     atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]);
142 | }
143 | 
144 | void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, 
145 |     const int *idx, const float *weight, float *grad_points, cudaStream_t stream) {
146 |     // grad_out: (B, C, N)
147 |     // weight: (B, N, 3)
148 |     // output:
149 |     //      grad_points: (B, C, M)
150 | 
151 |     cudaError_t err;
152 |     dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
153 |     dim3 threads(THREADS_PER_BLOCK);
154 |     three_interpolate_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, m, grad_out, idx, weight, grad_points);
155 | 
156 |     err = cudaGetLastError();
157 |     if (cudaSuccess != err) {
158 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
159 |         exit(-1);
160 |     }
161 | }


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/pointnet2_modules.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from . import pointnet2_utils
  6 | from . import pytorch_utils as pt_utils
  7 | from typing import List
  8 | 
  9 | 
 10 | class _PointnetSAModuleBase(nn.Module):
 11 | 
 12 |     def __init__(self):
 13 |         super().__init__()
 14 |         self.npoint = None
 15 |         self.groupers = None
 16 |         self.mlps = None
 17 |         self.pool_method = 'max_pool'
 18 | 
 19 |     def forward(self, xyz: torch.Tensor, features: torch.Tensor = None, new_xyz=None) -> (torch.Tensor, torch.Tensor):
 20 |         """
 21 |         :param xyz: (B, N, 3) tensor of the xyz coordinates of the features
 22 |         :param features: (B, N, C) tensor of the descriptors of the the features
 23 |         :param new_xyz:
 24 |         :return:
 25 |             new_xyz: (B, npoint, 3) tensor of the new features' xyz
 26 |             new_features: (B, npoint, \sum_k(mlps[k][-1])) tensor of the new_features descriptors
 27 |         """
 28 |         new_features_list = []
 29 | 
 30 |         xyz_flipped = xyz.transpose(1, 2).contiguous()
 31 |         if new_xyz is None:
 32 |             new_xyz = pointnet2_utils.gather_operation(
 33 |                 xyz_flipped,
 34 |                 pointnet2_utils.furthest_point_sample(xyz, self.npoint)
 35 |             ).transpose(1, 2).contiguous() if self.npoint is not None else None
 36 | 
 37 |         for i in range(len(self.groupers)):
 38 |             new_features = self.groupers[i](xyz, new_xyz, features)  # (B, C, npoint, nsample)
 39 | 
 40 |             new_features = self.mlps[i](new_features)  # (B, mlp[-1], npoint, nsample)
 41 |             if self.pool_method == 'max_pool':
 42 |                 new_features = F.max_pool2d(
 43 |                     new_features, kernel_size=[1, new_features.size(3)]
 44 |                 )  # (B, mlp[-1], npoint, 1)
 45 |             elif self.pool_method == 'avg_pool':
 46 |                 new_features = F.avg_pool2d(
 47 |                     new_features, kernel_size=[1, new_features.size(3)]
 48 |                 )  # (B, mlp[-1], npoint, 1)
 49 |             else:
 50 |                 raise NotImplementedError
 51 | 
 52 |             new_features = new_features.squeeze(-1)  # (B, mlp[-1], npoint)
 53 |             new_features_list.append(new_features)
 54 | 
 55 |         return new_xyz, torch.cat(new_features_list, dim=1)
 56 | 
 57 | 
 58 | class PointnetSAModuleMSG(_PointnetSAModuleBase):
 59 |     """Pointnet set abstraction layer with multiscale grouping"""
 60 | 
 61 |     def __init__(self, *, npoint: int, radii: List[float], nsamples: List[int], mlps: List[List[int]], bn: bool = True,
 62 |                  use_xyz: bool = True, pool_method='max_pool', instance_norm=False,use_norm=False):
 63 |         """
 64 |         :param npoint: int
 65 |         :param radii: list of float, list of radii to group with
 66 |         :param nsamples: list of int, number of samples in each ball query
 67 |         :param mlps: list of list of int, spec of the pointnet before the global pooling for each scale
 68 |         :param bn: whether to use batchnorm
 69 |         :param use_xyz:
 70 |         :param pool_method: max_pool / avg_pool
 71 |         :param instance_norm: whether to use instance_norm
 72 |         """
 73 |         super().__init__()
 74 | 
 75 |         assert len(radii) == len(nsamples) == len(mlps)
 76 | 
 77 |         self.npoint = npoint
 78 |         self.groupers = nn.ModuleList()
 79 |         self.mlps = nn.ModuleList()
 80 |         for i in range(len(radii)):
 81 |             radius = radii[i]
 82 |             nsample = nsamples[i]
 83 |             self.groupers.append(
 84 |                 pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz,norm=use_norm)
 85 |                 if npoint is not None else pointnet2_utils.GroupAll(use_xyz)
 86 |             )
 87 |             mlp_spec = mlps[i]
 88 |             if use_xyz:
 89 |                 mlp_spec[0] += 3
 90 | 
 91 |             self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn, instance_norm=instance_norm))
 92 |         self.pool_method = pool_method
 93 | 
 94 | 
 95 | class PointnetSAModule(PointnetSAModuleMSG):
 96 |     """Pointnet set abstraction layer"""
 97 | 
 98 |     def __init__(self, *, mlp: List[int], npoint: int = None, radius: float = None, nsample: int = None,
 99 |                  bn: bool = True, use_xyz: bool = True, pool_method='max_pool', instance_norm=False):
100 |         """
101 |         :param mlp: list of int, spec of the pointnet before the global max_pool
102 |         :param npoint: int, number of features
103 |         :param radius: float, radius of ball
104 |         :param nsample: int, number of samples in the ball query
105 |         :param bn: whether to use batchnorm
106 |         :param use_xyz:
107 |         :param pool_method: max_pool / avg_pool
108 |         :param instance_norm: whether to use instance_norm
109 |         """
110 |         super().__init__(
111 |             mlps=[mlp], npoint=npoint, radii=[radius], nsamples=[nsample], bn=bn, use_xyz=use_xyz,
112 |             pool_method=pool_method, instance_norm=instance_norm
113 |         )
114 | 
115 | 
116 | class PointnetFPModule(nn.Module):
117 |     r"""Propigates the features of one set to another"""
118 | 
119 |     def __init__(self, *, mlp: List[int], bn: bool = True):
120 |         """
121 |         :param mlp: list of int
122 |         :param bn: whether to use batchnorm
123 |         """
124 |         super().__init__()
125 |         self.mlp = pt_utils.SharedMLP(mlp, bn=bn)
126 | 
127 |     def forward(
128 |             self, unknown: torch.Tensor, known: torch.Tensor, unknow_feats: torch.Tensor, known_feats: torch.Tensor
129 |     ) -> torch.Tensor:
130 |         """
131 |         :param unknown: (B, n, 3) tensor of the xyz positions of the unknown features
132 |         :param known: (B, m, 3) tensor of the xyz positions of the known features
133 |         :param unknow_feats: (B, C1, n) tensor of the features to be propigated to
134 |         :param known_feats: (B, C2, m) tensor of features to be propigated
135 |         :return:
136 |             new_features: (B, mlp[-1], n) tensor of the features of the unknown features
137 |         """
138 |         if known is not None:
139 |             dist, idx = pointnet2_utils.three_nn(unknown, known)
140 |             dist_recip = 1.0 / (dist + 1e-8)
141 |             norm = torch.sum(dist_recip, dim=2, keepdim=True)
142 |             weight = dist_recip / norm
143 | 
144 |             interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight)
145 |         else:
146 |             interpolated_feats = known_feats.expand(*known_feats.size()[0:2], unknown.size(1))
147 | 
148 |         if unknow_feats is not None:
149 |             new_features = torch.cat([interpolated_feats, unknow_feats], dim=1)  # (B, C2 + C1, n)
150 |         else:
151 |             new_features = interpolated_feats
152 | 
153 |         new_features = new_features.unsqueeze(-1)
154 |         new_features = self.mlp(new_features)
155 | 
156 |         return new_features.squeeze(-1)
157 | 
158 | 
159 | if __name__ == "__main__":
160 |     pass
161 | 


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/pytorch_utils.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from typing import List, Tuple
  3 | 
  4 | 
  5 | class SharedMLP(nn.Sequential):
  6 | 
  7 |     def __init__(
  8 |             self,
  9 |             args: List[int],
 10 |             *,
 11 |             bn: bool = False,
 12 |             activation=nn.ReLU(inplace=True),
 13 |             preact: bool = False,
 14 |             first: bool = False,
 15 |             name: str = "",
 16 |             instance_norm: bool = False,
 17 |     ):
 18 |         super().__init__()
 19 | 
 20 |         for i in range(len(args) - 1):
 21 |             self.add_module(
 22 |                 name + 'layer{}'.format(i),
 23 |                 Conv2d(
 24 |                     args[i],
 25 |                     args[i + 1],
 26 |                     bn=(not first or not preact or (i != 0)) and bn,
 27 |                     activation=activation
 28 |                     if (not first or not preact or (i != 0)) else None,
 29 |                     preact=preact,
 30 |                     instance_norm=instance_norm
 31 |                 )
 32 |             )
 33 | 
 34 | 
 35 | class _ConvBase(nn.Sequential):
 36 | 
 37 |     def __init__(
 38 |             self,
 39 |             in_size,
 40 |             out_size,
 41 |             kernel_size,
 42 |             stride,
 43 |             padding,
 44 |             activation,
 45 |             bn,
 46 |             init,
 47 |             conv=None,
 48 |             batch_norm=None,
 49 |             bias=True,
 50 |             preact=False,
 51 |             name="",
 52 |             instance_norm=False,
 53 |             instance_norm_func=None
 54 |     ):
 55 |         super().__init__()
 56 | 
 57 |         bias = bias and (not bn)
 58 |         conv_unit = conv(
 59 |             in_size,
 60 |             out_size,
 61 |             kernel_size=kernel_size,
 62 |             stride=stride,
 63 |             padding=padding,
 64 |             bias=bias
 65 |         )
 66 |         init(conv_unit.weight)
 67 |         if bias:
 68 |             nn.init.constant_(conv_unit.bias, 0)
 69 | 
 70 |         if bn:
 71 |             if not preact:
 72 |                 bn_unit = batch_norm(out_size)
 73 |             else:
 74 |                 bn_unit = batch_norm(in_size)
 75 |         if instance_norm:
 76 |             if not preact:
 77 |                 in_unit = instance_norm_func(out_size, affine=False, track_running_stats=False)
 78 |             else:
 79 |                 in_unit = instance_norm_func(in_size, affine=False, track_running_stats=False)
 80 | 
 81 |         if preact:
 82 |             if bn:
 83 |                 self.add_module(name + 'bn', bn_unit)
 84 | 
 85 |             if activation is not None:
 86 |                 self.add_module(name + 'activation', activation)
 87 | 
 88 |             if not bn and instance_norm:
 89 |                 self.add_module(name + 'in', in_unit)
 90 | 
 91 |         self.add_module(name + 'conv', conv_unit)
 92 | 
 93 |         if not preact:
 94 |             if bn:
 95 |                 self.add_module(name + 'bn', bn_unit)
 96 | 
 97 |             if activation is not None:
 98 |                 self.add_module(name + 'activation', activation)
 99 | 
100 |             if not bn and instance_norm:
101 |                 self.add_module(name + 'in', in_unit)
102 | 
103 | 
104 | class _BNBase(nn.Sequential):
105 | 
106 |     def __init__(self, in_size, batch_norm=None, name=""):
107 |         super().__init__()
108 |         self.add_module(name + "bn", batch_norm(in_size))
109 | 
110 |         nn.init.constant_(self[0].weight, 1.0)
111 |         nn.init.constant_(self[0].bias, 0)
112 | 
113 | 
114 | class BatchNorm1d(_BNBase):
115 | 
116 |     def __init__(self, in_size: int, *, name: str = ""):
117 |         super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name)
118 | 
119 | 
120 | class BatchNorm2d(_BNBase):
121 | 
122 |     def __init__(self, in_size: int, name: str = ""):
123 |         super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name)
124 | 
125 | 
126 | class Conv1d(_ConvBase):
127 | 
128 |     def __init__(
129 |             self,
130 |             in_size: int,
131 |             out_size: int,
132 |             *,
133 |             kernel_size: int = 1,
134 |             stride: int = 1,
135 |             padding: int = 0,
136 |             activation=nn.ReLU(inplace=True),
137 |             bn: bool = False,
138 |             init=nn.init.kaiming_normal_,
139 |             bias: bool = True,
140 |             preact: bool = False,
141 |             name: str = "",
142 |             instance_norm=False
143 |     ):
144 |         super().__init__(
145 |             in_size,
146 |             out_size,
147 |             kernel_size,
148 |             stride,
149 |             padding,
150 |             activation,
151 |             bn,
152 |             init,
153 |             conv=nn.Conv1d,
154 |             batch_norm=BatchNorm1d,
155 |             bias=bias,
156 |             preact=preact,
157 |             name=name,
158 |             instance_norm=instance_norm,
159 |             instance_norm_func=nn.InstanceNorm1d
160 |         )
161 | 
162 | 
163 | class Conv2d(_ConvBase):
164 | 
165 |     def __init__(
166 |             self,
167 |             in_size: int,
168 |             out_size: int,
169 |             *,
170 |             kernel_size: Tuple[int, int] = (1, 1),
171 |             stride: Tuple[int, int] = (1, 1),
172 |             padding: Tuple[int, int] = (0, 0),
173 |             activation=nn.ReLU(inplace=True),
174 |             bn: bool = False,
175 |             init=nn.init.kaiming_normal_,
176 |             bias: bool = True,
177 |             preact: bool = False,
178 |             name: str = "",
179 |             instance_norm=False
180 |     ):
181 |         super().__init__(
182 |             in_size,
183 |             out_size,
184 |             kernel_size,
185 |             stride,
186 |             padding,
187 |             activation,
188 |             bn,
189 |             init,
190 |             conv=nn.Conv2d,
191 |             batch_norm=BatchNorm2d,
192 |             bias=bias,
193 |             preact=preact,
194 |             name=name,
195 |             instance_norm=instance_norm,
196 |             instance_norm_func=nn.InstanceNorm2d
197 |         )
198 | 
199 | 
200 | class FC(nn.Sequential):
201 | 
202 |     def __init__(
203 |             self,
204 |             in_size: int,
205 |             out_size: int,
206 |             *,
207 |             activation=nn.ReLU(inplace=True),
208 |             bn: bool = False,
209 |             init=None,
210 |             preact: bool = False,
211 |             name: str = ""
212 |     ):
213 |         super().__init__()
214 | 
215 |         fc = nn.Linear(in_size, out_size, bias=not bn)
216 |         if init is not None:
217 |             init(fc.weight)
218 |         if not bn:
219 |             nn.init.constant(fc.bias, 0)
220 | 
221 |         if preact:
222 |             if bn:
223 |                 self.add_module(name + 'bn', BatchNorm1d(in_size))
224 | 
225 |             if activation is not None:
226 |                 self.add_module(name + 'activation', activation)
227 | 
228 |         self.add_module(name + 'fc', fc)
229 | 
230 |         if not preact:
231 |             if bn:
232 |                 self.add_module(name + 'bn', BatchNorm1d(out_size))
233 | 
234 |             if activation is not None:
235 |                 self.add_module(name + 'activation', activation)
236 | 
237 | 


--------------------------------------------------------------------------------
/network/decoder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from .basic import MLP
  5 | from mmengine import Registry
  6 | import math
  7 | 
  8 | DECODER_REGISTRY = Registry("DECODER")
  9 | 
 10 | 
 11 | @DECODER_REGISTRY.register_module()
 12 | class deep_prior_decoderv2_9(nn.Module):
 13 |     def __init__(self,input_dim=256,group=4,cat_num=6,middle_dim=1024,training=False,vis=False) -> None:
 14 |         super().__init__()
 15 |         self.input_dim=input_dim
 16 |         self.group=group
 17 |         self.mlp1=MLP(input_dim,middle_dim,group*input_dim*cat_num)
 18 |         self.fc1=torch.nn.Linear(group*input_dim,input_dim)
 19 |         self.fc2=torch.nn.Linear(group*input_dim,input_dim)
 20 |         self.mlp2=MLP(input_dim,middle_dim,input_dim)
 21 |         self.mlp3=MLP(input_dim,middle_dim,group*input_dim*cat_num)
 22 |         self.mlp4=MLP(input_dim+64,middle_dim,input_dim+64)
 23 |         self.mlp5=MLP(input_dim+64,middle_dim,input_dim*2)
 24 |         self.mlp6=MLP(input_dim,middle_dim,input_dim)
 25 |         self.training=training
 26 |         self.chamfer=MLP(input_dim,128,3)
 27 |         self.glo_enhance1=global_enhancev2()
 28 |         self.glo_enhance2=global_enhancev2()
 29 |         self.glo_enhance3=global_enhancev2()
 30 |         self.glo_enhance4=global_enhancev2(input_dim+64)
 31 |         self.nocs_mlp=MLP(input_dim+64,128,3)
 32 |         self.coord_mlp=MLP(3,32,64)
 33 |         self.alpha1=torch.nn.parameter.Parameter(data=torch.tensor([1],dtype=torch.float32),requires_grad=True)
 34 |         self.alpha2=torch.nn.parameter.Parameter(data=torch.tensor([1],dtype=torch.float32),requires_grad=True)
 35 |         self.vis=vis
 36 | 
 37 | 
 38 |     def forward(self,prior_feat,inst_feat,index,encoder_input):
 39 |         '''
 40 |         inst:B,N,D
 41 |         prior:B,M,D
 42 |         '''
 43 |         B,N,D=inst_feat.shape
 44 |         M=prior_feat.shape[1]
 45 | 
 46 |         prior_feat=self.glo_enhance1(prior_feat)
 47 |         inst_feat=self.glo_enhance2(inst_feat)
 48 | 
 49 |         conv_para1=self.mlp1(prior_feat) #B,M,6*D*4
 50 |         conv_para1=conv_para1.transpose(1,2).contiguous().view(-1,self.group*D,M)
 51 |         conv_para1=torch.index_select(conv_para1,0,index) #B,4*D,M
 52 |         conv_para1=(conv_para1.view(B,D,-1)) #B,D,4*M
 53 | 
 54 |         iam1=torch.bmm(inst_feat,conv_para1) #B,N,4*M
 55 |         iam_prob=(iam1.sigmoid_())/(self.alpha1.clamp(min=1e-5))
 56 | 
 57 |         aggre_inst=torch.bmm(iam_prob.transpose(1,2),inst_feat) #B,4*M,D
 58 |         normalizer=iam_prob.sum(1,keepdim=True).transpose(1,2).clamp(min=1e-6) #B,4*M,1
 59 |         if not self.training and self.vis:
 60 |             iam1=iam_prob/(normalizer.transpose(1,2))#B,N,4*M
 61 |         aggre_inst=aggre_inst/normalizer
 62 |         aggre_inst=aggre_inst.view(B,M,-1) #B,M,4*D
 63 |         aggre_inst=self.fc1(aggre_inst) #B,M,D
 64 | 
 65 |         prior_feat=prior_feat+aggre_inst
 66 |         res_prior_feat=self.mlp2(prior_feat)#B,M,D
 67 |         prior_feat=res_prior_feat+prior_feat
 68 | 
 69 |         prior_feat=self.glo_enhance3(prior_feat)
 70 | 
 71 |         res_prior_feat=self.mlp6(prior_feat)#B,M,D
 72 |         prior_feat=res_prior_feat+prior_feat
 73 | 
 74 |         if self.training or self.vis:
 75 |             coord=self.chamfer(prior_feat)
 76 | 
 77 |         conv_para2=self.mlp3(inst_feat) #B,N,6*D*4
 78 |         conv_para2=conv_para2.transpose(1,2).contiguous().view(-1,self.group*D,N)
 79 |         conv_para2=torch.index_select(conv_para2,0,index) #B,4*D,N
 80 |         conv_para2=conv_para2.view(B,D,-1) #B,D,4*N
 81 | 
 82 |         iam2=torch.bmm(prior_feat,conv_para2) #B,M,4*N
 83 |         iam_prob=(iam2.sigmoid_())/(self.alpha2.clamp(min=1e-5)) #B,M,4*N
 84 | 
 85 |         aggre_prior=torch.bmm(iam_prob.transpose(1,2),prior_feat) #B,4*N,D
 86 |         normalizer=iam_prob.sum(1,keepdim=True).transpose(1,2).clamp(min=1e-6) #B,4*N,1
 87 |         if not self.training and self.vis:
 88 |             iam2=iam_prob/(normalizer.transpose(1,2))#B,M,4*N
 89 |         aggre_prior=aggre_prior/(normalizer)#B,N,4*M
 90 |         aggre_prior=aggre_prior.view(B,N,-1) #B,N,4*D
 91 |         aggre_prior=self.fc2(aggre_prior) #B,N,D
 92 | 
 93 |         
 94 |         inst_feat=inst_feat+aggre_prior#B,N,D
 95 | 
 96 |         coord_feat=self.coord_mlp(encoder_input) #B,N,64
 97 |         inst_feat=torch.cat((inst_feat,coord_feat),dim=-1)
 98 | 
 99 |         res_inst_feat=self.mlp4(inst_feat) #B,N,D
100 |         inst_feat=res_inst_feat+inst_feat
101 | 
102 |         inst_feat=self.glo_enhance4(inst_feat)
103 | 
104 | 
105 |         if self.training or self.vis:
106 |             response_coord=self.nocs_mlp(inst_feat)
107 |             response_coord=response_coord.sigmoid_()-0.5
108 | 
109 | 
110 |         inst_feat=self.mlp5(inst_feat)
111 | 
112 |         inst_feat=torch.nn.functional.adaptive_avg_pool1d(inst_feat.transpose(1,2),1).transpose(1,2) #B,1,D
113 |         if self.training:
114 |             return inst_feat,coord,response_coord
115 |         elif not self.vis:
116 |             return inst_feat
117 |         else:
118 |             return inst_feat,coord,response_coord,iam1,iam2
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | class global_enhancev2(nn.Module):
126 |     def __init__(self,input_dim=256) -> None:
127 |         super().__init__()
128 |         alpha=torch.tensor([1],dtype=torch.float32)
129 |         self.alpha=torch.nn.parameter.Parameter(data=alpha,requires_grad=True)
130 |         beta=torch.tensor([0],dtype=torch.float32)
131 |         self.beta=torch.nn.parameter.Parameter(data=beta,requires_grad=True)
132 |         self.linear=torch.nn.Conv1d(input_dim,input_dim,1,bias=False)
133 | 
134 |     def forward(self,feat):
135 |         '''
136 |         feat:B,N,D
137 |         '''
138 |         global_feat=torch.nn.functional.adaptive_avg_pool1d(feat.transpose(1,2),1) #B,D,1
139 |         global_feat=self.linear(global_feat)
140 |         atten=torch.bmm(feat,global_feat) #B,N,1
141 |         mean=atten.squeeze(-1).mean(-1) #B
142 |         std=torch.std(atten.squeeze(-1), dim=-1, unbiased=False) 
143 |         atten=self.alpha*(atten-mean.view(-1,1,1))/(std.view(-1,1,1)+1e-5)+self.beta
144 |         global_feat=torch.bmm(atten,global_feat.transpose(1,2)) #B,N,D
145 |         feat=feat+global_feat #B,N,D
146 |         return feat
147 | 
148 | 
149 | 
150 | 
151 | @DECODER_REGISTRY.register_module()
152 | class pose_estimater(nn.Module):
153 |     def __init__(self,input_dim=512,middle_dim=256,cat_num=6) -> None:
154 |         super().__init__()
155 |         self.mlp_r=MLP(input_dim,middle_dim,6)
156 |         self.mlp_t=MLP(input_dim,middle_dim,3)
157 |         self.mlp_s=MLP(input_dim,middle_dim,3)
158 | 
159 |     def forward(self,inst_feat,index):
160 |         '''
161 |         inst_faet:B,1,2*D
162 |         ''' 
163 |         inst_feat=inst_feat.squeeze(dim=1)
164 |         r=self.mlp_r(inst_feat) #B,6
165 |         t=self.mlp_t(inst_feat) #B,3
166 |         s=self.mlp_s(inst_feat) #B,3
167 | 
168 |         r=self.Ortho6d2Mat(r[:,0:3],r[:,3:])
169 | 
170 |         return r,t,s
171 |     def Ortho6d2Mat(self,x_raw, y_raw):
172 |         y = self.normalize_vector(y_raw)
173 |         z = self.cross_product(x_raw, y) #B,3
174 |         z = self.normalize_vector(z)#B,3
175 |         x = self.cross_product(y,z)#B,3
176 | 
177 |         x = x.unsqueeze(2)
178 |         y = y.unsqueeze(2)
179 |         z = z.unsqueeze(2)
180 |         matrix = torch.cat((x,y,z),dim=2) #batch*3*3
181 |         return matrix
182 |     def normalize_vector(self, v, dim =1, return_mag =False):
183 |         return torch.nn.functional.normalize(v,dim=dim)
184 | 
185 |     def cross_product(self,u, v):
186 |         return torch.cross(u,v,dim=-1)
187 | 


--------------------------------------------------------------------------------
/Pointnet2/tools/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import torch.utils.data as torch_data
  4 | import kitti_utils
  5 | import cv2
  6 | from PIL import Image
  7 | 
  8 | 
  9 | USE_INTENSITY = False
 10 | 
 11 | 
 12 | class KittiDataset(torch_data.Dataset):
 13 |     def __init__(self, root_dir, split='train', mode='TRAIN'):
 14 |         self.split = split
 15 |         self.mode = mode
 16 |         self.classes = ['Car']
 17 |         is_test = self.split == 'test'
 18 |         self.imageset_dir = os.path.join(root_dir, 'KITTI', 'object', 'testing' if is_test else 'training')
 19 | 
 20 |         split_dir = os.path.join(root_dir, 'KITTI', 'ImageSets', split + '.txt')
 21 |         self.image_idx_list = [x.strip() for x in open(split_dir).readlines()]
 22 |         self.sample_id_list = [int(sample_id) for sample_id in self.image_idx_list]
 23 |         self.num_sample = self.image_idx_list.__len__()
 24 | 
 25 |         self.npoints = 16384
 26 | 
 27 |         self.image_dir = os.path.join(self.imageset_dir, 'image_2')
 28 |         self.lidar_dir = os.path.join(self.imageset_dir, 'velodyne')
 29 |         self.calib_dir = os.path.join(self.imageset_dir, 'calib')
 30 |         self.label_dir = os.path.join(self.imageset_dir, 'label_2')
 31 |         self.plane_dir = os.path.join(self.imageset_dir, 'planes')
 32 | 
 33 |     def get_image(self, idx):
 34 |         img_file = os.path.join(self.image_dir, '%06d.png' % idx)
 35 |         assert os.path.exists(img_file)
 36 |         return cv2.imread(img_file)  # (H, W, 3) BGR mode
 37 | 
 38 |     def get_image_shape(self, idx):
 39 |         img_file = os.path.join(self.image_dir, '%06d.png' % idx)
 40 |         assert os.path.exists(img_file)
 41 |         im = Image.open(img_file)
 42 |         width, height = im.size
 43 |         return height, width, 3
 44 | 
 45 |     def get_lidar(self, idx):
 46 |         lidar_file = os.path.join(self.lidar_dir, '%06d.bin' % idx)
 47 |         assert os.path.exists(lidar_file)
 48 |         return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
 49 | 
 50 |     def get_calib(self, idx):
 51 |         calib_file = os.path.join(self.calib_dir, '%06d.txt' % idx)
 52 |         assert os.path.exists(calib_file)
 53 |         return kitti_utils.Calibration(calib_file)
 54 | 
 55 |     def get_label(self, idx):
 56 |         label_file = os.path.join(self.label_dir, '%06d.txt' % idx)
 57 |         assert os.path.exists(label_file)
 58 |         return kitti_utils.get_objects_from_label(label_file)
 59 | 
 60 |     @staticmethod
 61 |     def get_valid_flag(pts_rect, pts_img, pts_rect_depth, img_shape):
 62 |         val_flag_1 = np.logical_and(pts_img[:, 0] >= 0, pts_img[:, 0] < img_shape[1])
 63 |         val_flag_2 = np.logical_and(pts_img[:, 1] >= 0, pts_img[:, 1] < img_shape[0])
 64 |         val_flag_merge = np.logical_and(val_flag_1, val_flag_2)
 65 |         pts_valid_flag = np.logical_and(val_flag_merge, pts_rect_depth >= 0)
 66 |         return pts_valid_flag
 67 | 
 68 |     def filtrate_objects(self, obj_list):
 69 |         type_whitelist = self.classes
 70 |         if self.mode == 'TRAIN':
 71 |             type_whitelist = list(self.classes)
 72 |             if 'Car' in self.classes:
 73 |                 type_whitelist.append('Van')
 74 | 
 75 |         valid_obj_list = []
 76 |         for obj in obj_list:
 77 |             if obj.cls_type not in type_whitelist:
 78 |                 continue
 79 | 
 80 |             valid_obj_list.append(obj)
 81 |         return valid_obj_list
 82 | 
 83 |     def __len__(self):
 84 |         return len(self.sample_id_list)
 85 | 
 86 |     def __getitem__(self, index):
 87 |         sample_id = int(self.sample_id_list[index])
 88 |         calib = self.get_calib(sample_id)
 89 |         img_shape = self.get_image_shape(sample_id)
 90 |         pts_lidar = self.get_lidar(sample_id)
 91 | 
 92 |         # get valid point (projected points should be in image)
 93 |         pts_rect = calib.lidar_to_rect(pts_lidar[:, 0:3])
 94 |         pts_intensity = pts_lidar[:, 3]
 95 | 
 96 |         pts_img, pts_rect_depth = calib.rect_to_img(pts_rect)
 97 |         pts_valid_flag = self.get_valid_flag(pts_rect, pts_img, pts_rect_depth, img_shape)
 98 | 
 99 |         pts_rect = pts_rect[pts_valid_flag][:, 0:3]
100 |         pts_intensity = pts_intensity[pts_valid_flag]
101 | 
102 |         if self.npoints < len(pts_rect):
103 |             pts_depth = pts_rect[:, 2]
104 |             pts_near_flag = pts_depth < 40.0
105 |             far_idxs_choice = np.where(pts_near_flag == 0)[0]
106 |             near_idxs = np.where(pts_near_flag == 1)[0]
107 |             near_idxs_choice = np.random.choice(near_idxs, self.npoints - len(far_idxs_choice), replace=False)
108 | 
109 |             choice = np.concatenate((near_idxs_choice, far_idxs_choice), axis=0) \
110 |                 if len(far_idxs_choice) > 0 else near_idxs_choice
111 |             np.random.shuffle(choice)
112 |         else:
113 |             choice = np.arange(0, len(pts_rect), dtype=np.int32)
114 |             if self.npoints > len(pts_rect):
115 |                 extra_choice = np.random.choice(choice, self.npoints - len(pts_rect), replace=False)
116 |                 choice = np.concatenate((choice, extra_choice), axis=0)
117 |             np.random.shuffle(choice)
118 | 
119 |         ret_pts_rect = pts_rect[choice, :]
120 |         ret_pts_intensity = pts_intensity[choice] - 0.5  # translate intensity to [-0.5, 0.5]
121 | 
122 |         pts_features = [ret_pts_intensity.reshape(-1, 1)]
123 |         ret_pts_features = np.concatenate(pts_features, axis=1) if pts_features.__len__() > 1 else pts_features[0]
124 | 
125 |         sample_info = {'sample_id': sample_id}
126 | 
127 |         if self.mode == 'TEST':
128 |             if USE_INTENSITY:
129 |                 pts_input = np.concatenate((ret_pts_rect, ret_pts_features), axis=1)  # (N, C)
130 |             else:
131 |                 pts_input = ret_pts_rect
132 |             sample_info['pts_input'] = pts_input
133 |             sample_info['pts_rect'] = ret_pts_rect
134 |             sample_info['pts_features'] = ret_pts_features
135 |             return sample_info
136 | 
137 |         gt_obj_list = self.filtrate_objects(self.get_label(sample_id))
138 | 
139 |         gt_boxes3d = kitti_utils.objs_to_boxes3d(gt_obj_list)
140 | 
141 |         # prepare input
142 |         if USE_INTENSITY:
143 |             pts_input = np.concatenate((ret_pts_rect, ret_pts_features), axis=1)  # (N, C)
144 |         else:
145 |             pts_input = ret_pts_rect
146 | 
147 |         # generate training labels
148 |         cls_labels = self.generate_training_labels(ret_pts_rect, gt_boxes3d)
149 |         sample_info['pts_input'] = pts_input
150 |         sample_info['pts_rect'] = ret_pts_rect
151 |         sample_info['cls_labels'] = cls_labels
152 |         return sample_info
153 | 
154 |     @staticmethod
155 |     def generate_training_labels(pts_rect, gt_boxes3d):
156 |         cls_label = np.zeros((pts_rect.shape[0]), dtype=np.int32)
157 |         gt_corners = kitti_utils.boxes3d_to_corners3d(gt_boxes3d, rotate=True)
158 |         extend_gt_boxes3d = kitti_utils.enlarge_box3d(gt_boxes3d, extra_width=0.2)
159 |         extend_gt_corners = kitti_utils.boxes3d_to_corners3d(extend_gt_boxes3d, rotate=True)
160 |         for k in range(gt_boxes3d.shape[0]):
161 |             box_corners = gt_corners[k]
162 |             fg_pt_flag = kitti_utils.in_hull(pts_rect, box_corners)
163 |             cls_label[fg_pt_flag] = 1
164 | 
165 |             # enlarge the bbox3d, ignore nearby points
166 |             extend_box_corners = extend_gt_corners[k]
167 |             fg_enlarge_flag = kitti_utils.in_hull(pts_rect, extend_box_corners)
168 |             ignore_flag = np.logical_xor(fg_pt_flag, fg_enlarge_flag)
169 |             cls_label[ignore_flag] = -1
170 | 
171 |         return cls_label
172 | 
173 |     def collate_batch(self, batch):
174 |         batch_size = batch.__len__()
175 |         ans_dict = {}
176 | 
177 |         for key in batch[0].keys():
178 |             if isinstance(batch[0][key], np.ndarray):
179 |                 ans_dict[key] = np.concatenate([batch[k][key][np.newaxis, ...] for k in range(batch_size)], axis=0)
180 | 
181 |             else:
182 |                 ans_dict[key] = [batch[k][key] for k in range(batch_size)]
183 |                 if isinstance(batch[0][key], int):
184 |                     ans_dict[key] = np.array(ans_dict[key], dtype=np.int32)
185 |                 elif isinstance(batch[0][key], float):
186 |                     ans_dict[key] = np.array(ans_dict[key], dtype=np.float32)
187 | 
188 |         return ans_dict
189 | 


--------------------------------------------------------------------------------
/Pointnet2/tools/train_and_eval.py:
--------------------------------------------------------------------------------
  1 | import _init_path
  2 | import numpy as np
  3 | import os
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.optim as optim
  7 | import torch.optim.lr_scheduler as lr_sched
  8 | from torch.nn.utils import clip_grad_norm_
  9 | from torch.utils.data import DataLoader
 10 | import tensorboard_logger as tb_log
 11 | from dataset import KittiDataset
 12 | import argparse
 13 | import importlib
 14 | 
 15 | parser = argparse.ArgumentParser(description="Arg parser")
 16 | parser.add_argument("--batch_size", type=int, default=8)
 17 | parser.add_argument("--epochs", type=int, default=100)
 18 | parser.add_argument("--ckpt_save_interval", type=int, default=5)
 19 | parser.add_argument('--workers', type=int, default=4)
 20 | parser.add_argument("--mode", type=str, default='train')
 21 | parser.add_argument("--ckpt", type=str, default='None')
 22 | 
 23 | parser.add_argument("--net", type=str, default='pointnet2_msg')
 24 | 
 25 | parser.add_argument('--lr', type=float, default=0.002)
 26 | parser.add_argument('--lr_decay', type=float, default=0.2)
 27 | parser.add_argument('--lr_clip', type=float, default=0.000001)
 28 | parser.add_argument('--decay_step_list', type=list, default=[50, 70, 80, 90])
 29 | parser.add_argument('--weight_decay', type=float, default=0.001)
 30 | 
 31 | parser.add_argument("--output_dir", type=str, default='output')
 32 | parser.add_argument("--extra_tag", type=str, default='default')
 33 | 
 34 | args = parser.parse_args()
 35 | 
 36 | FG_THRESH = 0.3
 37 | 
 38 | 
 39 | def log_print(info, log_f=None):
 40 |     print(info)
 41 |     if log_f is not None:
 42 |         print(info, file=log_f)
 43 | 
 44 | 
 45 | class DiceLoss(nn.Module):
 46 |     def __init__(self, ignore_target=-1):
 47 |         super().__init__()
 48 |         self.ignore_target = ignore_target
 49 | 
 50 |     def forward(self, input, target):
 51 |         """
 52 |         :param input: (N), logit
 53 |         :param target: (N), {0, 1}
 54 |         :return:
 55 |         """
 56 |         input = torch.sigmoid(input.view(-1))
 57 |         target = target.float().view(-1)
 58 |         mask = (target != self.ignore_target).float()
 59 |         return 1.0 - (torch.min(input, target) * mask).sum() / torch.clamp((torch.max(input, target) * mask).sum(), min=1.0)
 60 | 
 61 | 
 62 | def train_one_epoch(model, train_loader, optimizer, epoch, lr_scheduler, total_it, tb_log, log_f):
 63 |     model.train()
 64 |     log_print('===============TRAIN EPOCH %d================' % epoch, log_f=log_f)
 65 |     loss_func = DiceLoss(ignore_target=-1)
 66 | 
 67 |     for it, batch in enumerate(train_loader):
 68 |         optimizer.zero_grad()
 69 | 
 70 |         pts_input, cls_labels = batch['pts_input'], batch['cls_labels']
 71 |         pts_input = torch.from_numpy(pts_input).cuda(non_blocking=True).float()
 72 |         cls_labels = torch.from_numpy(cls_labels).cuda(non_blocking=True).long().view(-1)
 73 | 
 74 |         pred_cls = model(pts_input)
 75 |         pred_cls = pred_cls.view(-1)
 76 | 
 77 |         loss = loss_func(pred_cls, cls_labels)
 78 |         loss.backward()
 79 |         clip_grad_norm_(model.parameters(), 1.0)
 80 |         optimizer.step()
 81 | 
 82 |         total_it += 1
 83 | 
 84 |         pred_class = (torch.sigmoid(pred_cls) > FG_THRESH)
 85 |         fg_mask = cls_labels > 0
 86 |         correct = ((pred_class.long() == cls_labels) & fg_mask).float().sum()
 87 |         union = fg_mask.sum().float() + (pred_class > 0).sum().float() - correct
 88 |         iou = correct / torch.clamp(union, min=1.0)
 89 | 
 90 |         cur_lr = lr_scheduler.get_lr()[0]
 91 |         tb_log.log_value('learning_rate', cur_lr, epoch)
 92 |         if tb_log is not None:
 93 |             tb_log.log_value('train_loss', loss, total_it)
 94 |             tb_log.log_value('train_fg_iou', iou, total_it)
 95 | 
 96 |         log_print('training epoch %d: it=%d/%d, total_it=%d, loss=%.5f, fg_iou=%.3f, lr=%f' %
 97 |                   (epoch, it, len(train_loader), total_it, loss.item(), iou.item(), cur_lr), log_f=log_f)
 98 | 
 99 |     return total_it
100 | 
101 | 
102 | def eval_one_epoch(model, eval_loader, epoch, tb_log=None, log_f=None):
103 |     model.train()
104 |     log_print('===============EVAL EPOCH %d================' % epoch, log_f=log_f)
105 | 
106 |     iou_list = []
107 |     for it, batch in enumerate(eval_loader):
108 |         pts_input, cls_labels = batch['pts_input'], batch['cls_labels']
109 |         pts_input = torch.from_numpy(pts_input).cuda(non_blocking=True).float()
110 |         cls_labels = torch.from_numpy(cls_labels).cuda(non_blocking=True).long().view(-1)
111 | 
112 |         pred_cls = model(pts_input)
113 |         pred_cls = pred_cls.view(-1)
114 | 
115 |         pred_class = (torch.sigmoid(pred_cls) > FG_THRESH)
116 |         fg_mask = cls_labels > 0
117 |         correct = ((pred_class.long() == cls_labels) & fg_mask).float().sum()
118 |         union = fg_mask.sum().float() + (pred_class > 0).sum().float() - correct
119 |         iou = correct / torch.clamp(union, min=1.0)
120 | 
121 |         iou_list.append(iou.item())
122 |         log_print('EVAL: it=%d/%d, iou=%.3f' % (it, len(eval_loader), iou), log_f=log_f)
123 | 
124 |     iou_list = np.array(iou_list)
125 |     avg_iou = iou_list.mean()
126 |     if tb_log is not None:
127 |         tb_log.log_value('eval_fg_iou', avg_iou, epoch)
128 | 
129 |     log_print('\nEpoch %d: Average IoU (samples=%d): %.6f' % (epoch, iou_list.__len__(), avg_iou), log_f=log_f)
130 |     return avg_iou
131 | 
132 | 
133 | def save_checkpoint(model, epoch, ckpt_name):
134 |     if isinstance(model, torch.nn.DataParallel):
135 |         model_state = model.module.state_dict()
136 |     else:
137 |         model_state = model.state_dict()
138 | 
139 |     state = {'epoch': epoch, 'model_state': model_state}
140 |     ckpt_name = '{}.pth'.format(ckpt_name)
141 |     torch.save(state, ckpt_name)
142 | 
143 | 
144 | def load_checkpoint(model, filename):
145 |     if os.path.isfile(filename):
146 |         log_print("==> Loading from checkpoint %s" % filename)
147 |         checkpoint = torch.load(filename)
148 |         epoch = checkpoint['epoch']
149 |         model.load_state_dict(checkpoint['model_state'])
150 |         log_print("==> Done")
151 |     else:
152 |         raise FileNotFoundError
153 | 
154 |     return epoch
155 | 
156 | 
157 | def train_and_eval(model, train_loader, eval_loader, tb_log, ckpt_dir, log_f):
158 |     model.cuda()
159 |     optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
160 | 
161 |     def lr_lbmd(cur_epoch):
162 |         cur_decay = 1
163 |         for decay_step in args.decay_step_list:
164 |             if cur_epoch >= decay_step:
165 |                 cur_decay = cur_decay * args.lr_decay
166 |         return max(cur_decay, args.lr_clip / args.lr)
167 | 
168 |     lr_scheduler = lr_sched.LambdaLR(optimizer, lr_lbmd)
169 | 
170 |     total_it = 0
171 |     for epoch in range(1, args.epochs + 1):
172 |         lr_scheduler.step(epoch)
173 |         total_it = train_one_epoch(model, train_loader, optimizer, epoch, lr_scheduler, total_it, tb_log, log_f)
174 | 
175 |         if epoch % args.ckpt_save_interval == 0:
176 |             with torch.no_grad():
177 |                 avg_iou = eval_one_epoch(model, eval_loader, epoch, tb_log, log_f)
178 |                 ckpt_name = os.path.join(ckpt_dir, 'checkpoint_epoch_%d' % epoch)
179 |                 save_checkpoint(model, epoch, ckpt_name)
180 | 
181 | 
182 | if __name__ == '__main__':
183 |     MODEL = importlib.import_module(args.net)  # import network module
184 |     model = MODEL.get_model(input_channels=0)
185 | 
186 |     eval_set = KittiDataset(root_dir='./data', mode='EVAL', split='val')
187 |     eval_loader = DataLoader(eval_set, batch_size=args.batch_size, shuffle=False, pin_memory=True,
188 |                              num_workers=args.workers, collate_fn=eval_set.collate_batch)
189 | 
190 |     if args.mode == 'train':
191 |         train_set = KittiDataset(root_dir='./data', mode='TRAIN', split='train')
192 |         train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, pin_memory=True,
193 |                                   num_workers=args.workers, collate_fn=train_set.collate_batch)
194 |         # output dir config
195 |         output_dir = os.path.join(args.output_dir, args.extra_tag)
196 |         os.makedirs(output_dir, exist_ok=True)
197 |         tb_log.configure(os.path.join(output_dir, 'tensorboard'))
198 |         ckpt_dir = os.path.join(output_dir, 'ckpt')
199 |         os.makedirs(ckpt_dir, exist_ok=True)
200 | 
201 |         log_file = os.path.join(output_dir, 'log.txt')
202 |         log_f = open(log_file, 'w')
203 | 
204 |         for key, val in vars(args).items():
205 |             log_print("{:16} {}".format(key, val), log_f=log_f)
206 | 
207 |         # train and eval
208 |         train_and_eval(model, train_loader, eval_loader, tb_log, ckpt_dir, log_f)
209 |         log_f.close()
210 |     elif args.mode == 'eval':
211 |         epoch = load_checkpoint(model, args.ckpt)
212 |         model.cuda()
213 |         with torch.no_grad():
214 |             avg_iou = eval_one_epoch(model, eval_loader, epoch)
215 |     else:
216 |         raise NotImplementedError
217 | 
218 | 


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/src/sampling_gpu.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include "cuda_utils.h"
  5 | #include "sampling_gpu.h"
  6 | 
  7 | 
  8 | __global__ void gather_points_kernel_fast(int b, int c, int n, int m, 
  9 |     const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) {
 10 |     // points: (B, C, N)
 11 |     // idx: (B, M)
 12 |     // output:
 13 |     //      out: (B, C, M)
 14 | 
 15 |     int bs_idx = blockIdx.z;
 16 |     int c_idx = blockIdx.y;
 17 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 18 |     if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;
 19 | 
 20 |     out += bs_idx * c * m + c_idx * m + pt_idx;
 21 |     idx += bs_idx * m + pt_idx;
 22 |     points += bs_idx * c * n + c_idx * n;
 23 |     out[0] = points[idx[0]];
 24 | }
 25 | 
 26 | void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, 
 27 |     const float *points, const int *idx, float *out, cudaStream_t stream) {
 28 |     // points: (B, C, N)
 29 |     // idx: (B, npoints)
 30 |     // output:
 31 |     //      out: (B, C, npoints)
 32 | 
 33 |     cudaError_t err;
 34 |     dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
 35 |     dim3 threads(THREADS_PER_BLOCK);
 36 | 
 37 |     gather_points_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, points, idx, out);
 38 | 
 39 |     err = cudaGetLastError();
 40 |     if (cudaSuccess != err) {
 41 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
 42 |         exit(-1);
 43 |     }
 44 | }
 45 | 
 46 | __global__ void gather_points_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, 
 47 |     const int *__restrict__ idx, float *__restrict__ grad_points) {
 48 |     // grad_out: (B, C, M)
 49 |     // idx: (B, M)
 50 |     // output:
 51 |     //      grad_points: (B, C, N)
 52 | 
 53 |     int bs_idx = blockIdx.z;
 54 |     int c_idx = blockIdx.y;
 55 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 56 |     if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;
 57 | 
 58 |     grad_out += bs_idx * c * m + c_idx * m + pt_idx;
 59 |     idx += bs_idx * m + pt_idx;
 60 |     grad_points += bs_idx * c * n + c_idx * n;
 61 | 
 62 |     atomicAdd(grad_points + idx[0], grad_out[0]);
 63 | }
 64 | 
 65 | void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, 
 66 |     const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) {
 67 |     // grad_out: (B, C, npoints)
 68 |     // idx: (B, npoints)
 69 |     // output:
 70 |     //      grad_points: (B, C, N)
 71 | 
 72 |     cudaError_t err;
 73 |     dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
 74 |     dim3 threads(THREADS_PER_BLOCK);
 75 | 
 76 |     gather_points_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, grad_out, idx, grad_points);
 77 | 
 78 |     err = cudaGetLastError();
 79 |     if (cudaSuccess != err) {
 80 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
 81 |         exit(-1);
 82 |     }
 83 | }
 84 | 
 85 | 
 86 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, int idx1, int idx2){
 87 |     const float v1 = dists[idx1], v2 = dists[idx2];
 88 |     const int i1 = dists_i[idx1], i2 = dists_i[idx2];
 89 |     dists[idx1] = max(v1, v2);
 90 |     dists_i[idx1] = v2 > v1 ? i2 : i1;
 91 | }
 92 | 
 93 | template <unsigned int block_size>
 94 | __global__ void furthest_point_sampling_kernel(int b, int n, int m, 
 95 |     const float *__restrict__ dataset, float *__restrict__ temp, int *__restrict__ idxs) {
 96 |     // dataset: (B, N, 3)
 97 |     // tmp: (B, N)
 98 |     // output:
 99 |     //      idx: (B, M)
100 | 
101 |     if (m <= 0) return;
102 |     __shared__ float dists[block_size];
103 |     __shared__ int dists_i[block_size];
104 | 
105 |     int batch_index = blockIdx.x;
106 |     dataset += batch_index * n * 3;
107 |     temp += batch_index * n;
108 |     idxs += batch_index * m;
109 | 
110 |     int tid = threadIdx.x;
111 |     const int stride = block_size;
112 | 
113 |     int old = 0;
114 |     if (threadIdx.x == 0)
115 |     idxs[0] = old;
116 | 
117 |     __syncthreads();
118 |     for (int j = 1; j < m; j++) {
119 |     int besti = 0;
120 |     float best = -1;
121 |     float x1 = dataset[old * 3 + 0];
122 |     float y1 = dataset[old * 3 + 1];
123 |     float z1 = dataset[old * 3 + 2];
124 |     for (int k = tid; k < n; k += stride) {
125 |         float x2, y2, z2;
126 |         x2 = dataset[k * 3 + 0];
127 |         y2 = dataset[k * 3 + 1];
128 |         z2 = dataset[k * 3 + 2];
129 |         // float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
130 |         // if (mag <= 1e-3)
131 |         // continue;
132 | 
133 |         float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
134 |         float d2 = min(d, temp[k]);
135 |         temp[k] = d2;
136 |         besti = d2 > best ? k : besti;
137 |         best = d2 > best ? d2 : best;
138 |     }
139 |     dists[tid] = best;
140 |     dists_i[tid] = besti;
141 |     __syncthreads();
142 | 
143 |     if (block_size >= 1024) {
144 |         if (tid < 512) {
145 |             __update(dists, dists_i, tid, tid + 512);
146 |         }
147 |         __syncthreads();
148 |     }
149 | 
150 |     if (block_size >= 512) {
151 |         if (tid < 256) {
152 |             __update(dists, dists_i, tid, tid + 256);
153 |         }
154 |         __syncthreads();
155 |     }
156 |     if (block_size >= 256) {
157 |         if (tid < 128) {
158 |             __update(dists, dists_i, tid, tid + 128);
159 |         }
160 |         __syncthreads();
161 |     }
162 |     if (block_size >= 128) {
163 |         if (tid < 64) {
164 |             __update(dists, dists_i, tid, tid + 64);
165 |         }
166 |         __syncthreads();
167 |     }
168 |     if (block_size >= 64) {
169 |         if (tid < 32) {
170 |             __update(dists, dists_i, tid, tid + 32);
171 |         }
172 |         __syncthreads();
173 |     }
174 |     if (block_size >= 32) {
175 |         if (tid < 16) {
176 |             __update(dists, dists_i, tid, tid + 16);
177 |         }
178 |         __syncthreads();
179 |     }
180 |     if (block_size >= 16) {
181 |         if (tid < 8) {
182 |             __update(dists, dists_i, tid, tid + 8);
183 |         }
184 |         __syncthreads();
185 |     }
186 |     if (block_size >= 8) {
187 |         if (tid < 4) {
188 |             __update(dists, dists_i, tid, tid + 4);
189 |         }
190 |         __syncthreads();
191 |     }
192 |     if (block_size >= 4) {
193 |         if (tid < 2) {
194 |             __update(dists, dists_i, tid, tid + 2);
195 |         }
196 |         __syncthreads();
197 |     }
198 |     if (block_size >= 2) {
199 |         if (tid < 1) {
200 |             __update(dists, dists_i, tid, tid + 1);
201 |         }
202 |         __syncthreads();
203 |     }
204 | 
205 |     old = dists_i[0];
206 |     if (tid == 0)
207 |         idxs[j] = old;
208 |     }
209 | }
210 | 
211 | void furthest_point_sampling_kernel_launcher(int b, int n, int m, 
212 |     const float *dataset, float *temp, int *idxs, cudaStream_t stream) {
213 |     // dataset: (B, N, 3)
214 |     // tmp: (B, N)
215 |     // output:
216 |     //      idx: (B, M)
217 | 
218 |     cudaError_t err;
219 |     unsigned int n_threads = opt_n_threads(n);
220 | 
221 |     switch (n_threads) {
222 |         case 1024:
223 |         furthest_point_sampling_kernel<1024><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
224 |         case 512:
225 |         furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
226 |         case 256:
227 |         furthest_point_sampling_kernel<256><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
228 |         case 128:
229 |         furthest_point_sampling_kernel<128><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
230 |         case 64:
231 |         furthest_point_sampling_kernel<64><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
232 |         case 32:
233 |         furthest_point_sampling_kernel<32><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
234 |         case 16:
235 |         furthest_point_sampling_kernel<16><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
236 |         case 8:
237 |         furthest_point_sampling_kernel<8><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
238 |         case 4:
239 |         furthest_point_sampling_kernel<4><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
240 |         case 2:
241 |         furthest_point_sampling_kernel<2><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
242 |         case 1:
243 |         furthest_point_sampling_kernel<1><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
244 |         default:
245 |         furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
246 |     }
247 | 
248 |     err = cudaGetLastError();
249 |     if (cudaSuccess != err) {
250 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
251 |         exit(-1);
252 |     }
253 | }
254 | 


--------------------------------------------------------------------------------
/Pointnet2/tools/kitti_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.spatial import Delaunay
  3 | import scipy
  4 | 
  5 | 
  6 | def cls_type_to_id(cls_type):
  7 |     type_to_id = {'Car': 1, 'Pedestrian': 2, 'Cyclist': 3, 'Van': 4}
  8 |     if cls_type not in type_to_id.keys():
  9 |         return -1
 10 |     return type_to_id[cls_type]
 11 | 
 12 | 
 13 | class Object3d(object):
 14 |     def __init__(self, line):
 15 |         label = line.strip().split(' ')
 16 |         self.src = line
 17 |         self.cls_type = label[0]
 18 |         self.cls_id = cls_type_to_id(self.cls_type)
 19 |         self.trucation = float(label[1])
 20 |         self.occlusion = float(label[2])  # 0:fully visible 1:partly occluded 2:largely occluded 3:unknown
 21 |         self.alpha = float(label[3])
 22 |         self.box2d = np.array((float(label[4]), float(label[5]), float(label[6]), float(label[7])), dtype=np.float32)
 23 |         self.h = float(label[8])
 24 |         self.w = float(label[9])
 25 |         self.l = float(label[10])
 26 |         self.pos = np.array((float(label[11]), float(label[12]), float(label[13])), dtype=np.float32)
 27 |         self.dis_to_cam = np.linalg.norm(self.pos)
 28 |         self.ry = float(label[14])
 29 |         self.score = float(label[15]) if label.__len__() == 16 else -1.0
 30 |         self.level_str = None
 31 |         self.level = self.get_obj_level()
 32 | 
 33 |     def get_obj_level(self):
 34 |         height = float(self.box2d[3]) - float(self.box2d[1]) + 1
 35 | 
 36 |         if height >= 40 and self.trucation <= 0.15 and self.occlusion <= 0:
 37 |             self.level_str = 'Easy'
 38 |             return 1  # Easy
 39 |         elif height >= 25 and self.trucation <= 0.3 and self.occlusion <= 1:
 40 |             self.level_str = 'Moderate'
 41 |             return 2  # Moderate
 42 |         elif height >= 25 and self.trucation <= 0.5 and self.occlusion <= 2:
 43 |             self.level_str = 'Hard'
 44 |             return 3  # Hard
 45 |         else:
 46 |             self.level_str = 'UnKnown'
 47 |             return 4
 48 | 
 49 |     def generate_corners3d(self):
 50 |         """
 51 |         generate corners3d representation for this object
 52 |         :return corners_3d: (8, 3) corners of box3d in camera coord
 53 |         """
 54 |         l, h, w = self.l, self.h, self.w
 55 |         x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
 56 |         y_corners = [0, 0, 0, 0, -h, -h, -h, -h]
 57 |         z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
 58 | 
 59 |         R = np.array([[np.cos(self.ry), 0, np.sin(self.ry)],
 60 |                       [0, 1, 0],
 61 |                       [-np.sin(self.ry), 0, np.cos(self.ry)]])
 62 |         corners3d = np.vstack([x_corners, y_corners, z_corners])  # (3, 8)
 63 |         corners3d = np.dot(R, corners3d).T
 64 |         corners3d = corners3d + self.pos
 65 |         return corners3d
 66 | 
 67 |     def to_str(self):
 68 |         print_str = '%s %.3f %.3f %.3f box2d: %s hwl: [%.3f %.3f %.3f] pos: %s ry: %.3f' \
 69 |                      % (self.cls_type, self.trucation, self.occlusion, self.alpha, self.box2d, self.h, self.w, self.l,
 70 |                         self.pos, self.ry)
 71 |         return print_str
 72 | 
 73 |     def to_kitti_format(self):
 74 |         kitti_str = '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' \
 75 |                     % (self.cls_type, self.trucation, int(self.occlusion), self.alpha, self.box2d[0], self.box2d[1],
 76 |                        self.box2d[2], self.box2d[3], self.h, self.w, self.l, self.pos[0], self.pos[1], self.pos[2],
 77 |                        self.ry)
 78 |         return kitti_str
 79 | 
 80 | 
 81 | def get_calib_from_file(calib_file):
 82 |     with open(calib_file) as f:
 83 |         lines = f.readlines()
 84 | 
 85 |     obj = lines[2].strip().split(' ')[1:]
 86 |     P2 = np.array(obj, dtype=np.float32)
 87 |     obj = lines[3].strip().split(' ')[1:]
 88 |     P3 = np.array(obj, dtype=np.float32)
 89 |     obj = lines[4].strip().split(' ')[1:]
 90 |     R0 = np.array(obj, dtype=np.float32)
 91 |     obj = lines[5].strip().split(' ')[1:]
 92 |     Tr_velo_to_cam = np.array(obj, dtype=np.float32)
 93 | 
 94 |     return {'P2': P2.reshape(3, 4),
 95 |             'P3': P3.reshape(3, 4),
 96 |             'R0': R0.reshape(3, 3),
 97 |             'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)}
 98 | 
 99 | 
100 | class Calibration(object):
101 |     def __init__(self, calib_file):
102 |         if isinstance(calib_file, str):
103 |             calib = get_calib_from_file(calib_file)
104 |         else:
105 |             calib = calib_file
106 | 
107 |         self.P2 = calib['P2']  # 3 x 4
108 |         self.R0 = calib['R0']  # 3 x 3
109 |         self.V2C = calib['Tr_velo2cam']  # 3 x 4
110 | 
111 |     def cart_to_hom(self, pts):
112 |         """
113 |         :param pts: (N, 3 or 2)
114 |         :return pts_hom: (N, 4 or 3)
115 |         """
116 |         pts_hom = np.hstack((pts, np.ones((pts.shape[0], 1), dtype=np.float32)))
117 |         return pts_hom
118 | 
119 |     def lidar_to_rect(self, pts_lidar):
120 |         """
121 |         :param pts_lidar: (N, 3)
122 |         :return pts_rect: (N, 3)
123 |         """
124 |         pts_lidar_hom = self.cart_to_hom(pts_lidar)
125 |         pts_rect = np.dot(pts_lidar_hom, np.dot(self.V2C.T, self.R0.T))
126 |         return pts_rect
127 | 
128 |     def rect_to_img(self, pts_rect):
129 |         """
130 |         :param pts_rect: (N, 3)
131 |         :return pts_img: (N, 2)
132 |         """
133 |         pts_rect_hom = self.cart_to_hom(pts_rect)
134 |         pts_2d_hom = np.dot(pts_rect_hom, self.P2.T)
135 |         pts_img = (pts_2d_hom[:, 0:2].T / pts_rect_hom[:, 2]).T  # (N, 2)
136 |         pts_rect_depth = pts_2d_hom[:, 2] - self.P2.T[3, 2]  # depth in rect camera coord
137 |         return pts_img, pts_rect_depth
138 | 
139 |     def lidar_to_img(self, pts_lidar):
140 |         """
141 |         :param pts_lidar: (N, 3)
142 |         :return pts_img: (N, 2)
143 |         """
144 |         pts_rect = self.lidar_to_rect(pts_lidar)
145 |         pts_img, pts_depth = self.rect_to_img(pts_rect)
146 |         return pts_img, pts_depth
147 | 
148 | 
149 | def get_objects_from_label(label_file):
150 |     with open(label_file, 'r') as f:
151 |         lines = f.readlines()
152 |     objects = [Object3d(line) for line in lines]
153 |     return objects
154 | 
155 | 
156 | def objs_to_boxes3d(obj_list):
157 |     boxes3d = np.zeros((obj_list.__len__(), 7), dtype=np.float32)
158 |     for k, obj in enumerate(obj_list):
159 |         boxes3d[k, 0:3], boxes3d[k, 3], boxes3d[k, 4], boxes3d[k, 5], boxes3d[k, 6] \
160 |             = obj.pos, obj.h, obj.w, obj.l, obj.ry
161 |     return boxes3d
162 | 
163 | 
164 | def boxes3d_to_corners3d(boxes3d, rotate=True):
165 |     """
166 |     :param boxes3d: (N, 7) [x, y, z, h, w, l, ry]
167 |     :param rotate:
168 |     :return: corners3d: (N, 8, 3)
169 |     """
170 |     boxes_num = boxes3d.shape[0]
171 |     h, w, l = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5]
172 |     x_corners = np.array([l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2.], dtype=np.float32).T  # (N, 8)
173 |     z_corners = np.array([w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.], dtype=np.float32).T  # (N, 8)
174 | 
175 |     y_corners = np.zeros((boxes_num, 8), dtype=np.float32)
176 |     y_corners[:, 4:8] = -h.reshape(boxes_num, 1).repeat(4, axis=1)  # (N, 8)
177 | 
178 |     if rotate:
179 |         ry = boxes3d[:, 6]
180 |         zeros, ones = np.zeros(ry.size, dtype=np.float32), np.ones(ry.size, dtype=np.float32)
181 |         rot_list = np.array([[np.cos(ry), zeros, -np.sin(ry)],
182 |                              [zeros,       ones,       zeros],
183 |                              [np.sin(ry), zeros,  np.cos(ry)]])  # (3, 3, N)
184 |         R_list = np.transpose(rot_list, (2, 0, 1))  # (N, 3, 3)
185 | 
186 |         temp_corners = np.concatenate((x_corners.reshape(-1, 8, 1), y_corners.reshape(-1, 8, 1),
187 |                                        z_corners.reshape(-1, 8, 1)), axis=2)  # (N, 8, 3)
188 |         rotated_corners = np.matmul(temp_corners, R_list)  # (N, 8, 3)
189 |         x_corners, y_corners, z_corners = rotated_corners[:, :, 0], rotated_corners[:, :, 1], rotated_corners[:, :, 2]
190 | 
191 |     x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
192 | 
193 |     x = x_loc.reshape(-1, 1) + x_corners.reshape(-1, 8)
194 |     y = y_loc.reshape(-1, 1) + y_corners.reshape(-1, 8)
195 |     z = z_loc.reshape(-1, 1) + z_corners.reshape(-1, 8)
196 | 
197 |     corners = np.concatenate((x.reshape(-1, 8, 1), y.reshape(-1, 8, 1), z.reshape(-1, 8, 1)), axis=2)
198 | 
199 |     return corners.astype(np.float32)
200 | 
201 | 
202 | def enlarge_box3d(boxes3d, extra_width):
203 |     """
204 |     :param boxes3d: (N, 7) [x, y, z, h, w, l, ry]
205 |     """
206 |     if isinstance(boxes3d, np.ndarray):
207 |         large_boxes3d = boxes3d.copy()
208 |     else:
209 |         large_boxes3d = boxes3d.clone()
210 |     large_boxes3d[:, 3:6] += extra_width * 2
211 |     large_boxes3d[:, 1] += extra_width
212 |     return large_boxes3d
213 | 
214 | 
215 | def in_hull(p, hull):
216 |     """
217 |     :param p: (N, K) test points
218 |     :param hull: (M, K) M corners of a box
219 |     :return (N) bool
220 |     """
221 |     try:
222 |         if not isinstance(hull, Delaunay):
223 |             hull = Delaunay(hull)
224 |         flag = hull.find_simplex(p) >= 0
225 |     except scipy.spatial.qhull.QhullError:
226 |         print('Warning: not a hull %s' % str(hull))
227 |         flag = np.zeros(p.shape[0], dtype=np.bool)
228 | 
229 |     return flag
230 | 


--------------------------------------------------------------------------------
/dataset/data_augmentation.py:
--------------------------------------------------------------------------------
  1 | # introduced from fs-net
  2 | import numpy as np
  3 | import cv2
  4 | import torch
  5 | import math
  6 | 
  7 | 
  8 | 
  9 | # add noise to mask
 10 | def defor_2D(roi_mask, rand_r=2, rand_pro=0.3):
 11 |     '''
 12 |     :param roi_mask: 256 x 256
 13 |     :param rand_r: randomly expand or shrink the mask iter rand_r
 14 |     :return:
 15 |     '''
 16 |     roi_mask = roi_mask.copy().squeeze()
 17 |     if np.random.rand() > rand_pro:
 18 |         return roi_mask
 19 |     mask = roi_mask.copy()
 20 |     kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2, 2))
 21 |     mask_erode = cv2.erode(mask, kernel, rand_r)  # rand_r
 22 |     mask_dilate = cv2.dilate(mask, kernel, rand_r)
 23 |     change_list = roi_mask[mask_erode != mask_dilate]
 24 |     l_list = change_list.size
 25 |     if l_list < 1.0:
 26 |         return roi_mask
 27 |     choose = np.random.choice(l_list, l_list // 2, replace=False)
 28 |     change_list = np.ones_like(change_list)
 29 |     change_list[choose] = 0.0
 30 |     roi_mask[mask_erode != mask_dilate] = change_list
 31 |     roi_mask[roi_mask > 0.0] = 1.0
 32 |     return roi_mask
 33 | 
 34 | 
 35 | # point cloud based data augmentation
 36 | # augment based on bounding box
 37 | def defor_3D_bb(pc, R, t, s, nocs, model, sym=None, aug_bb=None):
 38 |     # pc  n x 3, here s must  be the original s
 39 |     pc_reproj = torch.mm(R.T, (pc - t.view(1, 3)).T).T  # nn x 3
 40 |     if sym[0] == 1:  # y axis symmetry
 41 |         ex = aug_bb[0]
 42 |         ey = aug_bb[1]
 43 |         ez = aug_bb[2]
 44 | 
 45 |         exz = (ex + ez) / 2
 46 |         pc_reproj[:, (0, 2)] = pc_reproj[:, (0, 2)] * exz
 47 |         pc_reproj[:, 1] = pc_reproj[:, 1] * ey
 48 |         nocs_scale_aug = torch.norm(torch.tensor([s[0] * exz, s[1] * ey, s[2] * exz])) / torch.norm(s)
 49 |         s[0] = s[0] * exz
 50 |         s[1] = s[1] * ey
 51 |         s[2] = s[2] * exz
 52 |         nocs[:, 0] = nocs[:, 0] * exz / nocs_scale_aug
 53 |         nocs[:, 1] = nocs[:, 1] * ey / nocs_scale_aug
 54 |         nocs[:, 2] = nocs[:, 2] * exz / nocs_scale_aug
 55 |         model[:, 0] = model[:, 0] * exz / nocs_scale_aug
 56 |         model[:, 1] = model[:, 1] * ey / nocs_scale_aug
 57 |         model[:, 2] = model[:, 2] * exz / nocs_scale_aug
 58 |         pc_new = torch.mm(R, pc_reproj.T) + t.view(3, 1)
 59 |         pc_new = pc_new.T
 60 |     else:
 61 |         ex = aug_bb[0]
 62 |         ey = aug_bb[1]
 63 |         ez = aug_bb[2]
 64 |         nocs_scale_aug = torch.norm(torch.tensor([s[0] * ex, s[1] * ey, s[2] * ez])) / torch.norm(s)
 65 |         pc_reproj[:, 0] = pc_reproj[:, 0] * ex
 66 |         pc_reproj[:, 1] = pc_reproj[:, 1] * ey
 67 |         pc_reproj[:, 2] = pc_reproj[:, 2] * ez
 68 |         s[0] = s[0] * ex
 69 |         s[1] = s[1] * ey
 70 |         s[2] = s[2] * ez
 71 |         nocs[:, 0] = nocs[:, 0] * ex / nocs_scale_aug
 72 |         nocs[:, 1] = nocs[:, 1] * ey / nocs_scale_aug
 73 |         nocs[:, 2] = nocs[:, 2] * ez / nocs_scale_aug
 74 |         model[:, 0] = model[:, 0] * ex / nocs_scale_aug
 75 |         model[:, 1] = model[:, 1] * ey / nocs_scale_aug
 76 |         model[:, 2] = model[:, 2] * ez / nocs_scale_aug
 77 |         pc_new = torch.mm(R, pc_reproj.T) + t.view(3, 1)
 78 |         pc_new = pc_new.T
 79 |     return pc_new, s, nocs, model ,nocs_scale_aug
 80 | 
 81 | 
 82 | def defor_3D_bc(pc, R, t, s, model_point, nocs_scale, nocs):
 83 |     # resize box cage along y axis, the size s is modified
 84 |     ey_up = torch.rand(1, device=pc.device) * (1.2 - 0.8) + 0.8
 85 |     ey_down = torch.rand(1,  device=pc.device) * (1.2 - 0.8) + 0.8
 86 |     # for each point, resize its x and z linealy
 87 |     pc_reproj = torch.mm(R.T, (pc - t.view(1, 3)).T).T  # nn x 3
 88 |     per_point_resize = (pc_reproj[:, 1] + s[1] / 2) / s[1] * (ey_up - ey_down) + ey_down
 89 |     pc_reproj[:, 0] = pc_reproj[:, 0] * per_point_resize
 90 |     pc_reproj[:, 2] = pc_reproj[:, 2] * per_point_resize
 91 |     pc_new = torch.mm(R, pc_reproj.T) + t.view(3, 1)
 92 |     pc_new = pc_new.T
 93 | 
 94 |     norm_s = s / torch.norm(s)
 95 |     model_point_resize =  (model_point[:, 1] + norm_s[1] / 2) / norm_s[1] * (ey_up - ey_down) + ey_down
 96 |     model_point[:, 0] = model_point[:, 0] * model_point_resize
 97 |     model_point[:, 2] = model_point[:, 2] * model_point_resize
 98 | 
 99 |     lx = 2 * max(max(model_point[:, 0]), -min(model_point[:, 0]))
100 |     ly = max(model_point[:, 1]) - min(model_point[:, 1])
101 |     lz = max(model_point[:, 2]) - min(model_point[:, 2])
102 | 
103 |     lx_t = lx * torch.norm(s)
104 |     ly_t = ly * torch.norm(s)
105 |     lz_t = lz * torch.norm(s)
106 |     size_new = torch.tensor([lx_t, ly_t, lz_t], device=pc.device)
107 | 
108 |     nocs_scale_aug = torch.norm(torch.tensor([lx, ly, lz]))
109 |     model_point = model_point / nocs_scale_aug
110 | 
111 |     nocs_resize = (nocs[:, 1] + norm_s[1] / 2) / norm_s[1] * (ey_up - ey_down) + ey_down
112 |     nocs[:, 0] = nocs[:, 0] * nocs_resize
113 |     nocs[:, 2] = nocs[:, 2] * nocs_resize
114 |     nocs = nocs / nocs_scale_aug
115 | 
116 |     return pc_new, size_new, model_point, nocs,nocs_scale_aug
117 | 
118 | 
119 | # point cloud based data augmentation
120 | # augment based on bounding box
121 | def deform_non_linear(pc, R, t, s, nocs, model_point, axis=0):
122 |     # pc  n x 3, here s must  be the original s
123 |     assert axis in [0, 1]
124 |     r_max = torch.rand(1, device=pc.device) * 0.2 + 1.1
125 |     r_min = -torch.rand(1, device=pc.device) * 0.2 + 0.9
126 |     # for each point, resize its x and z
127 |     pc_reproj = torch.mm(R.T, (pc - t.view(1, 3)).T).T  # nn x 3
128 |     per_point_resize = r_min + 4 * (pc_reproj[:, axis] * pc_reproj[:, axis]) / (s[axis] ** 2) * (r_max - r_min)
129 |     pc_reproj[:, axis] = pc_reproj[:, axis] * per_point_resize
130 |     pc_new = torch.mm(R, pc_reproj.T) + t.view(3, 1)
131 |     pc_new = pc_new.T
132 | 
133 |     norm_s = s / torch.norm(s)
134 |     model_point_resize = r_min + 4 * (model_point[:, axis] * model_point[:, axis]) / (norm_s[axis] ** 2) * (r_max - r_min)
135 |     model_point[:, axis] = model_point[:, axis] * model_point_resize
136 | 
137 |     lx = 2 * max(max(model_point[:, 0]), -min(model_point[:, 0]))
138 |     ly = max(model_point[:, 1]) - min(model_point[:, 1])
139 |     lz = max(model_point[:, 2]) - min(model_point[:, 2])
140 | 
141 |     lx_t = lx * torch.norm(s)
142 |     ly_t = ly * torch.norm(s)
143 |     lz_t = lz * torch.norm(s)
144 |     size_new = torch.tensor([lx_t, ly_t, lz_t], device=pc.device)
145 | 
146 |     nocs_scale_aug = torch.norm(torch.tensor([lx, ly, lz]))
147 |     model_point = model_point / nocs_scale_aug
148 | 
149 |     nocs_resize = r_min + 4 * (nocs[:, axis] * nocs[:, axis]) / (norm_s[axis] ** 2) * (r_max - r_min)
150 |     nocs[:, axis] = nocs[:, axis] * nocs_resize
151 |     nocs = nocs / nocs_scale_aug
152 |     return pc_new, size_new, model_point, nocs ,nocs_scale_aug
153 | 
154 | 
155 | def defor_3D_pc(pc, r):
156 |     points_defor = torch.clip(r*torch.randn(pc.shape).to(pc.device),min=-0.005,max=0.005)
157 |     # points_defor=0.01*torch.randn(pc.shape).to(pc.device)
158 |     # points_defor=torch.clamp(points_defor,min=-(pc*0.02),max=(pc*0.02))
159 |     pc = pc + points_defor
160 |     return pc
161 | 
162 | 
163 | # point cloud based data augmentation
164 | # random rotation and translation
165 | def defor_3D_rt(pc, R, t, aug_rt_t, aug_rt_r):
166 |     #  add_t
167 |     dx = aug_rt_t[0]
168 |     dy = aug_rt_t[1]
169 |     dz = aug_rt_t[2]
170 | 
171 |     pc[:, 0] = pc[:, 0] + dx
172 |     pc[:, 1] = pc[:, 1] + dy
173 |     pc[:, 2] = pc[:, 2] + dz
174 |     t[0] = t[0] + dx
175 |     t[1] = t[1] + dy
176 |     t[2] = t[2] + dz
177 | 
178 |     # add r
179 |     '''
180 |     Rm = get_rotation(np.random.uniform(-a, a), np.random.uniform(-a, a), np.random.uniform(-a, a))
181 |     Rm_tensor = torch.tensor(Rm, device=pc.device)
182 |     pc_new = torch.mm(Rm_tensor, pc.T).T
183 |     pc = pc_new
184 |     R_new = torch.mm(Rm_tensor, R)
185 |     R = R_new
186 |     '''
187 |     '''
188 |     x_rot = torch.rand(1, dtype=torch.float32, device=pc.device) * 2 * a - a
189 |     y_rot = torch.rand(1, dtype=torch.float32, device=pc.device) * 2 * a - a
190 |     z_rot = torch.rand(1, dtype=torch.float32, device=pc.device) * 2 * a - a
191 |     Rm = get_rotation_torch(x_rot, y_rot, z_rot)
192 |     '''
193 |     Rm = aug_rt_r
194 |     pc_new = torch.mm(Rm, pc.T).T
195 |     pc = pc_new
196 |     R_new = torch.mm(Rm, R)
197 |     R = R_new
198 |     T_new = torch.mm(Rm, t.view(3, 1))
199 |     t = T_new
200 | 
201 |     return pc, R, t
202 | 
203 | 
204 | def get_rotation(x_, y_, z_):
205 |     # print(math.cos(math.pi/2))
206 |     x = float(x_ / 180) * math.pi
207 |     y = float(y_ / 180) * math.pi
208 |     z = float(z_ / 180) * math.pi
209 |     R_x = np.array([[1, 0, 0],
210 |                     [0, math.cos(x), -math.sin(x)],
211 |                     [0, math.sin(x), math.cos(x)]])
212 | 
213 |     R_y = np.array([[math.cos(y), 0, math.sin(y)],
214 |                     [0, 1, 0],
215 |                     [-math.sin(y), 0, math.cos(y)]])
216 | 
217 |     R_z = np.array([[math.cos(z), -math.sin(z), 0],
218 |                     [math.sin(z), math.cos(z), 0],
219 |                     [0, 0, 1]])
220 |     return np.dot(R_z, np.dot(R_y, R_x)).astype(np.float32)
221 | 
222 | def get_rotation_torch(x_, y_, z_):
223 |     x = (x_ / 180) * math.pi
224 |     y = (y_ / 180) * math.pi
225 |     z = (z_ / 180) * math.pi
226 |     R_x = torch.tensor([[1, 0, 0],
227 |                     [0, math.cos(x), -math.sin(x)],
228 |                     [0, math.sin(x), math.cos(x)]], device=x_.device)
229 | 
230 |     R_y = torch.tensor([[math.cos(y), 0, math.sin(y)],
231 |                     [0, 1, 0],
232 |                     [-math.sin(y), 0, math.cos(y)]], device=y_.device)
233 | 
234 |     R_z = torch.tensor([[math.cos(z), -math.sin(z), 0],
235 |                     [math.sin(z), math.cos(z), 0],
236 |                     [0, 0, 1]], device=z_.device)
237 |     return torch.mm(R_z, torch.mm(R_y, R_x))
238 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('.')
  3 | import argparse
  4 | import os
  5 | import shutil
  6 | 
  7 | import torch
  8 | import torch.backends.cudnn as cudnn
  9 | import torch.distributed as dist
 10 | import torch.multiprocessing as mp
 11 | import torch.nn.parallel
 12 | import torch.optim
 13 | import torch.utils.data
 14 | import torch.utils.data.distributed
 15 | from collections import Counter
 16 | 
 17 | import random
 18 | import numpy as np
 19 | from network import NETWORK_REGISTRY
 20 | from mmengine import Config,DictAction
 21 | from core.trainer import Trainer
 22 | from dataset import DATALOADER_REGISTRY
 23 | from utils.logging import create_checkpoint, setup_logger
 24 | from utils.utils import OPTIMIZER_REGISTRY, save_checkpoint, SCHEDULER_REGISTRY,farthest_point_sample,index_points
 25 | 
 26 | def set_random_seed(seed, deterministic=False): 
 27 |     random.seed(seed) 
 28 |     np.random.seed(seed) 
 29 |     torch.manual_seed(seed) 
 30 |     torch.cuda.manual_seed_all(seed) 
 31 |     if deterministic: 
 32 |         torch.backends.cudnn.deterministic = True 
 33 |         torch.backends.cudnn.benchmark = False 
 34 | 
 35 | def parse_args():
 36 |     parser = argparse.ArgumentParser(description='Train')
 37 |     # general
 38 |     parser.add_argument('--cfg',
 39 |                         help='experiment configure file name',
 40 |                         required=True,
 41 |                         type=str)
 42 |     parser.add_argument('--cfg-options',
 43 |                         nargs='+',
 44 |                         action=DictAction,
 45 |                         help='override some settings in the used config, the key-value pair '
 46 |                         'in xxx=yyy format will be merged into config file. If the value to '
 47 |                         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 48 |                         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 49 |                         'Note that the quotation marks are necessary and that no white space '
 50 |                         'is allowed.')
 51 |     # distributed training
 52 |     parser.add_argument('--gpus',
 53 |                         help='gpu ids for ddp training',
 54 |                         type=str)
 55 |     parser.add_argument('--port',
 56 |                         default='23459',
 57 |                         type=str,
 58 |                         help='port used to set up distributed training')
 59 |     parser.add_argument('--dist-url',
 60 |                         default='tcp://127.0.0.1',
 61 |                         type=str,
 62 |                         help='url used to set up distributed training')
 63 |     args = parser.parse_args()
 64 | 
 65 |     return args
 66 | 
 67 | def main():
 68 |     args = parse_args()
 69 |     cfg = Config.fromfile(args.cfg)
 70 |     if args.cfg_options is not None:
 71 |         cfg.merge_from_dict(args.cfg_options)
 72 |         cfg.MODEL.vis=cfg.VIS
 73 |     assert cfg.train
 74 |     print(cfg.pretty_text)
 75 | 
 76 |     final_output_dir = create_checkpoint(cfg, 'train')
 77 | 
 78 |     if args.gpus is not None:
 79 |         os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
 80 |     dist_url = args.dist_url + ':{}'.format(args.port)
 81 |     # save config file
 82 |     if not cfg.VIS:
 83 |         print('save cfg and source')
 84 |         src_folder = os.path.join(final_output_dir, 'src')
 85 |         if os.path.exists(os.path.join(src_folder, 'network')):
 86 |             shutil.rmtree(os.path.join(src_folder, 'network'))
 87 |         shutil.copytree('network', os.path.join(src_folder, 'network'))
 88 |         if os.path.exists(os.path.join(src_folder, 'tools')):
 89 |             shutil.rmtree(os.path.join(src_folder, 'tools'))
 90 |         shutil.copytree('tools', os.path.join(src_folder, 'tools'))
 91 |         if os.path.exists(os.path.join(src_folder, 'cfg.py')):
 92 |             os.remove(os.path.join(src_folder, 'cfg.py'))
 93 |         cfg.dump(os.path.join(src_folder, 'cfg.py'))
 94 | 
 95 |     ngpus_per_node = torch.cuda.device_count()
 96 | 
 97 |     set_random_seed(123)
 98 | 
 99 |     if cfg.DDP:
100 |         world_size = ngpus_per_node
101 |         mp.spawn(main_worker, nprocs=ngpus_per_node, args=(world_size, dist_url, final_output_dir, cfg))
102 |     else:
103 |         main_worker(0, 1, dist_url, final_output_dir, cfg)
104 | 
105 | def main_worker(rank, world_size, dist_url, final_output_dir, cfg):
106 |     set_random_seed(42)
107 |     if rank==0:
108 |         logger, _ = setup_logger(final_output_dir, rank, 'train',cfg.VIS)
109 |     else:
110 |         logger=None
111 | 
112 |     cudnn.benchmark = cfg.CUDNN.BENCHMARK
113 |     torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
114 |     torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
115 | 
116 |     print("Use GPU: {} for training".format(rank))
117 |     if cfg.DDP:
118 |         print('Init process group: dist_url: {}, world_size: {}, rank: {}'.format(dist_url, world_size, rank))
119 |         dist.init_process_group(
120 |             backend=cfg.DIST_BACKEND,
121 |             init_method=dist_url,
122 |             world_size=world_size,
123 |             rank=rank
124 |         )
125 | 
126 |     # Data loading code
127 |     train_loader = DATALOADER_REGISTRY.build(cfg)
128 | 
129 |     model = NETWORK_REGISTRY.build(cfg.MODEL)
130 | 
131 |     def count_parameters(model):
132 |         return sum(p.numel() for p in model.parameters() if p.requires_grad)
133 |     if rank==0:
134 |         logger.info("Total params: {num:.3f}M".format(num=count_parameters(model)/1e6))
135 |     if cfg.DDP:
136 |         print(rank)
137 |         torch.cuda.set_device(rank)
138 |         model.cuda(rank)
139 |         model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank],find_unused_parameters=cfg.find_unused_parameters)
140 |     else:
141 |         model = torch.nn.DataParallel(model).cuda()
142 | 
143 | 
144 |     
145 |     best_perf = -1
146 |     last_epoch = -1
147 |     optimizer = OPTIMIZER_REGISTRY.build(cfg=cfg.OPTIMIZER, parameters=model.parameters())
148 |     lr_scheduler = SCHEDULER_REGISTRY.build(cfg=cfg.SCHEDULER, optimizer=optimizer)
149 | 
150 |     begin_epoch = cfg.TRAIN.BEGIN_EPOCH
151 |     if cfg.AUTO_RESUME:
152 |         if cfg.RESUME_FILE != '':
153 |             checkpoint_file = os.path.join(
154 |             final_output_dir, 'model', cfg.RESUME_FILE)
155 |         else:
156 |             checkpoint_file = os.path.join(
157 |             final_output_dir, 'model', 'checkpoint.pth.tar')
158 |         print(checkpoint_file)
159 |         if os.path.exists(checkpoint_file):
160 |             if rank==0:
161 |                 logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
162 |             checkpoint = torch.load(checkpoint_file, map_location=lambda storage, loc: storage)
163 |             model.load_state_dict(checkpoint['state_dict'])
164 |             if not cfg.ONLY_MODEL:
165 |                 begin_epoch = checkpoint['epoch']
166 |                 best_perf = checkpoint['perf']
167 |                 last_epoch = checkpoint['epoch']
168 |                 optimizer.load_state_dict(checkpoint['optimizer'])
169 |                 if cfg.CHANGE_SCHEDULE:
170 |                     milestones=cfg.SCHEDULER.milestones
171 |                     new_counter={stones:1 for stones in milestones}
172 |                     new_counter=Counter(new_counter)
173 |                     checkpoint['scheduler']['milestones']=new_counter
174 |                     checkpoint['scheduler']['gamma']=cfg.SCHEDULER.gamma
175 |                 if 'scheduler' in checkpoint.keys():
176 |                     lr_scheduler.load_state_dict(checkpoint['scheduler'])
177 | 
178 |                 # if cfg.CHANGE_SCHEDULE:
179 |                 #     lr_scheduler.step()
180 |                 #     print('lr',optimizer.state_dict()['param_groups'][0]['lr'])
181 |             if rank==0:
182 |                 logger.info("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch']))
183 | 
184 | 
185 |     is_iter=cfg.is_iter
186 |     if is_iter:
187 |         trainer = Trainer(cfg, model, rank, final_output_dir,logger=logger,lr_scheduler=lr_scheduler)
188 |     else:
189 |         trainer = Trainer(cfg, model, rank, final_output_dir,logger=logger)
190 |     
191 |     for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
192 |         train_loader.dataset.set_epoch(epoch)
193 |         if cfg.DDP:
194 |             train_loader.sampler.set_epoch(epoch)
195 |         
196 | 
197 |         trainer.train(epoch, train_loader, optimizer)
198 | 
199 |         if not is_iter:
200 |             lr_scheduler.step()
201 | 
202 |         perf_indicator = epoch
203 |         if perf_indicator >= best_perf:
204 |             
205 |             best_perf = perf_indicator
206 |             best_model = True
207 |         else:
208 |             best_model = False
209 | 
210 |         if not cfg.DDP or (cfg.DDP and rank == 0 and epoch%(cfg.TRAIN.SAVE_EPOCH_STEP)==0) and not cfg.VIS:
211 |             file_name='checkpoint_epoch_'+str(epoch)+'.tar.pth'
212 |             if rank==0:
213 |                 logger.info('=> saving checkpoint to {}'.format(final_output_dir))
214 |             save_checkpoint({
215 |                 'epoch': epoch + 1,
216 |                 'model': cfg.MODEL.type,
217 |                 'state_dict': model.state_dict(),
218 |                 'best_state_dict': model.module.state_dict(),
219 |                 'perf': perf_indicator,
220 |                 'optimizer': optimizer.state_dict(),
221 |                 'scheduler':lr_scheduler.state_dict()
222 |             }, best_model,final_output_dir, filename=file_name)
223 |         if best_model:
224 |             torch.save(
225 |                 model.module.state_dict(),
226 |                 os.path.join(final_output_dir, 'model_best.pth.tar')
227 |             )
228 | 
229 |     final_model_state_file = os.path.join(
230 |         final_output_dir, 'model', 'final_state{}.pth.tar'.format(rank)
231 |     )
232 |     if rank==0:
233 |         logger.info('saving final model state to {}'.format(final_model_state_file))
234 |     torch.save(model.module.state_dict(), final_model_state_file)
235 | 
236 | if __name__ == '__main__':
237 |     
238 |     main()


--------------------------------------------------------------------------------
/Pointnet2/pointnet2/pointnet2_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Variable
  3 | from torch.autograd import Function
  4 | import torch.nn as nn
  5 | from typing import Tuple
  6 | 
  7 | import pointnet2_cuda as pointnet2
  8 | 
  9 | 
 10 | class FurthestPointSampling(Function):
 11 |     @staticmethod
 12 |     def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor:
 13 |         """
 14 |         Uses iterative furthest point sampling to select a set of npoint features that have the largest
 15 |         minimum distance
 16 |         :param ctx:
 17 |         :param xyz: (B, N, 3) where N > npoint
 18 |         :param npoint: int, number of features in the sampled set
 19 |         :return:
 20 |              output: (B, npoint) tensor containing the set
 21 |         """
 22 |         assert xyz.is_contiguous()
 23 | 
 24 |         B, N, _ = xyz.size()
 25 |         output = torch.cuda.IntTensor(B, npoint)
 26 |         temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
 27 | 
 28 |         pointnet2.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp, output)
 29 |         return output
 30 | 
 31 |     @staticmethod
 32 |     def backward(xyz, a=None):
 33 |         return None, None
 34 | 
 35 | 
 36 | furthest_point_sample = FurthestPointSampling.apply
 37 | 
 38 | 
 39 | class GatherOperation(Function):
 40 | 
 41 |     @staticmethod
 42 |     def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
 43 |         """
 44 |         :param ctx:
 45 |         :param features: (B, C, N)
 46 |         :param idx: (B, npoint) index tensor of the features to gather
 47 |         :return:
 48 |             output: (B, C, npoint)
 49 |         """
 50 |         assert features.is_contiguous()
 51 |         assert idx.is_contiguous()
 52 | 
 53 |         B, npoint = idx.size()
 54 |         _, C, N = features.size()
 55 |         output = torch.cuda.FloatTensor(B, C, npoint)
 56 | 
 57 |         pointnet2.gather_points_wrapper(B, C, N, npoint, features, idx, output)
 58 | 
 59 |         ctx.for_backwards = (idx, C, N)
 60 |         return output
 61 | 
 62 |     @staticmethod
 63 |     def backward(ctx, grad_out):
 64 |         idx, C, N = ctx.for_backwards
 65 |         B, npoint = idx.size()
 66 | 
 67 |         grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
 68 |         grad_out_data = grad_out.data.contiguous()
 69 |         pointnet2.gather_points_grad_wrapper(B, C, N, npoint, grad_out_data, idx, grad_features.data)
 70 |         return grad_features, None
 71 | 
 72 | 
 73 | gather_operation = GatherOperation.apply
 74 | 
 75 | 
 76 | class ThreeNN(Function):
 77 | 
 78 |     @staticmethod
 79 |     def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
 80 |         """
 81 |         Find the three nearest neighbors of unknown in known
 82 |         :param ctx:
 83 |         :param unknown: (B, N, 3)
 84 |         :param known: (B, M, 3)
 85 |         :return:
 86 |             dist: (B, N, 3) l2 distance to the three nearest neighbors
 87 |             idx: (B, N, 3) index of 3 nearest neighbors
 88 |         """
 89 |         assert unknown.is_contiguous()
 90 |         assert known.is_contiguous()
 91 | 
 92 |         B, N, _ = unknown.size()
 93 |         m = known.size(1)
 94 |         dist2 = torch.cuda.FloatTensor(B, N, 3)
 95 |         idx = torch.cuda.IntTensor(B, N, 3)
 96 | 
 97 |         pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx)
 98 |         return torch.sqrt(dist2), idx
 99 | 
100 |     @staticmethod
101 |     def backward(ctx, a=None, b=None):
102 |         return None, None
103 | 
104 | 
105 | three_nn = ThreeNN.apply
106 | 
107 | 
108 | class ThreeInterpolate(Function):
109 | 
110 |     @staticmethod
111 |     def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
112 |         """
113 |         Performs weight linear interpolation on 3 features
114 |         :param ctx:
115 |         :param features: (B, C, M) Features descriptors to be interpolated from
116 |         :param idx: (B, n, 3) three nearest neighbors of the target features in features
117 |         :param weight: (B, n, 3) weights
118 |         :return:
119 |             output: (B, C, N) tensor of the interpolated features
120 |         """
121 |         assert features.is_contiguous()
122 |         assert idx.is_contiguous()
123 |         assert weight.is_contiguous()
124 | 
125 |         B, c, m = features.size()
126 |         n = idx.size(1)
127 |         ctx.three_interpolate_for_backward = (idx, weight, m)
128 |         output = torch.cuda.FloatTensor(B, c, n)
129 | 
130 |         pointnet2.three_interpolate_wrapper(B, c, m, n, features, idx, weight, output)
131 |         return output
132 | 
133 |     @staticmethod
134 |     def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
135 |         """
136 |         :param ctx:
137 |         :param grad_out: (B, C, N) tensor with gradients of outputs
138 |         :return:
139 |             grad_features: (B, C, M) tensor with gradients of features
140 |             None:
141 |             None:
142 |         """
143 |         idx, weight, m = ctx.three_interpolate_for_backward
144 |         B, c, n = grad_out.size()
145 | 
146 |         grad_features = Variable(torch.cuda.FloatTensor(B, c, m).zero_())
147 |         grad_out_data = grad_out.data.contiguous()
148 | 
149 |         pointnet2.three_interpolate_grad_wrapper(B, c, n, m, grad_out_data, idx, weight, grad_features.data)
150 |         return grad_features, None, None
151 | 
152 | 
153 | three_interpolate = ThreeInterpolate.apply
154 | 
155 | 
156 | class GroupingOperation(Function):
157 | 
158 |     @staticmethod
159 |     def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
160 |         """
161 |         :param ctx:
162 |         :param features: (B, C, N) tensor of features to group
163 |         :param idx: (B, npoint, nsample) tensor containing the indicies of features to group with
164 |         :return:
165 |             output: (B, C, npoint, nsample) tensor
166 |         """
167 |         assert features.is_contiguous()
168 |         assert idx.is_contiguous()
169 | 
170 |         B, nfeatures, nsample = idx.size()
171 |         _, C, N = features.size()
172 |         output = torch.cuda.FloatTensor(B, C, nfeatures, nsample)
173 | 
174 |         pointnet2.group_points_wrapper(B, C, N, nfeatures, nsample, features, idx, output)
175 | 
176 |         ctx.for_backwards = (idx, N)
177 |         return output
178 | 
179 |     @staticmethod
180 |     def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
181 |         """
182 |         :param ctx:
183 |         :param grad_out: (B, C, npoint, nsample) tensor of the gradients of the output from forward
184 |         :return:
185 |             grad_features: (B, C, N) gradient of the features
186 |         """
187 |         idx, N = ctx.for_backwards
188 | 
189 |         B, C, npoint, nsample = grad_out.size()
190 |         grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
191 | 
192 |         grad_out_data = grad_out.data.contiguous()
193 |         pointnet2.group_points_grad_wrapper(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data)
194 |         return grad_features, None
195 | 
196 | 
197 | grouping_operation = GroupingOperation.apply
198 | 
199 | 
200 | class BallQuery(Function):
201 | 
202 |     @staticmethod
203 |     def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor, new_xyz: torch.Tensor) -> torch.Tensor:
204 |         """
205 |         :param ctx:
206 |         :param radius: float, radius of the balls
207 |         :param nsample: int, maximum number of features in the balls
208 |         :param xyz: (B, N, 3) xyz coordinates of the features
209 |         :param new_xyz: (B, npoint, 3) centers of the ball query
210 |         :return:
211 |             idx: (B, npoint, nsample) tensor with the indicies of the features that form the query balls
212 |         """
213 |         assert new_xyz.is_contiguous()
214 |         assert xyz.is_contiguous()
215 | 
216 |         B, N, _ = xyz.size()
217 |         npoint = new_xyz.size(1)
218 |         idx = torch.cuda.IntTensor(B, npoint, nsample).zero_()
219 | 
220 |         pointnet2.ball_query_wrapper(B, N, npoint, radius, nsample, new_xyz, xyz, idx)
221 |         return idx
222 | 
223 |     @staticmethod
224 |     def backward(ctx, a=None):
225 |         return None, None, None, None
226 | 
227 | 
228 | ball_query = BallQuery.apply
229 | 
230 | 
231 | class QueryAndGroup(nn.Module):
232 |     def __init__(self, radius: float, nsample: int, use_xyz: bool = True,norm=False):
233 |         """
234 |         :param radius: float, radius of ball
235 |         :param nsample: int, maximum number of features to gather in the ball
236 |         :param use_xyz:
237 |         """
238 |         super().__init__()
239 |         self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz
240 |         self.norm=norm
241 | 
242 |     def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None) -> Tuple[torch.Tensor]:
243 |         """
244 |         :param xyz: (B, N, 3) xyz coordinates of the features
245 |         :param new_xyz: (B, npoint, 3) centroids
246 |         :param features: (B, C, N) descriptors of the features
247 |         :return:
248 |             new_features: (B, 3 + C, npoint, nsample)
249 |         """
250 |         idx = ball_query(self.radius, self.nsample, xyz, new_xyz)
251 |         xyz_trans = xyz.transpose(1, 2).contiguous()
252 |         grouped_xyz = grouping_operation(xyz_trans, idx)  # (B, 3, npoint, nsample)
253 |         grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)
254 |         if self.norm:
255 |             grouped_xyz/=self.radius
256 | 
257 |         if features is not None:
258 |             grouped_features = grouping_operation(features, idx)
259 |             if self.use_xyz:
260 |                 new_features = torch.cat([grouped_xyz, grouped_features], dim=1)  # (B, C + 3, npoint, nsample)
261 |             else:
262 |                 new_features = grouped_features
263 |         else:
264 |             assert self.use_xyz, "Cannot have not features and not use xyz as a feature!"
265 |             new_features = grouped_xyz
266 | 
267 |         return new_features
268 | 
269 | 
270 | class GroupAll(nn.Module):
271 |     def __init__(self, use_xyz: bool = True):
272 |         super().__init__()
273 |         self.use_xyz = use_xyz
274 | 
275 |     def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None):
276 |         """
277 |         :param xyz: (B, N, 3) xyz coordinates of the features
278 |         :param new_xyz: ignored
279 |         :param features: (B, C, N) descriptors of the features
280 |         :return:
281 |             new_features: (B, C + 3, 1, N)
282 |         """
283 |         grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)
284 |         if features is not None:
285 |             grouped_features = features.unsqueeze(2)
286 |             if self.use_xyz:
287 |                 new_features = torch.cat([grouped_xyz, grouped_features], dim=1)  # (B, 3 + C, 1, N)
288 |             else:
289 |                 new_features = grouped_features
290 |         else:
291 |             new_features = grouped_xyz
292 | 
293 |         return new_features
294 | 


--------------------------------------------------------------------------------
/dataset/pose_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import cv2
  4 | import math
  5 | import random
  6 | import numpy as np
  7 | import time
  8 | import _pickle as cPickle
  9 | # from PIL import Image
 10 | from tqdm import tqdm
 11 | import torch.utils.data as data
 12 | import torch
 13 | from utils import load_depth, get_bbox
 14 | from mmengine import Registry
 15 | from .data_augmentation import defor_3D_pc, defor_3D_bb, defor_3D_rt, defor_3D_bc, deform_non_linear,get_rotation
 16 | 
 17 | DATASET_REGISTRY = Registry("DATASET")
 18 | 
 19 | @DATASET_REGISTRY.register_module()
 20 | class PoseDataset(data.Dataset):
 21 |     def __init__(self, source, mode, data_dir, n_pts, vis,img_size=192, per_obj=None,use_cache=False,use_augment=True):
 22 |         """
 23 |         Args:
 24 |             source: 'CAMERA', 'Real' or 'CAMERA+Real'
 25 |             mode: 'train' or 'test'
 26 |             data_dir:
 27 |             n_pts: number of selected foreground points
 28 |         """
 29 | 
 30 |         self.source = source
 31 |         self.mode = mode
 32 |         self.data_dir = data_dir
 33 |         self.n_pts = n_pts
 34 |         self.vis=vis
 35 |         self.per_obj=per_obj
 36 |         self.img_size=img_size
 37 |         self.use_augment=use_augment
 38 | 
 39 | 
 40 |         assert source in ['CAMERA', 'Real', 'CAMERA+Real']
 41 |         assert mode in ['train', 'test']
 42 |         img_list_path = ['CAMERA/train_list.txt', 'Real/train_list.txt',
 43 |                          'CAMERA/val_list.txt', 'Real/test_list.txt']
 44 |         model_file_path = ['obj_models/camera_train.pkl', 'obj_models/real_train.pkl',
 45 |                            'obj_models/camera_val.pkl', 'obj_models/real_test.pkl']
 46 |         if mode == 'train':
 47 |             del img_list_path[2:]
 48 |             del model_file_path[2:]
 49 |         else:
 50 |             del img_list_path[:2]
 51 |             del model_file_path[:2]
 52 |         if source == 'CAMERA':
 53 |             del img_list_path[-1]
 54 |             del model_file_path[-1]
 55 |         elif source == 'Real':
 56 |             del img_list_path[0]
 57 |             del model_file_path[0]
 58 |         elif source=='CAMERA+Real':
 59 |             del img_list_path[2:]
 60 | 
 61 |         img_list = []
 62 |         subset_len = []
 63 |         for path in img_list_path:
 64 |             img_list += [os.path.join(path.split('/')[0], line.rstrip('\n'))
 65 |                          for line in open(os.path.join(data_dir, path))]
 66 |             subset_len.append(len(img_list))
 67 |         if len(subset_len) == 2:
 68 |             self.subset_len = [subset_len[0], subset_len[1]-subset_len[0]]
 69 | 
 70 |         if per_obj is not None:
 71 |             self.img_list=[]
 72 |             for img in img_list:
 73 |                 img_path = os.path.join(self.data_dir, img)
 74 |                 with open(img_path + '_label.pkl', 'rb') as f:
 75 |                     gts = cPickle.load(f)
 76 |                 b=False
 77 |                 for i in range(len(gts['instance_ids'])):
 78 |                     if gts['class_ids'][i]-1==self.per_obj:
 79 |                         b=True
 80 |                         break
 81 |                 if b:
 82 |                     self.img_list.append(img)
 83 |         else:
 84 |             self.img_list = img_list
 85 |         self.length = len(self.img_list)
 86 | 
 87 | 
 88 |         self.random=list(range(self.length))
 89 |         random.seed(1002)
 90 |         random.shuffle(self.random)
 91 | 
 92 |         # meta info for re-label mug category
 93 |         with open(os.path.join(data_dir, 'obj_models/mug_meta.pkl'), 'rb') as f:
 94 |             self.mug_meta = cPickle.load(f)
 95 | 
 96 |         self.mean_shapes = np.load('assets/mean_points_emb.npy')
 97 |         self.cat_names = ['bottle', 'bowl', 'camera', 'can', 'laptop', 'mug']
 98 |         self.camera_intrinsics = [577.5, 577.5, 319.5, 239.5]    # [fx, fy, cx, cy]
 99 |         self.real_intrinsics = [591.0125, 590.16775, 322.525, 244.11084]
100 |         self.sym_ids = [0, 1, 3]    # 0-indexed
101 |         self.norm_scale = 1000.0    # normalization scale
102 |         self.shift_range = 0.01
103 | 
104 |         models = {}
105 |         for path in model_file_path:
106 |             with open(os.path.join(data_dir, path), 'rb') as f:
107 |                 models.update(cPickle.load(f))
108 |         self.models = models
109 | 
110 |         self.xmap = np.array([[i for i in range(640)] for j in range(480)])
111 |         self.ymap = np.array([[j for i in range(640)] for j in range(480)])
112 | 
113 |         print('{} images found.'.format(self.length))
114 | 
115 |     def __len__(self):
116 |         return self.length
117 | 
118 |     def set_epoch(self,epoch):
119 |         random.seed(1234+epoch)
120 | 
121 |     def __getitem__(self, index):
122 |         index=self.random[index]
123 |         img_path = os.path.join(self.data_dir, self.img_list[index])
124 | 
125 |         id=self.img_list[index].split('/')[-1]
126 | 
127 |         
128 |             
129 |         if self.vis:
130 |             rgb = cv2.imread(img_path + '_color.png')[:, :, :3]
131 |             image=rgb.copy()
132 | 
133 |         # rgb = rgb[:, :, ::-1]
134 | 
135 |         depth = load_depth(img_path)
136 | 
137 |         mask = cv2.imread(img_path + '_mask.png')[:, :, 2]
138 | 
139 |         coord = cv2.imread(img_path + '_coord.png')[:, :, :3]
140 |         coord = coord[:, :, (2, 1, 0)]
141 |         coord = np.array(coord, dtype=np.float32) / 255
142 |         coord[:, :, 2] = 1 - coord[:, :, 2]
143 | 
144 |         with open(img_path + '_label.pkl', 'rb') as f:
145 |             gts = cPickle.load(f)
146 |         if 'CAMERA' in img_path.split('/'):
147 |             cam_fx, cam_fy, cam_cx, cam_cy = self.camera_intrinsics
148 |         else:
149 |             cam_fx, cam_fy, cam_cx, cam_cy = self.real_intrinsics
150 | 
151 |         cam_K=np.identity(3, dtype=np.float32)
152 |         cam_K[0,0],cam_K[1,1],cam_K[0,2],cam_K[1,2]=cam_fx, cam_fy, cam_cx, cam_cy
153 | 
154 |         # select one foreground object
155 |         ''''''
156 |         idx = random.randint(0, len(gts['instance_ids'])-1)
157 |         if self.per_obj is not None:
158 |             for i in range(len(gts['instance_ids'])):
159 |                 if gts['class_ids'][i]-1==self.per_obj:
160 |                     idx=i
161 | 
162 |         cat_id=gts['class_ids'][idx]-1
163 |         inst_id = gts['instance_ids'][idx]
164 |         rmin, rmax, cmin, cmax = get_bbox(gts['bboxes'][idx])
165 |         # sample points from mask
166 |         mask = np.equal(mask, inst_id)
167 |         mask = np.logical_and(mask, depth > 0)
168 |         mask = mask.flatten()
169 | 
170 |         depth_masked=(depth.flatten())[mask]  #N
171 |         xmap_masked=(self.xmap.flatten())[mask]
172 |         ymap_masked=(self.ymap.flatten())[mask]
173 |         
174 |         pt2=depth_masked/self.norm_scale
175 |         pt0 = (xmap_masked - cam_cx) * pt2 / cam_fx
176 |         pt1 = (ymap_masked - cam_cy) * pt2 / cam_fy
177 |         points=np.stack((pt0,pt1,pt2),axis=1)  #N,3
178 | 
179 |         l_all=points.shape[0]
180 | 
181 |         if l_all>=self.n_pts:
182 |             choose=np.random.choice(l_all,self.n_pts,replace=False)
183 |         else:
184 |             choose=np.random.choice(l_all,self.n_pts,replace=True)
185 | 
186 |         nocs = coord.reshape(-1,3)[mask,...][choose, :] - 0.5
187 |         
188 |         points=points[choose,...]
189 | 
190 | 
191 | 
192 |         crop_w = rmax - rmin
193 |         ratio = self.img_size / crop_w
194 |         col_idx = choose % crop_w
195 |         row_idx = choose // crop_w
196 |         choose = (np.floor(row_idx * ratio) * self.img_size + np.floor(col_idx * ratio)).astype(np.int64)
197 | 
198 | 
199 |         scale = gts['scales'][idx]
200 |         rotation = gts['rotations'][idx]
201 |         translation = gts['translations'][idx]
202 |         prior = self.mean_shapes[cat_id].astype(np.float32)
203 |   
204 |         # adjust nocs coords for mug category
205 |         if cat_id==5:
206 |             T0 = self.mug_meta[gts['model_list'][idx]][0]
207 |             s0 = self.mug_meta[gts['model_list'][idx]][1]
208 |             nocs = s0 * (nocs + T0)
209 |         
210 |         # map ambiguous rotation to canonical rotation
211 |         if cat_id in self.sym_ids:
212 |             rotation = gts['rotations'][idx]
213 |             # assume continuous axis rotation symmetry
214 |             theta_x = rotation[0, 0] + rotation[2, 2]
215 |             theta_y = rotation[0, 2] - rotation[2, 0]
216 |             r_norm = math.sqrt(theta_x**2 + theta_y**2)
217 |             s_map = np.array([[theta_x/r_norm, 0.0, -theta_y/r_norm],
218 |                             [0.0,            1.0,  0.0           ],
219 |                             [theta_y/r_norm, 0.0,  theta_x/r_norm]])
220 |             rotation = rotation @ s_map
221 |             nocs = nocs @ s_map
222 |         
223 |         sRT = np.identity(4, dtype=np.float32)
224 |         RT=np.identity(4,dtype=np.float32)
225 |         sRT[:3, :3] = scale * rotation
226 |         sRT[:3, 3] = translation
227 |         RT[:3,:3]=rotation
228 |         RT[:3,3]=translation
229 | 
230 |         model = self.models[gts['model_list'][idx]].astype(np.float32)
231 | 
232 | 
233 |         model=torch.as_tensor(model.astype(np.float32))
234 |         points=torch.as_tensor(points.astype(np.float32))
235 |         R=torch.as_tensor(rotation.astype(np.float32))
236 |         t=torch.as_tensor(translation.astype(np.float32))
237 |         s=torch.as_tensor(scale.astype(np.float32))
238 |         nocs=torch.as_tensor(nocs.astype(np.float32))
239 | 
240 |         sym_info = self.get_sym_info(cat_id, mug_handle=1)
241 |         bb_aug, rt_aug_t, rt_aug_R = self.generate_aug_parameters()
242 |         dimension_delta,mean_shape=self.get_fs_net_scale( model, s,cat_id)
243 | 
244 |         sym_info=torch.as_tensor(sym_info.astype(np.float32)).contiguous()
245 |         bb_aug, rt_aug_t, rt_aug_R=torch.as_tensor(bb_aug, dtype=torch.float32).contiguous(),torch.as_tensor(rt_aug_t, dtype=torch.float32).contiguous(),torch.as_tensor(rt_aug_R, dtype=torch.float32).contiguous()
246 |         dimension_delta=torch.as_tensor(dimension_delta,dtype=torch.float32).contiguous()
247 |         mean_shape=torch.as_tensor(mean_shape,dtype=torch.float32).contiguous()
248 | 
249 |         if self.use_augment:
250 |             points, R, t, dimension, model, nocs,s=self.data_augment(points,R,t,dimension_delta+mean_shape,sym_info,bb_aug,rt_aug_t,rt_aug_R,model,s,nocs,cat_id)
251 | 
252 |             dimension_delta=dimension-mean_shape
253 | 
254 |             if cat_id in self.sym_ids:
255 |                 # assume continuous axis rotation symmetry
256 |                 R=R.numpy()
257 |                 nocs=nocs.numpy()
258 |                 theta_x = R[0, 0] + R[2, 2]
259 |                 theta_y = R[0, 2] - R[2, 0]
260 |                 r_norm = math.sqrt(theta_x**2 + theta_y**2)
261 |                 s_map = np.array([[theta_x/r_norm, 0.0, -theta_y/r_norm],
262 |                                 [0.0,            1.0,  0.0           ],
263 |                                 [theta_y/r_norm, 0.0,  theta_x/r_norm]])
264 |                 R = R @ s_map
265 |                 nocs = nocs @ s_map
266 |                 R=torch.as_tensor(R.astype(np.float32))
267 |                 nocs=torch.as_tensor(nocs.astype(np.float32))
268 | 
269 |         gt_green,gt_red=self.get_gt_v(R)
270 |         
271 | 
272 |         #data=data[choose,...]
273 |         data_dict={}
274 |         if self.mode=='test':
275 |             data_dict['handle_visiblity']=gts['handle_visibility'][idx]
276 |         data_dict['points']=points.contiguous()
277 |         data_dict['nocs']=nocs.contiguous()
278 |         data_dict['prior']=torch.as_tensor(prior).contiguous()
279 |         data_dict['cat_id']=torch.as_tensor(cat_id)
280 |         data_dict['R']=R.contiguous()
281 |         data_dict['t']=t.contiguous()
282 |         data_dict['s']=s.contiguous()
283 |         data_dict['gt_green']=gt_green.contiguous()
284 |         data_dict['gt_red']=gt_red.contiguous()
285 |         data_dict['dimension_delta']=dimension_delta.contiguous()
286 |         data_dict['mean_shape']=mean_shape.contiguous()
287 |         data_dict['sym']=sym_info.contiguous()
288 |         if self.vis:
289 |             data_dict['RT']=torch.as_tensor(RT.astype(np.float32)).contiguous()
290 |             data_dict['id']=id
291 |             data_dict['sRT']=torch.as_tensor(sRT.astype(np.float32)).contiguous()
292 |             data_dict['image']=image
293 |             data_dict['cam_K']=cam_K
294 |         data_dict['model']=model.contiguous()
295 |         data_dict['img_path']=img_path
296 | 
297 |         return data_dict
298 | 
299 |     @torch.no_grad()
300 |     def get_gt_v(self,Rs, axis=2):
301 |         # TODO use 3 axis, the order remains: do we need to change order?
302 |         if axis == 3:
303 |             raise NotImplementedError
304 |         else:
305 |             assert axis == 2
306 |             gt_green = Rs[:,1:2]
307 |             gt_red = Rs[:,0:1]
308 |         return gt_green, gt_red
309 | 
310 | 
311 |     @torch.no_grad()
312 |     def data_augment(self,PC,gt_R,gt_t,gt_s,sym,aug_bb,aug_rt_t,aug_rt_r,model_point,nocs_scale,PC_nocs,obj_id):
313 |         prop_bb = torch.rand(1)
314 |         if prop_bb < 0.3:
315 |             #   R, t, s, s_x=(0.9, 1.1), s_y=(0.9, 1.1), s_z=(0.9, 1.1), sym=None
316 |             PC_new, gt_s_new, nocs_new, model_new,nocs_scale_aug = defor_3D_bb(PC, gt_R,
317 |                                             gt_t, gt_s, PC_nocs, model_point,
318 |                                             sym=sym, aug_bb=aug_bb)
319 |             PC = PC_new
320 |             gt_s = gt_s_new
321 |             PC_nocs = nocs_new
322 |             model_point = model_new
323 |             nocs_scale=nocs_scale/nocs_scale_aug
324 | 
325 | 
326 |         prop_rt = torch.rand(1)
327 |         if prop_rt < 0.3:
328 |             PC_new, gt_R_new, gt_t_new = defor_3D_rt(PC, gt_R,
329 |                                                         gt_t, aug_rt_t, aug_rt_r)
330 |             PC = PC_new
331 |             gt_R = gt_R_new
332 |             gt_t = gt_t_new.view(-1)
333 | 
334 |         prop_bc = torch.rand(1)
335 |         # only do bc for mug and bowl
336 |         b=False
337 |         if prop_bc < 0.3 and (obj_id in [1,5]):
338 |             b=True
339 |             PC_new, gt_s_new, model_point_new, nocs_new,nocs_scale_aug = defor_3D_bc(PC, gt_R, gt_t,gt_s,model_point, nocs_scale, PC_nocs)
340 |             PC = PC_new
341 |             gt_s = gt_s_new
342 |             model_point = model_point_new
343 |             PC_nocs = nocs_new
344 |             nocs_scale=nocs_scale/nocs_scale_aug
345 | 
346 |         prop_nl = torch.rand(1)
347 |         if not b and prop_nl < 0.3 and (obj_id in [0,1,2,3,5]):
348 |             if obj_id in [0,1,3,5]:
349 |                 sel_axis = 1
350 |             elif obj_id in [2]:
351 |                 sel_axis = 0
352 |             else:
353 |                 sel_axis = None
354 | 
355 |             PC_new, gt_s_new, model_point_new, nocs_new,nocs_scale_aug = deform_non_linear(PC, gt_R, gt_t,gt_s,PC_nocs, model_point, sel_axis)
356 | 
357 |             PC = PC_new
358 |             gt_s = gt_s_new
359 |             model_point = model_point_new
360 |             PC_nocs = nocs_new
361 |             nocs_scale=nocs_scale/nocs_scale_aug
362 | 
363 | 
364 |         prop_pc = torch.rand(1)
365 |         if prop_pc < 0.3:
366 |             PC_new = defor_3D_pc(PC, 0.001)
367 |             PC = PC_new
368 | 
369 |         pro_aug=torch.rand(1)
370 |         if pro_aug<0.1:
371 |             num=random.randint(1,10)
372 |             position=list(range(1024))
373 |             position=random.sample(position,num)
374 |             position=torch.tensor(position,dtype=torch.long)
375 |             PC[position,...]=torch.rand((num,3))*gt_s*0.6+gt_t
376 |         
377 |         #  augmentation finish
378 |         return PC, gt_R, gt_t, gt_s, model_point, PC_nocs,nocs_scale
379 | 
380 | 
381 |     def get_sym_info(self, c, mug_handle=1):
382 |         #  sym_info  c0 : face classfication  c1, c2, c3:Three view symmetry, correspond to xy, xz, yz respectively
383 |         # c0: 0 no symmetry 1 axis symmetry 2 two reflection planes 3 unimplemented type
384 |         #  Y axis points upwards, x axis pass through the handle, z axis otherwise
385 |         #
386 |         # for specific defination, see sketch_loss
387 |         if c == 0:#'bottle'
388 |             sym = np.array([1, 1, 0, 1], dtype=np.int32)
389 |         elif c == 1:#'bowl'
390 |             sym = np.array([1, 1, 0, 1], dtype=np.int32)
391 |         elif c == 2:#'camera'
392 |             sym = np.array([0, 0, 0, 0], dtype=np.int32)
393 |         elif c == 3:#'can'
394 |             sym = np.array([1, 1, 1, 1], dtype=np.int32)
395 |         elif c == 4:#'laptop'
396 |             sym = np.array([0, 1, 0, 0], dtype=np.int32)
397 |         elif c ==  5 and mug_handle == 1:#'mug'
398 |             sym = np.array([0, 1, 0, 0], dtype=np.int32)  # for mug, we currently mark it as no symmetry
399 |         elif c == 5 and mug_handle == 0:#'mug'
400 |             sym = np.array([1, 0, 0, 0], dtype=np.int32)
401 |         else:
402 |             sym = np.array([0, 0, 0, 0], dtype=np.int32)
403 |         return sym
404 | 
405 | 
406 |     def generate_aug_parameters(self, s_x=(0.8, 1.2), s_y=(0.8, 1.2), s_z=(0.8, 1.2), ax=50, ay=50, az=50, a=15):
407 |         # for bb aug
408 |         ex, ey, ez = np.random.rand(3)
409 |         ex = ex * (s_x[1] - s_x[0]) + s_x[0]
410 |         ey = ey * (s_y[1] - s_y[0]) + s_y[0]
411 |         ez = ez * (s_z[1] - s_z[0]) + s_z[0]
412 |         # for R, t aug
413 |         Rm = get_rotation(np.random.uniform(-a, a), np.random.uniform(-a, a), np.random.uniform(-a, a))
414 |         dx = np.random.rand() * 2 * ax - ax
415 |         dy = np.random.rand() * 2 * ay - ay
416 |         dz = np.random.rand() * 2 * az - az
417 |         return np.array([ex, ey, ez], dtype=np.float32), np.array([dx, dy, dz], dtype=np.float32) / 1000.0, Rm
418 | 
419 |     
420 |     def get_fs_net_scale(self, model, nocs_scale,c):
421 |         # model pc x 3
422 |         lx = 2 * max(max(model[:, 0]), -min(model[:, 0]))
423 |         ly = max(model[:, 1]) - min(model[:, 1])
424 |         lz = max(model[:, 2]) - min(model[:, 2])
425 | 
426 |         # real scale
427 |         lx_t = lx * nocs_scale * 1000
428 |         ly_t = ly * nocs_scale * 1000
429 |         lz_t = lz * nocs_scale * 1000
430 | 
431 |         if c == 0:#'bottle'
432 |             unitx = 87
433 |             unity = 220
434 |             unitz = 89
435 |         elif c == 1:#'bowl'
436 |             unitx = 165
437 |             unity = 80
438 |             unitz = 165
439 |         elif c == 2:#'camera'
440 |             unitx = 88
441 |             unity = 128
442 |             unitz = 156
443 |         elif c == 3:#'can'
444 |             unitx = 68
445 |             unity = 146
446 |             unitz = 72
447 |         elif c == 4:#'laptop'
448 |             unitx = 346
449 |             unity = 200
450 |             unitz = 335
451 |         elif c == 5:#'mug'
452 |             unitx = 146
453 |             unity = 83
454 |             unitz = 114
455 |         elif c == '02876657':
456 |             unitx = 324 / 4
457 |             unity = 874 / 4
458 |             unitz = 321 / 4
459 |         elif c == '02880940':
460 |             unitx = 675 / 4
461 |             unity = 271 / 4
462 |             unitz = 675 / 4
463 |         elif c == '02942699':
464 |             unitx = 464 / 4
465 |             unity = 487 / 4
466 |             unitz = 702 / 4
467 |         elif c == '02946921':
468 |             unitx = 450 / 4
469 |             unity = 753 / 4
470 |             unitz = 460 / 4
471 |         elif c == '03642806':
472 |             unitx = 581 / 4
473 |             unity = 445 / 4
474 |             unitz = 672 / 4
475 |         elif c == '03797390':
476 |             unitx = 670 / 4
477 |             unity = 540 / 4
478 |             unitz = 497 / 4
479 |         else:
480 |             unitx = 0
481 |             unity = 0
482 |             unitz = 0
483 |             print('This category is not recorded in my little brain.')
484 |             raise NotImplementedError
485 |         # scale residual
486 |         return np.array([lx_t - unitx, ly_t - unity, lz_t - unitz])/1000.0, np.array([unitx, unity, unitz])/1000.0
487 | 
488 | 


--------------------------------------------------------------------------------
/tools/valid.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('.')
  3 | import os
  4 | import time
  5 | import argparse
  6 | import cv2
  7 | import math
  8 | import glob
  9 | import numpy as np
 10 | from tqdm import tqdm
 11 | import _pickle as cPickle
 12 | import torch
 13 | import torch.nn.functional as F
 14 | from mmengine import Config,DictAction
 15 | from network import NETWORK_REGISTRY
 16 | from utils import load_depth, get_bbox, compute_mAP, plot_mAP
 17 | from utils.logging import create_checkpoint
 18 | import random
 19 | from utils.utils import farthest_point_sample,index_points
 20 | 
 21 | def set_random_seed(seed, deterministic=False): 
 22 |     random.seed(seed) 
 23 |     np.random.seed(seed) 
 24 |     torch.manual_seed(seed) 
 25 |     torch.cuda.manual_seed_all(seed) 
 26 |     if deterministic: 
 27 |         torch.backends.cudnn.deterministic = True 
 28 |         torch.backends.cudnn.benchmark = False 
 29 | 
 30 | 
 31 | parser = argparse.ArgumentParser()
 32 | parser.add_argument('--data', type=str, default='real_test', help='val, real_test')
 33 | parser.add_argument('--data_dir', type=str, default='data', help='data directory')
 34 | parser.add_argument('--n_cat', type=int, default=6, help='number of object categories')
 35 | parser.add_argument('--nv_prior', type=int, default=1024, help='number of vertices in shape priors')
 36 | parser.add_argument('--model', type=str, default='results/camera/model_50.pth', help='resume from saved model')
 37 | parser.add_argument('--n_pts', type=int, default=1024, help='number of foreground points')
 38 | parser.add_argument('--img_size', type=int, default=192, help='cropped image size')
 39 | parser.add_argument('--gpus', type=str, default='1', help='GPU to use')
 40 | parser.add_argument('--cfg',
 41 |                         help='experiment configure file name',
 42 |                         required=True,
 43 |                         type=str)
 44 | parser.add_argument('--cfg-options',
 45 |                     nargs='+',
 46 |                     action=DictAction,
 47 |                     help='override some settings in the used config, the key-value pair '
 48 |                     'in xxx=yyy format will be merged into config file. If the value to '
 49 |                     'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 50 |                     'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 51 |                     'Note that the quotation marks are necessary and that no white space '
 52 |                     'is allowed.')
 53 | opt = parser.parse_args()
 54 | 
 55 | per_obj=None
 56 | use_gt_mask=False
 57 | mean_shapes = np.load('assets/mean_points_emb.npy')
 58 | 
 59 | 
 60 | result_dir='results/eval_real'
 61 | 
 62 | 
 63 | xmap = np.array([[i for i in range(640)] for j in range(480)])
 64 | ymap = np.array([[j for i in range(640)] for j in range(480)])
 65 | norm_scale = 1000.0
 66 | 
 67 | @torch.inference_mode()
 68 | def detect():
 69 |     # resume model
 70 |     print('use_gt_mask: ',use_gt_mask)
 71 |     global opt
 72 |     global result_dir
 73 |     os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus
 74 |     opt = Config.fromfile(opt.cfg)
 75 |     assert not opt.train
 76 |     assert opt.DATA in ['val', 'real_test']
 77 |     opt.MODEL.decoder.training=False
 78 |     opt.MODEL.training=False
 79 |     if opt.DATA == 'val':
 80 |         result_dir = 'results/eval_camera'
 81 |         file_path = 'CAMERA/val_list.txt'
 82 |         cam_fx, cam_fy, cam_cx, cam_cy = 577.5, 577.5, 319.5, 239.5
 83 |     else:
 84 |         result_dir = 'results/eval_real'
 85 |         file_path = 'Real/test_list.txt'
 86 |         cam_fx, cam_fy, cam_cx, cam_cy = 591.0125, 590.16775, 322.525, 244.11084
 87 |         cam_K=np.identity(3, dtype=np.float32)
 88 |         cam_K[0,0],cam_K[1,1],cam_K[0,2],cam_K[1,2]=cam_fx, cam_fy, cam_cx, cam_cy
 89 | 
 90 |     if not os.path.exists(result_dir):
 91 |         os.makedirs(result_dir)
 92 |     set_random_seed(123)
 93 | 
 94 |     mean_shapes = np.load('assets/mean_points_emb.npy')
 95 | 
 96 |     model = NETWORK_REGISTRY.build(opt.MODEL)
 97 |     #model.init_para(0)
 98 |     model = torch.nn.DataParallel(model).cuda()
 99 |     final_output_dir = create_checkpoint(opt,None)
100 |     checkpoint_file = os.path.join(
101 |             final_output_dir, 'model', opt.RESUME_FILE)
102 |     
103 |     checkpoint = torch.load(checkpoint_file, map_location=lambda storage, loc: storage)
104 |     model.load_state_dict(checkpoint['state_dict'])
105 |     model.eval()
106 | 
107 | 
108 |     # get test data list
109 |     img_list = [os.path.join(file_path.split('/')[0], line.rstrip('\n'))
110 |                 for line in open(os.path.join(opt.DATASET.data_dir, file_path))]
111 |     # frame by frame test
112 |     times=[]
113 |     t_inference = 0.0
114 |     t_umeyama = 0.0
115 |     inst_count = 0
116 |     img_count = 0
117 |     t_start = time.time()
118 | 
119 |     # img_list=img_list[:300]
120 |     cam_K=np.identity(3, dtype=np.float32)
121 |     cam_K[0,0],cam_K[1,1],cam_K[0,2],cam_K[1,2]=cam_fx, cam_fy, cam_cx, cam_cy
122 |     for path in tqdm(img_list):
123 |         img_path = os.path.join(opt.DATASET.data_dir, path)
124 |         raw_depth = load_depth(img_path)
125 |         # load mask-rcnn detection results
126 |         img_path_parsing = img_path.split('/')
127 |         
128 |         if use_gt_mask:
129 |             gt_mask=cv2.imread(img_path + '_mask.png')[:, :, 2]
130 |             with open(img_path + '_label.pkl', 'rb') as f:
131 |                 gts = cPickle.load(f)
132 |             num_insts=len(gts['instance_ids'])
133 |         else:
134 |             
135 |             mrcnn_path = os.path.join('data/results/mrcnn_results', opt.DATA, 'results_{}_{}_{}.pkl'.format(
136 |             opt.DATA.split('_')[-1], img_path_parsing[-2], img_path_parsing[-1]))
137 |             with open(mrcnn_path, 'rb') as f:
138 |                 mrcnn_result = cPickle.load(f)
139 |             num_insts = len(mrcnn_result['class_ids'])
140 | 
141 | 
142 |         f_sRT = np.zeros((num_insts, 4, 4), dtype=float)
143 |         f_size = np.zeros((num_insts, 3), dtype=float)
144 |   
145 |         # prepare frame data
146 |         f_points, f_catId,f_rgb,f_mask,f_choose,f_prior,f_sym,f_mean_shape= [], [],[],[],[],[],[],[]
147 |         valid_inst = []
148 |         
149 |         for i in range(num_insts):
150 |             if use_gt_mask:
151 |                 new_gt_mask=gt_mask.copy()
152 |                 new_gt_mask=np.equal(new_gt_mask,gts['instance_ids'][i])
153 |                 new_gt_mask=np.logical_and(new_gt_mask,raw_depth>0)
154 |                 mask=new_gt_mask
155 |                 cat_id=gts['class_ids'][i]-1
156 |             else:
157 |                 cat_id = mrcnn_result['class_ids'][i] - 1
158 |                 mask = np.logical_and(mrcnn_result['masks'][:, :, i], raw_depth > 0)
159 |             if per_obj is not None and  cat_id not in per_obj:
160 |                 continue   
161 |             # raw_rgb[mask,:]=255
162 | 
163 |             # if cat_id==1:
164 |             #     raw_rgb[mask,:]=255
165 |             #     cv2.imwrite(os.path.join('imgs','pred_masks',img_path_parsing[-1]+'_1.png'),raw_rgb)
166 | 
167 |             mask = mask.flatten()
168 | 
169 |             depth_masked=(raw_depth.flatten())[mask]  #N
170 |             xmap_masked=(xmap.flatten())[mask]
171 |             ymap_masked=(ymap.flatten())[mask]
172 | 
173 |             pt2 = depth_masked / norm_scale
174 |             pt0 = (xmap_masked - cam_cx) * pt2 / cam_fx
175 |             pt1 = (ymap_masked - cam_cy) * pt2 / cam_fy
176 |             points = np.stack((pt0, pt1, pt2), axis=1)
177 |             
178 |             l_all=points.shape[0]
179 | 
180 |             if l_all < 32:
181 |                 f_sRT[i] = np.identity(4, dtype=float)
182 |                 prior = mean_shapes[cat_id].astype(np.float32)
183 |                 f_size[i] = 2 * np.amax(np.abs(prior), axis=0)
184 |                 continue
185 |             else:
186 |                 valid_inst.append(i)
187 |             #prior = mean_shapes[cat_id].astype(np.float32)
188 |             if use_gt_mask:
189 |                 rmin, rmax, cmin, cmax = get_bbox(gts['bboxes'][i])
190 |             else:
191 |                 rmin, rmax, cmin, cmax = get_bbox(mrcnn_result['rois'][i])
192 | 
193 | 
194 |             # process objects with valid depth observation
195 |             if l_all >= opt.DATASET.n_pts:
196 |                 choose=np.random.choice(l_all,opt.DATASET.n_pts,replace=False)
197 |             else:
198 |                 choose=np.random.choice(l_all,opt.DATASET.n_pts,replace=True)
199 | 
200 |             points=points[choose,...]
201 | 
202 | 
203 |             sym_info = get_sym_info(cat_id, mug_handle=1)
204 |             mean_shape=get_fs_net_scale(cat_id)
205 | 
206 |             # concatenate instances
207 |             f_points.append(points)
208 |             f_catId.append(cat_id)
209 |             # f_rgb.append(rgb)
210 |             f_mask.append(mask)
211 |             # f_choose.append(choose)
212 |             f_sym.append(sym_info)
213 |             f_mean_shape.append(mean_shape)
214 |             #f_prior.append(prior)
215 |         if len(valid_inst):
216 |             f_points = torch.cuda.FloatTensor(np.array(f_points)).contiguous()
217 |             f_catId = torch.cuda.LongTensor(np.array(f_catId)).contiguous()
218 |             # f_rgb=torch.cuda.FloatTensor(np.array(f_rgb)).contiguous()
219 |             f_choose=torch.cuda.LongTensor(np.array(f_choose)).contiguous()
220 |             f_sym=torch.cuda.LongTensor(np.array(f_sym)).contiguous()
221 |             f_mean_shape=torch.cuda.FloatTensor(np.array(f_mean_shape)).contiguous()
222 |             batched_input={
223 |                 'points':f_points,
224 |                 'cat_id':f_catId,
225 |                 'sym':f_sym,
226 |                 'mean_shape':f_mean_shape
227 |                 #'prior':f_prior
228 |             }
229 |             # inference
230 |             torch.cuda.synchronize()
231 |             t_now = time.time()
232 |             pred_sRT, size = model(batched_input)
233 |             for i in range(len(valid_inst)):
234 |                 inst_idx = valid_inst[i]
235 |                 f_sRT[inst_idx] = pred_sRT[i]
236 |                 f_size[inst_idx]=size[i]
237 |             torch.cuda.synchronize()
238 |             inference=time.time() - t_now
239 |             times.append(inference)
240 |             t_inference += (inference)
241 |             img_count += 1
242 |             inst_count += len(valid_inst)
243 | 
244 | 
245 |         # save results
246 |         result = {}
247 |         if not use_gt_mask:
248 |             with open(img_path + '_label.pkl', 'rb') as f:
249 |                 gts = cPickle.load(f)
250 |         result['gt_class_ids'] = gts['class_ids']
251 |         result['gt_bboxes'] = gts['bboxes']
252 |         for idx,cat_id in enumerate(gts['class_ids']):
253 |             cat_id=cat_id-1
254 |             assert cat_id>=0
255 |             rotation = gts['rotations'][idx]
256 |             scale = gts['scales'][idx]
257 |             translation = gts['translations'][idx]
258 |             if cat_id in [0, 1, 3]:
259 |                 # assume continuous axis rotation symmetry
260 |                 theta_x = rotation[0, 0] + rotation[2, 2]
261 |                 theta_y = rotation[0, 2] - rotation[2, 0]
262 |                 r_norm = math.sqrt(theta_x**2 + theta_y**2)
263 |                 s_map = np.array([[theta_x/r_norm, 0.0, -theta_y/r_norm],
264 |                                 [0.0,            1.0,  0.0           ],
265 |                                 [theta_y/r_norm, 0.0,  theta_x/r_norm]])
266 |                 rotation = rotation @ s_map
267 |                 sRT = np.identity(4, dtype=np.float32)
268 |                 sRT[:3, :3] = scale * rotation
269 |                 sRT[:3, 3] = translation
270 |                 gts['poses'][idx]=sRT
271 |         result['gt_RTs'] = gts['poses']
272 |         result['gt_scales'] = gts['size']
273 |         result['gt_handle_visibility'] = gts['handle_visibility']
274 | 
275 |         if use_gt_mask:
276 |             result['pred_class_ids'] = gts['class_ids']
277 |             result['pred_bboxes'] = gts['bboxes']
278 |             result['pred_scores'] = np.ones((num_insts,))
279 |         else:
280 |             result['pred_class_ids'] = mrcnn_result['class_ids']
281 |             result['pred_bboxes'] = mrcnn_result['rois']
282 |             result['pred_scores'] = mrcnn_result['scores']
283 | 
284 |         
285 |         result['pred_RTs'] = f_sRT
286 |         result['pred_scales']=f_size
287 | 
288 |         image_short_path = '_'.join(img_path_parsing[-3:])
289 |         save_path = os.path.join(result_dir, 'results_{}.pkl'.format(image_short_path))
290 |         with open(save_path, 'wb') as f:
291 |             cPickle.dump(result, f)
292 | 
293 | 
294 |         
295 |     # write statistics    
296 |     total_time=0.0
297 |     times=times[100:]
298 |     for t in times:
299 |         total_time+=t
300 |     fw = open('{0}/eval_logs.txt'.format(result_dir), 'w')
301 |     messages = []
302 |     messages.append("Total images: {}".format(len(img_list)))
303 |     messages.append("Valid images: {},  Total instances: {},  Average: {:.2f}/image".format(
304 |         img_count, inst_count, inst_count/img_count))
305 |     messages.append("Inference time: {:06f}  Average: {:06f}/image  fps:{:06f}".format(t_inference, total_time/(img_count-100),(img_count-100)/total_time))
306 |     messages.append("Total time: {:06f}".format(time.time() - t_start))
307 |     for msg in messages:
308 |         print(msg)
309 |         fw.write(msg + '\n')
310 |     fw.close()
311 |     del model
312 | 
313 | 
314 | def evaluate():
315 |     degree_thres_list = list(range(0, 61, 1))
316 |     shift_thres_list = [i / 2 for i in range(21)]
317 |     iou_thres_list = [i / 100 for i in range(101)]
318 |     # predictions
319 |     result_pkl_list = glob.glob(os.path.join(result_dir, 'results_*.pkl'))
320 |     result_pkl_list = sorted(result_pkl_list)
321 | 
322 |     # result_pkl_list=result_pkl_list[:100]
323 |     assert len(result_pkl_list)
324 |     pred_results = []
325 |     for pkl_path in result_pkl_list:
326 |         with open(pkl_path, 'rb') as f:
327 |             result = cPickle.load(f)
328 |             if 'gt_handle_visibility' not in result:
329 |                 result['gt_handle_visibility'] = np.ones_like(result['gt_class_ids'])
330 |             else:
331 |                 assert len(result['gt_handle_visibility']) == len(result['gt_class_ids']), "{} {}".format(
332 |                     result['gt_handle_visibility'], result['gt_class_ids'])
333 |         if type(result) is list:
334 |             pred_results += result
335 |         elif type(result) is dict:
336 |             pred_results.append(result)
337 |         else:
338 |             assert False
339 | 
340 |     # To be consistent with NOCS, set use_matches_for_pose=True for mAP evaluation
341 |     iou_aps, pose_aps, iou_acc, pose_acc = compute_mAP(pred_results, result_dir, degree_thres_list, shift_thres_list,
342 |                                                        iou_thres_list, iou_pose_thres=0.1, use_matches_for_pose=True)
343 |     #print(pose_aps)
344 |     # np.save('pose_aps', pose_aps, allow_pickle=True, fix_imports=True)
345 |     
346 |     # metric
347 |     fw = open('{0}/eval_logs.txt'.format(result_dir), 'a')
348 |     iou_25_idx = iou_thres_list.index(0.25)
349 |     iou_50_idx = iou_thres_list.index(0.5)
350 |     iou_75_idx = iou_thres_list.index(0.75)
351 |     degree_05_idx = degree_thres_list.index(5)
352 |     degree_10_idx = degree_thres_list.index(10)
353 |     shift_02_idx = shift_thres_list.index(2)
354 |     shift_05_idx = shift_thres_list.index(5)
355 |     messages = []
356 |     messages.append('mAP:')
357 |     messages.append('3D IoU at 25: {:.1f}'.format(iou_aps[-1, iou_25_idx] * 100))
358 |     messages.append('3D IoU at 50: {:.1f}'.format(iou_aps[-1, iou_50_idx] * 100))
359 |     messages.append('3D IoU at 75: {:.1f}'.format(iou_aps[-1, iou_75_idx] * 100))
360 |     messages.append('5 degree, 2cm: {:.1f}'.format(pose_aps[-1, degree_05_idx, shift_02_idx] * 100))
361 |     messages.append('5 degree, 5cm: {:.1f}'.format(pose_aps[-1, degree_05_idx, shift_05_idx] * 100))
362 |     messages.append('10 degree, 2cm: {:.1f}'.format(pose_aps[-1, degree_10_idx, shift_02_idx] * 100))
363 |     messages.append('10 degree, 5cm: {:.1f}'.format(pose_aps[-1, degree_10_idx, shift_05_idx] * 100))
364 |     messages.append('Acc:')
365 |     messages.append('3D IoU at 25: {:.1f}'.format(iou_acc[-1, iou_25_idx] * 100))
366 |     messages.append('3D IoU at 50: {:.1f}'.format(iou_acc[-1, iou_50_idx] * 100))
367 |     messages.append('3D IoU at 75: {:.1f}'.format(iou_acc[-1, iou_75_idx] * 100))
368 |     messages.append('5 degree, 2cm: {:.1f}'.format(pose_acc[-1, degree_05_idx, shift_02_idx] * 100))
369 |     messages.append('5 degree, 5cm: {:.1f}'.format(pose_acc[-1, degree_05_idx, shift_05_idx] * 100))
370 |     messages.append('10 degree, 2cm: {:.1f}'.format(pose_acc[-1, degree_10_idx, shift_02_idx] * 100))
371 |     messages.append('10 degree, 5cm: {:.1f}'.format(pose_acc[-1, degree_10_idx, shift_05_idx] * 100))
372 |     for msg in messages:
373 |         print(msg)
374 |         fw.write(msg + '\n')
375 |     fw.close()
376 |     # load NOCS results
377 |     pkl_path = os.path.join('results/nocs_results', opt.DATA, 'mAP_Acc.pkl')
378 |     with open(pkl_path, 'rb') as f:
379 |         nocs_results = cPickle.load(f)
380 |     nocs_iou_aps = nocs_results['iou_aps'][-1, :]
381 |     nocs_pose_aps = nocs_results['pose_aps'][-1, :, :]
382 |     iou_aps = np.concatenate((iou_aps, nocs_iou_aps[None, :]), axis=0)
383 |     pose_aps = np.concatenate((pose_aps, nocs_pose_aps[None, :, :]), axis=0)
384 |     # plot
385 |     plot_mAP(iou_aps, pose_aps, result_dir, iou_thres_list, degree_thres_list, shift_thres_list)
386 | 
387 | 
388 | def get_sym_info(c, mug_handle=1):
389 |     #  sym_info  c0 : face classfication  c1, c2, c3:Three view symmetry, correspond to xy, xz, yz respectively
390 |     # c0: 0 no symmetry 1 axis symmetry 2 two reflection planes 3 unimplemented type
391 |     #  Y axis points upwards, x axis pass through the handle, z axis otherwise
392 |     #
393 |     # for specific defination, see sketch_loss
394 |     if c == 0:#'bottle'
395 |         sym = np.array([1, 1, 0, 1], dtype=np.int32)
396 |     elif c == 1:#'bowl'
397 |         sym = np.array([1, 1, 0, 1], dtype=np.int32)
398 |     elif c == 2:#'camera'
399 |         sym = np.array([0, 0, 0, 0], dtype=np.int32)
400 |     elif c == 3:#'can'
401 |         sym = np.array([1, 1, 1, 1], dtype=np.int32)
402 |     elif c == 4:#'laptop'
403 |         sym = np.array([0, 1, 0, 0], dtype=np.int32)
404 |     elif c ==  5 and mug_handle == 1:#'mug'
405 |         sym = np.array([0, 1, 0, 0], dtype=np.int32)  # for mug, we currently mark it as no symmetry
406 |     elif c == 5 and mug_handle == 0:#'mug'
407 |         sym = np.array([1, 0, 0, 0], dtype=np.int32)
408 |     else:
409 |         sym = np.array([0, 0, 0, 0], dtype=np.int32)
410 |     return sym
411 | 
412 | 
413 | def get_fs_net_scale(c):
414 |         if c == 0:#'bottle'
415 |             unitx = 87
416 |             unity = 220
417 |             unitz = 89
418 |         elif c == 1:#'bowl'
419 |             unitx = 165
420 |             unity = 80
421 |             unitz = 165
422 |         elif c == 2:#'camera'
423 |             unitx = 88
424 |             unity = 128
425 |             unitz = 156
426 |         elif c == 3:#'can'
427 |             unitx = 68
428 |             unity = 146
429 |             unitz = 72
430 |         elif c == 4:#'laptop'
431 |             unitx = 346
432 |             unity = 200
433 |             unitz = 335
434 |         elif c == 5:#'mug'
435 |             unitx = 146
436 |             unity = 83
437 |             unitz = 114
438 |         elif c == '02876657':
439 |             unitx = 324 / 4
440 |             unity = 874 / 4
441 |             unitz = 321 / 4
442 |         elif c == '02880940':
443 |             unitx = 675 / 4
444 |             unity = 271 / 4
445 |             unitz = 675 / 4
446 |         elif c == '02942699':
447 |             unitx = 464 / 4
448 |             unity = 487 / 4
449 |             unitz = 702 / 4
450 |         elif c == '02946921':
451 |             unitx = 450 / 4
452 |             unity = 753 / 4
453 |             unitz = 460 / 4
454 |         elif c == '03642806':
455 |             unitx = 581 / 4
456 |             unity = 445 / 4
457 |             unitz = 672 / 4
458 |         elif c == '03797390':
459 |             unitx = 670 / 4
460 |             unity = 540 / 4
461 |             unitz = 497 / 4
462 |         else:
463 |             unitx = 0
464 |             unity = 0
465 |             unitz = 0
466 |             print('This category is not recorded in my little brain.')
467 |             raise NotImplementedError
468 |         # scale residual
469 |         return  np.array([unitx, unity, unitz])/1000.0
470 | 
471 | 
472 | if __name__ == '__main__':
473 |     print('Detecting ...')
474 |     detect()
475 |     print('Evaluating ...')
476 |     evaluate()


--------------------------------------------------------------------------------