├── tools
    ├── __init__.py
    ├── train_generator.py
    └── generate_grasps.py
├── .dockerignore
├── grasp_ldm
    ├── utils
    │   ├── __init__.py
    │   ├── torch_utils.py
    │   ├── utils.py
    │   ├── vis.py
    │   └── camera.py
    ├── models
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── ext
    │   │   │   ├── __init__.py
    │   │   │   └── pvcnn
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── modules
    │   │   │   │       ├── loss.py
    │   │   │   │       ├── functional
    │   │   │   │       │   ├── src
    │   │   │   │       │   │   ├── ball_query
    │   │   │   │       │   │   │   ├── ball_query.cuh
    │   │   │   │       │   │   │   ├── ball_query.hpp
    │   │   │   │       │   │   │   ├── ball_query.cpp
    │   │   │   │       │   │   │   └── ball_query.cu
    │   │   │   │       │   │   ├── grouping
    │   │   │   │       │   │   │   ├── grouping.hpp
    │   │   │   │       │   │   │   ├── grouping.cuh
    │   │   │   │       │   │   │   ├── grouping.cpp
    │   │   │   │       │   │   │   └── grouping.cu
    │   │   │   │       │   │   ├── voxelization
    │   │   │   │       │   │   │   ├── vox.cuh
    │   │   │   │       │   │   │   ├── vox.hpp
    │   │   │   │       │   │   │   ├── vox.cpp
    │   │   │   │       │   │   │   └── vox.cu
    │   │   │   │       │   │   ├── sampling
    │   │   │   │       │   │   │   ├── sampling.hpp
    │   │   │   │       │   │   │   ├── sampling.cuh
    │   │   │   │       │   │   │   ├── sampling.cpp
    │   │   │   │       │   │   │   └── sampling.cu
    │   │   │   │       │   │   ├── interpolate
    │   │   │   │       │   │   │   ├── trilinear_devox.cuh
    │   │   │   │       │   │   │   ├── trilinear_devox.hpp
    │   │   │   │       │   │   │   ├── neighbor_interpolate.hpp
    │   │   │   │       │   │   │   ├── neighbor_interpolate.cuh
    │   │   │   │       │   │   │   ├── neighbor_interpolate.cpp
    │   │   │   │       │   │   │   ├── trilinear_devox.cpp
    │   │   │   │       │   │   │   ├── trilinear_devox.cu
    │   │   │   │       │   │   │   └── neighbor_interpolate.cu
    │   │   │   │       │   │   ├── utils.hpp
    │   │   │   │       │   │   ├── cuda_utils.cuh
    │   │   │   │       │   │   └── bindings.cpp
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   ├── loss.py
    │   │   │   │       │   ├── ball_query.py
    │   │   │   │       │   ├── backend.py
    │   │   │   │       │   ├── grouping.py
    │   │   │   │       │   ├── voxelization.py
    │   │   │   │       │   ├── interpolatation.py
    │   │   │   │       │   ├── devoxelization.py
    │   │   │   │       │   └── sampling.py
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── se.py
    │   │   │   │       ├── shared_mlp.py
    │   │   │   │       ├── voxelization.py
    │   │   │   │       ├── ball_query.py
    │   │   │   │       ├── pointnet.py
    │   │   │   │       └── pvconv.py
    │   │   │   │   ├── README.md
    │   │   │   │   ├── pointnet2.py
    │   │   │   │   └── utils.py
    │   │   ├── base_network.py
    │   │   ├── modules.py
    │   │   └── class_conditioned_resnet.py
    │   ├── __init__.py
    │   ├── diffusion
    │   │   └── __init__.py
    │   ├── builder.py
    │   └── grasp_classifier.py
    ├── __init__.py
    ├── losses
    │   ├── __init__.py
    │   ├── builder.py
    │   └── loss.py
    ├── dataset
    │   ├── __init__.py
    │   ├── cameras
    │   │   └── camera_d435i_dummy.json
    │   ├── acronym
    │   │   ├── __init__.py
    │   │   └── gripper_ctrl_pts.json
    │   ├── builder.py
    │   └── pl_wrapper.py
    ├── inference
    │   └── __init__.py
    └── trainers
    │   ├── __init__.py
    │   ├── mixins.py
    │   ├── experiment.py
    │   └── grasp_classification_trainer.py
├── doc
    └── img
    │   └── arch_graspldm.png
├── .gitignore
├── NOTICE
├── environment.yml
├── LICENSE
├── requirements.txt
├── .docker
    ├── build.sh
    ├── gpu_env.Dockerfile
    └── run.sh
├── .devcontainer
    └── devcontainer.json
├── setup.py
├── .pre-commit-config.yaml
├── configs
    └── generation
    │   ├── fpc
    │       └── fpc_1a_latentc3_z4_pc64_180k.py
    │   └── partial_pc
    │       └── ppc_1a_partial_63cat8k_filtered_latentc3_z16_pc256_180k.py
└── README.md


/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | output
2 | 


--------------------------------------------------------------------------------
/grasp_ldm/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/grasp_ldm/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.1"
2 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/grasp_ldm/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .loss import *
2 | 


--------------------------------------------------------------------------------
/grasp_ldm/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import *
2 | 


--------------------------------------------------------------------------------
/doc/img/arch_graspldm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuldeepbrd1/graspLDM/HEAD/doc/img/arch_graspldm.png


--------------------------------------------------------------------------------
/grasp_ldm/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .grasp_ldm import GraspLatentDDM
2 | from .grasp_vae import GraspCVAE
3 | 


--------------------------------------------------------------------------------
/grasp_ldm/inference/__init__.py:
--------------------------------------------------------------------------------
1 | from .inference import InferenceLDM, InferenceVAE
2 | from .inference_base import Conditioning, ModelType
3 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .elucidated_diffusion import ElucidatedDiffusion
2 | from .gaussian_diffusion import GaussianDiffusion1D
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | logs
 2 | wandb
 3 | checkpoints/*
 4 | output/*
 5 | **/__pycache__
 6 | *.cpython
 7 | *.pyc
 8 | *.pt.trace.*
 9 | *.ckpt
10 | *.out
11 | *.swp
12 | *.pt
13 | output/*
14 | *.gif
15 | *.crt
16 | *.pkl
17 | **/*.egg-info
18 | .vscode
19 | data
20 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from . import functional as F
 4 | 
 5 | __all__ = ["KLLoss"]
 6 | 
 7 | 
 8 | class KLLoss(nn.Module):
 9 |     def forward(self, x, y):
10 |         return F.kl_loss(x, y)
11 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2023 Kuldeep Rambhai Barad, University of Luxembourg and Redwire Space Europe
2 | 
3 | This software was developed at the Interdisciplinary Center for Security, Reliability and Trust (SnT) of the University of Luxembourg in partnership with Redwire Space Europe (Made In Space Europe Sarl.).
4 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: grasp_ldm
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.8
 6 |   - numpy
 7 |   # You need numpy from conda to avoid difficulties with
 8 |   # glibc and opengl issueas for visualization with trimesh/pyglet
 9 |   - pip
10 |   - pip:
11 |     - -r file:requirements.txt
12 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/ball_query/ball_query.cuh:
--------------------------------------------------------------------------------
1 | #ifndef _BALL_QUERY_CUH
2 | #define _BALL_QUERY_CUH
3 | 
4 | void ball_query(int b, int n, int m, float r2, int u,
5 |                 const float *centers_coords, const float *points_coords,
6 |                 int *neighbors_indices);
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .ball_query import BallQuery
2 | from .frustum import FrustumPointNetLoss
3 | from .loss import KLLoss
4 | from .pointnet import PointNetAModule, PointNetFPModule, PointNetSAModule
5 | from .pvconv import PVConv
6 | from .se import SE3d
7 | from .shared_mlp import SharedMLP
8 | from .voxelization import Voxelization
9 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/grouping/grouping.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef _GROUPING_HPP
 2 | #define _GROUPING_HPP
 3 | 
 4 | #include <torch/extension.h>
 5 | 
 6 | at::Tensor grouping_forward(at::Tensor features, at::Tensor indices);
 7 | at::Tensor grouping_backward(at::Tensor grad_y, at::Tensor indices,
 8 |                              const int n);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/__init__.py:
--------------------------------------------------------------------------------
1 | from .ball_query import ball_query
2 | from .devoxelization import trilinear_devoxelize
3 | from .grouping import grouping
4 | from .interpolatation import nearest_neighbor_interpolate
5 | from .loss import huber_loss, kl_loss
6 | from .sampling import furthest_point_sample, gather, logits_mask
7 | from .voxelization import avg_voxelize
8 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/ball_query/ball_query.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef _BALL_QUERY_HPP
 2 | #define _BALL_QUERY_HPP
 3 | 
 4 | #include <torch/extension.h>
 5 | 
 6 | at::Tensor ball_query_forward(at::Tensor centers_coords,
 7 |                               at::Tensor points_coords, const float radius,
 8 |                               const int num_neighbors);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/grouping/grouping.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef _GROUPING_CUH
 2 | #define _GROUPING_CUH
 3 | 
 4 | void grouping(int b, int c, int n, int m, int u, const float *features,
 5 |               const int *indices, float *out);
 6 | void grouping_grad(int b, int c, int n, int m, int u, const float *grad_y,
 7 |                    const int *indices, float *grad_x);
 8 | 
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/voxelization/vox.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef _VOX_CUH
 2 | #define _VOX_CUH
 3 | 
 4 | // CUDA function declarations
 5 | void avg_voxelize(int b, int c, int n, int r, int r2, int r3, const int *coords,
 6 |                   const float *feat, int *ind, int *cnt, float *out);
 7 | void avg_voxelize_grad(int b, int c, int n, int s, const int *idx,
 8 |                        const int *cnt, const float *grad_y, float *grad_x);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/grasp_ldm/dataset/cameras/camera_d435i_dummy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "hfov": 87,
 3 |     "vfov": 58,
 4 |     "width": 640,
 5 |     "height": 480,
 6 |     "cameraMatrix": [
 7 |         [
 8 |             904.7,
 9 |             0,
10 |             320.0
11 |         ],
12 |         [
13 |             0,
14 |             904.7,
15 |             240.0
16 |         ],
17 |         [
18 |             0,
19 |             0,
20 |             1
21 |         ]
22 |     ],
23 |     "distCoeffs": []
24 | }
25 | 


--------------------------------------------------------------------------------
/grasp_ldm/losses/builder.py:
--------------------------------------------------------------------------------
 1 | from .loss import *
 2 | 
 3 | ALL_LOSSES = {
 4 |     "VAEReconstructionLoss": VAEReconstructionLoss,
 5 |     "VAELatentLoss": VAELatentLoss,
 6 |     "GraspReconstructionLoss": GraspReconstructionLoss,
 7 |     "QualityLoss": QualityLoss,
 8 |     "ClassificationLoss": ClassificationLoss,
 9 |     "GraspControlPointsReconstructionLoss": GraspControlPointsReconstructionLoss,
10 | }
11 | 
12 | 
13 | def build_loss_from_cfg(loss_cfg):
14 |     return ALL_LOSSES[loss_cfg.type](**loss_cfg.args)
15 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/sampling/sampling.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef _SAMPLING_HPP
 2 | #define _SAMPLING_HPP
 3 | 
 4 | #include <torch/extension.h>
 5 | 
 6 | at::Tensor gather_features_forward(at::Tensor features, at::Tensor indices);
 7 | at::Tensor gather_features_backward(at::Tensor grad_y, at::Tensor indices,
 8 |                                     const int n);
 9 | at::Tensor furthest_point_sampling_forward(at::Tensor coords,
10 |                                            const int num_samples);
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/sampling/sampling.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef _SAMPLING_CUH
 2 | #define _SAMPLING_CUH
 3 | 
 4 | void gather_features(int b, int c, int n, int m, const float *features,
 5 |                      const int *indices, float *out);
 6 | void gather_features_grad(int b, int c, int n, int m, const float *grad_y,
 7 |                           const int *indices, float *grad_x);
 8 | void furthest_point_sampling(int b, int n, int m, const float *coords,
 9 |                              float *distances, int *indices);
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/voxelization/vox.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef _VOX_HPP
 2 | #define _VOX_HPP
 3 | 
 4 | #include <torch/torch.h>
 5 | #include <vector>
 6 | 
 7 | std::vector<at::Tensor> avg_voxelize_forward(const at::Tensor features,
 8 |                                              const at::Tensor coords,
 9 |                                              const int resolution);
10 | 
11 | at::Tensor avg_voxelize_backward(const at::Tensor grad_y,
12 |                                  const at::Tensor indices,
13 |                                  const at::Tensor cnt);
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | __all__ = ["kl_loss", "huber_loss"]
 5 | 
 6 | 
 7 | def kl_loss(x, y):
 8 |     x = F.softmax(x.detach(), dim=1)
 9 |     y = F.log_softmax(y, dim=1)
10 |     return torch.mean(torch.sum(x * (torch.log(x) - y), dim=1))
11 | 
12 | 
13 | def huber_loss(error, delta):
14 |     abs_error = torch.abs(error)
15 |     quadratic = torch.min(abs_error, torch.full_like(abs_error, fill_value=delta))
16 |     losses = 0.5 * (quadratic**2) + delta * (abs_error - quadratic)
17 |     return torch.mean(losses)
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2023 Kuldeep Rambhai Barad, University of Luxembourg and Redwire Space Europe
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/trilinear_devox.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef _TRILINEAR_DEVOX_CUH
 2 | #define _TRILINEAR_DEVOX_CUH
 3 | 
 4 | // CUDA function declarations
 5 | void trilinear_devoxelize(int b, int c, int n, int r, int r2, int r3,
 6 |                           bool is_training, const float *coords,
 7 |                           const float *feat, int *inds, float *wgts,
 8 |                           float *outs);
 9 | void trilinear_devoxelize_grad(int b, int c, int n, int r3, const int *inds,
10 |                                const float *wgts, const float *grad_y,
11 |                                float *grad_x);
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/trilinear_devox.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef _TRILINEAR_DEVOX_HPP
 2 | #define _TRILINEAR_DEVOX_HPP
 3 | 
 4 | #include <torch/torch.h>
 5 | #include <vector>
 6 | 
 7 | std::vector<at::Tensor> trilinear_devoxelize_forward(const int r,
 8 |                                                      const bool is_training,
 9 |                                                      const at::Tensor coords,
10 |                                                      const at::Tensor features);
11 | 
12 | at::Tensor trilinear_devoxelize_backward(const at::Tensor grad_y,
13 |                                          const at::Tensor indices,
14 |                                          const at::Tensor weights, const int r);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | addict>=2.4.0
 2 | # black>=22.0.0 # Formatting
 3 | certifi
 4 | diffusers[torch]
 5 | einops
 6 | h5py>=3.7.0
 7 | matplotlib>=3.6.0
 8 | ninja
 9 | opencv-python-headless
10 | pandas>=1.5.1
11 | Pillow>=9.2.0
12 | pyglet==1.5.27
13 | pyrender
14 | pytorch-lightning==1.8.0
15 | scikit-learn==1.2.2
16 | scipy>=1.9.0
17 | seaborn>=0.12.1
18 | shapely>=2.0.0
19 | six>=1.16.0
20 | torch==1.13.1 -e https://download.pytorch.org/whl/cu117
21 | torchvision==0.14.1 -e https://download.pytorch.org/whl/cu117
22 | tqdm==4.64.1
23 | trimesh==3.17.1
24 | # wandb==0.13.6 # Logging
25 | yapf==0.32.0
26 | 
27 | ## Optional
28 | # ipykernel
29 | # iprogress
30 | # jupyter
31 | # glooey
32 | # torcheval
33 | # pytorch3d @ git+https://github.com/facebookresearch/pytorch3d.git@v0.7.4
34 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/README.md:
--------------------------------------------------------------------------------
 1 | # PVCNN: Point-Voxel CNN for Efficient 3D Deep Learning
 2 | 
 3 | Source: [https://github.com/mit-han-lab/pvcnn](https://github.com/mit-han-lab/pvcnn)
 4 | 
 5 | See [grasp_ldm/models/modules/ext/pvcnn/benchmark.py](grasp_ldm/models/modules/ext/pvcnn/benchmark.py) for the preliminary comparison between PVCNN and PointNet++.
 6 | 
 7 | ```
 8 | @inproceedings{liu2019pvcnn,
 9 |   title={Point-Voxel CNN for Efficient 3D Deep Learning},
10 |   author={Liu, Zhijian and Tang, Haotian and Lin, Yujun and Han, Song},
11 |   booktitle={Advances in Neural Information Processing Systems},
12 |   year={2019}
13 | }
14 | ```
15 | 
16 | ## License
17 | 
18 | This repository is released under the MIT license. See [LICENSE](LICENSE) for additional details.
19 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/neighbor_interpolate.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef _NEIGHBOR_INTERPOLATE_HPP
 2 | #define _NEIGHBOR_INTERPOLATE_HPP
 3 | 
 4 | #include <torch/extension.h>
 5 | #include <vector>
 6 | 
 7 | std::vector<at::Tensor>
 8 | three_nearest_neighbors_interpolate_forward(at::Tensor points_coords,
 9 |                                             at::Tensor centers_coords,
10 |                                             at::Tensor centers_features);
11 | at::Tensor three_nearest_neighbors_interpolate_backward(at::Tensor grad_y,
12 |                                                         at::Tensor indices,
13 |                                                         at::Tensor weights,
14 |                                                         const int m);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/ball_query.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Function
 2 | 
 3 | from .backend import _backend
 4 | 
 5 | __all__ = ["ball_query"]
 6 | 
 7 | 
 8 | def ball_query(centers_coords, points_coords, radius, num_neighbors):
 9 |     """
10 |     :param centers_coords: coordinates of centers, FloatTensor[B, 3, M]
11 |     :param points_coords: coordinates of points, FloatTensor[B, 3, N]
12 |     :param radius: float, radius of ball query
13 |     :param num_neighbors: int, maximum number of neighbors
14 |     :return:
15 |         neighbor_indices: indices of neighbors, IntTensor[B, M, U]
16 |     """
17 |     centers_coords = centers_coords.contiguous()
18 |     points_coords = points_coords.contiguous()
19 |     return _backend.ball_query(centers_coords, points_coords, radius, num_neighbors)
20 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/se.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | __all__ = ["SE3d"]
 5 | 
 6 | 
 7 | class Swish(nn.Module):
 8 |     def forward(self, x):
 9 |         return x * torch.sigmoid(x)
10 | 
11 | 
12 | class SE3d(nn.Module):
13 |     def __init__(self, channel, reduction=8, use_relu=False):
14 |         super().__init__()
15 |         self.fc = nn.Sequential(
16 |             nn.Linear(channel, channel // reduction, bias=False),
17 |             nn.ReLU(True) if use_relu else Swish(),
18 |             nn.Linear(channel // reduction, channel, bias=False),
19 |             nn.Sigmoid(),
20 |         )
21 | 
22 |     def forward(self, inputs):
23 |         return inputs * self.fc(inputs.mean(-1).mean(-1).mean(-1)).view(
24 |             inputs.shape[0], inputs.shape[1], 1, 1, 1
25 |         )
26 | 


--------------------------------------------------------------------------------
/.docker/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | IMAGE_NAME="kuldeepbrd1/grasp_ldm:latest"
 4 | 
 5 | SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" &>/dev/null && pwd)"
 6 | REPOSITORY_DIR="$(dirname "${SCRIPT_DIR}")"
 7 | 
 8 | ## Parse TAG and forward additional build arguments
 9 | if [ "${#}" -gt "0" ]; then
10 |     if [[ "${1}" != "-"* ]]; then
11 |         IMAGE_NAME="${IMAGE_NAME}:${1}"
12 |         BUILD_ARGS=${*:2}
13 |     else
14 |         BUILD_ARGS=${*:1}
15 |     fi
16 | fi
17 | 
18 | ## Build the image
19 | DOCKER_BUILD_CMD=(
20 |     docker build
21 |     "${REPOSITORY_DIR}"
22 |     --file "${REPOSITORY_DIR}/.docker/Dockerfile"
23 |     --tag "${IMAGE_NAME}"
24 |     "${BUILD_ARGS}"
25 | )
26 | echo -e "\033[1;30m${DOCKER_BUILD_CMD[*]}\033[0m" | xargs
27 | # shellcheck disable=SC2048
28 | exec ${DOCKER_BUILD_CMD[*]}
29 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/utils.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef _UTILS_HPP
 2 | #define _UTILS_HPP
 3 | 
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | #include <torch/extension.h>
 6 | 
 7 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor")
 8 | 
 9 | #define CHECK_CONTIGUOUS(x)                                                    \
10 |   TORCH_CHECK(x.is_contiguous(), #x " must be a contiguous tensor")
11 | 
12 | #define CHECK_IS_INT(x)                                                        \
13 |   TORCH_CHECK(x.scalar_type() == at::ScalarType::Int,                             \
14 |            #x " must be an int tensor")
15 | 
16 | #define CHECK_IS_FLOAT(x)                                                      \
17 |   TORCH_CHECK(x.scalar_type() == at::ScalarType::Float,                           \
18 |            #x " must be a float tensor")
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // "image": "image_name",
 3 |     "build": {
 4 |         "dockerfile": "../.docker/gpu_env.Dockerfile",
 5 |         "context": "..",
 6 |         "args": {},
 7 |         "target": "" //
 8 |     },
 9 |     "containerEnv": {
10 |         "DISPLAY": "${localEnv:DISPLAY}",
11 |         "QT_X11_NO_MITSHM": "1"
12 |     },
13 |     "runArgs": [
14 |         "--network=host",
15 |         "--volume=/tmp/.X11-unix/:/tmp/.X11-unix/",
16 |         // "--volume=<acronym-data-path>:/workspaces/data",
17 |         "--device=/dev/dri:/dev/dri",
18 |         "--gpus",
19 |         "all",
20 |         "--privileged"
21 |     ],
22 |     "customizations": {
23 |         "vscode": {
24 |             "extensions": [
25 |                 "ms-python.python",
26 |                 "njpwerner.autodocstring",
27 |                 "ms-toolsai.jupyter"
28 |             ]
29 |         }
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from setuptools import find_namespace_packages, find_packages, setup
 4 | 
 5 | from grasp_ldm import __version__
 6 | 
 7 | # here = os.path.abspath(os.path.dirname(__file__))
 8 | # requires_list = []
 9 | # with open(os.path.join(here, 'requirements.txt'), encoding='utf-8') as f:
10 | #     for line in f:
11 | #         requires_list.append(str(line))
12 | 
13 | setup(
14 |     name="grasp_ldm",
15 |     version=__version__,
16 |     author="Kuldeep Barad",
17 |     # TODO: Improve grasp_ldm_utils module by combining internal and external utils
18 |     packages=["grasp_ldm", "grasp_ldm.tools", "grasp_ldm_utils"],
19 |     # packages=find_packages(),
20 |     package_dir={
21 |         "grasp_ldm": "grasp_ldm",
22 |         "grasp_ldm.tools": "tools",
23 |         "grasp_ldm_utils": "utils",
24 |     },
25 |     python_requires=">=3.8.0, <3.10",
26 |     # install_requires=requires_list,
27 | )
28 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/neighbor_interpolate.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef _NEIGHBOR_INTERPOLATE_CUH
 2 | #define _NEIGHBOR_INTERPOLATE_CUH
 3 | 
 4 | void three_nearest_neighbors_interpolate(int b, int c, int m, int n,
 5 |                                          const float *points_coords,
 6 |                                          const float *centers_coords,
 7 |                                          const float *centers_features,
 8 |                                          int *indices, float *weights,
 9 |                                          float *out);
10 | void three_nearest_neighbors_interpolate_grad(int b, int c, int n, int m,
11 |                                               const float *grad_y,
12 |                                               const int *indices,
13 |                                               const float *weights,
14 |                                               float *grad_x);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/backend.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from torch.utils.cpp_extension import load
 4 | 
 5 | _src_path = os.path.dirname(os.path.abspath(__file__))
 6 | _backend = load(
 7 |     name="_pvcnn_backend",
 8 |     extra_cflags=["-O3", "-std=c++17"],
 9 |     sources=[
10 |         os.path.join(_src_path, "src", f)
11 |         for f in [
12 |             "ball_query/ball_query.cpp",
13 |             "ball_query/ball_query.cu",
14 |             "grouping/grouping.cpp",
15 |             "grouping/grouping.cu",
16 |             "interpolate/neighbor_interpolate.cpp",
17 |             "interpolate/neighbor_interpolate.cu",
18 |             "interpolate/trilinear_devox.cpp",
19 |             "interpolate/trilinear_devox.cu",
20 |             "sampling/sampling.cpp",
21 |             "sampling/sampling.cu",
22 |             "voxelization/vox.cpp",
23 |             "voxelization/vox.cu",
24 |             "bindings.cpp",
25 |         ]
26 |     ],
27 | )
28 | 
29 | __all__ = ["_backend"]
30 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/ball_query/ball_query.cpp:
--------------------------------------------------------------------------------
 1 | #include "ball_query.hpp"
 2 | #include "ball_query.cuh"
 3 | 
 4 | #include "../utils.hpp"
 5 | 
 6 | at::Tensor ball_query_forward(at::Tensor centers_coords,
 7 |                               at::Tensor points_coords, const float radius,
 8 |                               const int num_neighbors) {
 9 |   CHECK_CUDA(centers_coords);
10 |   CHECK_CUDA(points_coords);
11 |   CHECK_CONTIGUOUS(centers_coords);
12 |   CHECK_CONTIGUOUS(points_coords);
13 |   CHECK_IS_FLOAT(centers_coords);
14 |   CHECK_IS_FLOAT(points_coords);
15 | 
16 |   int b = centers_coords.size(0);
17 |   int m = centers_coords.size(2);
18 |   int n = points_coords.size(2);
19 | 
20 |   at::Tensor neighbors_indices = torch::zeros(
21 |       {b, m, num_neighbors},
22 |       at::device(centers_coords.device()).dtype(at::ScalarType::Int));
23 | 
24 |   ball_query(b, n, m, radius * radius, num_neighbors,
25 |              centers_coords.data_ptr<float>(),
26 |              points_coords.data_ptr<float>(),
27 |              neighbors_indices.data_ptr<int>());
28 | 
29 |   return neighbors_indices;
30 | }
31 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/shared_mlp.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | __all__ = ["SharedMLP"]
 4 | 
 5 | 
 6 | class SharedMLP(nn.Module):
 7 |     def __init__(self, in_channels, out_channels, dim=1):
 8 |         super().__init__()
 9 |         if dim == 1:
10 |             conv = nn.Conv1d
11 |             bn = nn.BatchNorm1d
12 |         elif dim == 2:
13 |             conv = nn.Conv2d
14 |             bn = nn.BatchNorm2d
15 |         else:
16 |             raise ValueError
17 |         if not isinstance(out_channels, (list, tuple)):
18 |             out_channels = [out_channels]
19 |         layers = []
20 |         for oc in out_channels:
21 |             layers.extend(
22 |                 [
23 |                     conv(in_channels, oc, 1),
24 |                     bn(oc),
25 |                     nn.ReLU(True),
26 |                 ]
27 |             )
28 |             in_channels = oc
29 |         self.layers = nn.Sequential(*layers)
30 | 
31 |     def forward(self, inputs):
32 |         if isinstance(inputs, (list, tuple)):
33 |             return (self.layers(inputs[0]), *inputs[1:])
34 |         else:
35 |             return self.layers(inputs)
36 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/grouping.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Function
 2 | 
 3 | from .backend import _backend
 4 | 
 5 | __all__ = ["grouping"]
 6 | 
 7 | 
 8 | class Grouping(Function):
 9 |     @staticmethod
10 |     def forward(ctx, features, indices):
11 |         """
12 |         :param ctx:
13 |         :param features: features of points, FloatTensor[B, C, N]
14 |         :param indices: neighbor indices of centers, IntTensor[B, M, U], M is #centers, U is #neighbors
15 |         :return:
16 |             grouped_features: grouped features, FloatTensor[B, C, M, U]
17 |         """
18 |         features = features.contiguous()
19 |         indices = indices.contiguous()
20 |         ctx.save_for_backward(indices)
21 |         ctx.num_points = features.size(-1)
22 |         return _backend.grouping_forward(features, indices)
23 | 
24 |     @staticmethod
25 |     def backward(ctx, grad_output):
26 |         (indices,) = ctx.saved_tensors
27 |         grad_features = _backend.grouping_backward(
28 |             grad_output.contiguous(), indices, ctx.num_points
29 |         )
30 |         return grad_features, None
31 | 
32 | 
33 | grouping = Grouping.apply
34 | 


--------------------------------------------------------------------------------
/grasp_ldm/dataset/acronym/__init__.py:
--------------------------------------------------------------------------------
 1 | FILTER_63_CATEGORIES = [
 2 |     "Cup",
 3 |     "Mug",
 4 |     "Fork",
 5 |     "Hat",
 6 |     "Bottle",
 7 |     "Bowl",
 8 |     "Car",
 9 |     "Donut",
10 |     "Laptop",
11 |     "MousePad",
12 |     "Pencil",
13 |     "Plate",
14 |     "ScrewDriver",
15 |     "WineBottle",
16 |     "Backpack",
17 |     "Bag",
18 |     "Banana",
19 |     "Battery",
20 |     "BeanBag",
21 |     "Bear",
22 |     "Book",
23 |     "Books",
24 |     "Camera",
25 |     "CerealBox",
26 |     "Cookie",
27 |     "Hammer",
28 |     "Hanger",
29 |     "Knife",
30 |     "MilkCarton",
31 |     "Painting",
32 |     "PillBottle",
33 |     "Plant",
34 |     "PowerSocket",
35 |     "PowerStrip",
36 |     "PS3",
37 |     "PSP",
38 |     "Ring",
39 |     "Scissors",
40 |     "Shampoo",
41 |     "Shoes",
42 |     "Sheep",
43 |     "Shower",
44 |     "Sink",
45 |     "SoapBottle",
46 |     "SodaCan",
47 |     "Spoon",
48 |     "Statue",
49 |     "Teacup",
50 |     "Teapot",
51 |     "ToiletPaper",
52 |     "ToyFigure",
53 |     "Wallet",
54 |     "WineGlass",
55 |     "Cow",
56 |     "Sheep",
57 |     "Cat",
58 |     "Dog",
59 |     "Pizza",
60 |     "Elephant",
61 |     "Donkey",
62 |     "RubiksCube",
63 |     "Tank",
64 |     "Truck",
65 |     "USBStick",
66 | ]
67 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/base_network.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from typing import Optional
 3 | 
 4 | from torch import Tensor, nn
 5 | 
 6 | 
 7 | class BaseGraspSampler(nn.Module):
 8 |     """Base abstract class for Grasp Samplers"""
 9 | 
10 |     def __init__(self):
11 |         super(BaseGraspSampler, self).__init__()
12 | 
13 |     @property
14 |     def _type(self) -> str:
15 |         return self.__class__.__name__
16 | 
17 |     @abstractmethod
18 |     def generate_grasps(
19 |         self, z: Optional[Tensor] = None, z_cond: Optional[Tensor] = None
20 |     ) -> Tensor:
21 |         """Abstract method for generating grasp poses given latents (optional: None)
22 |         and conditioning input z_cond
23 |         """
24 |         raise NotImplementedError
25 | 
26 | 
27 | class BaseGraspClassifier(nn.Module):
28 |     """Base abstract class for Grasp Samplers"""
29 | 
30 |     def __init__(self):
31 |         super(BaseGraspClassifier, self).__init__()
32 | 
33 |     @property
34 |     def _type(self) -> str:
35 |         return self.__class__.__name__
36 | 
37 |     @abstractmethod
38 |     def classify_grasps(
39 |         self, grasp_poses: Optional[Tensor] = None, pc: Optional[Tensor] = None
40 |     ) -> Tensor:
41 |         """Abstract method for generating grasp poses given latents (optional: None)
42 |         and conditioning input z_cond
43 |         """
44 |         raise NotImplementedError
45 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | 
 4 | repos:
 5 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 6 |     rev: v4.3.0
 7 |     hooks:
 8 |       - id: check-added-large-files
 9 |       - id: check-case-conflict
10 |       - id: check-executables-have-shebangs
11 |       - id: check-merge-conflict
12 |       - id: check-shebang-scripts-are-executable
13 |       - id: check-symlinks
14 |       - id: check-xml
15 |       - id: check-yaml
16 |       - id: debug-statements
17 |       - id: destroyed-symlinks
18 |       - id: detect-private-key
19 |       - id: end-of-file-fixer
20 |       - id: mixed-line-ending
21 |       - id: requirements-txt-fixer
22 |       - id: trailing-whitespace
23 | 
24 |   - repo: https://github.com/pycqa/isort
25 |     rev: 5.12.0
26 |     hooks:
27 |       - id: isort
28 |         args: ["--profile", "black"]
29 | 
30 |   - repo: https://github.com/psf/black
31 |     rev: 23.7.0
32 |     hooks:
33 |       - id: black
34 | 
35 |   - repo: https://github.com/lovesegfault/beautysh
36 |     rev: v6.2.1
37 |     hooks:
38 |       - id: beautysh
39 | 
40 |   - repo: https://github.com/executablebooks/mdformat
41 |     rev: 0.7.15
42 |     hooks:
43 |       - id: mdformat
44 | 
45 |   - repo: https://github.com/codespell-project/codespell
46 |     rev: v2.1.0
47 |     hooks:
48 |       - id: codespell
49 | 


--------------------------------------------------------------------------------
/grasp_ldm/dataset/builder.py:
--------------------------------------------------------------------------------
 1 | from .acronym.acronym_grasp_points import (
 2 |     AcronymFullPcGraspPointsClassification,
 3 |     AcronymPartialPcGraspPointsClassification,
 4 | )
 5 | from .acronym.acronym_partial_pointclouds import AcronymPartialPointclouds
 6 | from .acronym.acronym_pointclouds import AcronymShapenetPointclouds
 7 | 
 8 | POINTCLOUD_GRASP_DATASETS = {
 9 |     "AcronymShapenetPointclouds": AcronymShapenetPointclouds,
10 |     "AcronymPartialPointclouds": AcronymPartialPointclouds,
11 | }
12 | 
13 | 
14 | POINTCLOUD_GRASP_CLASIFICATION_DATASETS = {
15 |     "AcronymFullPcGraspPointsClassification": AcronymFullPcGraspPointsClassification,
16 |     "AcronymPartialPcGraspPointsClassification": AcronymPartialPcGraspPointsClassification,
17 | }
18 | 
19 | ALL_DATASETS = {
20 |     **POINTCLOUD_GRASP_DATASETS,
21 |     **POINTCLOUD_GRASP_CLASIFICATION_DATASETS,
22 | }
23 | 
24 | 
25 | def build_dataset_from_cfg(data_cfg, split):
26 |     """Build dataset from config
27 | 
28 |     Args:
29 |         data_cfg (dict): data config
30 |         split (str): split name
31 | 
32 |     Raises:
33 |         KeyError: if split not found in data config
34 | 
35 |     Returns:
36 |         Dataset: dataset
37 |     """
38 |     if split not in data_cfg:
39 |         raise KeyError(f"Could not find split:`{split}` in the data config dict")
40 | 
41 |     split_cfg = data_cfg[split]
42 |     return ALL_DATASETS[split_cfg.type](**split_cfg.args)
43 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/voxelization.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from . import functional as F
 5 | 
 6 | __all__ = ["Voxelization"]
 7 | 
 8 | 
 9 | class Voxelization(nn.Module):
10 |     def __init__(self, resolution, normalize=True, eps=0):
11 |         super().__init__()
12 |         self.r = int(resolution)
13 |         self.normalize = normalize
14 |         self.eps = eps
15 | 
16 |     def forward(self, features, coords):
17 |         coords = coords.detach()
18 |         norm_coords = coords - coords.mean(2, keepdim=True)
19 |         if self.normalize:
20 |             norm_coords = (
21 |                 norm_coords
22 |                 / (
23 |                     norm_coords.norm(dim=1, keepdim=True)
24 |                     .max(dim=2, keepdim=True)
25 |                     .values
26 |                     * 2.0
27 |                     + self.eps
28 |                 )
29 |                 + 0.5
30 |             )
31 |         else:
32 |             norm_coords = (norm_coords + 1) / 2.0
33 |         norm_coords = torch.clamp(norm_coords * self.r, 0, self.r - 1)
34 |         vox_coords = torch.round(norm_coords).to(torch.int32)
35 |         return F.avg_voxelize(features, vox_coords, self.r), norm_coords
36 | 
37 |     def extra_repr(self):
38 |         return "resolution={}{}".format(
39 |             self.r, ", normalized eps = {}".format(self.eps) if self.normalize else ""
40 |         )
41 | 


--------------------------------------------------------------------------------
/grasp_ldm/dataset/pl_wrapper.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence, Union
 2 | 
 3 | import pytorch_lightning as pl
 4 | from torch.utils.data import DataLoader, Dataset
 5 | 
 6 | 
 7 | class GraspDataModule(pl.LightningDataModule):
 8 |     def __init__(
 9 |         self,
10 |         train_dataset: Dataset,
11 |         train_batch_size: int = 8,
12 |         val_batch_size: int = 8,
13 |         num_workers: int = 0,
14 |         pin_memory: bool = True,
15 |         persistent_workers: bool = True,
16 |         **kwargs,
17 |     ):
18 |         super().__init__()
19 | 
20 |         self.train_dataset = train_dataset
21 | 
22 |         self.train_batch_size = train_batch_size
23 |         self.val_batch_size = val_batch_size
24 |         self.num_workers = num_workers
25 |         self.pin_memory = pin_memory
26 |         self.persistent_workers = persistent_workers
27 | 
28 |     def train_dataloader(self):
29 |         return DataLoader(
30 |             self.train_dataset,
31 |             batch_size=self.train_batch_size,
32 |             shuffle=True,
33 |             num_workers=self.num_workers,
34 |             pin_memory=self.pin_memory,
35 |             persistent_workers=self.persistent_workers,
36 |         )
37 | 
38 |     def val_dataloader(self):
39 |         raise NotImplementedError
40 | 
41 |     def test_dataloader(self):
42 |         raise NotImplementedError
43 | 
44 |     def predict_dataloader(self):
45 |         raise NotImplementedError
46 | 


--------------------------------------------------------------------------------
/.docker/gpu_env.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cudagl:11.4.2-devel-ubuntu20.04
 2 | 
 3 | # ENV
 4 | ENV HOME_DIR=/root/
 5 | ENV LANG C.UTF-8
 6 | ENV LC_ALL C.UTF-8
 7 | 
 8 | # REQUIREMENTS & CERTS
 9 | ADD requirements.txt /tmp/
10 | 
11 | SHELL ["/bin/bash", "-c"]
12 | 
13 | # hotfix- cuda source error on ubuntu 20.04
14 | RUN  echo "deb [by-hash=no] http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 /" > /etc/apt/sources.list.d/cuda.list
15 | 
16 | # APT
17 | RUN apt-get update -y\
18 |     && apt-get upgrade -y \
19 |     && DEBIAN_FRONTEND=noninteractive \
20 |     apt-get install -q -y --no-install-recommends \
21 |     build-essential \
22 |     cmake \
23 |     dirmngr \
24 |     gnupg2 \
25 |     git \
26 |     iputils-ping \
27 |     ca-certificates \
28 |     nano \
29 |     net-tools \
30 |     python3-dev \
31 |     python3-pip \
32 |     python3-wheel \
33 |     python3-opengl \
34 |     tree \
35 |     unzip \
36 |     wget \
37 |     && rm -rf /var/lib/apt/lists/* \
38 |     && update-ca-certificates \
39 |     && echo "alias python=python3" >> /root/.bashrc\
40 |     && echo "alias pip=pip3" >> /root/.bashrc
41 | 
42 | # PIP
43 | ENV ACRONYM_INSTALL_PATH=/tmp/acronym
44 | RUN git clone https://github.com/NVlabs/acronym.git ${ACRONYM_INSTALL_PATH} \
45 |     && pip install -r ${ACRONYM_INSTALL_PATH}/requirements.txt \
46 |     && pip install ${ACRONYM_INSTALL_PATH} \
47 |     && rm -r ${ACRONYM_INSTALL_PATH} \
48 |     && pip install -r /tmp/requirements.txt \
49 |     && rm /tmp/requirements.txt
50 | 
51 | CMD ["/bin/bash"]
52 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/grouping/grouping.cpp:
--------------------------------------------------------------------------------
 1 | #include "grouping.hpp"
 2 | #include "grouping.cuh"
 3 | 
 4 | #include "../utils.hpp"
 5 | 
 6 | at::Tensor grouping_forward(at::Tensor features, at::Tensor indices) {
 7 |   CHECK_CUDA(features);
 8 |   CHECK_CUDA(indices);
 9 |   CHECK_CONTIGUOUS(features);
10 |   CHECK_CONTIGUOUS(indices);
11 |   CHECK_IS_FLOAT(features);
12 |   CHECK_IS_INT(indices);
13 | 
14 |   int b = features.size(0);
15 |   int c = features.size(1);
16 |   int n = features.size(2);
17 |   int m = indices.size(1);
18 |   int u = indices.size(2);
19 |   at::Tensor output = torch::zeros(
20 |       {b, c, m, u}, at::device(features.device()).dtype(at::ScalarType::Float));
21 |   grouping(b, c, n, m, u, features.data_ptr<float>(), indices.data_ptr<int>(),
22 |            output.data_ptr<float>());
23 |   return output;
24 | }
25 | 
26 | at::Tensor grouping_backward(at::Tensor grad_y, at::Tensor indices,
27 |                              const int n) {
28 |   CHECK_CUDA(grad_y);
29 |   CHECK_CUDA(indices);
30 |   CHECK_CONTIGUOUS(grad_y);
31 |   CHECK_CONTIGUOUS(indices);
32 |   CHECK_IS_FLOAT(grad_y);
33 |   CHECK_IS_INT(indices);
34 | 
35 |   int b = grad_y.size(0);
36 |   int c = grad_y.size(1);
37 |   int m = indices.size(1);
38 |   int u = indices.size(2);
39 |   at::Tensor grad_x = torch::zeros(
40 |       {b, c, n}, at::device(grad_y.device()).dtype(at::ScalarType::Float));
41 |   grouping_grad(b, c, n, m, u, grad_y.data_ptr<float>(),
42 |                 indices.data_ptr<int>(), grad_x.data_ptr<float>());
43 |   return grad_x;
44 | }
45 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/cuda_utils.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <ATen/ATen.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | #include <cmath>
 7 | 
 8 | #include <cuda.h>
 9 | #include <cuda_runtime.h>
10 | 
11 | #include <vector>
12 | 
13 | #define MAXIMUM_THREADS 512
14 | 
15 | inline int optimal_num_threads(int work_size) {
16 |   const int pow_2 = std::log2(static_cast<double>(work_size));
17 |   return max(min(1 << pow_2, MAXIMUM_THREADS), 1);
18 | }
19 | 
20 | inline dim3 optimal_block_config(int x, int y) {
21 |   const int x_threads = optimal_num_threads(x);
22 |   const int y_threads =
23 |       max(min(optimal_num_threads(y), MAXIMUM_THREADS / x_threads), 1);
24 |   dim3 block_config(x_threads, y_threads, 1);
25 |   return block_config;
26 | }
27 | 
28 | #define CUDA_CHECK_ERRORS()                                                    \
29 |   {                                                                            \
30 |     cudaError_t err = cudaGetLastError();                                      \
31 |     if (cudaSuccess != err) {                                                  \
32 |       fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n",           \
33 |               cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__,          \
34 |               __FILE__);                                                       \
35 |       exit(-1);                                                                \
36 |     }                                                                          \
37 |   }
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/voxelization.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Function
 2 | 
 3 | from .backend import _backend
 4 | 
 5 | __all__ = ["avg_voxelize"]
 6 | 
 7 | 
 8 | class AvgVoxelization(Function):
 9 |     @staticmethod
10 |     def forward(ctx, features, coords, resolution):
11 |         """
12 |         :param ctx:
13 |         :param features: Features of the point cloud, FloatTensor[B, C, N]
14 |         :param coords: Voxelized Coordinates of each point, IntTensor[B, 3, N]
15 |         :param resolution: Voxel resolution
16 |         :return:
17 |             Voxelized Features, FloatTensor[B, C, R, R, R]
18 |         """
19 |         features = features.contiguous()
20 |         coords = coords.int().contiguous()
21 |         b, c, _ = features.shape
22 |         out, indices, counts = _backend.avg_voxelize_forward(
23 |             features, coords, resolution
24 |         )
25 |         ctx.save_for_backward(indices, counts)
26 |         return out.view(b, c, resolution, resolution, resolution)
27 | 
28 |     @staticmethod
29 |     def backward(ctx, grad_output):
30 |         """
31 |         :param ctx:
32 |         :param grad_output: gradient of output, FloatTensor[B, C, R, R, R]
33 |         :return:
34 |             gradient of inputs, FloatTensor[B, C, N]
35 |         """
36 |         b, c = grad_output.shape[:2]
37 |         indices, counts = ctx.saved_tensors
38 |         grad_features = _backend.avg_voxelize_backward(
39 |             grad_output.contiguous().view(b, c, -1), indices, counts
40 |         )
41 |         return grad_features, None, None
42 | 
43 | 
44 | avg_voxelize = AvgVoxelization.apply
45 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/ball_query.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from . import functional as F
 5 | 
 6 | __all__ = ["BallQuery"]
 7 | 
 8 | 
 9 | class BallQuery(nn.Module):
10 |     def __init__(self, radius, num_neighbors, include_coordinates=True):
11 |         super().__init__()
12 |         self.radius = radius
13 |         self.num_neighbors = num_neighbors
14 |         self.include_coordinates = include_coordinates
15 | 
16 |     def forward(self, points_coords, centers_coords, points_features=None):
17 |         points_coords = points_coords.contiguous()
18 |         centers_coords = centers_coords.contiguous()
19 |         neighbor_indices = F.ball_query(
20 |             centers_coords, points_coords, self.radius, self.num_neighbors
21 |         )
22 |         neighbor_coordinates = F.grouping(points_coords, neighbor_indices)
23 |         neighbor_coordinates = neighbor_coordinates - centers_coords.unsqueeze(-1)
24 | 
25 |         if points_features is None:
26 |             assert self.include_coordinates, "No Features For Grouping"
27 |             neighbor_features = neighbor_coordinates
28 |         else:
29 |             neighbor_features = F.grouping(points_features, neighbor_indices)
30 |             if self.include_coordinates:
31 |                 neighbor_features = torch.cat(
32 |                     [neighbor_coordinates, neighbor_features], dim=1
33 |                 )
34 |         return neighbor_features
35 | 
36 |     def extra_repr(self):
37 |         return "radius={}, num_neighbors={}{}".format(
38 |             self.radius,
39 |             self.num_neighbors,
40 |             ", include coordinates" if self.include_coordinates else "",
41 |         )
42 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/interpolatation.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Function
 2 | 
 3 | from .backend import _backend
 4 | 
 5 | __all__ = ["nearest_neighbor_interpolate"]
 6 | 
 7 | 
 8 | class NeighborInterpolation(Function):
 9 |     @staticmethod
10 |     def forward(ctx, points_coords, centers_coords, centers_features):
11 |         """
12 |         :param ctx:
13 |         :param points_coords: coordinates of points, FloatTensor[B, 3, N]
14 |         :param centers_coords: coordinates of centers, FloatTensor[B, 3, M]
15 |         :param centers_features: features of centers, FloatTensor[B, C, M]
16 |         :return:
17 |             points_features: features of points, FloatTensor[B, C, N]
18 |         """
19 |         centers_coords = centers_coords.contiguous()
20 |         points_coords = points_coords.contiguous()
21 |         centers_features = centers_features.contiguous()
22 |         (
23 |             points_features,
24 |             indices,
25 |             weights,
26 |         ) = _backend.three_nearest_neighbors_interpolate_forward(
27 |             points_coords, centers_coords, centers_features
28 |         )
29 |         ctx.save_for_backward(indices, weights)
30 |         ctx.num_centers = centers_coords.size(-1)
31 |         return points_features
32 | 
33 |     @staticmethod
34 |     def backward(ctx, grad_output):
35 |         indices, weights = ctx.saved_tensors
36 |         grad_centers_features = _backend.three_nearest_neighbors_interpolate_backward(
37 |             grad_output.contiguous(), indices, weights, ctx.num_centers
38 |         )
39 |         return None, None, grad_centers_features
40 | 
41 | 
42 | nearest_neighbor_interpolate = NeighborInterpolation.apply
43 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/devoxelization.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Function
 2 | 
 3 | from .backend import _backend
 4 | 
 5 | __all__ = ["trilinear_devoxelize"]
 6 | 
 7 | 
 8 | class TrilinearDevoxelization(Function):
 9 |     @staticmethod
10 |     def forward(ctx, features, coords, resolution, is_training=True):
11 |         """
12 |         :param ctx:
13 |         :param coords: the coordinates of points, FloatTensor[B, 3, N]
14 |         :param features: FloatTensor[B, C, R, R, R]
15 |         :param resolution: int, the voxel resolution
16 |         :param is_training: bool, training mode
17 |         :return:
18 |             FloatTensor[B, C, N]
19 |         """
20 |         B, C = features.shape[:2]
21 |         features = features.contiguous().view(B, C, -1)
22 |         coords = coords.contiguous()
23 |         outs, inds, wgts = _backend.trilinear_devoxelize_forward(
24 |             resolution, is_training, coords, features
25 |         )
26 |         if is_training:
27 |             ctx.save_for_backward(inds, wgts)
28 |             ctx.r = resolution
29 |         return outs
30 | 
31 |     @staticmethod
32 |     def backward(ctx, grad_output):
33 |         """
34 |         :param ctx:
35 |         :param grad_output: gradient of outputs, FloatTensor[B, C, N]
36 |         :return:
37 |             gradient of inputs, FloatTensor[B, C, R, R, R]
38 |         """
39 |         inds, wgts = ctx.saved_tensors
40 |         grad_inputs = _backend.trilinear_devoxelize_backward(
41 |             grad_output.contiguous(), inds, wgts, ctx.r
42 |         )
43 |         return (
44 |             grad_inputs.view(
45 |                 grad_output.size(0), grad_output.size(1), ctx.r, ctx.r, ctx.r
46 |             ),
47 |             None,
48 |             None,
49 |             None,
50 |         )
51 | 
52 | 
53 | trilinear_devoxelize = TrilinearDevoxelization.apply
54 | 


--------------------------------------------------------------------------------
/grasp_ldm/trainers/__init__.py:
--------------------------------------------------------------------------------
 1 | import enum
 2 | 
 3 | from pytorch_lightning.loggers import CSVLogger, Logger, TensorBoardLogger, WandbLogger
 4 | 
 5 | LOGGERS = {
 6 |     "WandbLogger": WandbLogger,
 7 |     "TensorBoardLogger": TensorBoardLogger,
 8 |     "CSVLogger": CSVLogger,
 9 | }
10 | 
11 | 
12 | class E_Trainers(enum.Enum):
13 |     CLASSIFIER = "classifier"
14 |     VAE = "vae"
15 |     DDM = "ddm"
16 | 
17 |     def __repr__(self):
18 |         return f"{self.__class__.__name__}.{self.name}"
19 | 
20 |     def _get_trainer(model_type: str):
21 |         if model_type == E_Trainers.CLASSIFIER:
22 |             from grasp_ldm.trainers.grasp_classification_trainer import (
23 |                 GraspClassificationTrainer,
24 |             )
25 | 
26 |             return GraspClassificationTrainer
27 |         elif model_type == E_Trainers.VAE:
28 |             from grasp_ldm.trainers.grasp_generation_trainer import GraspVAETrainer
29 | 
30 |             return GraspVAETrainer
31 |         elif model_type == E_Trainers.DDM:
32 |             from grasp_ldm.trainers.grasp_generation_trainer import GraspLDMTrainer
33 | 
34 |             return GraspLDMTrainer
35 |         else:
36 |             raise NotImplementedError(f"Model type {model_type} not implemented")
37 | 
38 |     def get_trainer(self):
39 |         return E_Trainers._get_trainer(self)
40 | 
41 |     def from_string(model_type: str):
42 |         if model_type == "classifier":
43 |             return E_Trainers.CLASSIFIER
44 |         elif model_type == "vae":
45 |             return E_Trainers.VAE
46 |         elif model_type == "ddm":
47 |             return E_Trainers.DDM
48 |         else:
49 |             raise NotImplementedError(f"Model type {model_type} not implemented")
50 | 
51 |     def get(model_type: str):
52 |         enum_type = E_Trainers.from_string(model_type)
53 |         return E_Trainers._get_trainer(enum_type)
54 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/bindings.cpp:
--------------------------------------------------------------------------------
 1 | #include <pybind11/pybind11.h>
 2 | 
 3 | #include "ball_query/ball_query.hpp"
 4 | #include "grouping/grouping.hpp"
 5 | #include "interpolate/neighbor_interpolate.hpp"
 6 | #include "interpolate/trilinear_devox.hpp"
 7 | #include "sampling/sampling.hpp"
 8 | #include "voxelization/vox.hpp"
 9 | 
10 | PYBIND11_MODULE(_pvcnn_backend, m) {
11 |   m.def("gather_features_forward", &gather_features_forward,
12 |         "Gather Centers' Features forward (CUDA)");
13 |   m.def("gather_features_backward", &gather_features_backward,
14 |         "Gather Centers' Features backward (CUDA)");
15 |   m.def("furthest_point_sampling", &furthest_point_sampling_forward,
16 |         "Furthest Point Sampling (CUDA)");
17 |   m.def("ball_query", &ball_query_forward, "Ball Query (CUDA)");
18 |   m.def("grouping_forward", &grouping_forward,
19 |         "Grouping Features forward (CUDA)");
20 |   m.def("grouping_backward", &grouping_backward,
21 |         "Grouping Features backward (CUDA)");
22 |   m.def("three_nearest_neighbors_interpolate_forward",
23 |         &three_nearest_neighbors_interpolate_forward,
24 |         "3 Nearest Neighbors Interpolate forward (CUDA)");
25 |   m.def("three_nearest_neighbors_interpolate_backward",
26 |         &three_nearest_neighbors_interpolate_backward,
27 |         "3 Nearest Neighbors Interpolate backward (CUDA)");
28 | 
29 |   m.def("trilinear_devoxelize_forward", &trilinear_devoxelize_forward,
30 |         "Trilinear Devoxelization forward (CUDA)");
31 |   m.def("trilinear_devoxelize_backward", &trilinear_devoxelize_backward,
32 |         "Trilinear Devoxelization backward (CUDA)");
33 |   m.def("avg_voxelize_forward", &avg_voxelize_forward,
34 |         "Voxelization forward with average pooling (CUDA)");
35 |   m.def("avg_voxelize_backward", &avg_voxelize_backward,
36 |         "Voxelization backward (CUDA)");
37 | }
38 | 


--------------------------------------------------------------------------------
/grasp_ldm/utils/torch_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def fix_state_dict_prefix(state_dict, prefix="model", ignore_all_others=False):
 5 |     """Fix state dict keys prefix
 6 | 
 7 |     Args:
 8 |         state_dict (dict): state dict
 9 |         prefix (str, optional): prefix to remove. Defaults to "model".
10 | 
11 |     Returns:
12 |         dict: state dict with prefix removed
13 |     """
14 |     from collections import OrderedDict
15 | 
16 |     if isinstance(state_dict, dict):
17 |         if ignore_all_others:
18 |             return {
19 |                 k.partition(f"{prefix}.")[2]: v
20 |                 for k, v in state_dict.items()
21 |                 if k.startswith(prefix)
22 |             }
23 |         else:
24 |             return {k.partition(f"{prefix}.")[2]: v for k, v in state_dict.items()}
25 |     elif isinstance(state_dict, OrderedDict):
26 |         if ignore_all_others:
27 |             return OrderedDict(
28 |                 [
29 |                     (k.partition(f"{prefix}.")[2], v)
30 |                     for k, v in state_dict.items()
31 |                     if k.startswith(prefix)
32 |                 ]
33 |             )
34 |         else:
35 |             return OrderedDict(
36 |                 [(k.partition(f"{prefix}.")[2], v) for k, v in state_dict.items()]
37 |             )
38 | 
39 | 
40 | def minmax_normalize(
41 |     t: torch.Tensor, dim: int, v_min: float = 0.0, v_max: float = 1.0, keepdim=True
42 | ) -> torch.Tensor:
43 |     """min-max normalization in [0,1]
44 | 
45 |     Args:
46 |         t (Tensor): tensor [B, D1, D2 ... Dn]
47 |         dim (int): dimension to normalize on
48 |         min (float, optional): min value. Defaults to 0.0.
49 |         max (float, optional): max value. Defaults to 1.0.
50 | 
51 |     Returns:
52 |         Tensor: [B, D1, D2 ... Dn]
53 |     """
54 |     t -= t.min(dim, keepdim=keepdim)[0]
55 |     t /= t.max(dim, keepdim=keepdim)[0]
56 | 
57 |     t = t * (v_max - v_min) + v_min
58 |     return t
59 | 


--------------------------------------------------------------------------------
/grasp_ldm/trainers/mixins.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | import pytorch_lightning as pl
 4 | from pytorch_lightning import LightningModule, Trainer
 5 | from pytorch_lightning.callbacks import ModelCheckpoint
 6 | 
 7 | 
 8 | class TrainerEMAMixin:
 9 |     """Mixin for EMA model management in trainer
10 | 
11 |     The idea is to have all this functionality completely hidden and disconnected
12 |     from the main trainer class. Only activated when specified in trainer config
13 |     """
14 | 
15 |     def configure_ema(self, trainer_config):
16 |         from ema_pytorch import EMA
17 | 
18 |         if hasattr(trainer_config, "ema"):
19 |             if trainer_config.ema:
20 |                 ema_config = self.get_ema_config(trainer_config)
21 |                 self.ema_model = EMA(self.model, **ema_config).to(self.device)
22 |             else:
23 |                 self.ema_model = None
24 | 
25 |     def get_ema_config(self, trainer_config):
26 |         """Get EMA config
27 | 
28 |         Args:
29 |             trainer_config (dict): trainer config
30 | 
31 |         Returns:
32 |             dict: EMA config
33 |         """
34 | 
35 |         def check_key(q_dict, q_key):
36 |             if key in q_dict:
37 |                 if q_dict[key] is not None:
38 |                     return True
39 |             return False
40 | 
41 |         ema_config = dict(
42 |             beta=0.990,
43 |             update_after_step=1000,
44 |             update_every=5,
45 |         )
46 | 
47 |         for key in list(ema_config):
48 |             if check_key(trainer_config.ema, key):
49 |                 ema_config[key] = getattr(trainer_config.ema, key)
50 | 
51 |         return ema_config
52 | 
53 |     def get_ema_callback(self):
54 |         # Unused because this requires additional checkpoint to be saved
55 |         # No good way to disconnect from how we implement normal checkpoints in derived class
56 |         return self.EMAModelCheckpoint(
57 |             save_top_k=1,
58 |             monitor="loss",
59 |             mode="min",
60 |             dirpath=self._experiment.ckpt_dir,
61 |             filename="ema-{step}",
62 |             save_weights_only=True,
63 |             every_n_train_steps=1000,
64 |         )
65 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/sampling/sampling.cpp:
--------------------------------------------------------------------------------
 1 | #include "sampling.hpp"
 2 | #include "sampling.cuh"
 3 | 
 4 | #include "../utils.hpp"
 5 | 
 6 | at::Tensor gather_features_forward(at::Tensor features, at::Tensor indices) {
 7 |   CHECK_CUDA(features);
 8 |   CHECK_CUDA(indices);
 9 |   CHECK_CONTIGUOUS(features);
10 |   CHECK_CONTIGUOUS(indices);
11 |   CHECK_IS_FLOAT(features);
12 |   CHECK_IS_INT(indices);
13 | 
14 |   int b = features.size(0);
15 |   int c = features.size(1);
16 |   int n = features.size(2);
17 |   int m = indices.size(1);
18 |   at::Tensor output = torch::zeros(
19 |       {b, c, m}, at::device(features.device()).dtype(at::ScalarType::Float));
20 |   gather_features(b, c, n, m, features.data_ptr<float>(),
21 |                   indices.data_ptr<int>(), output.data_ptr<float>());
22 |   return output;
23 | }
24 | 
25 | at::Tensor gather_features_backward(at::Tensor grad_y, at::Tensor indices,
26 |                                     const int n) {
27 |   CHECK_CUDA(grad_y);
28 |   CHECK_CUDA(indices);
29 |   CHECK_CONTIGUOUS(grad_y);
30 |   CHECK_CONTIGUOUS(indices);
31 |   CHECK_IS_FLOAT(grad_y);
32 |   CHECK_IS_INT(indices);
33 | 
34 |   int b = grad_y.size(0);
35 |   int c = grad_y.size(1);
36 |   at::Tensor grad_x = torch::zeros(
37 |       {b, c, n}, at::device(grad_y.device()).dtype(at::ScalarType::Float));
38 |   gather_features_grad(b, c, n, indices.size(1), grad_y.data_ptr<float>(),
39 |                        indices.data_ptr<int>(), grad_x.data_ptr<float>());
40 |   return grad_x;
41 | }
42 | 
43 | at::Tensor furthest_point_sampling_forward(at::Tensor coords,
44 |                                            const int num_samples) {
45 |   CHECK_CUDA(coords);
46 |   CHECK_CONTIGUOUS(coords);
47 |   CHECK_IS_FLOAT(coords);
48 | 
49 |   int b = coords.size(0);
50 |   int n = coords.size(2);
51 |   at::Tensor indices = torch::zeros(
52 |       {b, num_samples}, at::device(coords.device()).dtype(at::ScalarType::Int));
53 |   at::Tensor distances = torch::full(
54 |       {b, n}, 1e38f, at::device(coords.device()).dtype(at::ScalarType::Float));
55 |   furthest_point_sampling(b, n, num_samples, coords.data_ptr<float>(),
56 |                           distances.data_ptr<float>(), indices.data_ptr<int>());
57 |   return indices;
58 | }
59 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/ball_query/ball_query.cu:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | #include "../cuda_utils.cuh"
 6 | 
 7 | /*
 8 |   Function: ball query
 9 |   Args:
10 |     b   : batch size
11 |     n   : number of points in point clouds
12 |     m   : number of query centers
13 |     r2  : ball query radius ** 2
14 |     u   : maximum number of neighbors
15 |     centers_coords: coordinates of centers, FloatTensor[b, 3, m]
16 |     points_coords : coordinates of points, FloatTensor[b, 3, n]
17 |     neighbors_indices : neighbor indices in points, IntTensor[b, m, u]
18 | */
19 | __global__ void ball_query_kernel(int b, int n, int m, float r2, int u,
20 |                                   const float *__restrict__ centers_coords,
21 |                                   const float *__restrict__ points_coords,
22 |                                   int *__restrict__ neighbors_indices) {
23 |   int batch_index = blockIdx.x;
24 |   int index = threadIdx.x;
25 |   int stride = blockDim.x;
26 |   points_coords += batch_index * n * 3;
27 |   centers_coords += batch_index * m * 3;
28 |   neighbors_indices += batch_index * m * u;
29 | 
30 |   for (int j = index; j < m; j += stride) {
31 |     float center_x = centers_coords[j];
32 |     float center_y = centers_coords[j + m];
33 |     float center_z = centers_coords[j + m + m];
34 |     for (int k = 0, cnt = 0; k < n && cnt < u; ++k) {
35 |       float dx = center_x - points_coords[k];
36 |       float dy = center_y - points_coords[k + n];
37 |       float dz = center_z - points_coords[k + n + n];
38 |       float d2 = dx * dx + dy * dy + dz * dz;
39 |       if (d2 < r2) {
40 |         if (cnt == 0) {
41 |           for (int v = 0; v < u; ++v) {
42 |             neighbors_indices[j * u + v] = k;
43 |           }
44 |         }
45 |         neighbors_indices[j * u + cnt] = k;
46 |         ++cnt;
47 |       }
48 |     }
49 |   }
50 | }
51 | 
52 | void ball_query(int b, int n, int m, float r2, int u,
53 |                 const float *centers_coords, const float *points_coords,
54 |                 int *neighbors_indices) {
55 |   ball_query_kernel<<<b, optimal_num_threads(m), 0,
56 |                       at::cuda::getCurrentCUDAStream()>>>(
57 |       b, n, m, r2, u, centers_coords, points_coords, neighbors_indices);
58 |   CUDA_CHECK_ERRORS();
59 | }
60 | 


--------------------------------------------------------------------------------
/grasp_ldm/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import multiprocessing
 3 | 
 4 | # from ptflops import get_model_complexity_info
 5 | from typing import Tuple
 6 | 
 7 | import torch
 8 | from scipy.spatial.transform import Rotation as R
 9 | 
10 | 
11 | def get_param_count(model: torch.nn.Module):
12 |     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
13 |     total = sum(p.numel() for p in model.parameters())
14 |     print(f"Trainable: {trainable/1e6:0.3f} M \n Total: {total/1e6:0.3f}")
15 | 
16 | 
17 | def load_json(path: str) -> dict:
18 |     """load json helper
19 | 
20 |     Args:
21 |         path (str): json_path
22 | 
23 |     Returns:
24 |         dict: data
25 |     """
26 |     with open(path, "r") as jf:
27 |         data = json.load(jf)
28 |     return data
29 | 
30 | 
31 | def spawn_multiple_processes(n_proc, target_fn, process_args):
32 |     assert (
33 |         len(process_args) == n_proc
34 |     ), f"Number of processes ({n_proc}) does not match the length of process_args ({len(process_args)})"
35 | 
36 |     read_processes = []
37 | 
38 |     for idx in range(n_proc):
39 |         try:
40 |             if isinstance(process_args[idx], list):
41 |                 p = multiprocessing.Process(target=target_fn, args=process_args[idx])
42 |             elif isinstance(process_args[idx], dict):
43 |                 p = multiprocessing.Process(target=target_fn, kwargs=process_args[idx])
44 |             else:
45 |                 raise TypeError
46 | 
47 |             p.start()
48 |             read_processes.append(p)
49 |         except:
50 |             for p in read_processes:
51 |                 p.join()
52 | 
53 |     for p in read_processes:
54 |         p.join()
55 | 
56 |     return
57 | 
58 | 
59 | def split_list(lst, n):
60 |     """Split a list into n sublists of approximately equal length
61 | 
62 |     Args:
63 |         lst (list): list to split
64 |         n (int): number of sublists
65 | 
66 |     Returns:
67 |         list: list of sublists
68 |     """
69 |     # divisor, modulo for n splits of list length
70 |     div, mod = divmod(len(lst), n)
71 | 
72 |     # Length of each sublist
73 |     lengths = [div + 1 if i < mod else div for i in range(n)]
74 | 
75 |     # Split the original list into sublists
76 |     # sum(lengths[:i]) is 0 for i=0, so the first sublist starts at 0
77 |     sublists = [lst[sum(lengths[:i]) : sum(lengths[: i + 1])] for i in range(n)]
78 | 
79 |     # Remove empty sublists
80 |     sublists = [sublist for sublist in sublists if sublist]
81 | 
82 |     return sublists
83 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/neighbor_interpolate.cpp:
--------------------------------------------------------------------------------
 1 | #include "neighbor_interpolate.hpp"
 2 | #include "neighbor_interpolate.cuh"
 3 | 
 4 | #include "../utils.hpp"
 5 | 
 6 | std::vector<at::Tensor>
 7 | three_nearest_neighbors_interpolate_forward(at::Tensor points_coords,
 8 |                                             at::Tensor centers_coords,
 9 |                                             at::Tensor centers_features) {
10 |   CHECK_CUDA(points_coords);
11 |   CHECK_CUDA(centers_coords);
12 |   CHECK_CUDA(centers_features);
13 |   CHECK_CONTIGUOUS(points_coords);
14 |   CHECK_CONTIGUOUS(centers_coords);
15 |   CHECK_CONTIGUOUS(centers_features);
16 |   CHECK_IS_FLOAT(points_coords);
17 |   CHECK_IS_FLOAT(centers_coords);
18 |   CHECK_IS_FLOAT(centers_features);
19 | 
20 |   int b = centers_features.size(0);
21 |   int c = centers_features.size(1);
22 |   int m = centers_features.size(2);
23 |   int n = points_coords.size(2);
24 | 
25 |   at::Tensor indices = torch::zeros(
26 |       {b, 3, n}, at::device(points_coords.device()).dtype(at::ScalarType::Int));
27 |   at::Tensor weights = torch::zeros(
28 |       {b, 3, n},
29 |       at::device(points_coords.device()).dtype(at::ScalarType::Float));
30 |   at::Tensor output = torch::zeros(
31 |       {b, c, n},
32 |       at::device(centers_features.device()).dtype(at::ScalarType::Float));
33 | 
34 |   three_nearest_neighbors_interpolate(
35 |       b, c, m, n, points_coords.data_ptr<float>(),
36 |       centers_coords.data_ptr<float>(), centers_features.data_ptr<float>(),
37 |       indices.data_ptr<int>(), weights.data_ptr<float>(),
38 |       output.data_ptr<float>());
39 |   return {output, indices, weights};
40 | }
41 | 
42 | at::Tensor three_nearest_neighbors_interpolate_backward(at::Tensor grad_y,
43 |                                                         at::Tensor indices,
44 |                                                         at::Tensor weights,
45 |                                                         const int m) {
46 |   CHECK_CUDA(grad_y);
47 |   CHECK_CUDA(indices);
48 |   CHECK_CUDA(weights);
49 |   CHECK_CONTIGUOUS(grad_y);
50 |   CHECK_CONTIGUOUS(indices);
51 |   CHECK_CONTIGUOUS(weights);
52 |   CHECK_IS_FLOAT(grad_y);
53 |   CHECK_IS_INT(indices);
54 |   CHECK_IS_FLOAT(weights);
55 | 
56 |   int b = grad_y.size(0);
57 |   int c = grad_y.size(1);
58 |   int n = grad_y.size(2);
59 |   at::Tensor grad_x = torch::zeros(
60 |       {b, c, m}, at::device(grad_y.device()).dtype(at::ScalarType::Float));
61 |   three_nearest_neighbors_interpolate_grad(
62 |       b, c, n, m, grad_y.data_ptr<float>(), indices.data_ptr<int>(),
63 |       weights.data_ptr<float>(), grad_x.data_ptr<float>());
64 |   return grad_x;
65 | }
66 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/voxelization/vox.cpp:
--------------------------------------------------------------------------------
 1 | #include "vox.hpp"
 2 | #include "vox.cuh"
 3 | 
 4 | #include "../utils.hpp"
 5 | 
 6 | /*
 7 |   Function: average pool voxelization (forward)
 8 |   Args:
 9 |     features: features, FloatTensor[b, c, n]
10 |     coords  : coords of each point, IntTensor[b, 3, n]
11 |     resolution : voxel resolution
12 |   Return:
13 |     out : outputs, FloatTensor[b, c, s], s = r ** 3
14 |     ind : voxel index of each point, IntTensor[b, n]
15 |     cnt : #points in each voxel index, IntTensor[b, s]
16 | */
17 | std::vector<at::Tensor> avg_voxelize_forward(const at::Tensor features,
18 |                                              const at::Tensor coords,
19 |                                              const int resolution) {
20 |   CHECK_CUDA(features);
21 |   CHECK_CUDA(coords);
22 |   CHECK_CONTIGUOUS(features);
23 |   CHECK_CONTIGUOUS(coords);
24 |   CHECK_IS_FLOAT(features);
25 |   CHECK_IS_INT(coords);
26 | 
27 |   int b = features.size(0);
28 |   int c = features.size(1);
29 |   int n = features.size(2);
30 |   int r = resolution;
31 |   int r2 = r * r;
32 |   int r3 = r2 * r;
33 |   at::Tensor ind = torch::zeros(
34 |       {b, n}, at::device(features.device()).dtype(at::ScalarType::Int));
35 |   at::Tensor out = torch::zeros(
36 |       {b, c, r3}, at::device(features.device()).dtype(at::ScalarType::Float));
37 |   at::Tensor cnt = torch::zeros(
38 |       {b, r3}, at::device(features.device()).dtype(at::ScalarType::Int));
39 |   avg_voxelize(b, c, n, r, r2, r3, coords.data_ptr<int>(),
40 |                features.data_ptr<float>(), ind.data_ptr<int>(),
41 |                cnt.data_ptr<int>(), out.data_ptr<float>());
42 |   return {out, ind, cnt};
43 | }
44 | 
45 | /*
46 |   Function: average pool voxelization (backward)
47 |   Args:
48 |     grad_y : grad outputs, FloatTensor[b, c, s]
49 |     indices: voxel index of each point, IntTensor[b, n]
50 |     cnt    : #points in each voxel index, IntTensor[b, s]
51 |   Return:
52 |     grad_x : grad inputs, FloatTensor[b, c, n]
53 | */
54 | at::Tensor avg_voxelize_backward(const at::Tensor grad_y,
55 |                                  const at::Tensor indices,
56 |                                  const at::Tensor cnt) {
57 |   CHECK_CUDA(grad_y);
58 |   CHECK_CUDA(indices);
59 |   CHECK_CUDA(cnt);
60 |   CHECK_CONTIGUOUS(grad_y);
61 |   CHECK_CONTIGUOUS(indices);
62 |   CHECK_CONTIGUOUS(cnt);
63 |   CHECK_IS_FLOAT(grad_y);
64 |   CHECK_IS_INT(indices);
65 |   CHECK_IS_INT(cnt);
66 | 
67 |   int b = grad_y.size(0);
68 |   int c = grad_y.size(1);
69 |   int s = grad_y.size(2);
70 |   int n = indices.size(1);
71 |   at::Tensor grad_x = torch::zeros(
72 |       {b, c, n}, at::device(grad_y.device()).dtype(at::ScalarType::Float));
73 |   avg_voxelize_grad(b, c, n, s, indices.data_ptr<int>(), cnt.data_ptr<int>(),
74 |                     grad_y.data_ptr<float>(), grad_x.data_ptr<float>());
75 |   return grad_x;
76 | }
77 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/grouping/grouping.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include "../cuda_utils.cuh"
 5 | 
 6 | /*
 7 |   Function: grouping features of neighbors (forward)
 8 |   Args:
 9 |     b   : batch size
10 |     c   : #channles of features
11 |     n   : number of points in point clouds
12 |     m   : number of query centers
13 |     u   : maximum number of neighbors
14 |     features: points' features, FloatTensor[b, c, n]
15 |     indices : neighbor indices in points, IntTensor[b, m, u]
16 |     out     : gathered features, FloatTensor[b, c, m, u]
17 | */
18 | __global__ void grouping_kernel(int b, int c, int n, int m, int u,
19 |                                 const float *__restrict__ features,
20 |                                 const int *__restrict__ indices,
21 |                                 float *__restrict__ out) {
22 |   int batch_index = blockIdx.x;
23 |   features += batch_index * n * c;
24 |   indices += batch_index * m * u;
25 |   out += batch_index * m * u * c;
26 | 
27 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
28 |   const int stride = blockDim.y * blockDim.x;
29 |   for (int i = index; i < c * m; i += stride) {
30 |     const int l = i / m;
31 |     const int j = i % m;
32 |     for (int k = 0; k < u; ++k) {
33 |       out[(l * m + j) * u + k] = features[l * n + indices[j * u + k]];
34 |     }
35 |   }
36 | }
37 | 
38 | void grouping(int b, int c, int n, int m, int u, const float *features,
39 |               const int *indices, float *out) {
40 |   grouping_kernel<<<b, optimal_block_config(m, c), 0,
41 |                     at::cuda::getCurrentCUDAStream()>>>(b, c, n, m, u, features,
42 |                                                         indices, out);
43 |   CUDA_CHECK_ERRORS();
44 | }
45 | 
46 | /*
47 |   Function: grouping features of neighbors (backward)
48 |   Args:
49 |     b   : batch size
50 |     c   : #channles of features
51 |     n   : number of points in point clouds
52 |     m   : number of query centers
53 |     u   : maximum number of neighbors
54 |     grad_y : grad of gathered features, FloatTensor[b, c, m, u]
55 |     indices : neighbor indices in points, IntTensor[b, m, u]
56 |     grad_x: grad of points' features, FloatTensor[b, c, n]
57 | */
58 | __global__ void grouping_grad_kernel(int b, int c, int n, int m, int u,
59 |                                      const float *__restrict__ grad_y,
60 |                                      const int *__restrict__ indices,
61 |                                      float *__restrict__ grad_x) {
62 |   int batch_index = blockIdx.x;
63 |   grad_y += batch_index * m * u * c;
64 |   indices += batch_index * m * u;
65 |   grad_x += batch_index * n * c;
66 | 
67 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
68 |   const int stride = blockDim.y * blockDim.x;
69 |   for (int i = index; i < c * m; i += stride) {
70 |     const int l = i / m;
71 |     const int j = i % m;
72 |     for (int k = 0; k < u; ++k) {
73 |       atomicAdd(grad_x + l * n + indices[j * u + k],
74 |                 grad_y[(l * m + j) * u + k]);
75 |     }
76 |   }
77 | }
78 | 
79 | void grouping_grad(int b, int c, int n, int m, int u, const float *grad_y,
80 |                    const int *indices, float *grad_x) {
81 |   grouping_grad_kernel<<<b, optimal_block_config(m, c), 0,
82 |                          at::cuda::getCurrentCUDAStream()>>>(
83 |       b, c, n, m, u, grad_y, indices, grad_x);
84 |   CUDA_CHECK_ERRORS();
85 | }
86 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/modules.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | 
  4 | 
  5 | class Swish(nn.Module):
  6 |     def forward(self, x):
  7 |         return x * torch.sigmoid(x)
  8 | 
  9 | 
 10 | class Attention(nn.Module):
 11 |     # Adapted from https://github.com/alexzhou907/PVD
 12 |     # Used for global attention over context vectors like pc shape latent
 13 |     def __init__(self, in_ch, num_groups, D=3):
 14 |         super(Attention, self).__init__()
 15 |         assert in_ch % num_groups == 0
 16 |         if D == 3:
 17 |             self.q = nn.Conv3d(in_ch, in_ch, 1)
 18 |             self.k = nn.Conv3d(in_ch, in_ch, 1)
 19 |             self.v = nn.Conv3d(in_ch, in_ch, 1)
 20 | 
 21 |             self.out = nn.Conv3d(in_ch, in_ch, 1)
 22 |         elif D == 1:
 23 |             self.q = nn.Conv1d(in_ch, in_ch, 1)
 24 |             self.k = nn.Conv1d(in_ch, in_ch, 1)
 25 |             self.v = nn.Conv1d(in_ch, in_ch, 1)
 26 | 
 27 |             self.out = nn.Conv1d(in_ch, in_ch, 1)
 28 | 
 29 |         self.norm = nn.GroupNorm(num_groups, in_ch)
 30 |         self.nonlin = Swish()
 31 | 
 32 |         self.sm = nn.Softmax(-1)
 33 | 
 34 |     def forward(self, x):
 35 |         B, C = x.shape[:2]
 36 |         h = x
 37 | 
 38 |         q = self.q(h).reshape(B, C, -1)
 39 |         k = self.k(h).reshape(B, C, -1)
 40 |         v = self.v(h).reshape(B, C, -1)
 41 | 
 42 |         qk = torch.matmul(q.permute(0, 2, 1), k)  # * (int(C) ** (-0.5))
 43 | 
 44 |         w = self.sm(qk)
 45 | 
 46 |         h = torch.matmul(v, w.permute(0, 2, 1)).reshape(B, C, *x.shape[2:])
 47 | 
 48 |         h = self.out(h)
 49 | 
 50 |         x = h + x
 51 | 
 52 |         x = self.nonlin(self.norm(x))
 53 | 
 54 |         return x
 55 | 
 56 | 
 57 | class FCLayers(nn.Module):
 58 |     def __init__(
 59 |         self,
 60 |         in_features,
 61 |         layer_outs_specs=[128, 256, 512],
 62 |         layer_normalization=True,
 63 |     ) -> None:
 64 |         super().__init__()
 65 | 
 66 |         self.in_features = in_features
 67 |         self.out_features = layer_outs_specs[-1]
 68 | 
 69 |         self.layer_specs = layer_outs_specs
 70 |         self.layer_normalization = layer_normalization
 71 | 
 72 |         self.num_layers = len(layer_outs_specs)
 73 |         self.layers = self._build_layers()
 74 | 
 75 |     def _build_layers(self):
 76 |         module_list = []
 77 | 
 78 |         for idx, layer_out_features in enumerate(self.layer_specs):
 79 |             in_feats = self.in_features if idx == 0 else self.layer_specs[idx - 1]
 80 |             out_feats = layer_out_features
 81 | 
 82 |             if self.layer_normalization:
 83 |                 module_list.append(
 84 |                     nn.Sequential(
 85 |                         nn.Linear(
 86 |                             in_feats, out_feats, bias=not self.layer_normalization
 87 |                         ),
 88 |                         nn.LayerNorm(out_feats),
 89 |                         nn.ReLU(),
 90 |                     )
 91 |                 )
 92 |             else:
 93 |                 module_list.append(
 94 |                     nn.Sequential(
 95 |                         nn.Linear(
 96 |                             in_feats, out_feats, bias=not self.layer_normalization
 97 |                         ),
 98 |                         nn.ReLU(),
 99 |                     )
100 |                 )
101 | 
102 |         return nn.Sequential(*module_list)
103 | 
104 |     def forward(self, x):
105 |         return self.layers(x)
106 | 


--------------------------------------------------------------------------------
/grasp_ldm/utils/vis.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import trimesh
  4 | 
  5 | 
  6 | # TODO: To be removed in favor of gripper.py
  7 | def create_gripper_marker(color=[0, 0, 255], tube_radius=0.001, sections=6):
  8 |     """Create a 3D mesh visualizing a parallel yaw gripper. It consists of four cylinders.
  9 | 
 10 |     From: https://github.com/NVlabs/acronym/blob/main/acronym_tools/acronym.py
 11 | 
 12 |     Args:
 13 |         color (list, optional): RGB values of marker. Defaults to [0, 0, 255].
 14 |         tube_radius (float, optional): Radius of cylinders. Defaults to 0.001.
 15 |         sections (int, optional): Number of sections of each cylinder. Defaults to 6.
 16 | 
 17 |     Returns:
 18 |         trimesh.Trimesh: A mesh that represents a simple parallel yaw gripper.
 19 |     """
 20 |     cfl = trimesh.creation.cylinder(
 21 |         radius=0.002,
 22 |         sections=sections,
 23 |         segment=[
 24 |             [4.10000000e-02, -7.27595772e-12, 6.59999996e-02],
 25 |             [4.10000000e-02, -7.27595772e-12, 1.12169998e-01],
 26 |         ],
 27 |     )
 28 |     cfr = trimesh.creation.cylinder(
 29 |         radius=0.002,
 30 |         sections=sections,
 31 |         segment=[
 32 |             [-4.100000e-02, -7.27595772e-12, 6.59999996e-02],
 33 |             [-4.100000e-02, -7.27595772e-12, 1.12169998e-01],
 34 |         ],
 35 |     )
 36 |     cb1 = trimesh.creation.cylinder(
 37 |         radius=0.002, sections=sections, segment=[[0, 0, 0], [0, 0, 6.59999996e-02]]
 38 |     )
 39 |     cb2 = trimesh.creation.cylinder(
 40 |         radius=0.002,
 41 |         sections=sections,
 42 |         segment=[[-4.100000e-02, 0, 6.59999996e-02], [4.100000e-02, 0, 6.59999996e-02]],
 43 |     )
 44 | 
 45 |     tmp = trimesh.util.concatenate([cb1, cb2, cfr, cfl])
 46 |     tmp.visual.face_colors = color
 47 | 
 48 |     return tmp
 49 | 
 50 | 
 51 | def visualize_pc(pc):
 52 |     if isinstance(pc, torch.Tensor):
 53 |         pc = pc.squeeze().numpy()
 54 |     r = pc[..., 0] * 255 / max(pc[..., 0])
 55 |     g = pc[..., 1] * 200 / max(pc[..., 1])
 56 |     b = pc[..., 2] * 175 / max(pc[..., 2])
 57 |     a = np.ones(pc.shape[0]) * 200
 58 | 
 59 |     colors = np.clip(np.vstack((r, g, b, a)).T, 0, 255)
 60 | 
 61 |     colors = colors if colors is not None else np.ones((pc.shape[0], 3)) * 85
 62 |     pc_trimesh = trimesh.points.PointCloud(pc, colors=colors)
 63 |     scene = trimesh.Scene(pc_trimesh).show(line_settings={"point_size": 5})
 64 |     return scene
 65 | 
 66 | 
 67 | def visualize_pc_grasps(
 68 |     pc: np.ndarray, grasps: np.ndarray, c: np.ndarray = None
 69 | ) -> trimesh.Scene:
 70 |     # scene = visualize_pc(pc)
 71 |     r = pc[..., 0] * 255 / max(pc[..., 0])
 72 |     g = pc[..., 1] * 200 / max(pc[..., 1])
 73 |     b = pc[..., 2] * 175 / max(pc[..., 2])
 74 |     a = np.ones(pc.shape[0]) * 200
 75 | 
 76 |     pc_colors = np.clip(np.vstack((r, g, b, a)).T, 0, 255)
 77 | 
 78 |     if c is not None:
 79 |         c = c.squeeze(1) if c.ndim == 2 else c
 80 | 
 81 |     if c is not None:
 82 |         gripper_marker = [
 83 |             create_gripper_marker(
 84 |                 color=[150, np.clip(255 * ci, 0, 255), 0, np.clip(255 * ci, 150, 255)]
 85 |             )
 86 |             for ci in c
 87 |         ]
 88 |     else:
 89 |         gripper_marker = [create_gripper_marker(color=[0, 255, 0, 255])] * grasps.shape[
 90 |             0
 91 |         ]
 92 | 
 93 |     gripper_markers = [
 94 |         gripper_marker[i].copy().apply_transform(t) for i, t in enumerate(grasps)
 95 |     ]
 96 | 
 97 |     scene = trimesh.Scene(
 98 |         [trimesh.points.PointCloud(pc, colors=pc_colors)] + gripper_markers
 99 |     )
100 |     return scene
101 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/trilinear_devox.cpp:
--------------------------------------------------------------------------------
 1 | #include "trilinear_devox.hpp"
 2 | #include "trilinear_devox.cuh"
 3 | 
 4 | #include "../utils.hpp"
 5 | 
 6 | /*
 7 |   Function: trilinear devoxelization (forward)
 8 |   Args:
 9 |     r        : voxel resolution
10 |     trainig  : whether is training mode
11 |     coords   : the coordinates of points, FloatTensor[b, 3, n]
12 |     features : features, FloatTensor[b, c, s], s = r ** 3
13 |   Return:
14 |     outs : outputs, FloatTensor[b, c, n]
15 |     inds : the voxel coordinates of point cube, IntTensor[b, 8, n]
16 |     wgts : weight for trilinear interpolation, FloatTensor[b, 8, n]
17 | */
18 | std::vector<at::Tensor>
19 | trilinear_devoxelize_forward(const int r, const bool is_training,
20 |                              const at::Tensor coords,
21 |                              const at::Tensor features) {
22 |   CHECK_CUDA(features);
23 |   CHECK_CUDA(coords);
24 |   CHECK_CONTIGUOUS(features);
25 |   CHECK_CONTIGUOUS(coords);
26 |   CHECK_IS_FLOAT(features);
27 |   CHECK_IS_FLOAT(coords);
28 | 
29 |   int b = features.size(0);
30 |   int c = features.size(1);
31 |   int n = coords.size(2);
32 |   int r2 = r * r;
33 |   int r3 = r2 * r;
34 |   at::Tensor outs = torch::zeros(
35 |       {b, c, n}, at::device(features.device()).dtype(at::ScalarType::Float));
36 |   if (is_training) {
37 |     at::Tensor inds = torch::zeros(
38 |         {b, 8, n}, at::device(features.device()).dtype(at::ScalarType::Int));
39 |     at::Tensor wgts = torch::zeros(
40 |         {b, 8, n}, at::device(features.device()).dtype(at::ScalarType::Float));
41 |     trilinear_devoxelize(b, c, n, r, r2, r3, true, coords.data_ptr<float>(),
42 |                          features.data_ptr<float>(), inds.data_ptr<int>(),
43 |                          wgts.data_ptr<float>(), outs.data_ptr<float>());
44 |     return {outs, inds, wgts};
45 |   } else {
46 |     at::Tensor inds = torch::zeros(
47 |         {1}, at::device(features.device()).dtype(at::ScalarType::Int));
48 |     at::Tensor wgts = torch::zeros(
49 |         {1}, at::device(features.device()).dtype(at::ScalarType::Float));
50 |     trilinear_devoxelize(b, c, n, r, r2, r3, false, coords.data_ptr<float>(),
51 |                          features.data_ptr<float>(), inds.data_ptr<int>(),
52 |                          wgts.data_ptr<float>(), outs.data_ptr<float>());
53 |     return {outs, inds, wgts};
54 |   }
55 | }
56 | 
57 | /*
58 |   Function: trilinear devoxelization (backward)
59 |   Args:
60 |     grad_y  : grad outputs, FloatTensor[b, c, n]
61 |     indices : the voxel coordinates of point cube, IntTensor[b, 8, n]
62 |     weights : weight for trilinear interpolation, FloatTensor[b, 8, n]
63 |     r       : voxel resolution
64 |   Return:
65 |     grad_x     : grad inputs, FloatTensor[b, c, s], s = r ** 3
66 | */
67 | at::Tensor trilinear_devoxelize_backward(const at::Tensor grad_y,
68 |                                          const at::Tensor indices,
69 |                                          const at::Tensor weights,
70 |                                          const int r) {
71 |   CHECK_CUDA(grad_y);
72 |   CHECK_CUDA(weights);
73 |   CHECK_CUDA(indices);
74 |   CHECK_CONTIGUOUS(grad_y);
75 |   CHECK_CONTIGUOUS(weights);
76 |   CHECK_CONTIGUOUS(indices);
77 |   CHECK_IS_FLOAT(grad_y);
78 |   CHECK_IS_FLOAT(weights);
79 |   CHECK_IS_INT(indices);
80 | 
81 |   int b = grad_y.size(0);
82 |   int c = grad_y.size(1);
83 |   int n = grad_y.size(2);
84 |   int r3 = r * r * r;
85 |   at::Tensor grad_x = torch::zeros(
86 |       {b, c, r3}, at::device(grad_y.device()).dtype(at::ScalarType::Float));
87 |   trilinear_devoxelize_grad(b, c, n, r3, indices.data_ptr<int>(),
88 |                             weights.data_ptr<float>(), grad_y.data_ptr<float>(),
89 |                             grad_x.data_ptr<float>());
90 |   return grad_x;
91 | }
92 | 


--------------------------------------------------------------------------------
/tools/train_generator.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
  5 | import argparse
  6 | 
  7 | from grasp_ldm.trainers import E_Trainers
  8 | from grasp_ldm.utils.config import Config
  9 | 
 10 | 
 11 | def parse_args():
 12 |     parser = argparse.ArgumentParser(description="Runner for Training Grasp Samplers")
 13 |     parser.add_argument("--config", "-c", help="Path to config file", required=True)
 14 |     parser.add_argument(
 15 |         "--model",
 16 |         "-m",
 17 |         help="Model type",
 18 |         required=True,
 19 |         choices=["classifier", "vae", "ddm"],
 20 |     )
 21 |     parser.add_argument("--root-dir", "-d", help="Root directory")
 22 |     parser.add_argument("--num-gpus", "-g", type=int, help="Number of GPUs to use")
 23 |     parser.add_argument("--batch-size", "-b", type=int, help="Batch size per device")
 24 |     parser.add_argument(
 25 |         "-debug",
 26 |         action="store_true",
 27 |         default=False,
 28 |         help="Setting this will disable wandb logger and ... TODO",
 29 |     )
 30 |     parser.add_argument(
 31 |         "--deterministic",
 32 |         action="store_true",
 33 |         default=False,
 34 |         help="Make everything deterministic",
 35 |     )
 36 |     parser.add_argument(
 37 |         "--seed", type=int, default=None, help="Make everything deterministic"
 38 |     )
 39 | 
 40 |     return parser.parse_args()
 41 | 
 42 | 
 43 | def set_deterministic(config, args):
 44 |     """Deterministic Run
 45 | 
 46 |     Mediate config and CLI args to set deterministic run.
 47 |     CLI args take priority and overwrite config.
 48 | 
 49 |     In config:
 50 | 
 51 |     config.trainer.deterministic =True
 52 |     config.seed = 123
 53 | 
 54 |     In CLI:
 55 |     --deterministic
 56 |     --seed 123
 57 |     """
 58 |     config.trainer.deterministic = (
 59 |         False if "deterministic" not in config.trainer else config.trainer.deterministic
 60 |     )
 61 | 
 62 |     if args.deterministic:
 63 |         config.trainer.deterministic = True
 64 | 
 65 |     if config.trainer.deterministic:
 66 |         if not "seed" in config:
 67 |             config.seed = 42
 68 |         if args.seed is not None:
 69 |             config.seed = args.seed
 70 | 
 71 |         from pytorch_lightning import seed_everything
 72 | 
 73 |         seed_everything(config.seed, workers=True)
 74 |         print(
 75 |             "Training will be run in deterministic mode for reproducibility. This might be a bit slower."
 76 |         )
 77 |     else:
 78 |         print(
 79 |             "Training is not deterministic. This is a bit faster and alright. If you want deterministic training, set `deterministic=True` in trainer config."
 80 |         )
 81 | 
 82 |     return config
 83 | 
 84 | 
 85 | def main(args):
 86 |     ## -- Config --
 87 |     config = Config.fromfile(args.config)
 88 | 
 89 |     # Overwrite config with args
 90 |     ## Overwrite config with args
 91 |     # Num gpus
 92 |     if args.num_gpus:
 93 |         config.trainer.devices = args.num_gpus
 94 |         config.trainer.num_workers = args.num_gpus * config.num_workers_per_gpu
 95 | 
 96 |     # Batch size
 97 |     if args.batch_size:
 98 |         config.trainer.batch_size = args.batch_size
 99 |         config.data.train.batch_size = args.batch_size
100 | 
101 |     # Data Root
102 |     if args.root_dir:
103 |         for split in config.data:
104 |             config.data[split].args.data_root_dir = args.root_dir
105 | 
106 |     # Deterministic
107 |     config = set_deterministic(config=config, args=args)
108 | 
109 |     ## -- Trainer --
110 |     Trainer = E_Trainers.get(model_type=args.model)
111 |     trainer = Trainer(config)
112 |     trainer.run()
113 | 
114 | 
115 | if __name__ == "__main__":
116 |     args = parse_args()
117 |     main(args)
118 | 


--------------------------------------------------------------------------------
/grasp_ldm/dataset/acronym/gripper_ctrl_pts.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     [
  3 |         0.0,
  4 |         0.0,
  5 |         0.0
  6 |     ],
  7 |     [
  8 |         0.0,
  9 |         0.0,
 10 |         0.00659999996
 11 |     ],
 12 |     [
 13 |         0.0,
 14 |         0.0,
 15 |         0.01319999992
 16 |     ],
 17 |     [
 18 |         0.0,
 19 |         0.0,
 20 |         0.019799999880000002
 21 |     ],
 22 |     [
 23 |         0.0,
 24 |         0.0,
 25 |         0.02639999984
 26 |     ],
 27 |     [
 28 |         0.0,
 29 |         0.0,
 30 |         0.0329999998
 31 |     ],
 32 |     [
 33 |         0.0,
 34 |         0.0,
 35 |         0.039599999760000004
 36 |     ],
 37 |     [
 38 |         0.0,
 39 |         0.0,
 40 |         0.04619999972
 41 |     ],
 42 |     [
 43 |         0.0,
 44 |         0.0,
 45 |         0.05279999968
 46 |     ],
 47 |     [
 48 |         0.0,
 49 |         0.0,
 50 |         0.05939999964
 51 |     ],
 52 |     [
 53 |         0.0,
 54 |         0.0,
 55 |         0.0659999996
 56 |     ],
 57 |     [
 58 |         0.041,
 59 |         0.0,
 60 |         0.0659999996
 61 |     ],
 62 |     [
 63 |         0.0328,
 64 |         0.0,
 65 |         0.0659999996
 66 |     ],
 67 |     [
 68 |         0.0246,
 69 |         0.0,
 70 |         0.0659999996
 71 |     ],
 72 |     [
 73 |         0.016399999999999998,
 74 |         0.0,
 75 |         0.0659999996
 76 |     ],
 77 |     [
 78 |         0.008199999999999999,
 79 |         0.0,
 80 |         0.0659999996
 81 |     ],
 82 |     [
 83 |         0.0,
 84 |         0.0,
 85 |         0.0659999996
 86 |     ],
 87 |     [
 88 |         -0.008200000000000006,
 89 |         0.0,
 90 |         0.0659999996
 91 |     ],
 92 |     [
 93 |         -0.016400000000000005,
 94 |         0.0,
 95 |         0.0659999996
 96 |     ],
 97 |     [
 98 |         -0.024600000000000004,
 99 |         0.0,
100 |         0.0659999996
101 |     ],
102 |     [
103 |         -0.0328,
104 |         0.0,
105 |         0.0659999996
106 |     ],
107 |     [
108 |         -0.041,
109 |         0.0,
110 |         0.0659999996
111 |     ],
112 |     [
113 |         0.041,
114 |         0.0,
115 |         0.0659999996
116 |     ],
117 |     [
118 |         0.041,
119 |         0.0,
120 |         0.07061699944
121 |     ],
122 |     [
123 |         0.041,
124 |         0.0,
125 |         0.07523399928
126 |     ],
127 |     [
128 |         0.041,
129 |         0.0,
130 |         0.07985099912
131 |     ],
132 |     [
133 |         0.041,
134 |         0.0,
135 |         0.08446799896
136 |     ],
137 |     [
138 |         0.041,
139 |         0.0,
140 |         0.08908499880000001
141 |     ],
142 |     [
143 |         0.041,
144 |         0.0,
145 |         0.09370199864
146 |     ],
147 |     [
148 |         0.041,
149 |         0.0,
150 |         0.09831899848
151 |     ],
152 |     [
153 |         0.041,
154 |         0.0,
155 |         0.10293599832
156 |     ],
157 |     [
158 |         0.041,
159 |         0.0,
160 |         0.10755299816000001
161 |     ],
162 |     [
163 |         0.041,
164 |         0.0,
165 |         0.112169998
166 |     ],
167 |     [
168 |         -0.041,
169 |         0.0,
170 |         0.0659999996
171 |     ],
172 |     [
173 |         -0.041,
174 |         0.0,
175 |         0.07061699944
176 |     ],
177 |     [
178 |         -0.041,
179 |         0.0,
180 |         0.07523399928
181 |     ],
182 |     [
183 |         -0.041,
184 |         0.0,
185 |         0.07985099912
186 |     ],
187 |     [
188 |         -0.041,
189 |         0.0,
190 |         0.08446799896
191 |     ],
192 |     [
193 |         -0.041,
194 |         0.0,
195 |         0.08908499880000001
196 |     ],
197 |     [
198 |         -0.041,
199 |         0.0,
200 |         0.09370199864
201 |     ],
202 |     [
203 |         -0.041,
204 |         0.0,
205 |         0.09831899848
206 |     ],
207 |     [
208 |         -0.041,
209 |         0.0,
210 |         0.10293599832
211 |     ],
212 |     [
213 |         -0.041,
214 |         0.0,
215 |         0.10755299816000001
216 |     ],
217 |     [
218 |         -0.041,
219 |         0.0,
220 |         0.112169998
221 |     ]
222 | ]
223 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/builder.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | 
  3 | from grasp_ldm.utils.config import Config
  4 | 
  5 | from . import GraspCVAE, GraspLatentDDM
  6 | from .diffusion import GaussianDiffusion1D
  7 | from .grasp_classifier import PointsBasedGraspClassifier
  8 | from .modules.class_conditioned_resnet import ClassTimeConditionedResNet1D
  9 | from .modules.resnets import ResNet1D, TimeConditionedResNet1D, Unet1D
 10 | 
 11 | ## ----------------- Makeshift Model Registry ----------------- ##
 12 | DIFFUSION_MODELS = {
 13 |     "GaussianDiffusion1D": GaussianDiffusion1D,
 14 |     "TimeConditionedResNet1D": TimeConditionedResNet1D,
 15 |     "ClassTimeConditionedResNet1D": ClassTimeConditionedResNet1D,
 16 | }
 17 | 
 18 | STANDARD_MODULES = {
 19 |     "ResNet1D": ResNet1D,
 20 |     "Unet1D": Unet1D,
 21 | }
 22 | 
 23 | CLASSIFIERS = {
 24 |     "PointsBasedGraspClassifier": PointsBasedGraspClassifier,
 25 | }
 26 | 
 27 | 
 28 | ALL_MODELS = {
 29 |     "GraspCVAE": GraspCVAE,
 30 |     "GraspLatentDDM": GraspLatentDDM,
 31 |     **CLASSIFIERS,
 32 |     **STANDARD_MODULES,
 33 |     **DIFFUSION_MODELS,
 34 | }
 35 | 
 36 | 
 37 | ## ----------------- Model Build methods ----------------- ##
 38 | 
 39 | 
 40 | ### For now, user `build_model` for single model and `build_model_from_cfg` for multiple models specified in a composite model config
 41 | def build_model(model_cfg: Config) -> nn.Module:
 42 |     """Build model from config
 43 | 
 44 |     Args:
 45 |         model_cfg (Config): model config
 46 | 
 47 |     Returns:
 48 |         (nn.Module): built model
 49 |     """
 50 |     if model_cfg.type not in ALL_MODELS:
 51 |         raise KeyError(
 52 |             f"`{model_cfg.type}` in the model_registry. \n Supported models are: {list(ALL_MODELS)}"
 53 |         )
 54 |     return ALL_MODELS[model_cfg.type](**model_cfg.args)
 55 | 
 56 | 
 57 | def build_model_configs_recursive(model_cfg: Config) -> Config:
 58 |     """Build model configs recursively
 59 | 
 60 |         This allows building of nested models. For example, if we have a model that takes in a model as an argument,
 61 |         this can be handled in the config as in the example below:
 62 |             model = dict(
 63 |                 type="SomeModel",
 64 |                 args=dict(
 65 |                     model=dict(
 66 |                         type="SomeOtherModel",
 67 |                         args=dict(
 68 |                             ...
 69 |                         )
 70 |                     )
 71 |                 )
 72 |             )
 73 | 
 74 |         Returns a dict with values for all "model" keys replaced with the built model.
 75 | 
 76 |     Args:
 77 |         model_cfg (Config): model config
 78 | 
 79 |     Returns:
 80 |         Config: model config
 81 |     """
 82 |     # new_model_cfg = copy.deepcopy(cfg)
 83 |     if isinstance(model_cfg, dict) or isinstance(model_cfg, Config):
 84 |         for k, v in model_cfg.items():
 85 |             if k == "args":
 86 |                 if isinstance(v, dict):
 87 |                     model_cfg[k] = build_model_configs_recursive(v)
 88 |             if k == "model":
 89 |                 if isinstance(v, dict):
 90 |                     model_cfg[k] = build_model_configs_recursive(v)
 91 |                 model_cfg[k] = build_model(model_cfg[k])
 92 | 
 93 |     return model_cfg
 94 | 
 95 | 
 96 | def build_model_from_cfg(model_cfg: Config) -> nn.Module:
 97 |     """Build model from config
 98 |     # TODO: Rename this to indicate multiple models building
 99 | 
100 |         This relies on a hacky model registry specified by ALL_MODELS and the `type` key in the config.
101 |         The `type` key is used to look up the model class and the `args` key is used to pass in the
102 |         arguments to the model class.
103 | 
104 |     Args:
105 |         model_cfg (Config): model config
106 | 
107 |     Returns:
108 |         (nn.Module): model
109 |     """
110 | 
111 |     # recursively build model configs for nested model configs
112 |     built_model_cfg = build_model_configs_recursive(model_cfg)
113 | 
114 |     return (
115 |         built_model_cfg.model if hasattr(built_model_cfg, "model") else built_model_cfg
116 |     )
117 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/sampling.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch.autograd import Function
  4 | 
  5 | from .backend import _backend
  6 | 
  7 | __all__ = ["gather", "furthest_point_sample", "logits_mask"]
  8 | 
  9 | 
 10 | class Gather(Function):
 11 |     @staticmethod
 12 |     def forward(ctx, features, indices):
 13 |         """
 14 |         Gather
 15 |         :param ctx:
 16 |         :param features: features of points, FloatTensor[B, C, N]
 17 |         :param indices: centers' indices in points, IntTensor[b, m]
 18 |         :return:
 19 |             centers_coords: coordinates of sampled centers, FloatTensor[B, C, M]
 20 |         """
 21 |         features = features.contiguous()
 22 |         indices = indices.int().contiguous()
 23 |         ctx.save_for_backward(indices)
 24 |         ctx.num_points = features.size(-1)
 25 |         return _backend.gather_features_forward(features, indices)
 26 | 
 27 |     @staticmethod
 28 |     def backward(ctx, grad_output):
 29 |         (indices,) = ctx.saved_tensors
 30 |         grad_features = _backend.gather_features_backward(
 31 |             grad_output.contiguous(), indices, ctx.num_points
 32 |         )
 33 |         return grad_features, None
 34 | 
 35 | 
 36 | gather = Gather.apply
 37 | 
 38 | 
 39 | def furthest_point_sample(coords, num_samples):
 40 |     """
 41 |     Uses iterative furthest point sampling to select a set of npoint features that have the largest
 42 |     minimum distance to the sampled point set
 43 |     :param coords: coordinates of points, FloatTensor[B, 3, N]
 44 |     :param num_samples: int, M
 45 |     :return:
 46 |        centers_coords: coordinates of sampled centers, FloatTensor[B, 3, M]
 47 |     """
 48 |     coords = coords.contiguous()
 49 |     indices = _backend.furthest_point_sampling(coords, num_samples)
 50 |     return gather(coords, indices)
 51 | 
 52 | 
 53 | def logits_mask(coords, logits, num_points_per_object):
 54 |     """
 55 |     Use logits to sample points
 56 |     :param coords: coords of points, FloatTensor[B, 3, N]
 57 |     :param logits: binary classification logits, FloatTensor[B, 2, N]
 58 |     :param num_points_per_object: M, #points per object after masking, int
 59 |     :return:
 60 |         selected_coords: FloatTensor[B, 3, M]
 61 |         masked_coords_mean: mean coords of selected points, FloatTensor[B, 3]
 62 |         mask: mask to select points, BoolTensor[B, N]
 63 |     """
 64 |     batch_size, _, num_points = coords.shape
 65 |     mask = torch.lt(logits[:, 0, :], logits[:, 1, :])  # [B, N]
 66 |     num_candidates = torch.sum(mask, dim=-1, keepdim=True)  # [B, 1]
 67 |     masked_coords = coords * mask.view(batch_size, 1, num_points)  # [B, C, N]
 68 |     masked_coords_mean = (
 69 |         torch.sum(masked_coords, dim=-1)
 70 |         / torch.max(num_candidates, torch.ones_like(num_candidates)).float()
 71 |     )  # [B, C]
 72 |     selected_indices = torch.zeros(
 73 |         (batch_size, num_points_per_object), device=coords.device, dtype=torch.int32
 74 |     )
 75 |     for i in range(batch_size):
 76 |         current_mask = mask[i]  # [N]
 77 |         current_candidates = current_mask.nonzero().view(-1)
 78 |         current_num_candidates = current_candidates.numel()
 79 |         if current_num_candidates >= num_points_per_object:
 80 |             choices = np.random.choice(
 81 |                 current_num_candidates, num_points_per_object, replace=False
 82 |             )
 83 |             selected_indices[i] = current_candidates[choices]
 84 |         elif current_num_candidates > 0:
 85 |             choices = np.concatenate(
 86 |                 [
 87 |                     np.arange(current_num_candidates).repeat(
 88 |                         num_points_per_object // current_num_candidates
 89 |                     ),
 90 |                     np.random.choice(
 91 |                         current_num_candidates,
 92 |                         num_points_per_object % current_num_candidates,
 93 |                         replace=False,
 94 |                     ),
 95 |                 ]
 96 |             )
 97 |             np.random.shuffle(choices)
 98 |             selected_indices[i] = current_candidates[choices]
 99 |     selected_coords = gather(
100 |         masked_coords - masked_coords_mean.view(batch_size, -1, 1), selected_indices
101 |     )
102 |     return selected_coords, masked_coords_mean, mask
103 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/class_conditioned_resnet.py:
--------------------------------------------------------------------------------
  1 | from typing import Sequence
  2 | 
  3 | import torch
  4 | from torch import nn
  5 | 
  6 | from .resnets import TimeConditionedResNet1D, default
  7 | 
  8 | 
  9 | class ClassTimeConditionedResNet1D(TimeConditionedResNet1D):
 10 |     def __init__(
 11 |         self,
 12 |         dim: int,
 13 |         init_dim: int = None,
 14 |         out_channels: int = None,
 15 |         block_channels: Sequence = ...,
 16 |         channels: int = 1,
 17 |         input_conditioning_dims: int = None,
 18 |         is_self_conditioned: bool = False,
 19 |         resnet_block_groups: int = 8,
 20 |         learned_variance: bool = False,
 21 |         dropout=None,
 22 |         is_time_conditioned: bool = True,
 23 |         learned_sinusoidal_cond: bool = False,
 24 |         random_fourier_features: bool = False,
 25 |         learned_sinusoidal_dim: int = 16,
 26 |     ) -> None:
 27 |         super().__init__(
 28 |             dim,
 29 |             init_dim,
 30 |             out_channels,
 31 |             block_channels,
 32 |             channels,
 33 |             input_conditioning_dims,
 34 |             is_self_conditioned,
 35 |             resnet_block_groups,
 36 |             learned_variance,
 37 |             dropout,
 38 |             is_time_conditioned,
 39 |             learned_sinusoidal_cond,
 40 |             random_fourier_features,
 41 |             learned_sinusoidal_dim,
 42 |         )
 43 |         self.cls_embed = nn.Sequential(
 44 |             nn.Linear(1, self.emb_dim),
 45 |             nn.SiLU(),
 46 |         )
 47 | 
 48 |     def forward(
 49 |         self,
 50 |         x: torch.Tensor,
 51 |         *,
 52 |         time: torch.Tensor = None,
 53 |         z_cond: torch.Tensor = None,
 54 |         x_self_cond: torch.Tensor = None,
 55 |         cls_cond: torch.Tensor = None,
 56 |         **kwargs
 57 |     ) -> torch.Tensor:
 58 |         """Forward
 59 | 
 60 |         Args:
 61 |             x (torch.Tensor): input
 62 |             time (torch.Tensor): timestep for diffusion
 63 |                 Note: Set to None, when using the architecture outside diffusion.
 64 |                     i.e. self.is_time_conditioned = False
 65 |             z_cond (torch.Tensor, optional): conditioning latent. Defaults to None.
 66 |             x_self_cond (torch.Tensor, optional): self conditioning vector. Defaults to None.
 67 | 
 68 |         Returns:
 69 |             torch.Tensor: output
 70 |         """
 71 | 
 72 |         # Ugly: improve
 73 |         if cls_cond is None:
 74 |             assert (
 75 |                 "mode_cls" in kwargs["metas"]
 76 |             ), "Class conditioning tensor is required"
 77 |             cls_cond = (
 78 |                 kwargs["metas"]["mode_cls"]
 79 |                 .unsqueeze(-1)
 80 |                 .reshape(-1, 1)
 81 |                 .to(dtype=x.dtype)
 82 |             )
 83 | 
 84 |         if self.is_self_conditioned:
 85 |             x_self_cond = default(x_self_cond, lambda: torch.zeros_like(x))
 86 |             x = torch.cat((x_self_cond, x), dim=1)
 87 | 
 88 |         x = self.init_conv(x)
 89 |         # r = x.clone()
 90 | 
 91 |         # Time embedding for diffusion, None for non-diffusion
 92 |         if self.is_time_conditioned and self.time_mlp is not None:
 93 |             assert time is not None
 94 |             latent_emb = self.time_mlp(time)
 95 |         else:
 96 |             latent_emb = None
 97 | 
 98 |         # Class embedding
 99 |         cls_emb = self.cls_embed(cls_cond).squeeze(1)
100 |         latent_emb += cls_emb
101 | 
102 |         # Add input embedding if inupt conditioned
103 |         if self.is_input_conditioned:
104 |             input_emb = self.input_emb_layers(z_cond)
105 |             if input_emb.ndim != 2 and input_emb.ndim == 3:
106 |                 latent_emb = latent_emb.unsqueeze(-2).repeat([1, 3, 1])
107 |             else:
108 |                 raise NotImplementedError
109 |             latent_emb = latent_emb + input_emb if latent_emb is not None else input_emb
110 | 
111 |         for block1, block2, attn, updownsample in self.blocks:
112 |             x = block1(x, latent_emb)
113 | 
114 |             x = block2(x, latent_emb)
115 |             x = attn(x)
116 | 
117 |             x = updownsample(x)
118 |             if self.dropout:
119 |                 x = self.dropout(x)
120 | 
121 |         x = self.final_res_block(x, latent_emb)
122 |         return self.final_conv(x)
123 | 


--------------------------------------------------------------------------------
/grasp_ldm/trainers/experiment.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import os
  3 | import shutil
  4 | import warnings
  5 | 
  6 | 
  7 | class Experiment:
  8 |     def __init__(
  9 |         self,
 10 |         config_path,
 11 |         resume_from="last",
 12 |         out_dir="output/",
 13 |         ckpt_format="ckpt",
 14 |         model_suffix="",
 15 |         configs_dir_name="configs",
 16 |     ) -> None:
 17 |         """
 18 |         NOTE: configs_dir_name is important to split the category and name of the experiment
 19 | 
 20 |         Args:
 21 |             config_path (str): path to the config file
 22 |             resume_from (str, optional): Checkpoint to resume training from. Defaults to "last".
 23 |             out_dir (str, optional): Output directory. Defaults to "output/".
 24 |             ckpt_format (str, optional): Checkpoint format. Defaults to "ckpt".
 25 |             model_suffix (str, optional): Suffix for the model directory. Defaults to None.
 26 |             configs_dir_name (str, optional): Name of the directory containing the configs. Defaults to "configs".
 27 |         """
 28 |         # Checkpoint format
 29 |         self._ckpt_format = ckpt_format
 30 | 
 31 |         # Experiment naming
 32 |         # Split from configs directory
 33 |         relative_config_path = config_path.split(configs_dir_name)[-1].strip("/")
 34 |         self.name = os.path.basename(relative_config_path).split(".")[0]
 35 |         self.category = os.path.dirname(relative_config_path)
 36 | 
 37 |         # Experiment directories
 38 |         self.out_dir = out_dir
 39 |         self.exp_dir = os.path.join(os.path.abspath(out_dir), self.category, self.name)
 40 |         self.model_dir = self.exp_dir + (
 41 |             f"/{model_suffix}" if model_suffix is not None else ""
 42 |         )
 43 | 
 44 |         self.ckpt_dir = os.path.join(self.model_dir, "checkpoints")
 45 |         self.log_dir = os.path.join(self.model_dir, "logs")
 46 |         self._make_dirs()
 47 | 
 48 |         # Make a copy of the config file when training
 49 |         self.src_config_path = config_path
 50 |         self.dst_config_path = os.path.join(self.model_dir, f"{self.name}.py")
 51 | 
 52 |         # Maintain a single config in exp dir. Warn if exists and over-write
 53 |         if os.path.isfile(self.dst_config_path):
 54 |             warnings.warn(
 55 |                 f"Existing config file will be over-written: {self.dst_config_path}"
 56 |             )
 57 |         shutil.copy(self.src_config_path, self.dst_config_path)
 58 | 
 59 |         # Resume from checkpoint
 60 |         self.resume_from = resume_from
 61 | 
 62 |     @property
 63 |     def all_checkpoints(self):
 64 |         return glob.glob(os.path.join(self.ckpt_dir, f"*.{self._ckpt_format}"))
 65 | 
 66 |     @property
 67 |     def exists(self):
 68 |         return os.path.isdir(self.exp_dir)
 69 | 
 70 |     @property
 71 |     def last_checkpoint(self):
 72 |         ckpt_path = os.path.join(self.ckpt_dir, f"last.{self._ckpt_format}")
 73 |         return ckpt_path if os.path.exists(ckpt_path) else None
 74 | 
 75 |     @property
 76 |     def best_checkpoint(self):
 77 |         ckpt_path = os.path.join(self.ckpt_dir, f"best.{self._ckpt_format}")
 78 |         return ckpt_path if os.path.exists(ckpt_path) else None
 79 | 
 80 |     @property
 81 |     def default_resume_checkpoint(self):
 82 |         _default_checkpoint = self.last_checkpoint
 83 | 
 84 |         if self.resume_from in ("best", "last"):
 85 |             ckpt_path = (
 86 |                 self.last_checkpoint
 87 |                 if self.resume_from == "last"
 88 |                 else self.best_checkpoint
 89 |             )
 90 |         else:
 91 |             ckpt_path = self.resume_from
 92 | 
 93 |         if ckpt_path is not None and os.path.isfile(ckpt_path):
 94 |             _default_checkpoint = ckpt_path
 95 |         else:
 96 |             # Do nothing and start from scratch
 97 |             pass
 98 | 
 99 |             # warnings.warn(f"Could not find checkpoint: {ckpt_path}")
100 |             # if _default_checkpoint is None:
101 |             #     warnings.warn(
102 |             #         f"Default checkpoint {_default_checkpoint} also not found."
103 |             #     )
104 |         return _default_checkpoint
105 | 
106 |     def _make_dirs(self):
107 |         # Warn existing checkpoint directory
108 |         if os.path.exists(self.ckpt_dir):
109 |             warnings.warn(
110 |                 f"Experiment Checkpoint directory exists: {self.ckpt_dir} \nCheckpoints may be auto-overwritten by the trainer."
111 |             )
112 |         else:
113 |             os.makedirs(self.ckpt_dir, exist_ok=True)
114 | 
115 |         if not os.path.exists(self.log_dir):
116 |             os.makedirs(self.log_dir, exist_ok=True)
117 | 
118 |         return
119 | 


--------------------------------------------------------------------------------
/tools/generate_grasps.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | from typing import Optional, Tuple
  5 | 
  6 | import numpy as np
  7 | 
  8 | os.environ["LIBGL_ALWAYS_INDIRECT"] = "0"
  9 | sys.path.append((os.getcwd()))
 10 | 
 11 | from tools.inference import Conditioning, InferenceLDM, InferenceVAE, ModelType
 12 | 
 13 | 
 14 | def parse_args():
 15 |     parser = argparse.ArgumentParser(description="Grasp Generation Script")
 16 |     parser.add_argument(
 17 |         "--exp_path", type=str, required=True, help="Path to experiment checkpoint"
 18 |     )
 19 |     parser.add_argument(
 20 |         "--data_root", type=str, default="data/ACRONYM", help="Root directory for data"
 21 |     )
 22 |     parser.add_argument(
 23 |         "--mode",
 24 |         type=str,
 25 |         choices=["VAE", "LDM"],
 26 |         default="VAE",
 27 |         help="Model type to use",
 28 |     )
 29 |     parser.add_argument("--split", type=str, default="test", help="Data split to use")
 30 |     parser.add_argument(
 31 |         "--num_grasps", type=int, default=20, help="Number of grasps to generate"
 32 |     )
 33 |     parser.add_argument("--visualize", action="store_true", help="Enable visualization")
 34 |     parser.add_argument(
 35 |         "--no_ema",
 36 |         action="store_false",
 37 |         dest="use_ema_model",
 38 |         help="Disable EMA model usage",
 39 |     )
 40 |     parser.add_argument(
 41 |         "--num_samples", type=int, default=11, help="Number of samples to generate"
 42 |     )
 43 |     parser.add_argument(
 44 |         "--conditioning",
 45 |         type=str,
 46 |         choices=["unconditional", "class", "region"],
 47 |         default="unconditional",
 48 |         help="Type of conditioning to use",
 49 |     )
 50 |     parser.add_argument(
 51 |         "--condition_value",
 52 |         type=int,
 53 |         help="Value for conditioning (class label or region ID)",
 54 |     )
 55 |     parser.add_argument(
 56 |         "--inference_steps",
 57 |         type=int,
 58 |         default=100,
 59 |         help="Number of inference steps for LDM",
 60 |     )
 61 |     return parser.parse_args()
 62 | 
 63 | 
 64 | def setup_model(args):
 65 |     exp_name = os.path.basename(args.exp_path)
 66 |     exp_out_root = os.path.dirname(args.exp_path)
 67 | 
 68 |     if args.mode == "LDM":
 69 |         model = InferenceLDM(
 70 |             exp_name=exp_name,
 71 |             exp_out_root=exp_out_root,
 72 |             use_elucidated=False,
 73 |             data_root=args.data_root,
 74 |             load_dataset=True,
 75 |             num_inference_steps=args.inference_steps,
 76 |             use_fast_sampler=False,
 77 |             data_split=args.split,
 78 |             use_ema_model=args.use_ema_model,
 79 |         )
 80 |         print(
 81 |             f"Trained using noise schedule: beta0 = {model.model.diffusion_model.beta_start} ; betaT = {model.model.diffusion_model.beta_end}"
 82 |         )
 83 |     elif args.mode == "VAE":
 84 |         model = InferenceVAE(
 85 |             exp_name=exp_name,
 86 |             exp_out_root=exp_out_root,
 87 |             data_root=args.data_root,
 88 |             load_dataset=True,
 89 |             data_split=args.split,
 90 |             use_ema_model=args.use_ema_model,
 91 |         )
 92 |     return model
 93 | 
 94 | 
 95 | def get_conditioning(args) -> Tuple[Optional[Conditioning], Optional[int]]:
 96 |     if args.conditioning == "unconditional":
 97 |         return Conditioning.UNCONDITIONAL, None
 98 |     elif args.conditioning == "class":
 99 |         if args.condition_value is None:
100 |             raise ValueError("Must provide --condition_value for class conditioning")
101 |         return Conditioning.CLASS_CONDITIONED, args.condition_value
102 |     elif args.conditioning == "region":
103 |         if args.condition_value is None:
104 |             raise ValueError("Must provide --condition_value for region conditioning")
105 |         return Conditioning.REGION_CONDITIONED, args.condition_value
106 |     return None, None
107 | 
108 | 
109 | def main():
110 |     args = parse_args()
111 |     model = setup_model(args)
112 |     condition_type, conditioning = get_conditioning(args)
113 | 
114 |     for _ in range(args.num_samples):
115 |         data_idx = np.random.randint(0, len(model.dataset))
116 | 
117 |         # Skip conditioning for VAE mode
118 |         if args.mode == "VAE":
119 |             condition_type = Conditioning.UNCONDITIONAL
120 |             conditioning = None
121 | 
122 |         results = model.infer(
123 |             data_idx=data_idx,
124 |             num_grasps=args.num_grasps,
125 |             visualize=args.visualize,
126 |             condition_type=condition_type,
127 |             conditioning=conditioning,
128 |         )
129 | 
130 |         if args.visualize:
131 |             results.show(line_settings={"point_size": 10})
132 | 
133 | 
134 | if __name__ == "__main__":
135 |     main()
136 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/voxelization/vox.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include "../cuda_utils.cuh"
  5 | 
  6 | /*
  7 |   Function: get how many points in each voxel grid
  8 |   Args:
  9 |     b      : batch size
 10 |     n      : number of points
 11 |     r      : voxel resolution
 12 |     r2     : = r * r
 13 |     r3     : s, voxel cube size = r ** 3
 14 |     coords : coords of each point, IntTensor[b, 3, n]
 15 |     ind    : voxel index of each point, IntTensor[b, n]
 16 |     cnt    : #points in each voxel index, IntTensor[b, s]
 17 | */
 18 | __global__ void grid_stats_kernel(int b, int n, int r, int r2, int r3,
 19 |                                   const int *__restrict__ coords,
 20 |                                   int *__restrict__ ind, int *cnt) {
 21 |   int batch_index = blockIdx.x;
 22 |   int stride = blockDim.x;
 23 |   int index = threadIdx.x;
 24 |   coords += batch_index * n * 3;
 25 |   ind += batch_index * n;
 26 |   cnt += batch_index * r3;
 27 | 
 28 |   for (int i = index; i < n; i += stride) {
 29 |     // if (ind[i] == -1)
 30 |     //   continue;
 31 |     ind[i] = coords[i] * r2 + coords[i + n] * r + coords[i + n + n];
 32 |     atomicAdd(cnt + ind[i], 1);
 33 |   }
 34 | }
 35 | 
 36 | /*
 37 |   Function: average pool voxelization (forward)
 38 |   Args:
 39 |     b   : batch size
 40 |     c   : #channels
 41 |     n   : number of points
 42 |     s   : voxel cube size = voxel resolution ** 3
 43 |     ind : voxel index of each point, IntTensor[b, n]
 44 |     cnt : #points in each voxel index, IntTensor[b, s]
 45 |     feat: features, FloatTensor[b, c, n]
 46 |     out : outputs, FloatTensor[b, c, s]
 47 | */
 48 | __global__ void avg_voxelize_kernel(int b, int c, int n, int s,
 49 |                                     const int *__restrict__ ind,
 50 |                                     const int *__restrict__ cnt,
 51 |                                     const float *__restrict__ feat,
 52 |                                     float *__restrict__ out) {
 53 |   int batch_index = blockIdx.x;
 54 |   int stride = blockDim.x;
 55 |   int index = threadIdx.x;
 56 |   ind += batch_index * n;
 57 |   feat += batch_index * c * n;
 58 |   out += batch_index * c * s;
 59 |   cnt += batch_index * s;
 60 |   for (int i = index; i < n; i += stride) {
 61 |     int pos = ind[i];
 62 |     // if (pos == -1)
 63 |     //   continue;
 64 |     int cur_cnt = cnt[pos];
 65 |     if (cur_cnt > 0) {
 66 |       float div_cur_cnt = 1.0 / static_cast<float>(cur_cnt);
 67 |       for (int j = 0; j < c; j++) {
 68 |         atomicAdd(out + j * s + pos, feat[j * n + i] * div_cur_cnt);
 69 |       }
 70 |     }
 71 |   }
 72 | }
 73 | 
 74 | /*
 75 |   Function: average pool voxelization (backward)
 76 |   Args:
 77 |     b      : batch size
 78 |     c      : #channels
 79 |     n      : number of points
 80 |     r3     : voxel cube size = voxel resolution ** 3
 81 |     ind    : voxel index of each point, IntTensor[b, n]
 82 |     cnt    : #points in each voxel index, IntTensor[b, s]
 83 |     grad_y : grad outputs, FloatTensor[b, c, s]
 84 |     grad_x : grad inputs, FloatTensor[b, c, n]
 85 | */
 86 | __global__ void avg_voxelize_grad_kernel(int b, int c, int n, int r3,
 87 |                                          const int *__restrict__ ind,
 88 |                                          const int *__restrict__ cnt,
 89 |                                          const float *__restrict__ grad_y,
 90 |                                          float *__restrict__ grad_x) {
 91 |   int batch_index = blockIdx.x;
 92 |   int stride = blockDim.x;
 93 |   int index = threadIdx.x;
 94 |   ind += batch_index * n;
 95 |   grad_x += batch_index * c * n;
 96 |   grad_y += batch_index * c * r3;
 97 |   cnt += batch_index * r3;
 98 |   for (int i = index; i < n; i += stride) {
 99 |     int pos = ind[i];
100 |     // if (pos == -1)
101 |     //   continue;
102 |     int cur_cnt = cnt[pos];
103 |     if (cur_cnt > 0) {
104 |       float div_cur_cnt = 1.0 / static_cast<float>(cur_cnt);
105 |       for (int j = 0; j < c; j++) {
106 |         atomicAdd(grad_x + j * n + i, grad_y[j * r3 + pos] * div_cur_cnt);
107 |       }
108 |     }
109 |   }
110 | }
111 | 
112 | void avg_voxelize(int b, int c, int n, int r, int r2, int r3, const int *coords,
113 |                   const float *feat, int *ind, int *cnt, float *out) {
114 |   grid_stats_kernel<<<b, optimal_num_threads(n)>>>(b, n, r, r2, r3, coords, ind,
115 |                                                    cnt);
116 |   avg_voxelize_kernel<<<b, optimal_num_threads(n)>>>(b, c, n, r3, ind, cnt,
117 |                                                      feat, out);
118 |   CUDA_CHECK_ERRORS();
119 | }
120 | 
121 | void avg_voxelize_grad(int b, int c, int n, int s, const int *ind,
122 |                        const int *cnt, const float *grad_y, float *grad_x) {
123 |   avg_voxelize_grad_kernel<<<b, optimal_num_threads(n)>>>(b, c, n, s, ind, cnt,
124 |                                                           grad_y, grad_x);
125 |   CUDA_CHECK_ERRORS();
126 | }
127 | 


--------------------------------------------------------------------------------
/.docker/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Based on Gist: AndrejOrsula/_docker_helper_scripts.md
  3 | 
  4 | set -e
  5 | 
  6 | ## Configuration
  7 | # Directory and image
  8 | IMAGE_NAME="kuldeepbrd/grasp_ldm:latest"
  9 | 
 10 | # Flags for running the container
 11 | DOCKER_RUN_OPTS="${DOCKER_RUN_OPTS:-
 12 |     --interactive
 13 |     --tty
 14 |     --rm
 15 |     --network host
 16 |     --ipc host
 17 | }"
 18 | 
 19 | # Flags for enabling GPU and GUI (X11) inside the container
 20 | ENABLE_GPU="${ENABLE_GPU:-true}"
 21 | ENABLE_GUI="${ENABLE_GUI:-true}"
 22 | 
 23 | 
 24 | # List of volumes to mount (can be updated by passing -v HOST_DIR:DOCKER_DIR:OPTIONS)
 25 | CUSTOM_VOLUMES=(
 26 |     "/etc/localtime:/etc/localtime:ro"
 27 | )
 28 | 
 29 | 
 30 | 
 31 | ## Select the container name based on the image name
 32 | CONTAINER_NAME="${IMAGE_NAME##*/}"
 33 | # If the container name is already in use, append a unique (incremental) numerical suffix
 34 | if docker container list --all --format "{{.Names}}" | grep -qi "${CONTAINER_NAME}"; then
 35 |     CONTAINER_NAME="${CONTAINER_NAME}1"
 36 |     while docker container list --all --format "{{.Names}}" | grep -qi "${CONTAINER_NAME}"; do
 37 |         CONTAINER_NAME="${CONTAINER_NAME%?}$((${CONTAINER_NAME: -1} + 1))"
 38 |     done
 39 | fi
 40 | DOCKER_RUN_OPTS="--name ${CONTAINER_NAME} ${DOCKER_RUN_OPTS}"
 41 | 
 42 | ## Parse volumes and environment variables
 43 | while getopts ":v:e:" opt; do
 44 |     case "${opt}" in
 45 |         v) CUSTOM_VOLUMES+=("${OPTARG}") ;;
 46 |         e) CUSTOM_ENVS+=("${OPTARG}") ;;
 47 |         *)
 48 |             echo >&2 "Usage: ${0} [-v HOST_DIR:DOCKER_DIR:OPTIONS] [-e ENV=VALUE] [TAG] [CMD]"
 49 |             exit 2
 50 |             ;;
 51 |     esac
 52 | done
 53 | shift "$((OPTIND - 1))"
 54 | 
 55 | ## GPU
 56 | if [[ "${ENABLE_GPU,,}" = true ]]; then
 57 |     check_nvidia_gpu() {
 58 |         if [[ -n "${ENABLE_GPU_FORCE_NVIDIA}" ]]; then
 59 |             if [[ "${ENABLE_GPU_FORCE_NVIDIA,,}" = true ]]; then
 60 |                 echo "INFO: NVIDIA GPU is force-enabled via \`ENABLE_GPU_FORCE_NVIDIA=true\`."
 61 |                 return 0 # NVIDIA GPU is force-enabled
 62 |             else
 63 |                 echo "INFO: NVIDIA GPU is force-disabled via \`ENABLE_GPU_FORCE_NVIDIA=false\`."
 64 |                 return 1 # NVIDIA GPU is force-disabled
 65 |             fi
 66 |         elif ! lshw -C display 2>/dev/null | grep -qi "vendor.*nvidia"; then
 67 |             return 1 # NVIDIA GPU is not present
 68 |         elif [[ ! -x "$(command -v nvidia-smi)" ]]; then
 69 |             echo >&2 -e "\e[33mWARNING: NVIDIA GPU is detected, but its functionality cannot be verified. This container will not be able to use the GPU. Please install nvidia-utils on the host system or force-enable NVIDIA GPU via \`ENABLE_GPU_FORCE_NVIDIA=true\` environment variable.\e[0m"
 70 |             return 1 # NVIDIA GPU is present but nvidia-utils not installed
 71 |         elif ! nvidia-smi -L &>/dev/null; then
 72 |             echo >&2 -e "\e[33mWARNING: NVIDIA GPU is detected, but it does not seem to be working properly. This container will not be able to use the GPU. Please ensure the NVIDIA drivers are properly installed on the host system.\e[0m"
 73 |             return 1 # NVIDIA GPU is present but is not working properly
 74 |         else
 75 |             return 0 # NVIDIA GPU is present and appears to be working
 76 |         fi
 77 |     }
 78 |     if check_nvidia_gpu; then
 79 |         # Enable GPU either via NVIDIA Container Toolkit or NVIDIA Docker (depending on Docker version)
 80 |         if dpkg --compare-versions "$(docker version --format '{{.Server.Version}}')" gt "19.3"; then
 81 |             GPU_OPT="--gpus all"
 82 |         else
 83 |             GPU_OPT="--runtime nvidia"
 84 |         fi
 85 |         GPU_ENVS=(
 86 |             NVIDIA_VISIBLE_DEVICES="all"
 87 |             NVIDIA_DRIVER_CAPABILITIES="all"
 88 |         )
 89 |     elif [[ $(getent group video) ]]; then
 90 |         GPU_OPT="--device=/dev/dri:/dev/dri --group-add video"
 91 |     else
 92 |         GPU_OPT="--device=/dev/dri:/dev/dri"
 93 |     fi
 94 | fi
 95 | 
 96 | ## GUI
 97 | if [[ "${ENABLE_GUI,,}" = true ]]; then
 98 |     # To enable GUI, make sure processes in the container can connect to the x server
 99 |     XAUTH=/tmp/.docker.xauth
100 |     if [ ! -f ${XAUTH} ]; then
101 |         touch ${XAUTH}
102 |         chmod a+r ${XAUTH}
103 | 
104 |         XAUTH_LIST=$(xauth nlist "${DISPLAY}")
105 |         if [ -n "${XAUTH_LIST}" ]; then
106 |             # shellcheck disable=SC2001
107 |             XAUTH_LIST=$(sed -e 's/^..../ffff/' <<<"${XAUTH_LIST}")
108 |             echo "${XAUTH_LIST}" | xauth -f ${XAUTH} nmerge -
109 |         fi
110 |     fi
111 |     # GUI-enabling volumes
112 |     GUI_VOLUMES=(
113 |         "${XAUTH}:${XAUTH}"
114 |         "/tmp/.X11-unix:/tmp/.X11-unix"
115 |         "/dev/input:/dev/input"
116 |     )
117 |     # GUI-enabling environment variables
118 |     GUI_ENVS=(
119 |         DISPLAY="${DISPLAY}"
120 |         XAUTHORITY="${XAUTH}"
121 |     )
122 | fi
123 | 
124 | ## Run the container
125 | DOCKER_RUN_CMD=(
126 |     docker run
127 |     "${DOCKER_RUN_OPTS}"
128 |     "${GPU_OPT}"
129 |     "${GPU_ENVS[@]/#/"--env "}"
130 |     "${GUI_VOLUMES[@]/#/"--volume "}"
131 |     "${GUI_ENVS[@]/#/"--env "}"
132 |     "${CUSTOM_VOLUMES[@]/#/"--volume "}"
133 |     "${CUSTOM_ENVS[@]/#/"--env "}"
134 |     "${IMAGE_NAME}"
135 |     "${CMD}"
136 | )
137 | echo -e "\033[1;30m${DOCKER_RUN_CMD[*]}\033[0m" | xargs
138 | # shellcheck disable=SC2048
139 | exec ${DOCKER_RUN_CMD[*]}
140 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/pointnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from . import functional as F
  5 | from .ball_query import BallQuery
  6 | from .shared_mlp import SharedMLP
  7 | 
  8 | __all__ = ["PointNetAModule", "PointNetSAModule", "PointNetFPModule"]
  9 | 
 10 | 
 11 | class PointNetAModule(nn.Module):
 12 |     def __init__(self, in_channels, out_channels, include_coordinates=True):
 13 |         super().__init__()
 14 |         if not isinstance(out_channels, (list, tuple)):
 15 |             out_channels = [[out_channels]]
 16 |         elif not isinstance(out_channels[0], (list, tuple)):
 17 |             out_channels = [out_channels]
 18 | 
 19 |         mlps = []
 20 |         total_out_channels = 0
 21 |         for _out_channels in out_channels:
 22 |             mlps.append(
 23 |                 SharedMLP(
 24 |                     in_channels=in_channels + (3 if include_coordinates else 0),
 25 |                     out_channels=_out_channels,
 26 |                     dim=1,
 27 |                 )
 28 |             )
 29 |             total_out_channels += _out_channels[-1]
 30 | 
 31 |         self.include_coordinates = include_coordinates
 32 |         self.out_channels = total_out_channels
 33 |         self.mlps = nn.ModuleList(mlps)
 34 | 
 35 |     def forward(self, inputs):
 36 |         features, coords = inputs
 37 |         if self.include_coordinates:
 38 |             features = torch.cat([features, coords], dim=1)
 39 |         coords = torch.zeros((coords.size(0), 3, 1), device=coords.device)
 40 |         if len(self.mlps) > 1:
 41 |             features_list = []
 42 |             for mlp in self.mlps:
 43 |                 features_list.append(mlp(features).max(dim=-1, keepdim=True).values)
 44 |             return torch.cat(features_list, dim=1), coords
 45 |         else:
 46 |             return self.mlps[0](features).max(dim=-1, keepdim=True).values, coords
 47 | 
 48 |     def extra_repr(self):
 49 |         return f"out_channels={self.out_channels}, include_coordinates={self.include_coordinates}"
 50 | 
 51 | 
 52 | class PointNetSAModule(nn.Module):
 53 |     def __init__(
 54 |         self,
 55 |         num_centers,
 56 |         radius,
 57 |         num_neighbors,
 58 |         in_channels,
 59 |         out_channels,
 60 |         include_coordinates=True,
 61 |     ):
 62 |         super().__init__()
 63 |         if not isinstance(radius, (list, tuple)):
 64 |             radius = [radius]
 65 |         if not isinstance(num_neighbors, (list, tuple)):
 66 |             num_neighbors = [num_neighbors] * len(radius)
 67 |         assert len(radius) == len(num_neighbors)
 68 |         if not isinstance(out_channels, (list, tuple)):
 69 |             out_channels = [[out_channels]] * len(radius)
 70 |         elif not isinstance(out_channels[0], (list, tuple)):
 71 |             out_channels = [out_channels] * len(radius)
 72 |         assert len(radius) == len(out_channels)
 73 | 
 74 |         groupers, mlps = [], []
 75 |         total_out_channels = 0
 76 |         for _radius, _out_channels, _num_neighbors in zip(
 77 |             radius, out_channels, num_neighbors
 78 |         ):
 79 |             groupers.append(
 80 |                 BallQuery(
 81 |                     radius=_radius,
 82 |                     num_neighbors=_num_neighbors,
 83 |                     include_coordinates=include_coordinates,
 84 |                 )
 85 |             )
 86 |             mlps.append(
 87 |                 SharedMLP(
 88 |                     in_channels=in_channels + (3 if include_coordinates else 0),
 89 |                     out_channels=_out_channels,
 90 |                     dim=2,
 91 |                 )
 92 |             )
 93 |             total_out_channels += _out_channels[-1]
 94 | 
 95 |         self.num_centers = num_centers
 96 |         self.out_channels = total_out_channels
 97 |         self.groupers = nn.ModuleList(groupers)
 98 |         self.mlps = nn.ModuleList(mlps)
 99 | 
100 |     def forward(self, inputs):
101 |         features, coords = inputs
102 |         centers_coords = F.furthest_point_sample(coords, self.num_centers)
103 |         features_list = []
104 |         for grouper, mlp in zip(self.groupers, self.mlps):
105 |             features_list.append(
106 |                 mlp(grouper(coords, centers_coords, features)).max(dim=-1).values
107 |             )
108 |         if len(features_list) > 1:
109 |             return torch.cat(features_list, dim=1), centers_coords
110 |         else:
111 |             return features_list[0], centers_coords
112 | 
113 |     def extra_repr(self):
114 |         return f"num_centers={self.num_centers}, out_channels={self.out_channels}"
115 | 
116 | 
117 | class PointNetFPModule(nn.Module):
118 |     def __init__(self, in_channels, out_channels):
119 |         super().__init__()
120 |         self.mlp = SharedMLP(in_channels=in_channels, out_channels=out_channels, dim=1)
121 | 
122 |     def forward(self, inputs):
123 |         if len(inputs) == 3:
124 |             points_coords, centers_coords, centers_features = inputs
125 |             points_features = None
126 |         else:
127 |             points_coords, centers_coords, centers_features, points_features = inputs
128 |         interpolated_features = F.nearest_neighbor_interpolate(
129 |             points_coords, centers_coords, centers_features
130 |         )
131 |         if points_features is not None:
132 |             interpolated_features = torch.cat(
133 |                 [interpolated_features, points_features], dim=1
134 |             )
135 |         return self.mlp(interpolated_features), points_coords
136 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/grasp_classifier.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | import torch
  4 | from torch import Tensor, nn
  5 | 
  6 | from ..losses import ClassificationLoss as BCELogitLoss
  7 | from ..utils.gripper import SimplePandaGripper
  8 | from .modules.base_network import BaseGraspClassifier
  9 | from .modules.ext.pvcnn.utils import create_mlp_components
 10 | from .modules.pc_encoders import PVCNN, PVCNN2
 11 | 
 12 | 
 13 | class PointsBasedGraspClassifier(BaseGraspClassifier):
 14 |     SUPPORTED_BASE_NETWORKS = {"PVCNN": PVCNN, "PVCNN2": PVCNN2}
 15 | 
 16 |     SUPPORTED_LOSSES = {"BCEClassificationLoss": BCELogitLoss}
 17 | 
 18 |     def __init__(self, num_pc_points, points_backbone_config: dict, loss_config: dict):
 19 |         super().__init__()
 20 | 
 21 |         # Loss
 22 |         self._loss_config = loss_config
 23 |         _classification_loss_cfg = loss_config.classification_loss
 24 |         self.loss = self.SUPPORTED_LOSSES[_classification_loss_cfg["type"]](
 25 |             **_classification_loss_cfg["args"]
 26 |         )
 27 | 
 28 |         # Object point cloud
 29 |         self.num_pc_points = num_pc_points
 30 | 
 31 |         # Base Point cloud network
 32 |         self.base_network = self.SUPPORTED_BASE_NETWORKS[
 33 |             points_backbone_config["type"]
 34 |         ](**points_backbone_config["args"])
 35 | 
 36 |         # Cls sub-network
 37 |         self._cls_out_dim = 1
 38 |         self._width_multiplier = 1
 39 | 
 40 |         cls_mlp_layers, _ = create_mlp_components(
 41 |             in_channels=self.base_network.out_channels,
 42 |             out_channels=[128, 0.5, 1],
 43 |             classifier=True,
 44 |             dim=2,
 45 |             width_multiplier=self._width_multiplier,
 46 |         )
 47 |         logit_layer = nn.Linear(self.num_pc_points, 1)
 48 | 
 49 |         self.classifier = nn.Sequential(*cls_mlp_layers, logit_layer)
 50 | 
 51 |         # Classifier outputs binary logits. We use sigmoid to get psuedo-probability
 52 |         self.sigmoid = nn.Sigmoid()
 53 | 
 54 |     def forward(
 55 |         self,
 56 |         pc: Tensor,
 57 |         grasp_points: Tensor,
 58 |         *,
 59 |         cls_target: Tensor = None,
 60 |         compute_loss: bool = True
 61 |     ) -> Tensor:
 62 |         """
 63 |         Args:
 64 |             pc (Tensor): [B, NP, 3] Point cloud
 65 |             grasp_points (Tensor): [B, NG, 3] Grasp pose (t(3), mrp(3))
 66 |         Returns:
 67 |             Tensor: [B, 1] Grasp success pred logit or loss
 68 |         """
 69 | 
 70 |         # Add feature label. 0 for pc points and 1 for gripper points
 71 |         obj_pc = torch.cat((pc, torch.zeros_like(pc[..., :1])), dim=-1)
 72 |         grasp_points = torch.cat(
 73 |             (grasp_points, torch.ones_like(grasp_points[..., :1])), dim=-1
 74 |         )
 75 | 
 76 |         # Concat object and gripper point cloud : [B, Np, 3] -> [B, Np+Ng, 3]
 77 |         pc_in = torch.cat((obj_pc, grasp_points), dim=-2)
 78 | 
 79 |         # [B, N, 3] -> [B, 3, N]
 80 |         pc_in = torch.transpose(pc_in, 1, 2).contiguous()
 81 | 
 82 |         # Pass through PVCNN modules
 83 |         x = self.base_network(pc_in)
 84 | 
 85 |         # [B, 1]
 86 |         cls_logit = self.classifier(x).squeeze()
 87 | 
 88 |         # # Sanity check
 89 |         # assert (
 90 |         #     cls_logit.ndim == 1 and cls_logit.shape[0] == pc.shape[0]
 91 |         # ), "Something went wrong in classifier shape broadcasting"
 92 |         preds = self.sigmoid(cls_logit)
 93 |         if compute_loss:
 94 |             if cls_target is None:
 95 |                 raise ValueError("cls_target must be provided if compute_loss is True")
 96 | 
 97 |             if cls_target.shape[0] != cls_logit.shape[0]:
 98 |                 raise ValueError("cls_target and cls_logit size mismatch")
 99 | 
100 |             # Note: Loss is BCE with logits, so we don't apply sigmoid here
101 |             loss = self.loss(cls_logit, cls_target)
102 |             return loss, preds
103 |         else:
104 |             return None, preds
105 | 
106 |     # def merge_pc_gripper_points(self, pc: Tensor, grasp_pose: Tensor) -> Tensor:
107 |     #     """Merge point cloud and gripper points for PVCNN input
108 | 
109 |     #     B: Batch size
110 |     #     Np: Number of points in point cloud
111 |     #     Ng: Number of gripper points
112 | 
113 |     #     Args:
114 |     #         pc (Tensor): [B, Np, 3] Point cloud
115 |     #         grasp_pose (Tensor): [B, 6] Grasp pose (t(3), mrp(3))
116 | 
117 |     #     Returns:
118 |     #         Tensor: [B, 3, Np+Ng] Point cloud with gripper points
119 |     #     """
120 | 
121 |     #     # Get projected gripper points per grasp pose: [Ng, 3] -> [Bp, Ng, 3]
122 |     #     grasp_points = self.gripper_points @ grasp_pose
123 |     #     grasp_points = grasp_points[..., :3]
124 | 
125 |     #     # Transpose for valid input to PVCNN: [Bp, Np, 3] -> [Bp, 3, Np]
126 |     #     pc = pc.transpose(-1, -2).contiguous()
127 |     #     grasp_points = grasp_points.transpose(-1, -2).contiguous()
128 | 
129 |     #     # Concat point cloud and features: [B, 3, Np+Ng]
130 |     #     pc = torch.cat((pc, grasp_points), dim=-1)
131 | 
132 |     #     # Construct feature label tensor that is 0 for pc points and 1 for gripper points
133 |     #     feats = torch.zeros_like(pc[..., :1, :])
134 |     #     feats[..., : -self.num_gripper_points, :] = 1
135 | 
136 |     #     # point-features
137 |     #     pc_with_features = torch.cat((pc, feats), dim=-2)
138 | 
139 |     #     return pc_with_features
140 | 
141 |     def classify_grasps(self, pc: Tensor, grasp_pose: Tensor) -> Tensor:
142 |         _, preds = self.forward(pc, grasp_pose, compute_loss=False)
143 |         return preds
144 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/pvconv.py:
--------------------------------------------------------------------------------
  1 | # Adapted from https://github.com/alexzhou907/PVD/blob/9747265a5f141e5546fd4f862bfa66aa59f1bd33/modules/pvconv.py
  2 | import torch.nn as nn
  3 | 
  4 | from ....modules import Attention, Swish
  5 | from . import functional as F
  6 | from .se import SE3d
  7 | from .shared_mlp import SharedMLP
  8 | from .voxelization import Voxelization
  9 | 
 10 | __all__ = ["PVConv"]
 11 | 
 12 | 
 13 | class PVConv(nn.Module):
 14 |     def __init__(
 15 |         self,
 16 |         in_channels,
 17 |         out_channels,
 18 |         kernel_size,
 19 |         resolution,
 20 |         use_attention=False,
 21 |         dropout=0.1,
 22 |         with_se=False,
 23 |         with_se_relu=False,
 24 |         normalize=True,
 25 |         eps=0,
 26 |     ):
 27 |         """PVConv
 28 | 
 29 |         Args:
 30 |                 in_channels (int): Number of input channels.
 31 |                 out_channels (int): Number of output channels.
 32 |                 kernel_size (int): Kernel size of the convolution.
 33 |                 resolution (int): Voxel resolution.
 34 |                 attention (bool, optional): Whether to use attention. Defaults to False.
 35 |                 dropout (float, optional): Dropout rate. Defaults to 0.1.
 36 |                 with_se (bool, optional): Whether to use SE. Defaults to False.
 37 |                 with_se_relu (bool, optional): Whether to use ReLU in SE. Defaults to False.
 38 |                 eps (float, optional): Epsilon for normalization. Defaults to 0.
 39 | 
 40 |         """
 41 |         super().__init__()
 42 |         self.in_channels = in_channels
 43 |         self.out_channels = out_channels
 44 |         self.kernel_size = kernel_size
 45 |         self.resolution = resolution
 46 | 
 47 |         self.voxelization = Voxelization(resolution, normalize=normalize, eps=eps)
 48 |         voxel_layers = [
 49 |             nn.Conv3d(
 50 |                 in_channels,
 51 |                 out_channels,
 52 |                 kernel_size,
 53 |                 stride=1,
 54 |                 padding=kernel_size // 2,
 55 |             ),
 56 |             nn.GroupNorm(num_groups=8, num_channels=out_channels),
 57 |             Swish(),
 58 |         ]
 59 |         voxel_layers += [nn.Dropout(dropout)] if dropout is not None else []
 60 |         voxel_layers += [
 61 |             nn.Conv3d(
 62 |                 out_channels,
 63 |                 out_channels,
 64 |                 kernel_size,
 65 |                 stride=1,
 66 |                 padding=kernel_size // 2,
 67 |             ),
 68 |             nn.GroupNorm(num_groups=8, num_channels=out_channels),
 69 |             Attention(out_channels, 8) if use_attention else Swish(),
 70 |         ]
 71 |         if with_se:
 72 |             voxel_layers.append(SE3d(out_channels, use_relu=with_se_relu))
 73 |         self.voxel_layers = nn.Sequential(*voxel_layers)
 74 |         self.point_features = SharedMLP(in_channels, out_channels)
 75 | 
 76 |     def forward(self, inputs):
 77 |         features, coords = inputs
 78 |         voxel_features, voxel_coords = self.voxelization(features, coords)
 79 |         voxel_features = self.voxel_layers(voxel_features)
 80 |         voxel_features = F.trilinear_devoxelize(
 81 |             voxel_features, voxel_coords, self.resolution, self.training
 82 |         )
 83 |         fused_features = voxel_features + self.point_features(features)
 84 |         return fused_features, coords
 85 | 
 86 | 
 87 | class PVConvReLU(nn.Module):
 88 |     def __init__(
 89 |         self,
 90 |         in_channels,
 91 |         out_channels,
 92 |         kernel_size,
 93 |         resolution,
 94 |         attention=False,
 95 |         leak=0.2,
 96 |         dropout=0.1,
 97 |         with_se=False,
 98 |         with_se_relu=False,
 99 |         normalize=True,
100 |         eps=0,
101 |     ):
102 |         super().__init__()
103 |         self.in_channels = in_channels
104 |         self.out_channels = out_channels
105 |         self.kernel_size = kernel_size
106 |         self.resolution = resolution
107 | 
108 |         self.voxelization = Voxelization(resolution, normalize=normalize, eps=eps)
109 |         voxel_layers = [
110 |             nn.Conv3d(
111 |                 in_channels,
112 |                 out_channels,
113 |                 kernel_size,
114 |                 stride=1,
115 |                 padding=kernel_size // 2,
116 |             ),
117 |             nn.BatchNorm3d(out_channels),
118 |             nn.LeakyReLU(leak, True),
119 |         ]
120 |         voxel_layers += [nn.Dropout(dropout)] if dropout is not None else []
121 |         voxel_layers += [
122 |             nn.Conv3d(
123 |                 out_channels,
124 |                 out_channels,
125 |                 kernel_size,
126 |                 stride=1,
127 |                 padding=kernel_size // 2,
128 |             ),
129 |             nn.BatchNorm3d(out_channels),
130 |             Attention(out_channels, 8) if attention else nn.LeakyReLU(leak, True),
131 |         ]
132 |         if with_se:
133 |             voxel_layers.append(SE3d(out_channels, use_relu=with_se_relu))
134 |         self.voxel_layers = nn.Sequential(*voxel_layers)
135 |         self.point_features = SharedMLP(in_channels, out_channels)
136 | 
137 |     def forward(self, inputs):
138 |         features, coords, temb = inputs
139 |         voxel_features, voxel_coords = self.voxelization(features, coords)
140 |         voxel_features = self.voxel_layers(voxel_features)
141 |         voxel_features = F.trilinear_devoxelize(
142 |             voxel_features, voxel_coords, self.resolution, self.training
143 |         )
144 |         fused_features = voxel_features + self.point_features(features)
145 |         return fused_features, coords, temb
146 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/pointnet2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .utils import (
  5 |     create_mlp_components,
  6 |     create_pointnet2_fp_modules,
  7 |     create_pointnet2_sa_components,
  8 | )
  9 | 
 10 | __all__ = ["PointNet2SSG", "PointNet2MSG"]
 11 | 
 12 | 
 13 | class PointNet2(nn.Module):
 14 |     def __init__(
 15 |         self,
 16 |         # num_classes,
 17 |         sa_blocks,
 18 |         fp_blocks,
 19 |         with_one_hot_shape_id=False,
 20 |         num_shapes=0,
 21 |         extra_feature_channels=3,
 22 |         width_multiplier=1,
 23 |         voxel_resolution_multiplier=1,
 24 |     ):
 25 |         super().__init__()
 26 |         assert extra_feature_channels >= 0
 27 | 
 28 |         self.in_channels = extra_feature_channels + 3
 29 |         self.num_shapes = num_shapes
 30 |         self.with_one_hot_shape_id = with_one_hot_shape_id
 31 | 
 32 |         (
 33 |             sa_layers,
 34 |             sa_in_channels,
 35 |             channels_sa_features,
 36 |             _,
 37 |         ) = create_pointnet2_sa_components(
 38 |             sa_blocks=sa_blocks,
 39 |             extra_feature_channels=extra_feature_channels,
 40 |             width_multiplier=width_multiplier,
 41 |         )
 42 |         self.sa_layers = nn.ModuleList(sa_layers)
 43 | 
 44 |         # use one hot vector in the last fp module
 45 |         sa_in_channels[0] += num_shapes if with_one_hot_shape_id else 0
 46 |         fp_layers, channels_fp_features = create_pointnet2_fp_modules(
 47 |             fp_blocks=fp_blocks,
 48 |             in_channels=channels_sa_features,
 49 |             sa_in_channels=sa_in_channels,
 50 |             width_multiplier=width_multiplier,
 51 |             voxel_resolution_multiplier=voxel_resolution_multiplier,
 52 |         )
 53 |         self.fp_layers = nn.ModuleList(fp_layers)
 54 | 
 55 |         # layers, _ = create_mlp_components(
 56 |         #     in_channels=channels_fp_features,
 57 |         #     out_channels=[128, 0.5, num_classes],
 58 |         #     classifier=True,
 59 |         #     dim=2,
 60 |         #     width_multiplier=width_multiplier,
 61 |         # )
 62 |         # self.classifier = nn.Sequential(*layers)
 63 | 
 64 |     def forward(self, inputs):
 65 |         # inputs : [B, in_channels + S, N]
 66 |         features = inputs[:, : self.in_channels, :]
 67 |         if self.with_one_hot_shape_id:
 68 |             assert inputs.size(1) == self.in_channels + self.num_shapes
 69 |             features_with_one_hot_vectors = inputs
 70 |         else:
 71 |             features_with_one_hot_vectors = features
 72 | 
 73 |         coords, features = (
 74 |             features[:, :3, :].contiguous(),
 75 |             features[:, 3:, :].contiguous(),
 76 |         )
 77 |         coords_list, in_features_list = [], []
 78 |         for sa_module in self.sa_layers:
 79 |             in_features_list.append(features)
 80 |             coords_list.append(coords)
 81 |             features, coords = sa_module((features, coords))
 82 |         in_features_list[0] = features_with_one_hot_vectors.contiguous()
 83 | 
 84 |         for fp_idx, fp_module in enumerate(self.fp_layers):
 85 |             features, coords = fp_module(
 86 |                 (
 87 |                     coords_list[-1 - fp_idx],
 88 |                     coords,
 89 |                     features,
 90 |                     in_features_list[-1 - fp_idx],
 91 |                 )
 92 |             )
 93 | 
 94 |         # return self.classifier(features)
 95 |         return features
 96 | 
 97 | 
 98 | class PointNet2SSG(PointNet2):
 99 |     sa_blocks = [
100 |         (None, (512, 0.2, 64, (64, 64, 128))),
101 |         (None, (128, 0.4, 64, (128, 128, 256))),
102 |         (None, (None, None, None, (256, 512, 1024))),
103 |     ]
104 |     fp_blocks = [((256, 256), None), ((256, 128), None), ((128, 128, 128), None)]
105 | 
106 |     def __init__(
107 |         self,
108 |         # num_classes,
109 |         num_shapes=0,
110 |         extra_feature_channels=3,
111 |         width_multiplier=1,
112 |         voxel_resolution_multiplier=1,
113 |     ):
114 |         super().__init__(
115 |             # num_classes=num_classes,
116 |             num_shapes=num_shapes,
117 |             sa_blocks=self.sa_blocks,
118 |             fp_blocks=self.fp_blocks,
119 |             with_one_hot_shape_id=False,
120 |             extra_feature_channels=extra_feature_channels,
121 |             width_multiplier=width_multiplier,
122 |             voxel_resolution_multiplier=voxel_resolution_multiplier,
123 |         )
124 | 
125 | 
126 | class PointNet2MSG(PointNet2):
127 |     sa_blocks = [
128 |         (
129 |             None,
130 |             (
131 |                 512,
132 |                 [0.1, 0.2, 0.4],
133 |                 [32, 64, 128],
134 |                 [(32, 32, 64), (64, 64, 128), (64, 96, 128)],
135 |             ),
136 |         ),
137 |         (None, (128, [0.4, 0.8], [64, 128], [(128, 128, 256), (128, 196, 256)])),
138 |         (None, (None, None, None, (256, 512, 1024))),
139 |     ]
140 |     fp_blocks = [((256, 256), None), ((256, 128), None), ((128, 128, 128), None)]
141 | 
142 |     def __init__(
143 |         self,
144 |         num_classes,
145 |         num_shapes,
146 |         extra_feature_channels=3,
147 |         width_multiplier=1,
148 |         voxel_resolution_multiplier=1,
149 |     ):
150 |         super().__init__(
151 |             num_classes=num_classes,
152 |             num_shapes=num_shapes,
153 |             sa_blocks=self.sa_blocks,
154 |             fp_blocks=self.fp_blocks,
155 |             with_one_hot_shape_id=True,
156 |             extra_feature_channels=extra_feature_channels,
157 |             width_multiplier=width_multiplier,
158 |             voxel_resolution_multiplier=voxel_resolution_multiplier,
159 |         )
160 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/sampling/sampling.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include "../cuda_utils.cuh"
  5 | 
  6 | /*
  7 |   Function: gather centers' features (forward)
  8 |   Args:
  9 |     b   : batch size
 10 |     c   : #channles of features
 11 |     n   : number of points in point clouds
 12 |     m   : number of query/sampled centers
 13 |     features: points' features, FloatTensor[b, c, n]
 14 |     indices : centers' indices in points, IntTensor[b, m]
 15 |     out     : gathered features, FloatTensor[b, c, m]
 16 | */
 17 | __global__ void gather_features_kernel(int b, int c, int n, int m,
 18 |                                        const float *__restrict__ features,
 19 |                                        const int *__restrict__ indices,
 20 |                                        float *__restrict__ out) {
 21 |   int batch_index = blockIdx.x;
 22 |   int channel_index = blockIdx.y;
 23 |   int temp_index = batch_index * c + channel_index;
 24 |   features += temp_index * n;
 25 |   indices += batch_index * m;
 26 |   out += temp_index * m;
 27 | 
 28 |   for (int j = threadIdx.x; j < m; j += blockDim.x) {
 29 |     out[j] = features[indices[j]];
 30 |   }
 31 | }
 32 | 
 33 | void gather_features(int b, int c, int n, int m, const float *features,
 34 |                      const int *indices, float *out) {
 35 |   gather_features_kernel<<<dim3(b, c, 1), optimal_num_threads(m), 0,
 36 |                            at::cuda::getCurrentCUDAStream()>>>(
 37 |       b, c, n, m, features, indices, out);
 38 |   CUDA_CHECK_ERRORS();
 39 | }
 40 | 
 41 | /*
 42 |   Function: gather centers' features (backward)
 43 |   Args:
 44 |     b   : batch size
 45 |     c   : #channles of features
 46 |     n   : number of points in point clouds
 47 |     m   : number of query/sampled centers
 48 |     grad_y  : grad of gathered features, FloatTensor[b, c, m]
 49 |     indices : centers' indices in points, IntTensor[b, m]
 50 |     grad_x  : grad of points' features, FloatTensor[b, c, n]
 51 | */
 52 | __global__ void gather_features_grad_kernel(int b, int c, int n, int m,
 53 |                                             const float *__restrict__ grad_y,
 54 |                                             const int *__restrict__ indices,
 55 |                                             float *__restrict__ grad_x) {
 56 |   int batch_index = blockIdx.x;
 57 |   int channel_index = blockIdx.y;
 58 |   int temp_index = batch_index * c + channel_index;
 59 |   grad_y += temp_index * m;
 60 |   indices += batch_index * m;
 61 |   grad_x += temp_index * n;
 62 | 
 63 |   for (int j = threadIdx.x; j < m; j += blockDim.x) {
 64 |     atomicAdd(grad_x + indices[j], grad_y[j]);
 65 |   }
 66 | }
 67 | 
 68 | void gather_features_grad(int b, int c, int n, int m, const float *grad_y,
 69 |                           const int *indices, float *grad_x) {
 70 |   gather_features_grad_kernel<<<dim3(b, c, 1), optimal_num_threads(m), 0,
 71 |                                 at::cuda::getCurrentCUDAStream()>>>(
 72 |       b, c, n, m, grad_y, indices, grad_x);
 73 |   CUDA_CHECK_ERRORS();
 74 | }
 75 | 
 76 | /*
 77 |   Function: furthest point sampling
 78 |   Args:
 79 |     b   : batch size
 80 |     n   : number of points in point clouds
 81 |     m   : number of query/sampled centers
 82 |     coords    : points' coords, FloatTensor[b, 3, n]
 83 |     distances : minimum distance of a point to the set, IntTensor[b, n]
 84 |     indices   : sampled centers' indices in points, IntTensor[b, m]
 85 | */
 86 | __global__ void furthest_point_sampling_kernel(int b, int n, int m,
 87 |                                                const float *__restrict__ coords,
 88 |                                                float *__restrict__ distances,
 89 |                                                int *__restrict__ indices) {
 90 |   if (m <= 0)
 91 |     return;
 92 |   int batch_index = blockIdx.x;
 93 |   coords += batch_index * n * 3;
 94 |   distances += batch_index * n;
 95 |   indices += batch_index * m;
 96 | 
 97 |   const int BlockSize = 512;
 98 |   __shared__ float dists[BlockSize];
 99 |   __shared__ int dists_i[BlockSize];
100 |   const int BufferSize = 3072;
101 |   __shared__ float buf[BufferSize * 3];
102 | 
103 |   int old = 0;
104 |   if (threadIdx.x == 0)
105 |     indices[0] = old;
106 | 
107 |   for (int j = threadIdx.x; j < min(BufferSize, n); j += blockDim.x) {
108 |     buf[j] = coords[j];
109 |     buf[j + BufferSize] = coords[j + n];
110 |     buf[j + BufferSize + BufferSize] = coords[j + n + n];
111 |   }
112 |   __syncthreads();
113 | 
114 |   for (int j = 1; j < m; j++) {
115 |     int besti = 0;   // best index
116 |     float best = -1; // farthest distance
117 |     // calculating the distance with the latest sampled point
118 |     float x1 = coords[old];
119 |     float y1 = coords[old + n];
120 |     float z1 = coords[old + n + n];
121 |     for (int k = threadIdx.x; k < n; k += blockDim.x) {
122 |       // fetch distance at block n, thread k
123 |       float td = distances[k];
124 |       float x2, y2, z2;
125 |       if (k < BufferSize) {
126 |         x2 = buf[k];
127 |         y2 = buf[k + BufferSize];
128 |         z2 = buf[k + BufferSize + BufferSize];
129 |       } else {
130 |         x2 = coords[k];
131 |         y2 = coords[k + n];
132 |         z2 = coords[k + n + n];
133 |       }
134 |       float d =
135 |           (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
136 |       float d2 = min(d, td);
137 |       // update "point-to-set" distance
138 |       if (d2 != td)
139 |         distances[k] = d2;
140 |       // update the farthest distance at sample step j
141 |       if (d2 > best) {
142 |         best = d2;
143 |         besti = k;
144 |       }
145 |     }
146 | 
147 |     dists[threadIdx.x] = best;
148 |     dists_i[threadIdx.x] = besti;
149 |     for (int u = 0; (1 << u) < blockDim.x; u++) {
150 |       __syncthreads();
151 |       if (threadIdx.x < (blockDim.x >> (u + 1))) {
152 |         int i1 = (threadIdx.x * 2) << u;
153 |         int i2 = (threadIdx.x * 2 + 1) << u;
154 |         if (dists[i1] < dists[i2]) {
155 |           dists[i1] = dists[i2];
156 |           dists_i[i1] = dists_i[i2];
157 |         }
158 |       }
159 |     }
160 |     __syncthreads();
161 | 
162 |     // finish sample step j; old is the sampled index
163 |     old = dists_i[0];
164 |     if (threadIdx.x == 0)
165 |       indices[j] = old;
166 |   }
167 | }
168 | 
169 | void furthest_point_sampling(int b, int n, int m, const float *coords,
170 |                              float *distances, int *indices) {
171 |   furthest_point_sampling_kernel<<<b, 512>>>(b, n, m, coords, distances,
172 |                                              indices);
173 |   CUDA_CHECK_ERRORS();
174 | }
175 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/trilinear_devox.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include "../cuda_utils.cuh"
  5 | 
  6 | /*
  7 |   Function: trilinear devoxlization (forward)
  8 |   Args:
  9 |     b   : batch size
 10 |     c   : #channels
 11 |     n   : number of points
 12 |     r   : voxel resolution
 13 |     r2  : r ** 2
 14 |     r3  : r ** 3
 15 |     coords : the coordinates of points, FloatTensor[b, 3, n]
 16 |     feat   : features, FloatTensor[b, c, r3]
 17 |     inds   : the voxel indices of point cube, IntTensor[b, 8, n]
 18 |     wgts   : weight for trilinear interpolation, FloatTensor[b, 8, n]
 19 |     outs   : outputs, FloatTensor[b, c, n]
 20 | */
 21 | __global__ void trilinear_devoxelize_kernel(int b, int c, int n, int r, int r2,
 22 |                                             int r3, bool is_training,
 23 |                                             const float *__restrict__ coords,
 24 |                                             const float *__restrict__ feat,
 25 |                                             int *__restrict__ inds,
 26 |                                             float *__restrict__ wgts,
 27 |                                             float *__restrict__ outs) {
 28 |   int batch_index = blockIdx.x;
 29 |   int stride = blockDim.x;
 30 |   int index = threadIdx.x;
 31 |   coords += batch_index * n * 3;
 32 |   inds += batch_index * n * 8;
 33 |   wgts += batch_index * n * 8;
 34 |   feat += batch_index * c * r3;
 35 |   outs += batch_index * c * n;
 36 | 
 37 |   for (int i = index; i < n; i += stride) {
 38 |     float x = coords[i];
 39 |     float y = coords[i + n];
 40 |     float z = coords[i + n + n];
 41 |     float x_lo_f = floorf(x);
 42 |     float y_lo_f = floorf(y);
 43 |     float z_lo_f = floorf(z);
 44 | 
 45 |     float x_d_1 = x - x_lo_f; // / (x_hi_f - x_lo_f + 1e-8f)
 46 |     float y_d_1 = y - y_lo_f;
 47 |     float z_d_1 = z - z_lo_f;
 48 |     float x_d_0 = 1.0f - x_d_1;
 49 |     float y_d_0 = 1.0f - y_d_1;
 50 |     float z_d_0 = 1.0f - z_d_1;
 51 | 
 52 |     float wgt000 = x_d_0 * y_d_0 * z_d_0;
 53 |     float wgt001 = x_d_0 * y_d_0 * z_d_1;
 54 |     float wgt010 = x_d_0 * y_d_1 * z_d_0;
 55 |     float wgt011 = x_d_0 * y_d_1 * z_d_1;
 56 |     float wgt100 = x_d_1 * y_d_0 * z_d_0;
 57 |     float wgt101 = x_d_1 * y_d_0 * z_d_1;
 58 |     float wgt110 = x_d_1 * y_d_1 * z_d_0;
 59 |     float wgt111 = x_d_1 * y_d_1 * z_d_1;
 60 | 
 61 |     int x_lo = static_cast<int>(x_lo_f);
 62 |     int y_lo = static_cast<int>(y_lo_f);
 63 |     int z_lo = static_cast<int>(z_lo_f);
 64 |     int x_hi = (x_d_1 > 0) ? -1 : 0;
 65 |     int y_hi = (y_d_1 > 0) ? -1 : 0;
 66 |     int z_hi = (z_d_1 > 0) ? 1 : 0;
 67 | 
 68 |     int idx000 = x_lo * r2 + y_lo * r + z_lo;
 69 |     int idx001 = idx000 + z_hi;      // x_lo * r2 + y_lo * r + z_hi;
 70 |     int idx010 = idx000 + (y_hi & r);  // x_lo * r2 + y_hi * r + z_lo;
 71 |     int idx011 = idx010 + z_hi;      // x_lo * r2 + y_hi * r + z_hi;
 72 |     int idx100 = idx000 + (x_hi & r2); // x_hi * r2 + y_lo * r + z_lo;
 73 |     int idx101 = idx100 + z_hi;      // x_hi * r2 + y_lo * r + z_hi;
 74 |     int idx110 = idx100 + (y_hi & r);  // x_hi * r2 + y_hi * r + z_lo;
 75 |     int idx111 = idx110 + z_hi;      // x_hi * r2 + y_hi * r + z_hi;
 76 | 
 77 |     if (is_training) {
 78 |       wgts[i] = wgt000;
 79 |       wgts[i + n] = wgt001;
 80 |       wgts[i + n * 2] = wgt010;
 81 |       wgts[i + n * 3] = wgt011;
 82 |       wgts[i + n * 4] = wgt100;
 83 |       wgts[i + n * 5] = wgt101;
 84 |       wgts[i + n * 6] = wgt110;
 85 |       wgts[i + n * 7] = wgt111;
 86 |       inds[i] = idx000;
 87 |       inds[i + n] = idx001;
 88 |       inds[i + n * 2] = idx010;
 89 |       inds[i + n * 3] = idx011;
 90 |       inds[i + n * 4] = idx100;
 91 |       inds[i + n * 5] = idx101;
 92 |       inds[i + n * 6] = idx110;
 93 |       inds[i + n * 7] = idx111;
 94 |     }
 95 | 
 96 |     for (int j = 0; j < c; j++) {
 97 |       int jr3 = j * r3;
 98 |       outs[j * n + i] =
 99 |           wgt000 * feat[jr3 + idx000] + wgt001 * feat[jr3 + idx001] +
100 |           wgt010 * feat[jr3 + idx010] + wgt011 * feat[jr3 + idx011] +
101 |           wgt100 * feat[jr3 + idx100] + wgt101 * feat[jr3 + idx101] +
102 |           wgt110 * feat[jr3 + idx110] + wgt111 * feat[jr3 + idx111];
103 |     }
104 |   }
105 | }
106 | 
107 | /*
108 |   Function: trilinear devoxlization (backward)
109 |   Args:
110 |     b   : batch size
111 |     c   : #channels
112 |     n   : number of points
113 |     r3  : voxel cube size = voxel resolution ** 3
114 |     inds   : the voxel indices of point cube, IntTensor[b, 8, n]
115 |     wgts   : weight for trilinear interpolation, FloatTensor[b, 8, n]
116 |     grad_y : grad outputs, FloatTensor[b, c, n]
117 |     grad_x : grad inputs, FloatTensor[b, c, r3]
118 | */
119 | __global__ void trilinear_devoxelize_grad_kernel(
120 |     int b, int c, int n, int r3, const int *__restrict__ inds,
121 |     const float *__restrict__ wgts, const float *__restrict__ grad_y,
122 |     float *__restrict__ grad_x) {
123 |   int batch_index = blockIdx.x;
124 |   int stride = blockDim.x;
125 |   int index = threadIdx.x;
126 |   inds += batch_index * n * 8;
127 |   wgts += batch_index * n * 8;
128 |   grad_x += batch_index * c * r3;
129 |   grad_y += batch_index * c * n;
130 | 
131 |   for (int i = index; i < n; i += stride) {
132 |     int idx000 = inds[i];
133 |     int idx001 = inds[i + n];
134 |     int idx010 = inds[i + n * 2];
135 |     int idx011 = inds[i + n * 3];
136 |     int idx100 = inds[i + n * 4];
137 |     int idx101 = inds[i + n * 5];
138 |     int idx110 = inds[i + n * 6];
139 |     int idx111 = inds[i + n * 7];
140 |     float wgt000 = wgts[i];
141 |     float wgt001 = wgts[i + n];
142 |     float wgt010 = wgts[i + n * 2];
143 |     float wgt011 = wgts[i + n * 3];
144 |     float wgt100 = wgts[i + n * 4];
145 |     float wgt101 = wgts[i + n * 5];
146 |     float wgt110 = wgts[i + n * 6];
147 |     float wgt111 = wgts[i + n * 7];
148 | 
149 |     for (int j = 0; j < c; j++) {
150 |       int jr3 = j * r3;
151 |       float g = grad_y[j * n + i];
152 |       atomicAdd(grad_x + jr3 + idx000, wgt000 * g);
153 |       atomicAdd(grad_x + jr3 + idx001, wgt001 * g);
154 |       atomicAdd(grad_x + jr3 + idx010, wgt010 * g);
155 |       atomicAdd(grad_x + jr3 + idx011, wgt011 * g);
156 |       atomicAdd(grad_x + jr3 + idx100, wgt100 * g);
157 |       atomicAdd(grad_x + jr3 + idx101, wgt101 * g);
158 |       atomicAdd(grad_x + jr3 + idx110, wgt110 * g);
159 |       atomicAdd(grad_x + jr3 + idx111, wgt111 * g);
160 |     }
161 |   }
162 | }
163 | 
164 | void trilinear_devoxelize(int b, int c, int n, int r, int r2, int r3,
165 |                           bool training, const float *coords, const float *feat,
166 |                           int *inds, float *wgts, float *outs) {
167 |   trilinear_devoxelize_kernel<<<b, optimal_num_threads(n)>>>(
168 |       b, c, n, r, r2, r3, training, coords, feat, inds, wgts, outs);
169 |   CUDA_CHECK_ERRORS();
170 | }
171 | 
172 | void trilinear_devoxelize_grad(int b, int c, int n, int r3, const int *inds,
173 |                                const float *wgts, const float *grad_y,
174 |                                float *grad_x) {
175 |   trilinear_devoxelize_grad_kernel<<<b, optimal_num_threads(n)>>>(
176 |       b, c, n, r3, inds, wgts, grad_y, grad_x);
177 |   CUDA_CHECK_ERRORS();
178 | }
179 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/neighbor_interpolate.cu:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | 
  5 | #include "../cuda_utils.cuh"
  6 | 
  7 | /*
  8 |   Function: three nearest neighbors
  9 |   Args:
 10 |     b   : batch size
 11 |     n   : number of points in point clouds
 12 |     m   : number of query centers
 13 |     points_coords : coordinates of points, FloatTensor[b, 3, n]
 14 |     centers_coords: coordinates of centers, FloatTensor[b, 3, m]
 15 |     weights       : weights of nearest 3 centers to the point,
 16 |                     FloatTensor[b, 3, n]
 17 |     indices       : indices of nearest 3 centers to the point,
 18 |                     IntTensor[b, 3, n]
 19 | */
 20 | __global__ void three_nearest_neighbors_kernel(
 21 |     int b, int n, int m, const float *__restrict__ points_coords,
 22 |     const float *__restrict__ centers_coords, float *__restrict__ weights,
 23 |     int *__restrict__ indices) {
 24 |   int batch_index = blockIdx.x;
 25 |   int index = threadIdx.x;
 26 |   int stride = blockDim.x;
 27 |   points_coords += batch_index * 3 * n;
 28 |   weights += batch_index * 3 * n;
 29 |   indices += batch_index * 3 * n;
 30 |   centers_coords += batch_index * 3 * m;
 31 | 
 32 |   for (int j = index; j < n; j += stride) {
 33 |     float ux = points_coords[j];
 34 |     float uy = points_coords[j + n];
 35 |     float uz = points_coords[j + n + n];
 36 | 
 37 |     double best0 = 1e40, best1 = 1e40, best2 = 1e40;
 38 |     int besti0 = 0, besti1 = 0, besti2 = 0;
 39 |     for (int k = 0; k < m; ++k) {
 40 |       float x = centers_coords[k];
 41 |       float y = centers_coords[k + m];
 42 |       float z = centers_coords[k + m + m];
 43 |       float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
 44 |       if (d < best2) {
 45 |         best2 = d;
 46 |         besti2 = k;
 47 |         if (d < best1) {
 48 |           best2 = best1;
 49 |           besti2 = besti1;
 50 |           best1 = d;
 51 |           besti1 = k;
 52 |           if (d < best0) {
 53 |             best1 = best0;
 54 |             besti1 = besti0;
 55 |             best0 = d;
 56 |             besti0 = k;
 57 |           }
 58 |         }
 59 |       }
 60 |     }
 61 |     best0 = max(min(1e10f, best0), 1e-10f);
 62 |     best1 = max(min(1e10f, best1), 1e-10f);
 63 |     best2 = max(min(1e10f, best2), 1e-10f);
 64 |     float d0d1 = best0 * best1;
 65 |     float d0d2 = best0 * best2;
 66 |     float d1d2 = best1 * best2;
 67 |     float d0d1d2 = 1.0f / (d0d1 + d0d2 + d1d2);
 68 |     weights[j] = d1d2 * d0d1d2;
 69 |     indices[j] = besti0;
 70 |     weights[j + n] = d0d2 * d0d1d2;
 71 |     indices[j + n] = besti1;
 72 |     weights[j + n + n] = d0d1 * d0d1d2;
 73 |     indices[j + n + n] = besti2;
 74 |   }
 75 | }
 76 | 
 77 | /*
 78 |   Function: interpolate three nearest neighbors (forward)
 79 |   Args:
 80 |     b   : batch size
 81 |     c   : #channels of features
 82 |     m   : number of query centers
 83 |     n   : number of points in point clouds
 84 |     centers_features: features of centers, FloatTensor[b, c, m]
 85 |     indices         : indices of nearest 3 centers to the point,
 86 |                       IntTensor[b, 3, n]
 87 |     weights         : weights for interpolation, FloatTensor[b, 3, n]
 88 |     out             : features of points, FloatTensor[b, c, n]
 89 | */
 90 | __global__ void three_nearest_neighbors_interpolate_kernel(
 91 |     int b, int c, int m, int n, const float *__restrict__ centers_features,
 92 |     const int *__restrict__ indices, const float *__restrict__ weights,
 93 |     float *__restrict__ out) {
 94 |   int batch_index = blockIdx.x;
 95 |   centers_features += batch_index * m * c;
 96 |   indices += batch_index * n * 3;
 97 |   weights += batch_index * n * 3;
 98 |   out += batch_index * n * c;
 99 | 
100 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
101 |   const int stride = blockDim.y * blockDim.x;
102 |   for (int i = index; i < c * n; i += stride) {
103 |     const int l = i / n;
104 |     const int j = i % n;
105 |     float w1 = weights[j];
106 |     float w2 = weights[j + n];
107 |     float w3 = weights[j + n + n];
108 |     int i1 = indices[j];
109 |     int i2 = indices[j + n];
110 |     int i3 = indices[j + n + n];
111 | 
112 |     out[i] = centers_features[l * m + i1] * w1 +
113 |              centers_features[l * m + i2] * w2 +
114 |              centers_features[l * m + i3] * w3;
115 |   }
116 | }
117 | 
118 | void three_nearest_neighbors_interpolate(int b, int c, int m, int n,
119 |                                          const float *points_coords,
120 |                                          const float *centers_coords,
121 |                                          const float *centers_features,
122 |                                          int *indices, float *weights,
123 |                                          float *out) {
124 |   three_nearest_neighbors_kernel<<<b, optimal_num_threads(n), 0,
125 |                                    at::cuda::getCurrentCUDAStream()>>>(
126 |       b, n, m, points_coords, centers_coords, weights, indices);
127 |   three_nearest_neighbors_interpolate_kernel<<<
128 |       b, optimal_block_config(n, c), 0, at::cuda::getCurrentCUDAStream()>>>(
129 |       b, c, m, n, centers_features, indices, weights, out);
130 |   CUDA_CHECK_ERRORS();
131 | }
132 | 
133 | /*
134 |   Function: interpolate three nearest neighbors (backward)
135 |   Args:
136 |     b   : batch size
137 |     c   : #channels of features
138 |     m   : number of query centers
139 |     n   : number of points in point clouds
140 |     grad_y  : grad of features of points, FloatTensor[b, c, n]
141 |     indices : indices of nearest 3 centers to the point, IntTensor[b, 3, n]
142 |     weights : weights for interpolation, FloatTensor[b, 3, n]
143 |     grad_x  : grad of features of centers, FloatTensor[b, c, m]
144 | */
145 | __global__ void three_nearest_neighbors_interpolate_grad_kernel(
146 |     int b, int c, int n, int m, const float *__restrict__ grad_y,
147 |     const int *__restrict__ indices, const float *__restrict__ weights,
148 |     float *__restrict__ grad_x) {
149 |   int batch_index = blockIdx.x;
150 |   grad_y += batch_index * n * c;
151 |   indices += batch_index * n * 3;
152 |   weights += batch_index * n * 3;
153 |   grad_x += batch_index * m * c;
154 | 
155 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
156 |   const int stride = blockDim.y * blockDim.x;
157 |   for (int i = index; i < c * n; i += stride) {
158 |     const int l = i / n;
159 |     const int j = i % n;
160 |     float w1 = weights[j];
161 |     float w2 = weights[j + n];
162 |     float w3 = weights[j + n + n];
163 |     int i1 = indices[j];
164 |     int i2 = indices[j + n];
165 |     int i3 = indices[j + n + n];
166 |     atomicAdd(grad_x + l * m + i1, grad_y[i] * w1);
167 |     atomicAdd(grad_x + l * m + i2, grad_y[i] * w2);
168 |     atomicAdd(grad_x + l * m + i3, grad_y[i] * w3);
169 |   }
170 | }
171 | 
172 | void three_nearest_neighbors_interpolate_grad(int b, int c, int n, int m,
173 |                                               const float *grad_y,
174 |                                               const int *indices,
175 |                                               const float *weights,
176 |                                               float *grad_x) {
177 |   three_nearest_neighbors_interpolate_grad_kernel<<<
178 |       b, optimal_block_config(n, c), 0, at::cuda::getCurrentCUDAStream()>>>(
179 |       b, c, n, m, grad_y, indices, weights, grad_x);
180 |   CUDA_CHECK_ERRORS();
181 | }
182 | 


--------------------------------------------------------------------------------
/grasp_ldm/losses/loss.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import random
  3 | from typing import Any
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | from torch import nn
  8 | 
  9 | from grasp_ldm.utils.rotations import tmrp_to_H
 10 | 
 11 | __all__ = [
 12 |     "VAEReconstructionLoss",
 13 |     "VAELatentLoss",
 14 |     "ClassificationLoss",
 15 |     "QualityLoss",
 16 |     "GraspReconstructionLoss",
 17 |     "GraspControlPointsReconstructionLoss",
 18 | ]
 19 | 
 20 | 
 21 | # From: https://github.com/haofuml/cyclical_annealing
 22 | def linear_cyclical_anneling(n_iter, start=0.0, stop=1.0, n_cycle=4, ratio=0.5):
 23 |     L = np.ones(n_iter) * stop
 24 |     period = n_iter / n_cycle
 25 |     step = (stop - start) / (period * ratio)  # linear schedule
 26 | 
 27 |     for c in range(n_cycle):
 28 |         v, i = start, 0
 29 |         while v <= stop and (int(i + c * period) < n_iter):
 30 |             L[int(i + c * period)] = v
 31 |             v += step
 32 |             i += 1
 33 |     return L
 34 | 
 35 | 
 36 | class VAEReconstructionLoss(nn.Module):
 37 |     def __init__(self, weight=1, name="reconstruction_loss") -> None:
 38 |         super().__init__()
 39 |         self.name = name
 40 |         self.criterion = nn.MSELoss()
 41 |         self.weight = weight
 42 | 
 43 |     def forward(self, input, output):
 44 |         return self.weight * self.criterion(input, output)
 45 | 
 46 | 
 47 | class GraspReconstructionLoss(VAEReconstructionLoss):
 48 |     def __init__(
 49 |         self, translation_weight=10, rotation_weight=1, name="reconstruction_loss"
 50 |     ) -> None:
 51 |         super().__init__(weight=1, name=name)
 52 | 
 53 |         self.translation_weight = translation_weight
 54 |         self.rotation_weight = rotation_weight
 55 | 
 56 |     def forward(self, x_out, x_in, **kwargs):
 57 |         """Forward
 58 | 
 59 |         Args:
 60 |             x_out (Tensor): [B, 6] Predicted pose- (t(3), mrp(3))
 61 |             x_in (Tensor): [B, 6] Ground truth pose- (t(3), mrp(3))
 62 | 
 63 |         Returns:
 64 |             _type_: _description_
 65 |         """
 66 |         x_pred = x_out.clone()
 67 |         x_pred[..., :3] = x_pred[..., :3] * self.translation_weight
 68 |         x_pred[..., 3:] = x_pred[..., 3:] * self.rotation_weight
 69 | 
 70 |         x_gt = x_in.clone()
 71 |         x_gt[..., :3] = x_gt[..., :3] * self.translation_weight
 72 |         x_gt[..., 3:] = x_gt[..., 3:] * self.rotation_weight
 73 | 
 74 |         return super().forward(x_gt, x_pred)
 75 | 
 76 | 
 77 | class GraspControlPointsReconstructionLoss(VAEReconstructionLoss):
 78 |     def __init__(
 79 |         self,
 80 |         weight=1,
 81 |         name="reconstruction_loss",
 82 |         control_pts_file="grasp_ldm/dataset/acronym/gripper_ctrl_pts.json",
 83 |     ) -> None:
 84 |         super().__init__(weight=1, name=name)
 85 | 
 86 |         with open(control_pts_file) as f:
 87 |             _control_pts = np.array(json.load(f))
 88 | 
 89 |         # append 1 to the end of each control point
 90 |         self.control_pts = torch.from_numpy(
 91 |             np.concatenate(
 92 |                 [_control_pts, np.ones((_control_pts.shape[0], 1))],
 93 |                 axis=1,
 94 |             )
 95 |         )
 96 |         self.criterion = nn.MSELoss()
 97 |         self.weight = weight
 98 | 
 99 |     def forward(self, x_target, x_pred, **kwargs):
100 |         """Forward
101 | 
102 |         Args:
103 |             x_out (Tensor): [B, 6] Predicted pose- (t(3), mrp(3))
104 |             x_in (Tensor): [B, 6] Ground truth pose- (t(3), mrp(3))
105 | 
106 |         Returns:
107 |             _type_: _description_
108 |         """
109 |         metas = kwargs["metas"]
110 |         pc_batch_size = metas["grasp_std"].shape[0]
111 |         h_target = x_target.view((pc_batch_size, -1, 6)) * metas["grasp_std"].unsqueeze(
112 |             1
113 |         ) + metas["grasp_mean"].unsqueeze(1)
114 |         h_pred = x_pred.view((pc_batch_size, -1, 6)) * metas["grasp_std"].unsqueeze(
115 |             1
116 |         ) + metas["grasp_mean"].unsqueeze(1)
117 | 
118 |         ctrl_pts = self.control_pts.clone().to(h_target.device, h_target.dtype)
119 | 
120 |         H_target = tmrp_to_H(h_target.view((-1, 6)))
121 |         H_pred = tmrp_to_H(h_pred.view((-1, 6)))
122 | 
123 |         # Get the control points
124 |         control_pts_target = (H_target @ ctrl_pts.T).transpose(1, 2)
125 |         control_pts_pred = (H_pred @ ctrl_pts.T).transpose(1, 2)
126 | 
127 |         return self.weight * self.criterion(control_pts_target, control_pts_pred)
128 | 
129 | 
130 | class VAELatentLoss(nn.Module):
131 |     def __init__(
132 |         self,
133 |         weight=1,
134 |         name="kl_loss",
135 |         cyclical_annealing=False,
136 |         num_steps=None,
137 |         num_cycles=None,
138 |         start=1e-7,
139 |         stop=0.2,
140 |         ratio=0.25,
141 |     ) -> None:
142 |         super().__init__()
143 |         self.name = name
144 | 
145 |         if not cyclical_annealing:
146 |             self.weight = weight
147 |             self.schedule = None
148 |         else:
149 |             assert num_cycles is not None and num_steps is not None
150 |             self.weight = None
151 |             self.schedule = linear_cyclical_anneling(
152 |                 num_steps,
153 |                 start=start,
154 |                 stop=stop,
155 |                 n_cycle=num_cycles,
156 |                 ratio=ratio,
157 |             )
158 |         self.is_annealed = cyclical_annealing
159 | 
160 |     def forward(
161 |         self,
162 |         mu: torch.Tensor,
163 |         logvar: torch.Tensor,
164 |         return_unweighted: bool = False,
165 |         **kwargs,
166 |     ):
167 |         """Forward
168 |             B: Batch size
169 |             D: Dimensions of the latent
170 | 
171 |         Args:
172 |             mu (torch.Tensor): latent means [B, D]
173 |             logvar (torch.Tensor): latent logvars [B, D]
174 |             step (int, optional): step number for weight schedule.
175 |                         None, if no schedule. i.e. Constant weight
176 |             return_unweighted (bool, optional): Whether to also return unweighted loss
177 |                             Defaults to False
178 |         Returns:
179 |             torch.Tensor:  weighted kl loss [1,]  (if return_unweighted is False)
180 |             tuple(torch.Tensor, torch.Tensor): weighted_loss[1,], unweighted_kld[1,]
181 |         """
182 |         kl_d = -0.5 * torch.sum(1 + logvar - mu**2 - logvar.exp(), dim=1)
183 |         kl_d = torch.mean(kl_d, dim=0)
184 | 
185 |         if return_unweighted:
186 |             return self.weight * kl_d, kl_d
187 |         else:
188 |             return self.weight * kl_d
189 | 
190 |     def set_weight_from_schedule(self, step):
191 |         assert (
192 |             hasattr(self, "schedule") and self.schedule is not None
193 |         ), "No member schedule found in self, to set the loss weight from schedule."
194 |         f"Weight annealing was set to {self.is_annealed}"
195 | 
196 |         self.weight = (
197 |             self.schedule[step] if step < len(self.schedule) else self.schedule[-1]
198 |         )
199 |         return
200 | 
201 | 
202 | class ClassificationLoss(nn.Module):
203 |     def __init__(self, weight=1, name="classfication_loss") -> None:
204 |         super().__init__()
205 |         self.name = name
206 |         self.weight = weight
207 |         self.class_criterion = nn.BCEWithLogitsLoss(reduction="mean")
208 |         self.class_weight = weight
209 | 
210 |     def forward(self, output, targets, **kwargs):
211 |         classification_loss = self.class_criterion(output, targets)
212 |         return self.weight * classification_loss
213 | 
214 | 
215 | class QualityLoss(nn.Module):
216 |     def __init__(self, weight=1, name="quality_loss") -> None:
217 |         super().__init__()
218 |         self.name = name
219 |         self.weight = weight
220 |         self.criterion = nn.SmoothL1Loss()
221 | 
222 |     def forward(self, quals_in, quals_target, **kwargs):
223 |         confidence_loss = self.criterion(quals_in, quals_target)
224 | 
225 |         return self.weight * confidence_loss
226 | 


--------------------------------------------------------------------------------
/grasp_ldm/utils/camera.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import json
  3 | import os
  4 | import warnings
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from .utils import load_json
 10 | 
 11 | try:
 12 |     import pyrender
 13 | except:
 14 |     warnings.warn("pyrender was not found. Rendering modules will not work.")
 15 | 
 16 | 
 17 | def read_csv_realsense(csv_file_path):
 18 |     with open(csv_file_path, "r") as csv_file:
 19 |         csv_reader = csv.reader(csv_file, delimiter=",")
 20 |         data = {row[0]: row[1] for row in csv_reader if len(row) > 1}
 21 | 
 22 |     frame_info = {
 23 |         key: data[key]
 24 |         for key in [
 25 |             "Type",
 26 |             "Depth",
 27 |             "Format",
 28 |             "Frame Number",
 29 |             "Timestamp (ms)",
 30 |             "Resolution x",
 31 |             "Resolution y",
 32 |             "Bytes per pixel",
 33 |         ]
 34 |     }
 35 |     intrinsic_info = {
 36 |         key: data[key] for key in ["Fx", "Fy", "PPx", "PPy", "Distorsion"]
 37 |     }
 38 | 
 39 |     cam_json = {
 40 |         "hfov": 2 * np.arctan2(data["Resolution_x"] / (2 * data["Fx"])) * 180 / np.pi,
 41 |         "vfov": 2 * np.arctan2(data["Resolution_y"] / (2 * data["Fy"])) * 180 / np.pi,
 42 |         "width": int(data["Resolution_x"]),
 43 |         "height": int(data["Resolution_y"]),
 44 |         "cameraMatrix": [
 45 |             [float(data["Fx"]), 0, float(data["PPx"])],
 46 |             [0, float(data["Fy"]), float(data["PPy"])],
 47 |             [0, 0, 1],
 48 |         ],
 49 |         "distCoeffs": [],
 50 |     }
 51 |     return cam_json
 52 | 
 53 | 
 54 | def calculate_view_frustum(start_point, end_point, fov):
 55 |     """
 56 |     Calculate the coordinates of the view frustum given the boresight line and FOV.
 57 | 
 58 |     Args:
 59 |     start_point (tuple): The starting point of the boresight line.
 60 |     end_point (tuple): The ending point of the boresight line.
 61 |     fov (float): The field of view of the camera in degrees.
 62 | 
 63 |     Returns:
 64 |     view_frustum (list): A list of tuples containing the coordinates of the view frustum.
 65 |     """
 66 | 
 67 |     # Convert the FOV from degrees to radians
 68 |     fov_rad = np.radians(fov)
 69 | 
 70 |     # Calculate the distance between the two points
 71 |     distance = np.sqrt(
 72 |         sum([(end - start) ** 2 for start, end in zip(start_point, end_point)])
 73 |     )
 74 | 
 75 |     # Calculate the half-angle of the FOV
 76 |     half_angle = np.tan(fov_rad / 2)
 77 | 
 78 |     # Calculate the coordinates of the view frustum
 79 |     view_frustum = []
 80 |     for i in range(-1, 2, 2):  # Iterate twice: -1 for near plane, +1 for far plane
 81 |         x = start_point[0] + i * distance * half_angle
 82 |         y = start_point[1] + i * distance * half_angle
 83 |         z = start_point[2] + i * distance
 84 |         view_frustum.append((x, y, z))
 85 | 
 86 |     return view_frustum
 87 | 
 88 | 
 89 | class Camera:
 90 |     """Camera model using a user json file"""
 91 | 
 92 |     def __init__(
 93 |         self,
 94 |         camera_json_path: str,
 95 |         z_near: float = 0.05,
 96 |         z_far: float = 20,
 97 |     ) -> None:
 98 |         """
 99 |         Args:
100 |             camera_json_path (str): camera json file path
101 |             camera_name (str): camera name from the json.
102 |         """
103 |         self.name = os.path.basename(camera_json_path)
104 |         self.data = load_json(camera_json_path)
105 | 
106 |         # Intrinsics and distortion matrix
107 |         self.K = np.array(self.data["cameraMatrix"])
108 |         self.dists = np.array(self.data["distCoeffs"])
109 | 
110 |         # Focal Length in px
111 |         self._fx = self.K[0, 0]
112 |         self._fy = self.K[1, 1]
113 | 
114 |         # Principal centers
115 |         self._cx = self.K[0, 2]
116 |         self._cy = self.K[1, 2]
117 | 
118 |         # Near/Far limits in boresight
119 |         self.z_near = z_near
120 |         self.z_far = z_far
121 | 
122 |         # Image size in px
123 |         self.width = self.data["width"]
124 |         self.height = self.data["height"]
125 | 
126 |         # FOV
127 |         self.xfov = self.data["hfov"]  # HFOV
128 |         self.yfov = self.data["vfov"]  # VFOV
129 | 
130 |     def to_pyrender_camera(self):
131 |         return pyrender.IntrinsicsCamera(
132 |             self._fx, self._fy, self._cx, self._cy, self.z_near, self.z_far
133 |         )
134 | 
135 |     def depth_to_pointcloud(
136 |         self, depth: np.ndarray, rgb: np.ndarray = None
137 |     ) -> np.ndarray:
138 |         """Convert depth image to pointcloud given camera intrinsics.
139 |         Args:
140 |             depth (np.ndarray): Depth image.
141 |         Returns:
142 |             np.ndarray: [nx4] (x, y, z, 1) Point cloud.
143 |         """
144 | 
145 |         height = depth.shape[0]
146 |         width = depth.shape[1]
147 | 
148 |         assert (
149 |             height == self.height
150 |         ), "Something went wrong. height of the depth image does not match the camera model."
151 |         assert (
152 |             width == self.width
153 |         ), "Something went wrong. width of the depth image does not match the camera model."
154 | 
155 |         mask = np.where(depth > 0)
156 |         x, y = mask[1], mask[0]
157 | 
158 |         normalized_x = x.astype(np.float32) - self._cx
159 |         normalized_y = y.astype(np.float32) - self._cy
160 | 
161 |         world_x = normalized_x * depth[y, x] / self._fx
162 |         world_y = normalized_y * depth[y, x] / self._fy
163 |         world_z = depth[y, x]
164 | 
165 |         if rgb is not None:
166 |             rgb = rgb[y, x, :]
167 | 
168 |         pc = np.vstack((world_x, world_y, world_z)).T
169 | 
170 |         if rgb is not None:
171 |             rgb = rgb[y, x, :]
172 |             return pc, rgb
173 |         else:
174 |             return pc
175 | 
176 |     def depth_to_pointcloud_torch(
177 |         self, depth: torch.Tensor, rgb: torch.Tensor = None
178 |     ) -> torch.Tensor:
179 |         """Convert depth image to pointcloud given camera intrinsics.
180 |         Args:
181 |             depth (torch.Tensor): Depth image.
182 |         Returns:
183 |             torch.Tensor: [nx4] (x, y, z, 1) Point cloud.
184 |         """
185 | 
186 |         height = depth.shape[0]
187 |         width = depth.shape[1]
188 | 
189 |         assert (
190 |             height == self.height
191 |         ), "Something went wrong. height of the depth image does not match the camera model."
192 |         assert (
193 |             width == self.width
194 |         ), "Something went wrong. width of the depth image does not match the camera model."
195 | 
196 |         mask = torch.where(depth > 0)
197 |         x, y = mask[1], mask[0]
198 | 
199 |         normalized_x = x.to(torch.float32) - self._cx
200 |         normalized_y = y.to(torch.float32) - self._cy
201 | 
202 |         world_x = normalized_x * depth[y, x] / self._fx
203 |         world_y = normalized_y * depth[y, x] / self._fy
204 |         world_z = depth[y, x]
205 | 
206 |         if rgb is not None:
207 |             rgb = rgb[y, x, :]
208 | 
209 |         pc = torch.vstack((world_x, world_y, world_z)).T
210 | 
211 |         if rgb is not None:
212 |             rgb = rgb[y, x, :]
213 |             return pc, rgb
214 |         else:
215 |             return pc
216 | 
217 |     def write_to_dir(self, out_dir):
218 |         json_fp = os.path.join(out_dir, f"camera_{self.name}.json")
219 | 
220 |         print(f"Writing camera model {self.name} to {json_fp}.")
221 |         with json_fp as fileobj:
222 |             json.dump(self.data, fileobj)
223 |         return
224 | 
225 |     # def get_trimesh_camera(self):
226 |     #     """Get a trimesh object representing the camera intrinsics.
227 |     #     Returns:
228 |     #         trimesh.scene.cameras.Camera: Intrinsic parameters of the camera model
229 |     #     """
230 |     #     return trimesh.scene.cameras.Camera(
231 |     #         fov=(np.rad2deg(self._fov), np.rad2deg(self._fov)),
232 |     #         resolution=(self._height, self._width),
233 |     #         z_near=self._z_near,
234 |     #     )
235 | 


--------------------------------------------------------------------------------
/grasp_ldm/trainers/grasp_classification_trainer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import warnings
  3 | 
  4 | import einops
  5 | import torch
  6 | import torch.nn as nn
  7 | import torcheval.metrics.functional as Metrics
  8 | from pytorch_lightning.callbacks import (
  9 |     DeviceStatsMonitor,
 10 |     LearningRateMonitor,
 11 |     ModelCheckpoint,
 12 |     ModelSummary,
 13 |     StochasticWeightAveraging,
 14 | )
 15 | from pytorch_lightning.loggers import CSVLogger, Logger, TensorBoardLogger, WandbLogger
 16 | from torch.utils.data import Dataset
 17 | from utils.rotations import tmrp_to_H
 18 | 
 19 | from grasp_ldm.dataset.builder import build_dataset_from_cfg
 20 | from grasp_ldm.models.builder import build_model_from_cfg
 21 | from grasp_ldm.utils.config import Config, ConfigDict
 22 | 
 23 | from .experiment import Experiment
 24 | from .trainer import LightningTrainer
 25 | 
 26 | 
 27 | class GraspClassificationTrainer(LightningTrainer):
 28 |     CLS_PRED_THRESHOLD = 0.5
 29 | 
 30 |     def __init__(self, config: Config = None):
 31 |         """Grasp Classification Trainer"""
 32 | 
 33 |         # Split main sub-configs
 34 |         model_config = config.model
 35 |         data_config = config.data
 36 |         trainer_config = config.trainer
 37 | 
 38 |         # Experiment and config
 39 |         self._config = config
 40 |         self._experiment = Experiment(config.filename)
 41 | 
 42 |         # Checkpointing
 43 |         self._checkpointing_freq = (
 44 |             trainer_config.checkpointing_freq
 45 |             if hasattr(trainer_config, "checkpointing_freq")
 46 |             else 1000
 47 |         )
 48 |         trainer_config.default_root_dir = self._experiment.ckpt_dir
 49 | 
 50 |         # Initialize parent trainer class
 51 |         super().__init__(model_config, data_config, trainer_config)
 52 | 
 53 |         self.resume_from_checkpoint = self._experiment.default_resume_checkpoint
 54 | 
 55 |     def _build_dataset(self, data_config, split):
 56 |         """Custom routine for building dataset"""
 57 |         dataset = build_dataset_from_cfg(data_config, split)
 58 | 
 59 |         # dataset.pre_load() should define any pre-loading operations before workers are spawned
 60 |         dataset.pre_load()
 61 | 
 62 |         return dataset
 63 | 
 64 |     def _build_model(self, model_config):
 65 |         """Custom routine for building model"""
 66 |         model = build_model_from_cfg(ConfigDict(model=model_config))
 67 | 
 68 |         ## TODO: custom model initialization, if any
 69 |         # model.initialize()
 70 | 
 71 |         return model
 72 | 
 73 |     def training_step(self, batch_data, batch_idx):
 74 |         """Training step"""
 75 | 
 76 |         # Inputs
 77 |         pc = batch_data["pc"]
 78 |         grasps = batch_data["grasps"]
 79 | 
 80 |         # TODO: verify this reshape consistency
 81 |         success_labels = batch_data["success"].view(-1)
 82 |         num_grasps = grasps.shape[1]
 83 | 
 84 |         # Repeat pc and grasp so there is a 1-1 pairing
 85 |         pc = pc.repeat_interleave(num_grasps, dim=0)
 86 |         grasps = einops.rearrange(grasps, "b n c d -> (b n) c d")
 87 | 
 88 |         # Metas
 89 |         metas = batch_data["metas"]
 90 | 
 91 |         # Forward
 92 |         loss, _ = self.model(pc, grasps, cls_target=success_labels, compute_loss=True)
 93 | 
 94 |         # Log Loss
 95 |         self.log("loss", loss, sync_dist=True)
 96 |         return loss
 97 | 
 98 |     def validation_step(self, batch_data, batch_idx):
 99 |         """Validation step"""
100 | 
101 |         # Inputs
102 |         pc = batch_data["pc"]
103 |         grasps = batch_data["grasps"]
104 | 
105 |         # TODO: verify this reshape consistency
106 |         success_labels = batch_data["success"].view(-1)
107 |         num_grasps = grasps.shape[1]
108 | 
109 |         # Repeat pc and grasp so there is a 1-1 pairing
110 |         pc = pc.repeat_interleave(num_grasps, dim=0)
111 |         grasps = einops.rearrange(grasps, "b n c d -> (b n) c d")
112 | 
113 |         # Metas
114 |         metas = batch_data["metas"]
115 | 
116 |         # Forward
117 |         loss, preds = self.model(
118 |             pc, grasps, cls_target=success_labels, compute_loss=True
119 |         )
120 | 
121 |         # Convert probs to binary preds
122 |         preds = preds.detach()
123 |         preds[preds > self.CLS_PRED_THRESHOLD] = 1
124 |         preds[preds <= self.CLS_PRED_THRESHOLD] = 0
125 | 
126 |         # Accumulate preds in cache
127 |         self._update_cache("validation", "epoch", "cls_preds", preds.long())
128 |         self._update_cache("validation", "epoch", "cls_targets", success_labels.long())
129 | 
130 |         # Log Loss
131 |         self.log("val_loss", loss, sync_dist=True, prog_bar=True)
132 |         return
133 | 
134 |     def on_validation_epoch_end(self) -> None:
135 |         eval_metrics = self._compute_metrics()
136 |         self.log_dict({"validation_metrics": eval_metrics}, sync_dist=True)
137 |         self.log(
138 |             "val_accuracy", eval_metrics["accuracy"], prog_bar=True, sync_dist=True
139 |         )
140 |         return
141 | 
142 |     def _get_callbacks(self) -> list:
143 |         """Custom callbacks to be used by the trainer."""
144 | 
145 |         checkpoint_callback1 = ModelCheckpoint(
146 |             save_top_k=3,
147 |             monitor="loss",
148 |             mode="min",
149 |             dirpath=self._experiment.ckpt_dir,
150 |             filename="epoch_{epoch:02d}-step_{step}-loss_{loss:.2f}",
151 |             save_last=True,
152 |             every_n_train_steps=self._checkpointing_freq,
153 |         )
154 | 
155 |         checkpoint_callback2 = ModelCheckpoint(
156 |             save_top_k=1,
157 |             monitor="loss",
158 |             mode="min",
159 |             dirpath=self._experiment.ckpt_dir,
160 |             filename="best",
161 |             save_last=True,
162 |             every_n_train_steps=1000,
163 |         )
164 | 
165 |         lr_monitor_callback = LearningRateMonitor(logging_interval="step")
166 | 
167 |         callbacks = [checkpoint_callback1, checkpoint_callback2, lr_monitor_callback]
168 | 
169 |         return callbacks
170 | 
171 |     def _get_logger(self) -> Logger:
172 |         """Custom logger to be used by the trainer."""
173 |         if hasattr(self.trainer_config, "logger"):
174 |             logger_config = self.trainer_config.logger
175 | 
176 |             if logger_config.type == "WandbLogger":
177 |                 assert hasattr(
178 |                     logger_config, "project"
179 |                 ), "WandbLogger requires a project name to be specified in the config."
180 | 
181 |                 logger = WandbLogger(
182 |                     name=self._experiment.name,
183 |                     project=logger_config.project,
184 |                     save_dir=self._experiment.log_dir,
185 |                     config=self._config,
186 |                 )
187 |             elif logger_config.type == "TensorBoardLogger":
188 |                 logger = TensorBoardLogger(
189 |                     save_dir=self._experiment.log_dir,
190 |                     name=self._experiment.name,
191 |                 )
192 |         else:
193 |             logger = CSVLogger(
194 |                 save_dir=self._experiment.log_dir,
195 |                 name=self._experiment.name,
196 |             )
197 |         return logger
198 | 
199 |     def _compute_metrics(self):
200 |         """Compute metrics on validation set"""
201 | 
202 |         # Collect preds and targets from cache
203 |         cls_preds = torch.cat(self._validation_cache["epoch"]["cls_preds"])
204 |         cls_targets = torch.cat(self._validation_cache["epoch"]["cls_targets"])
205 | 
206 |         # Compute binary classification metrics
207 |         metrics = dict(
208 |             accuracy=Metrics.binary_accuracy(cls_preds, cls_targets),
209 |             precision=Metrics.binary_precision(cls_preds, cls_targets),
210 |             recall=Metrics.binary_recall(cls_preds, cls_targets),
211 |             f1=Metrics.binary_f1_score(cls_preds, cls_targets),
212 |             aP=Metrics.binary_auprc(cls_preds, cls_targets),
213 |             # confusion_matrix=Metrics.binary_confusion_matrix(cls_preds, cls_targets),
214 |         )
215 | 
216 |         return metrics
217 | 


--------------------------------------------------------------------------------
/configs/generation/fpc/fpc_1a_latentc3_z4_pc64_180k.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | ## --------------------  Most frequently changed params here  --------------------
  4 | 
  5 | resume_training_from_last = True
  6 | 
  7 | max_steps = 180000
  8 | batch_size = 10
  9 | 
 10 | num_gpus = 1
 11 | num_workers_per_gpu = 7
 12 | 
 13 | # During training, if a ckpt is provided here, it overrides resume_training_from_last and instead resumes training from this ckpt
 14 | vae_ckpt_path = None  # "output/boilerplate_kldanneal_c0.1/vae/checkpoints/last.ckpt"
 15 | ddm_ckpt_path = None
 16 | 
 17 | max_scenes = None
 18 | 
 19 | 
 20 | root_data_dir = "data/ACRONYM"
 21 | 
 22 | ## -------------------- Inputs/Shapes ------------------------
 23 | # Input/Output: grasp representation [mrp(3), t(3), cls_success(1), qualities(4)]
 24 | 
 25 | pc_num_points = 1024
 26 | pc_latent_dims = 64
 27 | pc_latent_channels = 3
 28 | 
 29 | grasp_pose_dims = 6
 30 | num_output_qualities = 0
 31 | grasp_latent_dims = 4
 32 | 
 33 | grasp_representation_dims = (
 34 |     grasp_pose_dims + num_output_qualities + 1
 35 |     if num_output_qualities is not None
 36 |     else grasp_pose_dims + 1
 37 | )
 38 | 
 39 | ## ----------------------- Model -----------------------
 40 | 
 41 | dropout = 0.1  # or None
 42 | 
 43 | pc_encoder_config = dict(
 44 |     type="PVCNNEncoder",
 45 |     args=dict(
 46 |         in_features=3,
 47 |         n_points=pc_num_points,
 48 |         scale_channels=0.75,
 49 |         scale_voxel_resolution=0.75,
 50 |         num_blocks=(1, 1, 1, 1),
 51 |         out_channels=pc_latent_channels,
 52 |         use_global_attention=False,
 53 |     ),
 54 | )
 55 | 
 56 | grasp_encoder_config = dict(
 57 |     type="ResNet1D",
 58 |     args=dict(
 59 |         in_features=grasp_representation_dims,
 60 |         block_channels=(32, 64, 128, 256),
 61 |         input_conditioning_dims=pc_latent_dims,
 62 |         resnet_block_groups=4,
 63 |         dropout=dropout,
 64 |     ),
 65 | )
 66 | 
 67 | decoder_config = dict(
 68 |     type="ResNet1D",
 69 |     args=dict(
 70 |         block_channels=(32, 64, 128, 256),
 71 |         # out_dim=grasp_pose_dims,
 72 |         input_conditioning_dims=pc_latent_dims,
 73 |         resnet_block_groups=4,
 74 |         dropout=dropout,
 75 |     ),
 76 | )
 77 | 
 78 | loss_config = dict(
 79 |     reconstruction_loss=dict(
 80 |         type="GraspReconstructionLoss",
 81 |         name="reconstruction_loss",
 82 |         args=dict(translation_weight=1, rotation_weight=1),
 83 |     ),
 84 |     latent_loss=dict(
 85 |         type="VAELatentLoss",
 86 |         args=dict(
 87 |             name="grasp_latent",
 88 |             cyclical_annealing=True,
 89 |             num_steps=max_steps,
 90 |             num_cycles=1,
 91 |             ratio=0.5,
 92 |             start=1e-7,
 93 |             stop=0.1,
 94 |         ),
 95 |     ),
 96 |     classification_loss=dict(type="ClassificationLoss", args=dict(weight=0.1)),
 97 |     # quality_loss=dict(type="QualityLoss", args=dict(weight=0.1)),
 98 | )
 99 | 
100 | denoiser_model = dict(
101 |     type="TimeConditionedResNet1D",
102 |     args=dict(
103 |         dim=grasp_latent_dims,
104 |         channels=1,
105 |         block_channels=(32, 64, 128, 256),
106 |         input_conditioning_dims=pc_latent_dims,
107 |         resnet_block_groups=4,
108 |         dropout=dropout,
109 |         is_time_conditioned=True,
110 |         learned_variance=False,
111 |         learned_sinusoidal_cond=False,
112 |         random_fourier_features=True,
113 |         # learned_sinusoidal_dim=16,
114 |     ),
115 | )
116 | # Use `model` for single module to be built. If a list of modules are required to be built, use `models` to make sure the outer
117 | # See models/builder.py for more info.
118 | model = dict(
119 |     vae=dict(
120 |         model=dict(
121 |             type="GraspCVAE",
122 |             args=dict(
123 |                 grasp_latent_size=grasp_latent_dims,
124 |                 pc_latent_size=pc_latent_dims,
125 |                 pc_encoder_config=pc_encoder_config,
126 |                 grasp_encoder_config=grasp_encoder_config,
127 |                 decoder_config=decoder_config,
128 |                 loss_config=loss_config,
129 |                 num_output_qualities=num_output_qualities,
130 |                 intermediate_feature_resolution=16,
131 |             ),
132 |         ),
133 |         ckpt_path=vae_ckpt_path,
134 |     ),
135 |     ddm=dict(
136 |         model=dict(
137 |             type="GraspLatentDDM",
138 |             args=dict(
139 |                 model=denoiser_model,
140 |                 latent_in_features=grasp_latent_dims,
141 |                 diffusion_timesteps=1000,
142 |                 noise_scheduler_type="ddpm",
143 |                 diffusion_loss="l2",
144 |                 beta_schedule="linear",
145 |                 is_conditioned=True,
146 |                 joint_training=False,
147 |                 denoising_loss_weight=1,
148 |                 variance_type="fixed_large",
149 |                 elucidated_diffusion=False,
150 |                 beta_start=0.00005,
151 |                 beta_end=0.001,
152 |             ),
153 |         ),
154 |         ckpt_path=ddm_ckpt_path,
155 |         use_vae_ema_model=True,
156 |     ),
157 | )
158 | ## -- Data --
159 | augs_config = [
160 |     dict(type="RandomRotation", args=dict(p=0.5, max_angle=180, is_degree=True)),
161 |     dict(type="PointcloudJitter", args=dict(p=1, sigma=0.005, clip=0.005)),
162 |     dict(type="RandomPointcloudDropout", args=dict(p=0.5, max_dropout_ratio=0.4)),
163 | ]
164 | 
165 | object_categories = [
166 |     "Cup",
167 |     "Mug",
168 |     "Fork",
169 |     "Hat",
170 |     "Bottle",
171 |     "Bowl",
172 |     "Car",
173 |     "Donut",
174 |     "Laptop",
175 |     "MousePad",
176 |     "Pencil",
177 |     "Plate",
178 |     "ScrewDriver",
179 |     "WineBottle",
180 |     "Backpack",
181 |     "Bag",
182 |     "Banana",
183 |     "Battery",
184 |     "BeanBag",
185 |     "Bear",
186 |     "Book",
187 |     "Books",
188 |     "Camera",
189 |     "CerealBox",
190 |     "Cookie",
191 |     "Hammer",
192 |     "Hanger",
193 |     "Knife",
194 |     "MilkCarton",
195 |     "Painting",
196 |     "PillBottle",
197 |     "Plant",
198 |     "PowerSocket",
199 |     "PowerStrip",
200 |     "PS3",
201 |     "PSP",
202 |     "Ring",
203 |     "Scissors",
204 |     "Shampoo",
205 |     "Shoes",
206 |     "Sheep",
207 |     "Shower",
208 |     "Sink",
209 |     "SoapBottle",
210 |     "SodaCan",
211 |     "Spoon",
212 |     "Statue",
213 |     "Teacup",
214 |     "Teapot",
215 |     "ToiletPaper",
216 |     "ToyFigure",
217 |     "Wallet",
218 |     "WineGlass",
219 |     "Cow",
220 |     "Sheep",
221 |     "Cat",
222 |     "Dog",
223 |     "Pizza",
224 |     "Elephant",
225 |     "Donkey",
226 |     "RubiksCube",
227 |     "Tank",
228 |     "Truck",
229 |     "USBStick",
230 | ]
231 | 
232 | train_data = dict(
233 |     type="AcronymShapenetPointclouds",
234 |     args=dict(
235 |         data_root_dir=root_data_dir,
236 |         batch_num_points_per_pc=pc_num_points,
237 |         batch_num_grasps_per_pc=100,
238 |         rotation_repr="mrp",
239 |         augs_config=augs_config,
240 |         split="train",
241 |         batch_failed_grasps_ratio=0,
242 |         use_dataset_statistics_for_norm=False,
243 |         filter_categories=object_categories,
244 |         load_fixed_subset_grasps_per_obj=None,
245 |         num_repeat_dataset=10,
246 |     ),
247 | )
248 | 
249 | data = dict(
250 |     train=train_data,
251 | )
252 | 
253 | # Patch: Mesh Categories. Used for simulation
254 | mesh_root = root_data_dir
255 | mesh_categories = object_categories
256 | 
257 | ## --------------------  Trainer  --------------------
258 | ## Logger
259 | logger = dict(type="WandbLogger", project="full-pc-ema-63c")
260 | 
261 | optimizer = dict(
262 |     initial_lr=0.001,
263 |     scheduler=dict(
264 |         type="MultiStepLR",
265 |         args=dict(milestones=[int(max_steps / 3), int(2 * max_steps / 3)], gamma=0.1),
266 |     ),
267 | )
268 | 
269 | trainer = dict(
270 |     max_steps=max_steps,
271 |     batch_size=batch_size,
272 |     num_workers=num_workers_per_gpu * num_gpus,
273 |     accelerator="gpu",
274 |     devices=num_gpus,
275 |     strategy="ddp",
276 |     logger=logger,
277 |     log_every_n_steps=100,
278 |     optimizer=optimizer,
279 |     resume_training_from_last=resume_training_from_last,
280 |     check_val_every_n_epoch=1,
281 |     ema=dict(
282 |         beta=0.990,
283 |         update_after_step=1000,
284 |     ),
285 |     deterministic=True,
286 | )
287 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## <p align="center">GraspLDM: Generative 6-DoF Grasp Synthesis using Latent Diffusion Models<br> </p>
  2 | 
  3 | <div align="center">
  4 |   <a href="https://kuldeepbrd1.github.io" target="_blank">Kuldeep&nbsp;Barad</a> &emsp; <b>&middot;</b> &emsp;
  5 |   <a href="https://github.com/AndrejOrsula" target="_blank">Andrej&nbsp;Orsula</a> &emsp; <b>&middot;</b> &emsp;
  6 |   <a href="https://wwwen.uni.lu/layout/set/print/snt/people/antoine_richard" target="_blank">Antoine&nbsp;Richard</a> &emsp; <b>&middot;</b> &emsp;
  7 |   <a href="https://scholar.google.com/citations?user=qDJjBAIAAAAJ&hl=en" target="_blank">Jan&nbsp;Dentler</a> &emsp; <b>&middot;</b> &emsp;
  8 |   <a href="https://www.spacer.lu/" target="_blank">Miguel&nbsp;Olivares-Mendez</a> &emsp; <b>&middot;</b> &emsp;
  9 |   <a href="https://carolmartinez.github.io/" target="_blank">Carol&nbsp;Martinez</a> &emsp;
 10 |   <br>
 11 | <!--   <a href="https://arxiv.org/abs/xxx" target="_blank">Paper</a> &emsp; -->
 12 | <!--   <a href="https://x" target="_blank">Project&nbsp;Page</a>  -->
 13 |   <h3><a href="" target="_blank"> ArXiv</a> &nbsp; | &nbsp; <a href="https://kuldeepbrd1.github.io/grasp_ldm/" target="https://youtu.be/z3-otAp28XA">Video</a></h3>
 14 | 
 15 | <p align="center">
 16 |   <img src="doc/img/arch_graspldm.png" width="75%" />
 17 | </p>
 18 | </div>
 19 | 
 20 | Vision-based grasping of unknown objects in unstructured environments is a key challenge for autonomous robotic manipulation. A practical grasp synthesis system is required to generate a diverse set of 6-DoF grasps from which a task-relevant grasp can be executed. Although generative models are suitable for learning such complex data distributions, existing models have limitations in grasp quality, long training times, and a lack of flexibility for task-specific generation. In this work, we present GraspLDM, a modular generative framework for 6-DoF grasp synthesis that uses diffusion models as priors in the latent space of a VAE. GraspLDM learns a generative model of object-centric SE(3) grasp poses conditioned on point clouds. GraspLDM's architecture enables us to train task-specific models efficiently by only re-training a small denoising network in the low-dimensional latent space, as opposed to existing models that need expensive re-training. Our framework provides robust and scalable models on both full and partial point clouds. GraspLDM models trained with simulation data transfer well to the real world without any further fine-tuning. Our models provide an 80% success rate for 80 grasp attempts of diverse test objects across two real-world robotic setups.
 21 | 
 22 | ## Pre-requisites
 23 | 
 24 | 1. Python >= 3.8
 25 | 1. CUDA > 11.1 and compatible Nvidia driver
 26 | 1. (Only for Docker) Nvidia container toolkit
 27 | 
 28 | ## Setup
 29 | 
 30 | You can setup a python environment using **Conda** or **virtualenv**.
 31 | Alternatively, to avoid issues with system libraries, you can use a **Docker** container or a **VSCode** Devcontainer.
 32 | 
 33 | 1. **Conda**
 34 | 
 35 |    ```
 36 |    conda create env -f environment.yml
 37 |    conda activate grasp_ldm
 38 |    ```
 39 | 
 40 | 1. **virtualenv**
 41 | 
 42 |    ```
 43 |    python -m venv grasp_ldm
 44 |    source grasp_ldm/bin/activate
 45 |    pip install -r requirements.txt
 46 |    ```
 47 | 
 48 | 1. **Docker**
 49 | 
 50 |    - Use the helper scripts to build a docker image and run the container.
 51 | 
 52 |    NOTE: Executing bash scripts may not always be safe. Double check before executing.
 53 | 
 54 |    ```
 55 |    cd .docker
 56 |    chmod +x build.sh run.sh
 57 | 
 58 |    # Build the image
 59 |    ./build.sh
 60 | 
 61 |    # Run a container
 62 |    ./run.sh
 63 |    ```
 64 | 
 65 | 1. **Devcontainer**
 66 | 
 67 |    - Use the editor commands (`Ctrl+Shft+P`) and start typing `Dev Containers: Reopen in Container` and select.
 68 | 
 69 |    - Generally, use `Dev Containers: Reopen in Container` to start the devcontainer. When you wish to rebuild after change use  `Dev Containers: Rebuild and Reopen ion Container.`
 70 | 
 71 |    - For more info on Devcontainers, refer to : ...
 72 | 
 73 | ## Prepare Data
 74 | 
 75 | 1. Download the ACRONYM dataset using the instructions given in [`nvlabs/acronym`](https://github.com/NVlabs/acronym?tab=readme-ov-file#using-the-full-acronym-dataset).
 76 | 
 77 | 1. Download the train/test splits data from the 🤗 HuggingFace [`kuldeepbarad/GraspLDM/splits`](https://huggingface.co/kuldeepbarad/GraspLDM/tree/main/splits)
 78 | 
 79 | ## Run Generation Demo on ShapeNet Point Clouds
 80 | 
 81 | 1. Download the pretrained models from 🤗 HuggingFace repository [`kuldeepbarad/GraspLDM`](https://huggingface.co/kuldeepbarad/GraspLDM).
 82 | 
 83 | 1. Run the demo script using pretrained model:
 84 | 
 85 |    ```bash
 86 |    python tools/generate_grasps.py --exp_path <path-to-experiment-folder> --mode VAE --visualize
 87 | 
 88 |    # Example
 89 |    python tools/generate_grasps.py --exp_path checkpoints/generation/fpc_1a_latentc3_z4_pc64_simple_140k_noatt --mode VAE --visualize
 90 |    ```
 91 | 
 92 |    <details>
 93 |     <summary>All options</summary>
 94 | 
 95 |    - `--exp_path`: Path to the experiment checkpoint
 96 |      ```bash
 97 |      python generate_grasps.py --exp_path checkpoints/generation/fpc_1a_latentc3_z4_pc64_simple_140k_noatt
 98 |      ```
 99 |    - `--data_root`: Root directory for data (default: "data/ACRONYM")
100 |    - `--mode`: Model type to use, either 'VAE' or 'LDM' (default: 'VAE')
101 |    - `--split`: Data split to use (default: "test")
102 |    - `--num_grasps`: Number of grasps to generate (default: 20)
103 |    - `--visualize`: Enable visualization
104 |    - `--no_ema`: Disable EMA model usage
105 |    - `--num_samples`: Number of samples to generate (default: 11)
106 |    - `--inference_steps`: Number of inference steps for LDM (default: 100)
107 | 
108 |    </details>
109 | 
110 | ## Run Training on ACRONYM Dataset
111 | 
112 | Train grasp sampling models (VAE, DDM) with multi-GPU support.
113 | 
114 | NOTE: The training is done in two stages. First the VAE encoders are trained and then the latent space denoising diffusion model. 
115 | 
116 | ```bash
117 | # Basic usage
118 | ## 1. First train the VAE
119 | python tools/train_generator.py --config configs/generation/fpc/fpc_1a_latentc3_z4_pc64_180k.py --model vae
120 | ## 2. Then train the DDM once VAE checkpoints are available.
121 | python tools/train_generator.py --config configs/generation/fpc/fpc_1a_latentc3_z4_pc64_180k.py --model ddm
122 | ```
123 | 
124 | Optional usage examples:
125 | ```bash
126 | # Multi-GPU training
127 | python tools/train_generator.py --config configs/generation/fpc/fpc_1a_latentc3_z4_pc64_180k.py --model vae --num-gpus 4 --batch-size 32
128 | 
129 | # DDM training - NOTE: DDM training can only be done once the VAE model for this experiment has been trained
130 | python tools/train_generator.py --config configs/generation/fpc/fpc_1a_latentc3_z4_pc64_180k.py --model ddm --seed 42
131 | ```
132 | 
133 | <details>
134 | <summary> All options </summary>
135 | 
136 | - `--config`, `-c`: Path to config file
137 | - `--model`, `-m`: Model type (`classifier`, `vae`, `ddm`)
138 | - `--root-dir`, `-d`: Data root directory
139 | - `--num-gpus`, `-g`: Number of GPUs
140 | - `--batch-size`, `-b`: Batch size per device
141 | - `--deterministic`: Enable deterministic training
142 | - `--seed`: Random seed
143 | - `-debug`: Disable wandb logging
144 | 
145 | </details>
146 | 
147 | ## Attribution
148 | 
149 | If you find this code useful, please cite our work:
150 | 
151 | ```
152 | @article{barad2023graspldm,
153 |   title={GraspLDM: Generative 6-DoF Grasp Synthesis using Latent Diffusion Models},
154 |   author={Barad, Kuldeep R and Orsula, Andrej and Richard, Antoine and Dentler, Jan and Olivares-Mendez, Miguel and Martinez, Carol},
155 |   journal={arXiv preprint arXiv:2312.11243},
156 |   year={2023}
157 | }
158 | ```
159 | 
160 | ## License
161 | 
162 | Apache 2.0 License. See [LICENSE](LICENSE) for more details.
163 | 
164 | ## Acknowledgements/External Resources
165 | 
166 | - Acronym tools and helpers are adapted from [https://github.com/NVlabs/acronym](https://github.com/NVlabs/acronym)
167 | 
168 | - PVCNN implementation and CUDA kernel are taken from [https://github.com/mit-han-lab/pvcnn](https://github.com/mit-han-lab/pvcnn)
169 | 
170 | - [`grasp_vdm/utils/config.py`](grasp_vdm/utils/config.py) is adapted from [https://github.com/open-mmlab/mmcv](https://github.com/open-mmlab/mmcv)
171 | 
172 | - Resnet models for DDM implementation is adapted from [https://github.com/lucidrains/denoising-diffusion-pytorch](https://github.com/lucidrains/denoising-diffusion-pytorch) and [https://github.com/openai/improved-diffusion](https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/unet.py). Elucidated Diffusion Model is adapted from [https://github.com/NVlabs/edm](https://github.com/NVlabs/edm).
173 | 


--------------------------------------------------------------------------------
/configs/generation/partial_pc/ppc_1a_partial_63cat8k_filtered_latentc3_z16_pc256_180k.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | ## --------------------  Most frequently changed params here  --------------------
  4 | 
  5 | resume_training_from_last = True
  6 | 
  7 | max_steps = 180000
  8 | batch_size = 60
  9 | 
 10 | num_gpus = 1
 11 | num_workers_per_gpu = 7
 12 | 
 13 | # During training, if a ckpt is provided here, it overrides resume_training_from_last and instead resumes training from this ckpt
 14 | vae_ckpt_path = None  # "output/boilerplate_kldanneal_c0.1/vae/checkpoints/last.ckpt"
 15 | ddm_ckpt_path = None
 16 | 
 17 | max_scenes = None
 18 | 
 19 | 
 20 | root_data_dir = "data/acronym/renders/objects_filtered_grasps_63cat_8k/"
 21 | camera_json = "grasp_ldm/dataset/cameras/camera_d435i_dummy.json"
 22 | 
 23 | ## -------------------- Inputs/Shapes ------------------------
 24 | # Input/Output: grasp representation [mrp(3), t(3), cls_success(1), qualities(4)]
 25 | 
 26 | pc_num_points = 1024
 27 | pc_latent_dims = 256
 28 | pc_latent_channels = 3
 29 | 
 30 | grasp_pose_dims = 6
 31 | num_output_qualities = 0
 32 | grasp_latent_dims = 16
 33 | 
 34 | grasp_representation_dims = (
 35 |     grasp_pose_dims + num_output_qualities + 1
 36 |     if num_output_qualities is not None
 37 |     else grasp_pose_dims + 1
 38 | )
 39 | 
 40 | ## ----------------------- Model -----------------------
 41 | 
 42 | dropout = 0.1  # or None
 43 | 
 44 | pc_encoder_config = dict(
 45 |     type="PVCNNEncoder",
 46 |     args=dict(
 47 |         in_features=3,
 48 |         n_points=pc_num_points,
 49 |         scale_channels=0.75,
 50 |         scale_voxel_resolution=0.75,
 51 |         num_blocks=(1, 1, 1, 1),
 52 |         out_channels=pc_latent_channels,
 53 |         use_global_attention=False,
 54 |     ),
 55 | )
 56 | 
 57 | grasp_encoder_config = dict(
 58 |     type="ResNet1D",
 59 |     args=dict(
 60 |         in_features=grasp_representation_dims,
 61 |         block_channels=(32, 64, 128, 256),
 62 |         input_conditioning_dims=pc_latent_dims,
 63 |         resnet_block_groups=4,
 64 |         dropout=dropout,
 65 |     ),
 66 | )
 67 | 
 68 | decoder_config = dict(
 69 |     type="ResNet1D",
 70 |     args=dict(
 71 |         block_channels=(32, 64, 128, 256),
 72 |         # out_dim=grasp_pose_dims,
 73 |         input_conditioning_dims=pc_latent_dims,
 74 |         resnet_block_groups=4,
 75 |         dropout=dropout,
 76 |     ),
 77 | )
 78 | 
 79 | loss_config = dict(
 80 |     reconstruction_loss=dict(
 81 |         type="GraspReconstructionLoss",
 82 |         name="reconstruction_loss",
 83 |         args=dict(translation_weight=1, rotation_weight=1),
 84 |     ),
 85 |     latent_loss=dict(
 86 |         type="VAELatentLoss",
 87 |         args=dict(
 88 |             name="grasp_latent",
 89 |             cyclical_annealing=True,
 90 |             num_steps=max_steps,
 91 |             num_cycles=1,
 92 |             ratio=0.5,
 93 |             start=1e-7,
 94 |             stop=0.1,
 95 |         ),
 96 |     ),
 97 |     classification_loss=dict(type="ClassificationLoss", args=dict(weight=0.1)),
 98 |     # quality_loss=dict(type="QualityLoss", args=dict(weight=0.1)),
 99 | )
100 | 
101 | denoiser_model = dict(
102 |     type="TimeConditionedResNet1D",
103 |     args=dict(
104 |         dim=grasp_latent_dims,
105 |         channels=1,
106 |         block_channels=(32, 64, 128, 256),
107 |         input_conditioning_dims=pc_latent_dims,
108 |         resnet_block_groups=4,
109 |         dropout=dropout,
110 |         is_time_conditioned=True,
111 |         learned_variance=False,
112 |         learned_sinusoidal_cond=False,
113 |         random_fourier_features=True,
114 |         # learned_sinusoidal_dim=16,
115 |     ),
116 | )
117 | # Use `model` for single module to be built. If a list of modules are required to be built, use `models` to make sure the outer
118 | # See models/builder.py for more info.
119 | model = dict(
120 |     vae=dict(
121 |         model=dict(
122 |             type="GraspCVAE",
123 |             args=dict(
124 |                 grasp_latent_size=grasp_latent_dims,
125 |                 pc_latent_size=pc_latent_dims,
126 |                 pc_encoder_config=pc_encoder_config,
127 |                 grasp_encoder_config=grasp_encoder_config,
128 |                 decoder_config=decoder_config,
129 |                 loss_config=loss_config,
130 |                 num_output_qualities=num_output_qualities,
131 |                 intermediate_feature_resolution=16,
132 |             ),
133 |         ),
134 |         ckpt_path=vae_ckpt_path,
135 |     ),
136 |     ddm=dict(
137 |         model=dict(
138 |             type="GraspLatentDDM",
139 |             args=dict(
140 |                 model=denoiser_model,
141 |                 latent_in_features=grasp_latent_dims,
142 |                 diffusion_timesteps=1000,
143 |                 noise_scheduler_type="ddpm",
144 |                 diffusion_loss="l2",
145 |                 beta_schedule="linear",
146 |                 is_conditioned=True,
147 |                 joint_training=False,
148 |                 denoising_loss_weight=1,
149 |                 variance_type="fixed_large",
150 |                 elucidated_diffusion=False,
151 |                 beta_start=0.00005,
152 |                 beta_end=0.001,
153 |             ),
154 |         ),
155 |         ckpt_path=ddm_ckpt_path,
156 |         use_vae_ema_model=True,
157 |     ),
158 | )
159 | ## -- Data --
160 | augs_config = [
161 |     dict(type="RandomRotation", args=dict(p=0.5, max_angle=180, is_degree=True)),
162 |     dict(type="PointcloudJitter", args=dict(p=1, sigma=0.005, clip=0.005)),
163 |     dict(type="RandomPointcloudDropout", args=dict(p=0.5, max_dropout_ratio=0.4)),
164 | ]
165 | 
166 | 
167 | train_data = dict(
168 |     type="AcronymPartialPointclouds",
169 |     args=dict(
170 |         data_root_dir=root_data_dir,
171 |         max_scenes=max_scenes,
172 |         camera_json=camera_json,
173 |         num_points_per_pc=pc_num_points,
174 |         num_grasps_per_obj=100,
175 |         rotation_repr="mrp",
176 |         augs_config=augs_config,
177 |         split="train",
178 |         depth_px_scale=10000,
179 |         scene_prefix="scene_",
180 |         min_usable_pc_points=1024,
181 |         preempt_load_data=True,
182 |         use_failed_grasps=False,
183 |         failed_grasp_ratio=0.3,
184 |         load_fixed_grasp_transforms=None,
185 |         is_input_dataset_normalized=False,
186 |         num_repeat_dataset=10,
187 |     ),
188 |     batch_size=batch_size,
189 | )
190 | 
191 | data = dict(
192 |     train=train_data,
193 | )
194 | 
195 | # Patch: Mesh Categories. Used for simulation
196 | mesh_root = root_data_dir
197 | mesh_categories = [
198 |     "Cup",
199 |     "Mug",
200 |     "Fork",
201 |     "Hat",
202 |     "Bottle",
203 |     "Bowl",
204 |     "Car",
205 |     "Donut",
206 |     "Laptop",
207 |     "MousePad",
208 |     "Pencil",
209 |     "Plate",
210 |     "ScrewDriver",
211 |     "WineBottle",
212 |     "Backpack",
213 |     "Bag",
214 |     "Banana",
215 |     "Battery",
216 |     "BeanBag",
217 |     "Bear",
218 |     "Book",
219 |     "Books",
220 |     "Camera",
221 |     "CerealBox",
222 |     "Cookie",
223 |     "Hammer",
224 |     "Hanger",
225 |     "Knife",
226 |     "MilkCarton",
227 |     "Painting",
228 |     "PillBottle",
229 |     "Plant",
230 |     "PowerSocket",
231 |     "PowerStrip",
232 |     "PS3",
233 |     "PSP",
234 |     "Ring",
235 |     "Scissors",
236 |     "Shampoo",
237 |     "Shoes",
238 |     "Sheep",
239 |     "Shower",
240 |     "Sink",
241 |     "SoapBottle",
242 |     "SodaCan",
243 |     "Spoon",
244 |     "Statue",
245 |     "Teacup",
246 |     "Teapot",
247 |     "ToiletPaper",
248 |     "ToyFigure",
249 |     "Wallet",
250 |     "WineGlass",
251 |     "Cow",
252 |     "Sheep",
253 |     "Cat",
254 |     "Dog",
255 |     "Pizza",
256 |     "Elephant",
257 |     "Donkey",
258 |     "RubiksCube",
259 |     "Tank",
260 |     "Truck",
261 |     "USBStick",
262 | ]
263 | 
264 | ## --------------------  Trainer  --------------------
265 | ## Logger
266 | logger = dict(type="WandbLogger", project="partial-pc-63c-ema")
267 | 
268 | optimizer = dict(
269 |     initial_lr=0.001,
270 |     scheduler=dict(
271 |         type="MultiStepLR",
272 |         args=dict(milestones=[int(max_steps / 3), int(2 * max_steps / 3)], gamma=0.1),
273 |     ),
274 | )
275 | 
276 | trainer = dict(
277 |     max_steps=max_steps,
278 |     batch_size=batch_size,
279 |     num_workers=num_workers_per_gpu * num_gpus,
280 |     accelerator="gpu",
281 |     devices=num_gpus,
282 |     strategy="ddp",
283 |     logger=logger,
284 |     log_every_n_steps=100,
285 |     optimizer=optimizer,
286 |     resume_training_from_last=resume_training_from_last,
287 |     check_val_every_n_epoch=1,
288 |     ema=dict(
289 |         beta=0.990,
290 |         update_after_step=1000,
291 |     ),
292 |     deterministic=True,
293 | )
294 | 


--------------------------------------------------------------------------------
/grasp_ldm/models/modules/ext/pvcnn/utils.py:
--------------------------------------------------------------------------------
  1 | # Adapted from PVCNN and PVD
  2 | import functools
  3 | 
  4 | import torch.nn as nn
  5 | 
  6 | from .modules import (
  7 |     PointNetAModule,
  8 |     PointNetFPModule,
  9 |     PointNetSAModule,
 10 |     PVConv,
 11 |     SharedMLP,
 12 | )
 13 | 
 14 | __all__ = [
 15 |     "create_mlp_components",
 16 |     "create_pointnet_components",
 17 |     "create_pointnet2_sa_components",
 18 |     "create_pointnet2_fp_modules",
 19 | ]
 20 | 
 21 | 
 22 | def _linear_bn_relu(in_channels, out_channels):
 23 |     return nn.Sequential(
 24 |         nn.Linear(in_channels, out_channels),
 25 |         nn.BatchNorm1d(out_channels),
 26 |         nn.ReLU(True),
 27 |     )
 28 | 
 29 | 
 30 | def create_mlp_components(
 31 |     in_channels, out_channels, classifier=False, dim=2, width_multiplier=1
 32 | ):
 33 |     r = width_multiplier
 34 | 
 35 |     if dim == 1:
 36 |         block = _linear_bn_relu
 37 |     else:
 38 |         block = SharedMLP
 39 |     if not isinstance(out_channels, (list, tuple)):
 40 |         out_channels = [out_channels]
 41 |     if len(out_channels) == 0 or (len(out_channels) == 1 and out_channels[0] is None):
 42 |         return nn.Sequential(), in_channels, in_channels
 43 | 
 44 |     layers = []
 45 |     for oc in out_channels[:-1]:
 46 |         if oc < 1:
 47 |             layers.append(nn.Dropout(oc))
 48 |         else:
 49 |             oc = int(r * oc)
 50 |             layers.append(block(in_channels, oc))
 51 |             in_channels = oc
 52 |     if dim == 1:
 53 |         if classifier:
 54 |             layers.append(nn.Linear(in_channels, out_channels[-1]))
 55 |         else:
 56 |             layers.append(_linear_bn_relu(in_channels, int(r * out_channels[-1])))
 57 |     else:
 58 |         if classifier:
 59 |             layers.append(nn.Conv1d(in_channels, out_channels[-1], 1))
 60 |         else:
 61 |             layers.append(SharedMLP(in_channels, int(r * out_channels[-1])))
 62 |     return layers, out_channels[-1] if classifier else int(r * out_channels[-1])
 63 | 
 64 | 
 65 | def create_pointnet_components(
 66 |     blocks,
 67 |     in_channels,
 68 |     with_se=False,
 69 |     normalize=True,
 70 |     eps=0,
 71 |     width_multiplier=1,
 72 |     voxel_resolution_multiplier=1,
 73 | ):
 74 |     r, vr = width_multiplier, voxel_resolution_multiplier
 75 | 
 76 |     layers, concat_channels = [], 0
 77 |     for out_channels, num_blocks, voxel_resolution in blocks:
 78 |         out_channels = int(r * out_channels)
 79 |         if voxel_resolution is None:
 80 |             block = SharedMLP
 81 |         else:
 82 |             block = functools.partial(
 83 |                 PVConv,
 84 |                 kernel_size=3,
 85 |                 resolution=int(vr * voxel_resolution),
 86 |                 with_se=with_se,
 87 |                 normalize=normalize,
 88 |                 eps=eps,
 89 |             )
 90 |         for _ in range(num_blocks):
 91 |             layers.append(block(in_channels, out_channels))
 92 |             in_channels = out_channels
 93 |             concat_channels += out_channels
 94 |     return layers, in_channels, concat_channels
 95 | 
 96 | 
 97 | def create_pointnet2_sa_components(
 98 |     sa_blocks,
 99 |     extra_feature_channels,
100 |     embed_dim=0,
101 |     use_attention=False,
102 |     dropout=0.1,
103 |     with_se=False,
104 |     voxelization_normalize=True,
105 |     eps=0,
106 |     width_multiplier=1,
107 |     voxel_resolution_multiplier=1,
108 | ):
109 |     r, vr = width_multiplier, voxel_resolution_multiplier
110 |     in_channels = extra_feature_channels + 3
111 | 
112 |     sa_layers, sa_in_channels = [], []
113 |     c = 0
114 |     for conv_configs, sa_configs in sa_blocks:
115 |         k = 0
116 |         sa_in_channels.append(in_channels)
117 |         sa_blocks = []
118 | 
119 |         if conv_configs is not None:
120 |             out_channels, num_blocks, voxel_resolution = conv_configs
121 |             out_channels = int(r * out_channels)
122 |             for p in range(num_blocks):
123 |                 attention = (c + 1) % 2 == 0 and use_attention and p == 0
124 |                 if voxel_resolution is None:
125 |                     block = SharedMLP
126 |                 else:
127 |                     block = functools.partial(
128 |                         PVConv,
129 |                         kernel_size=3,
130 |                         resolution=int(vr * voxel_resolution),
131 |                         use_attention=attention,
132 |                         dropout=dropout,
133 |                         with_se=with_se,
134 |                         with_se_relu=True,
135 |                         normalize=voxelization_normalize,
136 |                         eps=eps,
137 |                     )
138 | 
139 |                 if c == 0:
140 |                     sa_blocks.append(block(in_channels, out_channels))
141 |                 elif k == 0:
142 |                     sa_blocks.append(block(in_channels + embed_dim, out_channels))
143 |                 in_channels = out_channels
144 |                 k += 1
145 |             extra_feature_channels = in_channels
146 |         num_centers, radius, num_neighbors, out_channels = sa_configs
147 |         _out_channels = []
148 |         for oc in out_channels:
149 |             if isinstance(oc, (list, tuple)):
150 |                 _out_channels.append([int(r * _oc) for _oc in oc])
151 |             else:
152 |                 _out_channels.append(int(r * oc))
153 |         out_channels = _out_channels
154 |         if num_centers is None:
155 |             block = PointNetAModule
156 |         else:
157 |             block = functools.partial(
158 |                 PointNetSAModule,
159 |                 num_centers=num_centers,
160 |                 radius=radius,
161 |                 num_neighbors=num_neighbors,
162 |             )
163 |         sa_blocks.append(
164 |             block(
165 |                 in_channels=extra_feature_channels + (embed_dim if k == 0 else 0),
166 |                 out_channels=out_channels,
167 |                 include_coordinates=True,
168 |             )
169 |         )
170 |         c += 1
171 |         in_channels = extra_feature_channels = sa_blocks[-1].out_channels
172 |         if len(sa_blocks) == 1:
173 |             sa_layers.append(sa_blocks[0])
174 |         else:
175 |             sa_layers.append(nn.Sequential(*sa_blocks))
176 | 
177 |     return (
178 |         sa_layers,
179 |         sa_in_channels,
180 |         in_channels,
181 |         1 if num_centers is None else num_centers,
182 |     )
183 | 
184 | 
185 | def create_pointnet2_fp_modules(
186 |     fp_blocks,
187 |     in_channels,
188 |     sa_in_channels,
189 |     embed_dim=0,
190 |     use_attention=False,
191 |     dropout=0.1,
192 |     with_se=False,
193 |     normalize=True,
194 |     eps=0,
195 |     width_multiplier=1,
196 |     voxel_resolution_multiplier=1,
197 | ):
198 |     r, vr = width_multiplier, voxel_resolution_multiplier
199 | 
200 |     fp_layers = []
201 |     c = 0
202 |     for fp_idx, (fp_configs, conv_configs) in enumerate(fp_blocks):
203 |         fp_blocks = []
204 |         out_channels = tuple(int(r * oc) for oc in fp_configs)
205 |         fp_blocks.append(
206 |             PointNetFPModule(
207 |                 in_channels=in_channels + sa_in_channels[-1 - fp_idx] + embed_dim,
208 |                 out_channels=out_channels,
209 |             )
210 |         )
211 |         in_channels = out_channels[-1]
212 | 
213 |         if conv_configs is not None:
214 |             out_channels, num_blocks, voxel_resolution = conv_configs
215 |             out_channels = int(r * out_channels)
216 |             for p in range(num_blocks):
217 |                 attention = (
218 |                     (c + 1) % 2 == 0
219 |                     and c < len(fp_blocks) - 1
220 |                     and use_attention
221 |                     and p == 0
222 |                 )
223 |                 if voxel_resolution is None:
224 |                     block = SharedMLP
225 |                 else:
226 |                     block = functools.partial(
227 |                         PVConv,
228 |                         kernel_size=3,
229 |                         resolution=int(vr * voxel_resolution),
230 |                         use_attention=attention,
231 |                         dropout=dropout,
232 |                         with_se=with_se,
233 |                         with_se_relu=True,
234 |                         normalize=normalize,
235 |                         eps=eps,
236 |                     )
237 | 
238 |                 fp_blocks.append(block(in_channels, out_channels))
239 |                 in_channels = out_channels
240 |         if len(fp_blocks) == 1:
241 |             fp_layers.append(fp_blocks[0])
242 |         else:
243 |             fp_layers.append(nn.Sequential(*fp_blocks))
244 | 
245 |         c += 1
246 | 
247 |     return fp_layers, in_channels
248 | 


--------------------------------------------------------------------------------