├── tools ├── __init__.py ├── train_generator.py └── generate_grasps.py ├── .dockerignore ├── grasp_ldm ├── utils │ ├── __init__.py │ ├── torch_utils.py │ ├── utils.py │ ├── vis.py │ └── camera.py ├── models │ ├── modules │ │ ├── __init__.py │ │ ├── ext │ │ │ ├── __init__.py │ │ │ └── pvcnn │ │ │ │ ├── __init__.py │ │ │ │ ├── modules │ │ │ │ ├── loss.py │ │ │ │ ├── functional │ │ │ │ │ ├── src │ │ │ │ │ │ ├── ball_query │ │ │ │ │ │ │ ├── ball_query.cuh │ │ │ │ │ │ │ ├── ball_query.hpp │ │ │ │ │ │ │ ├── ball_query.cpp │ │ │ │ │ │ │ └── ball_query.cu │ │ │ │ │ │ ├── grouping │ │ │ │ │ │ │ ├── grouping.hpp │ │ │ │ │ │ │ ├── grouping.cuh │ │ │ │ │ │ │ ├── grouping.cpp │ │ │ │ │ │ │ └── grouping.cu │ │ │ │ │ │ ├── voxelization │ │ │ │ │ │ │ ├── vox.cuh │ │ │ │ │ │ │ ├── vox.hpp │ │ │ │ │ │ │ ├── vox.cpp │ │ │ │ │ │ │ └── vox.cu │ │ │ │ │ │ ├── sampling │ │ │ │ │ │ │ ├── sampling.hpp │ │ │ │ │ │ │ ├── sampling.cuh │ │ │ │ │ │ │ ├── sampling.cpp │ │ │ │ │ │ │ └── sampling.cu │ │ │ │ │ │ ├── interpolate │ │ │ │ │ │ │ ├── trilinear_devox.cuh │ │ │ │ │ │ │ ├── trilinear_devox.hpp │ │ │ │ │ │ │ ├── neighbor_interpolate.hpp │ │ │ │ │ │ │ ├── neighbor_interpolate.cuh │ │ │ │ │ │ │ ├── neighbor_interpolate.cpp │ │ │ │ │ │ │ ├── trilinear_devox.cpp │ │ │ │ │ │ │ ├── trilinear_devox.cu │ │ │ │ │ │ │ └── neighbor_interpolate.cu │ │ │ │ │ │ ├── utils.hpp │ │ │ │ │ │ ├── cuda_utils.cuh │ │ │ │ │ │ └── bindings.cpp │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── loss.py │ │ │ │ │ ├── ball_query.py │ │ │ │ │ ├── backend.py │ │ │ │ │ ├── grouping.py │ │ │ │ │ ├── voxelization.py │ │ │ │ │ ├── interpolatation.py │ │ │ │ │ ├── devoxelization.py │ │ │ │ │ └── sampling.py │ │ │ │ ├── __init__.py │ │ │ │ ├── se.py │ │ │ │ ├── shared_mlp.py │ │ │ │ ├── voxelization.py │ │ │ │ ├── ball_query.py │ │ │ │ ├── pointnet.py │ │ │ │ └── pvconv.py │ │ │ │ ├── README.md │ │ │ │ ├── pointnet2.py │ │ │ │ └── utils.py │ │ ├── base_network.py │ │ ├── modules.py │ │ └── class_conditioned_resnet.py │ ├── __init__.py │ ├── diffusion │ │ └── __init__.py │ ├── builder.py │ └── grasp_classifier.py ├── __init__.py ├── losses │ ├── __init__.py │ ├── builder.py │ └── loss.py ├── dataset │ ├── __init__.py │ ├── cameras │ │ └── camera_d435i_dummy.json │ ├── acronym │ │ ├── __init__.py │ │ └── gripper_ctrl_pts.json │ ├── builder.py │ └── pl_wrapper.py ├── inference │ └── __init__.py └── trainers │ ├── __init__.py │ ├── mixins.py │ ├── experiment.py │ └── grasp_classification_trainer.py ├── doc └── img │ └── arch_graspldm.png ├── .gitignore ├── NOTICE ├── environment.yml ├── LICENSE ├── requirements.txt ├── .docker ├── build.sh ├── gpu_env.Dockerfile └── run.sh ├── .devcontainer └── devcontainer.json ├── setup.py ├── .pre-commit-config.yaml ├── configs └── generation │ ├── fpc │ └── fpc_1a_latentc3_z4_pc64_180k.py │ └── partial_pc │ └── ppc_1a_partial_63cat8k_filtered_latentc3_z16_pc256_180k.py └── README.md /tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | output 2 | -------------------------------------------------------------------------------- /grasp_ldm/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /grasp_ldm/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1" 2 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /grasp_ldm/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .loss import * 2 | -------------------------------------------------------------------------------- /grasp_ldm/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import * 2 | -------------------------------------------------------------------------------- /doc/img/arch_graspldm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuldeepbrd1/graspLDM/HEAD/doc/img/arch_graspldm.png -------------------------------------------------------------------------------- /grasp_ldm/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .grasp_ldm import GraspLatentDDM 2 | from .grasp_vae import GraspCVAE 3 | -------------------------------------------------------------------------------- /grasp_ldm/inference/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import InferenceLDM, InferenceVAE 2 | from .inference_base import Conditioning, ModelType 3 | -------------------------------------------------------------------------------- /grasp_ldm/models/diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from .elucidated_diffusion import ElucidatedDiffusion 2 | from .gaussian_diffusion import GaussianDiffusion1D 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | logs 2 | wandb 3 | checkpoints/* 4 | output/* 5 | **/__pycache__ 6 | *.cpython 7 | *.pyc 8 | *.pt.trace.* 9 | *.ckpt 10 | *.out 11 | *.swp 12 | *.pt 13 | output/* 14 | *.gif 15 | *.crt 16 | *.pkl 17 | **/*.egg-info 18 | .vscode 19 | data 20 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from . import functional as F 4 | 5 | __all__ = ["KLLoss"] 6 | 7 | 8 | class KLLoss(nn.Module): 9 | def forward(self, x, y): 10 | return F.kl_loss(x, y) 11 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 Kuldeep Rambhai Barad, University of Luxembourg and Redwire Space Europe 2 | 3 | This software was developed at the Interdisciplinary Center for Security, Reliability and Trust (SnT) of the University of Luxembourg in partnership with Redwire Space Europe (Made In Space Europe Sarl.). 4 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: grasp_ldm 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.8 6 | - numpy 7 | # You need numpy from conda to avoid difficulties with 8 | # glibc and opengl issueas for visualization with trimesh/pyglet 9 | - pip 10 | - pip: 11 | - -r file:requirements.txt 12 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/ball_query/ball_query.cuh: -------------------------------------------------------------------------------- 1 | #ifndef _BALL_QUERY_CUH 2 | #define _BALL_QUERY_CUH 3 | 4 | void ball_query(int b, int n, int m, float r2, int u, 5 | const float *centers_coords, const float *points_coords, 6 | int *neighbors_indices); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .ball_query import BallQuery 2 | from .frustum import FrustumPointNetLoss 3 | from .loss import KLLoss 4 | from .pointnet import PointNetAModule, PointNetFPModule, PointNetSAModule 5 | from .pvconv import PVConv 6 | from .se import SE3d 7 | from .shared_mlp import SharedMLP 8 | from .voxelization import Voxelization 9 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/grouping/grouping.hpp: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_HPP 2 | #define _GROUPING_HPP 3 | 4 | #include 5 | 6 | at::Tensor grouping_forward(at::Tensor features, at::Tensor indices); 7 | at::Tensor grouping_backward(at::Tensor grad_y, at::Tensor indices, 8 | const int n); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/__init__.py: -------------------------------------------------------------------------------- 1 | from .ball_query import ball_query 2 | from .devoxelization import trilinear_devoxelize 3 | from .grouping import grouping 4 | from .interpolatation import nearest_neighbor_interpolate 5 | from .loss import huber_loss, kl_loss 6 | from .sampling import furthest_point_sample, gather, logits_mask 7 | from .voxelization import avg_voxelize 8 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/ball_query/ball_query.hpp: -------------------------------------------------------------------------------- 1 | #ifndef _BALL_QUERY_HPP 2 | #define _BALL_QUERY_HPP 3 | 4 | #include 5 | 6 | at::Tensor ball_query_forward(at::Tensor centers_coords, 7 | at::Tensor points_coords, const float radius, 8 | const int num_neighbors); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/grouping/grouping.cuh: -------------------------------------------------------------------------------- 1 | #ifndef _GROUPING_CUH 2 | #define _GROUPING_CUH 3 | 4 | void grouping(int b, int c, int n, int m, int u, const float *features, 5 | const int *indices, float *out); 6 | void grouping_grad(int b, int c, int n, int m, int u, const float *grad_y, 7 | const int *indices, float *grad_x); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/voxelization/vox.cuh: -------------------------------------------------------------------------------- 1 | #ifndef _VOX_CUH 2 | #define _VOX_CUH 3 | 4 | // CUDA function declarations 5 | void avg_voxelize(int b, int c, int n, int r, int r2, int r3, const int *coords, 6 | const float *feat, int *ind, int *cnt, float *out); 7 | void avg_voxelize_grad(int b, int c, int n, int s, const int *idx, 8 | const int *cnt, const float *grad_y, float *grad_x); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /grasp_ldm/dataset/cameras/camera_d435i_dummy.json: -------------------------------------------------------------------------------- 1 | { 2 | "hfov": 87, 3 | "vfov": 58, 4 | "width": 640, 5 | "height": 480, 6 | "cameraMatrix": [ 7 | [ 8 | 904.7, 9 | 0, 10 | 320.0 11 | ], 12 | [ 13 | 0, 14 | 904.7, 15 | 240.0 16 | ], 17 | [ 18 | 0, 19 | 0, 20 | 1 21 | ] 22 | ], 23 | "distCoeffs": [] 24 | } 25 | -------------------------------------------------------------------------------- /grasp_ldm/losses/builder.py: -------------------------------------------------------------------------------- 1 | from .loss import * 2 | 3 | ALL_LOSSES = { 4 | "VAEReconstructionLoss": VAEReconstructionLoss, 5 | "VAELatentLoss": VAELatentLoss, 6 | "GraspReconstructionLoss": GraspReconstructionLoss, 7 | "QualityLoss": QualityLoss, 8 | "ClassificationLoss": ClassificationLoss, 9 | "GraspControlPointsReconstructionLoss": GraspControlPointsReconstructionLoss, 10 | } 11 | 12 | 13 | def build_loss_from_cfg(loss_cfg): 14 | return ALL_LOSSES[loss_cfg.type](**loss_cfg.args) 15 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/sampling/sampling.hpp: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_HPP 2 | #define _SAMPLING_HPP 3 | 4 | #include 5 | 6 | at::Tensor gather_features_forward(at::Tensor features, at::Tensor indices); 7 | at::Tensor gather_features_backward(at::Tensor grad_y, at::Tensor indices, 8 | const int n); 9 | at::Tensor furthest_point_sampling_forward(at::Tensor coords, 10 | const int num_samples); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/sampling/sampling.cuh: -------------------------------------------------------------------------------- 1 | #ifndef _SAMPLING_CUH 2 | #define _SAMPLING_CUH 3 | 4 | void gather_features(int b, int c, int n, int m, const float *features, 5 | const int *indices, float *out); 6 | void gather_features_grad(int b, int c, int n, int m, const float *grad_y, 7 | const int *indices, float *grad_x); 8 | void furthest_point_sampling(int b, int n, int m, const float *coords, 9 | float *distances, int *indices); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/voxelization/vox.hpp: -------------------------------------------------------------------------------- 1 | #ifndef _VOX_HPP 2 | #define _VOX_HPP 3 | 4 | #include 5 | #include 6 | 7 | std::vector avg_voxelize_forward(const at::Tensor features, 8 | const at::Tensor coords, 9 | const int resolution); 10 | 11 | at::Tensor avg_voxelize_backward(const at::Tensor grad_y, 12 | const at::Tensor indices, 13 | const at::Tensor cnt); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | __all__ = ["kl_loss", "huber_loss"] 5 | 6 | 7 | def kl_loss(x, y): 8 | x = F.softmax(x.detach(), dim=1) 9 | y = F.log_softmax(y, dim=1) 10 | return torch.mean(torch.sum(x * (torch.log(x) - y), dim=1)) 11 | 12 | 13 | def huber_loss(error, delta): 14 | abs_error = torch.abs(error) 15 | quadratic = torch.min(abs_error, torch.full_like(abs_error, fill_value=delta)) 16 | losses = 0.5 * (quadratic**2) + delta * (abs_error - quadratic) 17 | return torch.mean(losses) 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 Kuldeep Rambhai Barad, University of Luxembourg and Redwire Space Europe 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/trilinear_devox.cuh: -------------------------------------------------------------------------------- 1 | #ifndef _TRILINEAR_DEVOX_CUH 2 | #define _TRILINEAR_DEVOX_CUH 3 | 4 | // CUDA function declarations 5 | void trilinear_devoxelize(int b, int c, int n, int r, int r2, int r3, 6 | bool is_training, const float *coords, 7 | const float *feat, int *inds, float *wgts, 8 | float *outs); 9 | void trilinear_devoxelize_grad(int b, int c, int n, int r3, const int *inds, 10 | const float *wgts, const float *grad_y, 11 | float *grad_x); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/trilinear_devox.hpp: -------------------------------------------------------------------------------- 1 | #ifndef _TRILINEAR_DEVOX_HPP 2 | #define _TRILINEAR_DEVOX_HPP 3 | 4 | #include 5 | #include 6 | 7 | std::vector trilinear_devoxelize_forward(const int r, 8 | const bool is_training, 9 | const at::Tensor coords, 10 | const at::Tensor features); 11 | 12 | at::Tensor trilinear_devoxelize_backward(const at::Tensor grad_y, 13 | const at::Tensor indices, 14 | const at::Tensor weights, const int r); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | addict>=2.4.0 2 | # black>=22.0.0 # Formatting 3 | certifi 4 | diffusers[torch] 5 | einops 6 | h5py>=3.7.0 7 | matplotlib>=3.6.0 8 | ninja 9 | opencv-python-headless 10 | pandas>=1.5.1 11 | Pillow>=9.2.0 12 | pyglet==1.5.27 13 | pyrender 14 | pytorch-lightning==1.8.0 15 | scikit-learn==1.2.2 16 | scipy>=1.9.0 17 | seaborn>=0.12.1 18 | shapely>=2.0.0 19 | six>=1.16.0 20 | torch==1.13.1 -e https://download.pytorch.org/whl/cu117 21 | torchvision==0.14.1 -e https://download.pytorch.org/whl/cu117 22 | tqdm==4.64.1 23 | trimesh==3.17.1 24 | # wandb==0.13.6 # Logging 25 | yapf==0.32.0 26 | 27 | ## Optional 28 | # ipykernel 29 | # iprogress 30 | # jupyter 31 | # glooey 32 | # torcheval 33 | # pytorch3d @ git+https://github.com/facebookresearch/pytorch3d.git@v0.7.4 34 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/README.md: -------------------------------------------------------------------------------- 1 | # PVCNN: Point-Voxel CNN for Efficient 3D Deep Learning 2 | 3 | Source: [https://github.com/mit-han-lab/pvcnn](https://github.com/mit-han-lab/pvcnn) 4 | 5 | See [grasp_ldm/models/modules/ext/pvcnn/benchmark.py](grasp_ldm/models/modules/ext/pvcnn/benchmark.py) for the preliminary comparison between PVCNN and PointNet++. 6 | 7 | ``` 8 | @inproceedings{liu2019pvcnn, 9 | title={Point-Voxel CNN for Efficient 3D Deep Learning}, 10 | author={Liu, Zhijian and Tang, Haotian and Lin, Yujun and Han, Song}, 11 | booktitle={Advances in Neural Information Processing Systems}, 12 | year={2019} 13 | } 14 | ``` 15 | 16 | ## License 17 | 18 | This repository is released under the MIT license. See [LICENSE](LICENSE) for additional details. 19 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/neighbor_interpolate.hpp: -------------------------------------------------------------------------------- 1 | #ifndef _NEIGHBOR_INTERPOLATE_HPP 2 | #define _NEIGHBOR_INTERPOLATE_HPP 3 | 4 | #include 5 | #include 6 | 7 | std::vector 8 | three_nearest_neighbors_interpolate_forward(at::Tensor points_coords, 9 | at::Tensor centers_coords, 10 | at::Tensor centers_features); 11 | at::Tensor three_nearest_neighbors_interpolate_backward(at::Tensor grad_y, 12 | at::Tensor indices, 13 | at::Tensor weights, 14 | const int m); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/ball_query.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .backend import _backend 4 | 5 | __all__ = ["ball_query"] 6 | 7 | 8 | def ball_query(centers_coords, points_coords, radius, num_neighbors): 9 | """ 10 | :param centers_coords: coordinates of centers, FloatTensor[B, 3, M] 11 | :param points_coords: coordinates of points, FloatTensor[B, 3, N] 12 | :param radius: float, radius of ball query 13 | :param num_neighbors: int, maximum number of neighbors 14 | :return: 15 | neighbor_indices: indices of neighbors, IntTensor[B, M, U] 16 | """ 17 | centers_coords = centers_coords.contiguous() 18 | points_coords = points_coords.contiguous() 19 | return _backend.ball_query(centers_coords, points_coords, radius, num_neighbors) 20 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/se.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | __all__ = ["SE3d"] 5 | 6 | 7 | class Swish(nn.Module): 8 | def forward(self, x): 9 | return x * torch.sigmoid(x) 10 | 11 | 12 | class SE3d(nn.Module): 13 | def __init__(self, channel, reduction=8, use_relu=False): 14 | super().__init__() 15 | self.fc = nn.Sequential( 16 | nn.Linear(channel, channel // reduction, bias=False), 17 | nn.ReLU(True) if use_relu else Swish(), 18 | nn.Linear(channel // reduction, channel, bias=False), 19 | nn.Sigmoid(), 20 | ) 21 | 22 | def forward(self, inputs): 23 | return inputs * self.fc(inputs.mean(-1).mean(-1).mean(-1)).view( 24 | inputs.shape[0], inputs.shape[1], 1, 1, 1 25 | ) 26 | -------------------------------------------------------------------------------- /.docker/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | IMAGE_NAME="kuldeepbrd1/grasp_ldm:latest" 4 | 5 | SCRIPT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" &>/dev/null && pwd)" 6 | REPOSITORY_DIR="$(dirname "${SCRIPT_DIR}")" 7 | 8 | ## Parse TAG and forward additional build arguments 9 | if [ "${#}" -gt "0" ]; then 10 | if [[ "${1}" != "-"* ]]; then 11 | IMAGE_NAME="${IMAGE_NAME}:${1}" 12 | BUILD_ARGS=${*:2} 13 | else 14 | BUILD_ARGS=${*:1} 15 | fi 16 | fi 17 | 18 | ## Build the image 19 | DOCKER_BUILD_CMD=( 20 | docker build 21 | "${REPOSITORY_DIR}" 22 | --file "${REPOSITORY_DIR}/.docker/Dockerfile" 23 | --tag "${IMAGE_NAME}" 24 | "${BUILD_ARGS}" 25 | ) 26 | echo -e "\033[1;30m${DOCKER_BUILD_CMD[*]}\033[0m" | xargs 27 | # shellcheck disable=SC2048 28 | exec ${DOCKER_BUILD_CMD[*]} 29 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/utils.hpp: -------------------------------------------------------------------------------- 1 | #ifndef _UTILS_HPP 2 | #define _UTILS_HPP 3 | 4 | #include 5 | #include 6 | 7 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor") 8 | 9 | #define CHECK_CONTIGUOUS(x) \ 10 | TORCH_CHECK(x.is_contiguous(), #x " must be a contiguous tensor") 11 | 12 | #define CHECK_IS_INT(x) \ 13 | TORCH_CHECK(x.scalar_type() == at::ScalarType::Int, \ 14 | #x " must be an int tensor") 15 | 16 | #define CHECK_IS_FLOAT(x) \ 17 | TORCH_CHECK(x.scalar_type() == at::ScalarType::Float, \ 18 | #x " must be a float tensor") 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | // "image": "image_name", 3 | "build": { 4 | "dockerfile": "../.docker/gpu_env.Dockerfile", 5 | "context": "..", 6 | "args": {}, 7 | "target": "" // 8 | }, 9 | "containerEnv": { 10 | "DISPLAY": "${localEnv:DISPLAY}", 11 | "QT_X11_NO_MITSHM": "1" 12 | }, 13 | "runArgs": [ 14 | "--network=host", 15 | "--volume=/tmp/.X11-unix/:/tmp/.X11-unix/", 16 | // "--volume=:/workspaces/data", 17 | "--device=/dev/dri:/dev/dri", 18 | "--gpus", 19 | "all", 20 | "--privileged" 21 | ], 22 | "customizations": { 23 | "vscode": { 24 | "extensions": [ 25 | "ms-python.python", 26 | "njpwerner.autodocstring", 27 | "ms-toolsai.jupyter" 28 | ] 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import find_namespace_packages, find_packages, setup 4 | 5 | from grasp_ldm import __version__ 6 | 7 | # here = os.path.abspath(os.path.dirname(__file__)) 8 | # requires_list = [] 9 | # with open(os.path.join(here, 'requirements.txt'), encoding='utf-8') as f: 10 | # for line in f: 11 | # requires_list.append(str(line)) 12 | 13 | setup( 14 | name="grasp_ldm", 15 | version=__version__, 16 | author="Kuldeep Barad", 17 | # TODO: Improve grasp_ldm_utils module by combining internal and external utils 18 | packages=["grasp_ldm", "grasp_ldm.tools", "grasp_ldm_utils"], 19 | # packages=find_packages(), 20 | package_dir={ 21 | "grasp_ldm": "grasp_ldm", 22 | "grasp_ldm.tools": "tools", 23 | "grasp_ldm_utils": "utils", 24 | }, 25 | python_requires=">=3.8.0, <3.10", 26 | # install_requires=requires_list, 27 | ) 28 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/neighbor_interpolate.cuh: -------------------------------------------------------------------------------- 1 | #ifndef _NEIGHBOR_INTERPOLATE_CUH 2 | #define _NEIGHBOR_INTERPOLATE_CUH 3 | 4 | void three_nearest_neighbors_interpolate(int b, int c, int m, int n, 5 | const float *points_coords, 6 | const float *centers_coords, 7 | const float *centers_features, 8 | int *indices, float *weights, 9 | float *out); 10 | void three_nearest_neighbors_interpolate_grad(int b, int c, int n, int m, 11 | const float *grad_y, 12 | const int *indices, 13 | const float *weights, 14 | float *grad_x); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/backend.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torch.utils.cpp_extension import load 4 | 5 | _src_path = os.path.dirname(os.path.abspath(__file__)) 6 | _backend = load( 7 | name="_pvcnn_backend", 8 | extra_cflags=["-O3", "-std=c++17"], 9 | sources=[ 10 | os.path.join(_src_path, "src", f) 11 | for f in [ 12 | "ball_query/ball_query.cpp", 13 | "ball_query/ball_query.cu", 14 | "grouping/grouping.cpp", 15 | "grouping/grouping.cu", 16 | "interpolate/neighbor_interpolate.cpp", 17 | "interpolate/neighbor_interpolate.cu", 18 | "interpolate/trilinear_devox.cpp", 19 | "interpolate/trilinear_devox.cu", 20 | "sampling/sampling.cpp", 21 | "sampling/sampling.cu", 22 | "voxelization/vox.cpp", 23 | "voxelization/vox.cu", 24 | "bindings.cpp", 25 | ] 26 | ], 27 | ) 28 | 29 | __all__ = ["_backend"] 30 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/ball_query/ball_query.cpp: -------------------------------------------------------------------------------- 1 | #include "ball_query.hpp" 2 | #include "ball_query.cuh" 3 | 4 | #include "../utils.hpp" 5 | 6 | at::Tensor ball_query_forward(at::Tensor centers_coords, 7 | at::Tensor points_coords, const float radius, 8 | const int num_neighbors) { 9 | CHECK_CUDA(centers_coords); 10 | CHECK_CUDA(points_coords); 11 | CHECK_CONTIGUOUS(centers_coords); 12 | CHECK_CONTIGUOUS(points_coords); 13 | CHECK_IS_FLOAT(centers_coords); 14 | CHECK_IS_FLOAT(points_coords); 15 | 16 | int b = centers_coords.size(0); 17 | int m = centers_coords.size(2); 18 | int n = points_coords.size(2); 19 | 20 | at::Tensor neighbors_indices = torch::zeros( 21 | {b, m, num_neighbors}, 22 | at::device(centers_coords.device()).dtype(at::ScalarType::Int)); 23 | 24 | ball_query(b, n, m, radius * radius, num_neighbors, 25 | centers_coords.data_ptr(), 26 | points_coords.data_ptr(), 27 | neighbors_indices.data_ptr()); 28 | 29 | return neighbors_indices; 30 | } 31 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/shared_mlp.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | __all__ = ["SharedMLP"] 4 | 5 | 6 | class SharedMLP(nn.Module): 7 | def __init__(self, in_channels, out_channels, dim=1): 8 | super().__init__() 9 | if dim == 1: 10 | conv = nn.Conv1d 11 | bn = nn.BatchNorm1d 12 | elif dim == 2: 13 | conv = nn.Conv2d 14 | bn = nn.BatchNorm2d 15 | else: 16 | raise ValueError 17 | if not isinstance(out_channels, (list, tuple)): 18 | out_channels = [out_channels] 19 | layers = [] 20 | for oc in out_channels: 21 | layers.extend( 22 | [ 23 | conv(in_channels, oc, 1), 24 | bn(oc), 25 | nn.ReLU(True), 26 | ] 27 | ) 28 | in_channels = oc 29 | self.layers = nn.Sequential(*layers) 30 | 31 | def forward(self, inputs): 32 | if isinstance(inputs, (list, tuple)): 33 | return (self.layers(inputs[0]), *inputs[1:]) 34 | else: 35 | return self.layers(inputs) 36 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/grouping.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .backend import _backend 4 | 5 | __all__ = ["grouping"] 6 | 7 | 8 | class Grouping(Function): 9 | @staticmethod 10 | def forward(ctx, features, indices): 11 | """ 12 | :param ctx: 13 | :param features: features of points, FloatTensor[B, C, N] 14 | :param indices: neighbor indices of centers, IntTensor[B, M, U], M is #centers, U is #neighbors 15 | :return: 16 | grouped_features: grouped features, FloatTensor[B, C, M, U] 17 | """ 18 | features = features.contiguous() 19 | indices = indices.contiguous() 20 | ctx.save_for_backward(indices) 21 | ctx.num_points = features.size(-1) 22 | return _backend.grouping_forward(features, indices) 23 | 24 | @staticmethod 25 | def backward(ctx, grad_output): 26 | (indices,) = ctx.saved_tensors 27 | grad_features = _backend.grouping_backward( 28 | grad_output.contiguous(), indices, ctx.num_points 29 | ) 30 | return grad_features, None 31 | 32 | 33 | grouping = Grouping.apply 34 | -------------------------------------------------------------------------------- /grasp_ldm/dataset/acronym/__init__.py: -------------------------------------------------------------------------------- 1 | FILTER_63_CATEGORIES = [ 2 | "Cup", 3 | "Mug", 4 | "Fork", 5 | "Hat", 6 | "Bottle", 7 | "Bowl", 8 | "Car", 9 | "Donut", 10 | "Laptop", 11 | "MousePad", 12 | "Pencil", 13 | "Plate", 14 | "ScrewDriver", 15 | "WineBottle", 16 | "Backpack", 17 | "Bag", 18 | "Banana", 19 | "Battery", 20 | "BeanBag", 21 | "Bear", 22 | "Book", 23 | "Books", 24 | "Camera", 25 | "CerealBox", 26 | "Cookie", 27 | "Hammer", 28 | "Hanger", 29 | "Knife", 30 | "MilkCarton", 31 | "Painting", 32 | "PillBottle", 33 | "Plant", 34 | "PowerSocket", 35 | "PowerStrip", 36 | "PS3", 37 | "PSP", 38 | "Ring", 39 | "Scissors", 40 | "Shampoo", 41 | "Shoes", 42 | "Sheep", 43 | "Shower", 44 | "Sink", 45 | "SoapBottle", 46 | "SodaCan", 47 | "Spoon", 48 | "Statue", 49 | "Teacup", 50 | "Teapot", 51 | "ToiletPaper", 52 | "ToyFigure", 53 | "Wallet", 54 | "WineGlass", 55 | "Cow", 56 | "Sheep", 57 | "Cat", 58 | "Dog", 59 | "Pizza", 60 | "Elephant", 61 | "Donkey", 62 | "RubiksCube", 63 | "Tank", 64 | "Truck", 65 | "USBStick", 66 | ] 67 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/base_network.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from typing import Optional 3 | 4 | from torch import Tensor, nn 5 | 6 | 7 | class BaseGraspSampler(nn.Module): 8 | """Base abstract class for Grasp Samplers""" 9 | 10 | def __init__(self): 11 | super(BaseGraspSampler, self).__init__() 12 | 13 | @property 14 | def _type(self) -> str: 15 | return self.__class__.__name__ 16 | 17 | @abstractmethod 18 | def generate_grasps( 19 | self, z: Optional[Tensor] = None, z_cond: Optional[Tensor] = None 20 | ) -> Tensor: 21 | """Abstract method for generating grasp poses given latents (optional: None) 22 | and conditioning input z_cond 23 | """ 24 | raise NotImplementedError 25 | 26 | 27 | class BaseGraspClassifier(nn.Module): 28 | """Base abstract class for Grasp Samplers""" 29 | 30 | def __init__(self): 31 | super(BaseGraspClassifier, self).__init__() 32 | 33 | @property 34 | def _type(self) -> str: 35 | return self.__class__.__name__ 36 | 37 | @abstractmethod 38 | def classify_grasps( 39 | self, grasp_poses: Optional[Tensor] = None, pc: Optional[Tensor] = None 40 | ) -> Tensor: 41 | """Abstract method for generating grasp poses given latents (optional: None) 42 | and conditioning input z_cond 43 | """ 44 | raise NotImplementedError 45 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v4.3.0 7 | hooks: 8 | - id: check-added-large-files 9 | - id: check-case-conflict 10 | - id: check-executables-have-shebangs 11 | - id: check-merge-conflict 12 | - id: check-shebang-scripts-are-executable 13 | - id: check-symlinks 14 | - id: check-xml 15 | - id: check-yaml 16 | - id: debug-statements 17 | - id: destroyed-symlinks 18 | - id: detect-private-key 19 | - id: end-of-file-fixer 20 | - id: mixed-line-ending 21 | - id: requirements-txt-fixer 22 | - id: trailing-whitespace 23 | 24 | - repo: https://github.com/pycqa/isort 25 | rev: 5.12.0 26 | hooks: 27 | - id: isort 28 | args: ["--profile", "black"] 29 | 30 | - repo: https://github.com/psf/black 31 | rev: 23.7.0 32 | hooks: 33 | - id: black 34 | 35 | - repo: https://github.com/lovesegfault/beautysh 36 | rev: v6.2.1 37 | hooks: 38 | - id: beautysh 39 | 40 | - repo: https://github.com/executablebooks/mdformat 41 | rev: 0.7.15 42 | hooks: 43 | - id: mdformat 44 | 45 | - repo: https://github.com/codespell-project/codespell 46 | rev: v2.1.0 47 | hooks: 48 | - id: codespell 49 | -------------------------------------------------------------------------------- /grasp_ldm/dataset/builder.py: -------------------------------------------------------------------------------- 1 | from .acronym.acronym_grasp_points import ( 2 | AcronymFullPcGraspPointsClassification, 3 | AcronymPartialPcGraspPointsClassification, 4 | ) 5 | from .acronym.acronym_partial_pointclouds import AcronymPartialPointclouds 6 | from .acronym.acronym_pointclouds import AcronymShapenetPointclouds 7 | 8 | POINTCLOUD_GRASP_DATASETS = { 9 | "AcronymShapenetPointclouds": AcronymShapenetPointclouds, 10 | "AcronymPartialPointclouds": AcronymPartialPointclouds, 11 | } 12 | 13 | 14 | POINTCLOUD_GRASP_CLASIFICATION_DATASETS = { 15 | "AcronymFullPcGraspPointsClassification": AcronymFullPcGraspPointsClassification, 16 | "AcronymPartialPcGraspPointsClassification": AcronymPartialPcGraspPointsClassification, 17 | } 18 | 19 | ALL_DATASETS = { 20 | **POINTCLOUD_GRASP_DATASETS, 21 | **POINTCLOUD_GRASP_CLASIFICATION_DATASETS, 22 | } 23 | 24 | 25 | def build_dataset_from_cfg(data_cfg, split): 26 | """Build dataset from config 27 | 28 | Args: 29 | data_cfg (dict): data config 30 | split (str): split name 31 | 32 | Raises: 33 | KeyError: if split not found in data config 34 | 35 | Returns: 36 | Dataset: dataset 37 | """ 38 | if split not in data_cfg: 39 | raise KeyError(f"Could not find split:`{split}` in the data config dict") 40 | 41 | split_cfg = data_cfg[split] 42 | return ALL_DATASETS[split_cfg.type](**split_cfg.args) 43 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/voxelization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from . import functional as F 5 | 6 | __all__ = ["Voxelization"] 7 | 8 | 9 | class Voxelization(nn.Module): 10 | def __init__(self, resolution, normalize=True, eps=0): 11 | super().__init__() 12 | self.r = int(resolution) 13 | self.normalize = normalize 14 | self.eps = eps 15 | 16 | def forward(self, features, coords): 17 | coords = coords.detach() 18 | norm_coords = coords - coords.mean(2, keepdim=True) 19 | if self.normalize: 20 | norm_coords = ( 21 | norm_coords 22 | / ( 23 | norm_coords.norm(dim=1, keepdim=True) 24 | .max(dim=2, keepdim=True) 25 | .values 26 | * 2.0 27 | + self.eps 28 | ) 29 | + 0.5 30 | ) 31 | else: 32 | norm_coords = (norm_coords + 1) / 2.0 33 | norm_coords = torch.clamp(norm_coords * self.r, 0, self.r - 1) 34 | vox_coords = torch.round(norm_coords).to(torch.int32) 35 | return F.avg_voxelize(features, vox_coords, self.r), norm_coords 36 | 37 | def extra_repr(self): 38 | return "resolution={}{}".format( 39 | self.r, ", normalized eps = {}".format(self.eps) if self.normalize else "" 40 | ) 41 | -------------------------------------------------------------------------------- /grasp_ldm/dataset/pl_wrapper.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, Union 2 | 3 | import pytorch_lightning as pl 4 | from torch.utils.data import DataLoader, Dataset 5 | 6 | 7 | class GraspDataModule(pl.LightningDataModule): 8 | def __init__( 9 | self, 10 | train_dataset: Dataset, 11 | train_batch_size: int = 8, 12 | val_batch_size: int = 8, 13 | num_workers: int = 0, 14 | pin_memory: bool = True, 15 | persistent_workers: bool = True, 16 | **kwargs, 17 | ): 18 | super().__init__() 19 | 20 | self.train_dataset = train_dataset 21 | 22 | self.train_batch_size = train_batch_size 23 | self.val_batch_size = val_batch_size 24 | self.num_workers = num_workers 25 | self.pin_memory = pin_memory 26 | self.persistent_workers = persistent_workers 27 | 28 | def train_dataloader(self): 29 | return DataLoader( 30 | self.train_dataset, 31 | batch_size=self.train_batch_size, 32 | shuffle=True, 33 | num_workers=self.num_workers, 34 | pin_memory=self.pin_memory, 35 | persistent_workers=self.persistent_workers, 36 | ) 37 | 38 | def val_dataloader(self): 39 | raise NotImplementedError 40 | 41 | def test_dataloader(self): 42 | raise NotImplementedError 43 | 44 | def predict_dataloader(self): 45 | raise NotImplementedError 46 | -------------------------------------------------------------------------------- /.docker/gpu_env.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cudagl:11.4.2-devel-ubuntu20.04 2 | 3 | # ENV 4 | ENV HOME_DIR=/root/ 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | 8 | # REQUIREMENTS & CERTS 9 | ADD requirements.txt /tmp/ 10 | 11 | SHELL ["/bin/bash", "-c"] 12 | 13 | # hotfix- cuda source error on ubuntu 20.04 14 | RUN echo "deb [by-hash=no] http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 /" > /etc/apt/sources.list.d/cuda.list 15 | 16 | # APT 17 | RUN apt-get update -y\ 18 | && apt-get upgrade -y \ 19 | && DEBIAN_FRONTEND=noninteractive \ 20 | apt-get install -q -y --no-install-recommends \ 21 | build-essential \ 22 | cmake \ 23 | dirmngr \ 24 | gnupg2 \ 25 | git \ 26 | iputils-ping \ 27 | ca-certificates \ 28 | nano \ 29 | net-tools \ 30 | python3-dev \ 31 | python3-pip \ 32 | python3-wheel \ 33 | python3-opengl \ 34 | tree \ 35 | unzip \ 36 | wget \ 37 | && rm -rf /var/lib/apt/lists/* \ 38 | && update-ca-certificates \ 39 | && echo "alias python=python3" >> /root/.bashrc\ 40 | && echo "alias pip=pip3" >> /root/.bashrc 41 | 42 | # PIP 43 | ENV ACRONYM_INSTALL_PATH=/tmp/acronym 44 | RUN git clone https://github.com/NVlabs/acronym.git ${ACRONYM_INSTALL_PATH} \ 45 | && pip install -r ${ACRONYM_INSTALL_PATH}/requirements.txt \ 46 | && pip install ${ACRONYM_INSTALL_PATH} \ 47 | && rm -r ${ACRONYM_INSTALL_PATH} \ 48 | && pip install -r /tmp/requirements.txt \ 49 | && rm /tmp/requirements.txt 50 | 51 | CMD ["/bin/bash"] 52 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/grouping/grouping.cpp: -------------------------------------------------------------------------------- 1 | #include "grouping.hpp" 2 | #include "grouping.cuh" 3 | 4 | #include "../utils.hpp" 5 | 6 | at::Tensor grouping_forward(at::Tensor features, at::Tensor indices) { 7 | CHECK_CUDA(features); 8 | CHECK_CUDA(indices); 9 | CHECK_CONTIGUOUS(features); 10 | CHECK_CONTIGUOUS(indices); 11 | CHECK_IS_FLOAT(features); 12 | CHECK_IS_INT(indices); 13 | 14 | int b = features.size(0); 15 | int c = features.size(1); 16 | int n = features.size(2); 17 | int m = indices.size(1); 18 | int u = indices.size(2); 19 | at::Tensor output = torch::zeros( 20 | {b, c, m, u}, at::device(features.device()).dtype(at::ScalarType::Float)); 21 | grouping(b, c, n, m, u, features.data_ptr(), indices.data_ptr(), 22 | output.data_ptr()); 23 | return output; 24 | } 25 | 26 | at::Tensor grouping_backward(at::Tensor grad_y, at::Tensor indices, 27 | const int n) { 28 | CHECK_CUDA(grad_y); 29 | CHECK_CUDA(indices); 30 | CHECK_CONTIGUOUS(grad_y); 31 | CHECK_CONTIGUOUS(indices); 32 | CHECK_IS_FLOAT(grad_y); 33 | CHECK_IS_INT(indices); 34 | 35 | int b = grad_y.size(0); 36 | int c = grad_y.size(1); 37 | int m = indices.size(1); 38 | int u = indices.size(2); 39 | at::Tensor grad_x = torch::zeros( 40 | {b, c, n}, at::device(grad_y.device()).dtype(at::ScalarType::Float)); 41 | grouping_grad(b, c, n, m, u, grad_y.data_ptr(), 42 | indices.data_ptr(), grad_x.data_ptr()); 43 | return grad_x; 44 | } 45 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/cuda_utils.cuh: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #define MAXIMUM_THREADS 512 14 | 15 | inline int optimal_num_threads(int work_size) { 16 | const int pow_2 = std::log2(static_cast(work_size)); 17 | return max(min(1 << pow_2, MAXIMUM_THREADS), 1); 18 | } 19 | 20 | inline dim3 optimal_block_config(int x, int y) { 21 | const int x_threads = optimal_num_threads(x); 22 | const int y_threads = 23 | max(min(optimal_num_threads(y), MAXIMUM_THREADS / x_threads), 1); 24 | dim3 block_config(x_threads, y_threads, 1); 25 | return block_config; 26 | } 27 | 28 | #define CUDA_CHECK_ERRORS() \ 29 | { \ 30 | cudaError_t err = cudaGetLastError(); \ 31 | if (cudaSuccess != err) { \ 32 | fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \ 33 | cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \ 34 | __FILE__); \ 35 | exit(-1); \ 36 | } \ 37 | } 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/voxelization.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .backend import _backend 4 | 5 | __all__ = ["avg_voxelize"] 6 | 7 | 8 | class AvgVoxelization(Function): 9 | @staticmethod 10 | def forward(ctx, features, coords, resolution): 11 | """ 12 | :param ctx: 13 | :param features: Features of the point cloud, FloatTensor[B, C, N] 14 | :param coords: Voxelized Coordinates of each point, IntTensor[B, 3, N] 15 | :param resolution: Voxel resolution 16 | :return: 17 | Voxelized Features, FloatTensor[B, C, R, R, R] 18 | """ 19 | features = features.contiguous() 20 | coords = coords.int().contiguous() 21 | b, c, _ = features.shape 22 | out, indices, counts = _backend.avg_voxelize_forward( 23 | features, coords, resolution 24 | ) 25 | ctx.save_for_backward(indices, counts) 26 | return out.view(b, c, resolution, resolution, resolution) 27 | 28 | @staticmethod 29 | def backward(ctx, grad_output): 30 | """ 31 | :param ctx: 32 | :param grad_output: gradient of output, FloatTensor[B, C, R, R, R] 33 | :return: 34 | gradient of inputs, FloatTensor[B, C, N] 35 | """ 36 | b, c = grad_output.shape[:2] 37 | indices, counts = ctx.saved_tensors 38 | grad_features = _backend.avg_voxelize_backward( 39 | grad_output.contiguous().view(b, c, -1), indices, counts 40 | ) 41 | return grad_features, None, None 42 | 43 | 44 | avg_voxelize = AvgVoxelization.apply 45 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/ball_query.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from . import functional as F 5 | 6 | __all__ = ["BallQuery"] 7 | 8 | 9 | class BallQuery(nn.Module): 10 | def __init__(self, radius, num_neighbors, include_coordinates=True): 11 | super().__init__() 12 | self.radius = radius 13 | self.num_neighbors = num_neighbors 14 | self.include_coordinates = include_coordinates 15 | 16 | def forward(self, points_coords, centers_coords, points_features=None): 17 | points_coords = points_coords.contiguous() 18 | centers_coords = centers_coords.contiguous() 19 | neighbor_indices = F.ball_query( 20 | centers_coords, points_coords, self.radius, self.num_neighbors 21 | ) 22 | neighbor_coordinates = F.grouping(points_coords, neighbor_indices) 23 | neighbor_coordinates = neighbor_coordinates - centers_coords.unsqueeze(-1) 24 | 25 | if points_features is None: 26 | assert self.include_coordinates, "No Features For Grouping" 27 | neighbor_features = neighbor_coordinates 28 | else: 29 | neighbor_features = F.grouping(points_features, neighbor_indices) 30 | if self.include_coordinates: 31 | neighbor_features = torch.cat( 32 | [neighbor_coordinates, neighbor_features], dim=1 33 | ) 34 | return neighbor_features 35 | 36 | def extra_repr(self): 37 | return "radius={}, num_neighbors={}{}".format( 38 | self.radius, 39 | self.num_neighbors, 40 | ", include coordinates" if self.include_coordinates else "", 41 | ) 42 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/interpolatation.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .backend import _backend 4 | 5 | __all__ = ["nearest_neighbor_interpolate"] 6 | 7 | 8 | class NeighborInterpolation(Function): 9 | @staticmethod 10 | def forward(ctx, points_coords, centers_coords, centers_features): 11 | """ 12 | :param ctx: 13 | :param points_coords: coordinates of points, FloatTensor[B, 3, N] 14 | :param centers_coords: coordinates of centers, FloatTensor[B, 3, M] 15 | :param centers_features: features of centers, FloatTensor[B, C, M] 16 | :return: 17 | points_features: features of points, FloatTensor[B, C, N] 18 | """ 19 | centers_coords = centers_coords.contiguous() 20 | points_coords = points_coords.contiguous() 21 | centers_features = centers_features.contiguous() 22 | ( 23 | points_features, 24 | indices, 25 | weights, 26 | ) = _backend.three_nearest_neighbors_interpolate_forward( 27 | points_coords, centers_coords, centers_features 28 | ) 29 | ctx.save_for_backward(indices, weights) 30 | ctx.num_centers = centers_coords.size(-1) 31 | return points_features 32 | 33 | @staticmethod 34 | def backward(ctx, grad_output): 35 | indices, weights = ctx.saved_tensors 36 | grad_centers_features = _backend.three_nearest_neighbors_interpolate_backward( 37 | grad_output.contiguous(), indices, weights, ctx.num_centers 38 | ) 39 | return None, None, grad_centers_features 40 | 41 | 42 | nearest_neighbor_interpolate = NeighborInterpolation.apply 43 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/devoxelization.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .backend import _backend 4 | 5 | __all__ = ["trilinear_devoxelize"] 6 | 7 | 8 | class TrilinearDevoxelization(Function): 9 | @staticmethod 10 | def forward(ctx, features, coords, resolution, is_training=True): 11 | """ 12 | :param ctx: 13 | :param coords: the coordinates of points, FloatTensor[B, 3, N] 14 | :param features: FloatTensor[B, C, R, R, R] 15 | :param resolution: int, the voxel resolution 16 | :param is_training: bool, training mode 17 | :return: 18 | FloatTensor[B, C, N] 19 | """ 20 | B, C = features.shape[:2] 21 | features = features.contiguous().view(B, C, -1) 22 | coords = coords.contiguous() 23 | outs, inds, wgts = _backend.trilinear_devoxelize_forward( 24 | resolution, is_training, coords, features 25 | ) 26 | if is_training: 27 | ctx.save_for_backward(inds, wgts) 28 | ctx.r = resolution 29 | return outs 30 | 31 | @staticmethod 32 | def backward(ctx, grad_output): 33 | """ 34 | :param ctx: 35 | :param grad_output: gradient of outputs, FloatTensor[B, C, N] 36 | :return: 37 | gradient of inputs, FloatTensor[B, C, R, R, R] 38 | """ 39 | inds, wgts = ctx.saved_tensors 40 | grad_inputs = _backend.trilinear_devoxelize_backward( 41 | grad_output.contiguous(), inds, wgts, ctx.r 42 | ) 43 | return ( 44 | grad_inputs.view( 45 | grad_output.size(0), grad_output.size(1), ctx.r, ctx.r, ctx.r 46 | ), 47 | None, 48 | None, 49 | None, 50 | ) 51 | 52 | 53 | trilinear_devoxelize = TrilinearDevoxelization.apply 54 | -------------------------------------------------------------------------------- /grasp_ldm/trainers/__init__.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | from pytorch_lightning.loggers import CSVLogger, Logger, TensorBoardLogger, WandbLogger 4 | 5 | LOGGERS = { 6 | "WandbLogger": WandbLogger, 7 | "TensorBoardLogger": TensorBoardLogger, 8 | "CSVLogger": CSVLogger, 9 | } 10 | 11 | 12 | class E_Trainers(enum.Enum): 13 | CLASSIFIER = "classifier" 14 | VAE = "vae" 15 | DDM = "ddm" 16 | 17 | def __repr__(self): 18 | return f"{self.__class__.__name__}.{self.name}" 19 | 20 | def _get_trainer(model_type: str): 21 | if model_type == E_Trainers.CLASSIFIER: 22 | from grasp_ldm.trainers.grasp_classification_trainer import ( 23 | GraspClassificationTrainer, 24 | ) 25 | 26 | return GraspClassificationTrainer 27 | elif model_type == E_Trainers.VAE: 28 | from grasp_ldm.trainers.grasp_generation_trainer import GraspVAETrainer 29 | 30 | return GraspVAETrainer 31 | elif model_type == E_Trainers.DDM: 32 | from grasp_ldm.trainers.grasp_generation_trainer import GraspLDMTrainer 33 | 34 | return GraspLDMTrainer 35 | else: 36 | raise NotImplementedError(f"Model type {model_type} not implemented") 37 | 38 | def get_trainer(self): 39 | return E_Trainers._get_trainer(self) 40 | 41 | def from_string(model_type: str): 42 | if model_type == "classifier": 43 | return E_Trainers.CLASSIFIER 44 | elif model_type == "vae": 45 | return E_Trainers.VAE 46 | elif model_type == "ddm": 47 | return E_Trainers.DDM 48 | else: 49 | raise NotImplementedError(f"Model type {model_type} not implemented") 50 | 51 | def get(model_type: str): 52 | enum_type = E_Trainers.from_string(model_type) 53 | return E_Trainers._get_trainer(enum_type) 54 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/bindings.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "ball_query/ball_query.hpp" 4 | #include "grouping/grouping.hpp" 5 | #include "interpolate/neighbor_interpolate.hpp" 6 | #include "interpolate/trilinear_devox.hpp" 7 | #include "sampling/sampling.hpp" 8 | #include "voxelization/vox.hpp" 9 | 10 | PYBIND11_MODULE(_pvcnn_backend, m) { 11 | m.def("gather_features_forward", &gather_features_forward, 12 | "Gather Centers' Features forward (CUDA)"); 13 | m.def("gather_features_backward", &gather_features_backward, 14 | "Gather Centers' Features backward (CUDA)"); 15 | m.def("furthest_point_sampling", &furthest_point_sampling_forward, 16 | "Furthest Point Sampling (CUDA)"); 17 | m.def("ball_query", &ball_query_forward, "Ball Query (CUDA)"); 18 | m.def("grouping_forward", &grouping_forward, 19 | "Grouping Features forward (CUDA)"); 20 | m.def("grouping_backward", &grouping_backward, 21 | "Grouping Features backward (CUDA)"); 22 | m.def("three_nearest_neighbors_interpolate_forward", 23 | &three_nearest_neighbors_interpolate_forward, 24 | "3 Nearest Neighbors Interpolate forward (CUDA)"); 25 | m.def("three_nearest_neighbors_interpolate_backward", 26 | &three_nearest_neighbors_interpolate_backward, 27 | "3 Nearest Neighbors Interpolate backward (CUDA)"); 28 | 29 | m.def("trilinear_devoxelize_forward", &trilinear_devoxelize_forward, 30 | "Trilinear Devoxelization forward (CUDA)"); 31 | m.def("trilinear_devoxelize_backward", &trilinear_devoxelize_backward, 32 | "Trilinear Devoxelization backward (CUDA)"); 33 | m.def("avg_voxelize_forward", &avg_voxelize_forward, 34 | "Voxelization forward with average pooling (CUDA)"); 35 | m.def("avg_voxelize_backward", &avg_voxelize_backward, 36 | "Voxelization backward (CUDA)"); 37 | } 38 | -------------------------------------------------------------------------------- /grasp_ldm/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def fix_state_dict_prefix(state_dict, prefix="model", ignore_all_others=False): 5 | """Fix state dict keys prefix 6 | 7 | Args: 8 | state_dict (dict): state dict 9 | prefix (str, optional): prefix to remove. Defaults to "model". 10 | 11 | Returns: 12 | dict: state dict with prefix removed 13 | """ 14 | from collections import OrderedDict 15 | 16 | if isinstance(state_dict, dict): 17 | if ignore_all_others: 18 | return { 19 | k.partition(f"{prefix}.")[2]: v 20 | for k, v in state_dict.items() 21 | if k.startswith(prefix) 22 | } 23 | else: 24 | return {k.partition(f"{prefix}.")[2]: v for k, v in state_dict.items()} 25 | elif isinstance(state_dict, OrderedDict): 26 | if ignore_all_others: 27 | return OrderedDict( 28 | [ 29 | (k.partition(f"{prefix}.")[2], v) 30 | for k, v in state_dict.items() 31 | if k.startswith(prefix) 32 | ] 33 | ) 34 | else: 35 | return OrderedDict( 36 | [(k.partition(f"{prefix}.")[2], v) for k, v in state_dict.items()] 37 | ) 38 | 39 | 40 | def minmax_normalize( 41 | t: torch.Tensor, dim: int, v_min: float = 0.0, v_max: float = 1.0, keepdim=True 42 | ) -> torch.Tensor: 43 | """min-max normalization in [0,1] 44 | 45 | Args: 46 | t (Tensor): tensor [B, D1, D2 ... Dn] 47 | dim (int): dimension to normalize on 48 | min (float, optional): min value. Defaults to 0.0. 49 | max (float, optional): max value. Defaults to 1.0. 50 | 51 | Returns: 52 | Tensor: [B, D1, D2 ... Dn] 53 | """ 54 | t -= t.min(dim, keepdim=keepdim)[0] 55 | t /= t.max(dim, keepdim=keepdim)[0] 56 | 57 | t = t * (v_max - v_min) + v_min 58 | return t 59 | -------------------------------------------------------------------------------- /grasp_ldm/trainers/mixins.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | import pytorch_lightning as pl 4 | from pytorch_lightning import LightningModule, Trainer 5 | from pytorch_lightning.callbacks import ModelCheckpoint 6 | 7 | 8 | class TrainerEMAMixin: 9 | """Mixin for EMA model management in trainer 10 | 11 | The idea is to have all this functionality completely hidden and disconnected 12 | from the main trainer class. Only activated when specified in trainer config 13 | """ 14 | 15 | def configure_ema(self, trainer_config): 16 | from ema_pytorch import EMA 17 | 18 | if hasattr(trainer_config, "ema"): 19 | if trainer_config.ema: 20 | ema_config = self.get_ema_config(trainer_config) 21 | self.ema_model = EMA(self.model, **ema_config).to(self.device) 22 | else: 23 | self.ema_model = None 24 | 25 | def get_ema_config(self, trainer_config): 26 | """Get EMA config 27 | 28 | Args: 29 | trainer_config (dict): trainer config 30 | 31 | Returns: 32 | dict: EMA config 33 | """ 34 | 35 | def check_key(q_dict, q_key): 36 | if key in q_dict: 37 | if q_dict[key] is not None: 38 | return True 39 | return False 40 | 41 | ema_config = dict( 42 | beta=0.990, 43 | update_after_step=1000, 44 | update_every=5, 45 | ) 46 | 47 | for key in list(ema_config): 48 | if check_key(trainer_config.ema, key): 49 | ema_config[key] = getattr(trainer_config.ema, key) 50 | 51 | return ema_config 52 | 53 | def get_ema_callback(self): 54 | # Unused because this requires additional checkpoint to be saved 55 | # No good way to disconnect from how we implement normal checkpoints in derived class 56 | return self.EMAModelCheckpoint( 57 | save_top_k=1, 58 | monitor="loss", 59 | mode="min", 60 | dirpath=self._experiment.ckpt_dir, 61 | filename="ema-{step}", 62 | save_weights_only=True, 63 | every_n_train_steps=1000, 64 | ) 65 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/sampling/sampling.cpp: -------------------------------------------------------------------------------- 1 | #include "sampling.hpp" 2 | #include "sampling.cuh" 3 | 4 | #include "../utils.hpp" 5 | 6 | at::Tensor gather_features_forward(at::Tensor features, at::Tensor indices) { 7 | CHECK_CUDA(features); 8 | CHECK_CUDA(indices); 9 | CHECK_CONTIGUOUS(features); 10 | CHECK_CONTIGUOUS(indices); 11 | CHECK_IS_FLOAT(features); 12 | CHECK_IS_INT(indices); 13 | 14 | int b = features.size(0); 15 | int c = features.size(1); 16 | int n = features.size(2); 17 | int m = indices.size(1); 18 | at::Tensor output = torch::zeros( 19 | {b, c, m}, at::device(features.device()).dtype(at::ScalarType::Float)); 20 | gather_features(b, c, n, m, features.data_ptr(), 21 | indices.data_ptr(), output.data_ptr()); 22 | return output; 23 | } 24 | 25 | at::Tensor gather_features_backward(at::Tensor grad_y, at::Tensor indices, 26 | const int n) { 27 | CHECK_CUDA(grad_y); 28 | CHECK_CUDA(indices); 29 | CHECK_CONTIGUOUS(grad_y); 30 | CHECK_CONTIGUOUS(indices); 31 | CHECK_IS_FLOAT(grad_y); 32 | CHECK_IS_INT(indices); 33 | 34 | int b = grad_y.size(0); 35 | int c = grad_y.size(1); 36 | at::Tensor grad_x = torch::zeros( 37 | {b, c, n}, at::device(grad_y.device()).dtype(at::ScalarType::Float)); 38 | gather_features_grad(b, c, n, indices.size(1), grad_y.data_ptr(), 39 | indices.data_ptr(), grad_x.data_ptr()); 40 | return grad_x; 41 | } 42 | 43 | at::Tensor furthest_point_sampling_forward(at::Tensor coords, 44 | const int num_samples) { 45 | CHECK_CUDA(coords); 46 | CHECK_CONTIGUOUS(coords); 47 | CHECK_IS_FLOAT(coords); 48 | 49 | int b = coords.size(0); 50 | int n = coords.size(2); 51 | at::Tensor indices = torch::zeros( 52 | {b, num_samples}, at::device(coords.device()).dtype(at::ScalarType::Int)); 53 | at::Tensor distances = torch::full( 54 | {b, n}, 1e38f, at::device(coords.device()).dtype(at::ScalarType::Float)); 55 | furthest_point_sampling(b, n, num_samples, coords.data_ptr(), 56 | distances.data_ptr(), indices.data_ptr()); 57 | return indices; 58 | } 59 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/ball_query/ball_query.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../cuda_utils.cuh" 6 | 7 | /* 8 | Function: ball query 9 | Args: 10 | b : batch size 11 | n : number of points in point clouds 12 | m : number of query centers 13 | r2 : ball query radius ** 2 14 | u : maximum number of neighbors 15 | centers_coords: coordinates of centers, FloatTensor[b, 3, m] 16 | points_coords : coordinates of points, FloatTensor[b, 3, n] 17 | neighbors_indices : neighbor indices in points, IntTensor[b, m, u] 18 | */ 19 | __global__ void ball_query_kernel(int b, int n, int m, float r2, int u, 20 | const float *__restrict__ centers_coords, 21 | const float *__restrict__ points_coords, 22 | int *__restrict__ neighbors_indices) { 23 | int batch_index = blockIdx.x; 24 | int index = threadIdx.x; 25 | int stride = blockDim.x; 26 | points_coords += batch_index * n * 3; 27 | centers_coords += batch_index * m * 3; 28 | neighbors_indices += batch_index * m * u; 29 | 30 | for (int j = index; j < m; j += stride) { 31 | float center_x = centers_coords[j]; 32 | float center_y = centers_coords[j + m]; 33 | float center_z = centers_coords[j + m + m]; 34 | for (int k = 0, cnt = 0; k < n && cnt < u; ++k) { 35 | float dx = center_x - points_coords[k]; 36 | float dy = center_y - points_coords[k + n]; 37 | float dz = center_z - points_coords[k + n + n]; 38 | float d2 = dx * dx + dy * dy + dz * dz; 39 | if (d2 < r2) { 40 | if (cnt == 0) { 41 | for (int v = 0; v < u; ++v) { 42 | neighbors_indices[j * u + v] = k; 43 | } 44 | } 45 | neighbors_indices[j * u + cnt] = k; 46 | ++cnt; 47 | } 48 | } 49 | } 50 | } 51 | 52 | void ball_query(int b, int n, int m, float r2, int u, 53 | const float *centers_coords, const float *points_coords, 54 | int *neighbors_indices) { 55 | ball_query_kernel<<>>( 57 | b, n, m, r2, u, centers_coords, points_coords, neighbors_indices); 58 | CUDA_CHECK_ERRORS(); 59 | } 60 | -------------------------------------------------------------------------------- /grasp_ldm/utils/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import multiprocessing 3 | 4 | # from ptflops import get_model_complexity_info 5 | from typing import Tuple 6 | 7 | import torch 8 | from scipy.spatial.transform import Rotation as R 9 | 10 | 11 | def get_param_count(model: torch.nn.Module): 12 | trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) 13 | total = sum(p.numel() for p in model.parameters()) 14 | print(f"Trainable: {trainable/1e6:0.3f} M \n Total: {total/1e6:0.3f}") 15 | 16 | 17 | def load_json(path: str) -> dict: 18 | """load json helper 19 | 20 | Args: 21 | path (str): json_path 22 | 23 | Returns: 24 | dict: data 25 | """ 26 | with open(path, "r") as jf: 27 | data = json.load(jf) 28 | return data 29 | 30 | 31 | def spawn_multiple_processes(n_proc, target_fn, process_args): 32 | assert ( 33 | len(process_args) == n_proc 34 | ), f"Number of processes ({n_proc}) does not match the length of process_args ({len(process_args)})" 35 | 36 | read_processes = [] 37 | 38 | for idx in range(n_proc): 39 | try: 40 | if isinstance(process_args[idx], list): 41 | p = multiprocessing.Process(target=target_fn, args=process_args[idx]) 42 | elif isinstance(process_args[idx], dict): 43 | p = multiprocessing.Process(target=target_fn, kwargs=process_args[idx]) 44 | else: 45 | raise TypeError 46 | 47 | p.start() 48 | read_processes.append(p) 49 | except: 50 | for p in read_processes: 51 | p.join() 52 | 53 | for p in read_processes: 54 | p.join() 55 | 56 | return 57 | 58 | 59 | def split_list(lst, n): 60 | """Split a list into n sublists of approximately equal length 61 | 62 | Args: 63 | lst (list): list to split 64 | n (int): number of sublists 65 | 66 | Returns: 67 | list: list of sublists 68 | """ 69 | # divisor, modulo for n splits of list length 70 | div, mod = divmod(len(lst), n) 71 | 72 | # Length of each sublist 73 | lengths = [div + 1 if i < mod else div for i in range(n)] 74 | 75 | # Split the original list into sublists 76 | # sum(lengths[:i]) is 0 for i=0, so the first sublist starts at 0 77 | sublists = [lst[sum(lengths[:i]) : sum(lengths[: i + 1])] for i in range(n)] 78 | 79 | # Remove empty sublists 80 | sublists = [sublist for sublist in sublists if sublist] 81 | 82 | return sublists 83 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/neighbor_interpolate.cpp: -------------------------------------------------------------------------------- 1 | #include "neighbor_interpolate.hpp" 2 | #include "neighbor_interpolate.cuh" 3 | 4 | #include "../utils.hpp" 5 | 6 | std::vector 7 | three_nearest_neighbors_interpolate_forward(at::Tensor points_coords, 8 | at::Tensor centers_coords, 9 | at::Tensor centers_features) { 10 | CHECK_CUDA(points_coords); 11 | CHECK_CUDA(centers_coords); 12 | CHECK_CUDA(centers_features); 13 | CHECK_CONTIGUOUS(points_coords); 14 | CHECK_CONTIGUOUS(centers_coords); 15 | CHECK_CONTIGUOUS(centers_features); 16 | CHECK_IS_FLOAT(points_coords); 17 | CHECK_IS_FLOAT(centers_coords); 18 | CHECK_IS_FLOAT(centers_features); 19 | 20 | int b = centers_features.size(0); 21 | int c = centers_features.size(1); 22 | int m = centers_features.size(2); 23 | int n = points_coords.size(2); 24 | 25 | at::Tensor indices = torch::zeros( 26 | {b, 3, n}, at::device(points_coords.device()).dtype(at::ScalarType::Int)); 27 | at::Tensor weights = torch::zeros( 28 | {b, 3, n}, 29 | at::device(points_coords.device()).dtype(at::ScalarType::Float)); 30 | at::Tensor output = torch::zeros( 31 | {b, c, n}, 32 | at::device(centers_features.device()).dtype(at::ScalarType::Float)); 33 | 34 | three_nearest_neighbors_interpolate( 35 | b, c, m, n, points_coords.data_ptr(), 36 | centers_coords.data_ptr(), centers_features.data_ptr(), 37 | indices.data_ptr(), weights.data_ptr(), 38 | output.data_ptr()); 39 | return {output, indices, weights}; 40 | } 41 | 42 | at::Tensor three_nearest_neighbors_interpolate_backward(at::Tensor grad_y, 43 | at::Tensor indices, 44 | at::Tensor weights, 45 | const int m) { 46 | CHECK_CUDA(grad_y); 47 | CHECK_CUDA(indices); 48 | CHECK_CUDA(weights); 49 | CHECK_CONTIGUOUS(grad_y); 50 | CHECK_CONTIGUOUS(indices); 51 | CHECK_CONTIGUOUS(weights); 52 | CHECK_IS_FLOAT(grad_y); 53 | CHECK_IS_INT(indices); 54 | CHECK_IS_FLOAT(weights); 55 | 56 | int b = grad_y.size(0); 57 | int c = grad_y.size(1); 58 | int n = grad_y.size(2); 59 | at::Tensor grad_x = torch::zeros( 60 | {b, c, m}, at::device(grad_y.device()).dtype(at::ScalarType::Float)); 61 | three_nearest_neighbors_interpolate_grad( 62 | b, c, n, m, grad_y.data_ptr(), indices.data_ptr(), 63 | weights.data_ptr(), grad_x.data_ptr()); 64 | return grad_x; 65 | } 66 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/voxelization/vox.cpp: -------------------------------------------------------------------------------- 1 | #include "vox.hpp" 2 | #include "vox.cuh" 3 | 4 | #include "../utils.hpp" 5 | 6 | /* 7 | Function: average pool voxelization (forward) 8 | Args: 9 | features: features, FloatTensor[b, c, n] 10 | coords : coords of each point, IntTensor[b, 3, n] 11 | resolution : voxel resolution 12 | Return: 13 | out : outputs, FloatTensor[b, c, s], s = r ** 3 14 | ind : voxel index of each point, IntTensor[b, n] 15 | cnt : #points in each voxel index, IntTensor[b, s] 16 | */ 17 | std::vector avg_voxelize_forward(const at::Tensor features, 18 | const at::Tensor coords, 19 | const int resolution) { 20 | CHECK_CUDA(features); 21 | CHECK_CUDA(coords); 22 | CHECK_CONTIGUOUS(features); 23 | CHECK_CONTIGUOUS(coords); 24 | CHECK_IS_FLOAT(features); 25 | CHECK_IS_INT(coords); 26 | 27 | int b = features.size(0); 28 | int c = features.size(1); 29 | int n = features.size(2); 30 | int r = resolution; 31 | int r2 = r * r; 32 | int r3 = r2 * r; 33 | at::Tensor ind = torch::zeros( 34 | {b, n}, at::device(features.device()).dtype(at::ScalarType::Int)); 35 | at::Tensor out = torch::zeros( 36 | {b, c, r3}, at::device(features.device()).dtype(at::ScalarType::Float)); 37 | at::Tensor cnt = torch::zeros( 38 | {b, r3}, at::device(features.device()).dtype(at::ScalarType::Int)); 39 | avg_voxelize(b, c, n, r, r2, r3, coords.data_ptr(), 40 | features.data_ptr(), ind.data_ptr(), 41 | cnt.data_ptr(), out.data_ptr()); 42 | return {out, ind, cnt}; 43 | } 44 | 45 | /* 46 | Function: average pool voxelization (backward) 47 | Args: 48 | grad_y : grad outputs, FloatTensor[b, c, s] 49 | indices: voxel index of each point, IntTensor[b, n] 50 | cnt : #points in each voxel index, IntTensor[b, s] 51 | Return: 52 | grad_x : grad inputs, FloatTensor[b, c, n] 53 | */ 54 | at::Tensor avg_voxelize_backward(const at::Tensor grad_y, 55 | const at::Tensor indices, 56 | const at::Tensor cnt) { 57 | CHECK_CUDA(grad_y); 58 | CHECK_CUDA(indices); 59 | CHECK_CUDA(cnt); 60 | CHECK_CONTIGUOUS(grad_y); 61 | CHECK_CONTIGUOUS(indices); 62 | CHECK_CONTIGUOUS(cnt); 63 | CHECK_IS_FLOAT(grad_y); 64 | CHECK_IS_INT(indices); 65 | CHECK_IS_INT(cnt); 66 | 67 | int b = grad_y.size(0); 68 | int c = grad_y.size(1); 69 | int s = grad_y.size(2); 70 | int n = indices.size(1); 71 | at::Tensor grad_x = torch::zeros( 72 | {b, c, n}, at::device(grad_y.device()).dtype(at::ScalarType::Float)); 73 | avg_voxelize_grad(b, c, n, s, indices.data_ptr(), cnt.data_ptr(), 74 | grad_y.data_ptr(), grad_x.data_ptr()); 75 | return grad_x; 76 | } 77 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/grouping/grouping.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "../cuda_utils.cuh" 5 | 6 | /* 7 | Function: grouping features of neighbors (forward) 8 | Args: 9 | b : batch size 10 | c : #channles of features 11 | n : number of points in point clouds 12 | m : number of query centers 13 | u : maximum number of neighbors 14 | features: points' features, FloatTensor[b, c, n] 15 | indices : neighbor indices in points, IntTensor[b, m, u] 16 | out : gathered features, FloatTensor[b, c, m, u] 17 | */ 18 | __global__ void grouping_kernel(int b, int c, int n, int m, int u, 19 | const float *__restrict__ features, 20 | const int *__restrict__ indices, 21 | float *__restrict__ out) { 22 | int batch_index = blockIdx.x; 23 | features += batch_index * n * c; 24 | indices += batch_index * m * u; 25 | out += batch_index * m * u * c; 26 | 27 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 28 | const int stride = blockDim.y * blockDim.x; 29 | for (int i = index; i < c * m; i += stride) { 30 | const int l = i / m; 31 | const int j = i % m; 32 | for (int k = 0; k < u; ++k) { 33 | out[(l * m + j) * u + k] = features[l * n + indices[j * u + k]]; 34 | } 35 | } 36 | } 37 | 38 | void grouping(int b, int c, int n, int m, int u, const float *features, 39 | const int *indices, float *out) { 40 | grouping_kernel<<>>(b, c, n, m, u, features, 42 | indices, out); 43 | CUDA_CHECK_ERRORS(); 44 | } 45 | 46 | /* 47 | Function: grouping features of neighbors (backward) 48 | Args: 49 | b : batch size 50 | c : #channles of features 51 | n : number of points in point clouds 52 | m : number of query centers 53 | u : maximum number of neighbors 54 | grad_y : grad of gathered features, FloatTensor[b, c, m, u] 55 | indices : neighbor indices in points, IntTensor[b, m, u] 56 | grad_x: grad of points' features, FloatTensor[b, c, n] 57 | */ 58 | __global__ void grouping_grad_kernel(int b, int c, int n, int m, int u, 59 | const float *__restrict__ grad_y, 60 | const int *__restrict__ indices, 61 | float *__restrict__ grad_x) { 62 | int batch_index = blockIdx.x; 63 | grad_y += batch_index * m * u * c; 64 | indices += batch_index * m * u; 65 | grad_x += batch_index * n * c; 66 | 67 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 68 | const int stride = blockDim.y * blockDim.x; 69 | for (int i = index; i < c * m; i += stride) { 70 | const int l = i / m; 71 | const int j = i % m; 72 | for (int k = 0; k < u; ++k) { 73 | atomicAdd(grad_x + l * n + indices[j * u + k], 74 | grad_y[(l * m + j) * u + k]); 75 | } 76 | } 77 | } 78 | 79 | void grouping_grad(int b, int c, int n, int m, int u, const float *grad_y, 80 | const int *indices, float *grad_x) { 81 | grouping_grad_kernel<<>>( 83 | b, c, n, m, u, grad_y, indices, grad_x); 84 | CUDA_CHECK_ERRORS(); 85 | } 86 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class Swish(nn.Module): 6 | def forward(self, x): 7 | return x * torch.sigmoid(x) 8 | 9 | 10 | class Attention(nn.Module): 11 | # Adapted from https://github.com/alexzhou907/PVD 12 | # Used for global attention over context vectors like pc shape latent 13 | def __init__(self, in_ch, num_groups, D=3): 14 | super(Attention, self).__init__() 15 | assert in_ch % num_groups == 0 16 | if D == 3: 17 | self.q = nn.Conv3d(in_ch, in_ch, 1) 18 | self.k = nn.Conv3d(in_ch, in_ch, 1) 19 | self.v = nn.Conv3d(in_ch, in_ch, 1) 20 | 21 | self.out = nn.Conv3d(in_ch, in_ch, 1) 22 | elif D == 1: 23 | self.q = nn.Conv1d(in_ch, in_ch, 1) 24 | self.k = nn.Conv1d(in_ch, in_ch, 1) 25 | self.v = nn.Conv1d(in_ch, in_ch, 1) 26 | 27 | self.out = nn.Conv1d(in_ch, in_ch, 1) 28 | 29 | self.norm = nn.GroupNorm(num_groups, in_ch) 30 | self.nonlin = Swish() 31 | 32 | self.sm = nn.Softmax(-1) 33 | 34 | def forward(self, x): 35 | B, C = x.shape[:2] 36 | h = x 37 | 38 | q = self.q(h).reshape(B, C, -1) 39 | k = self.k(h).reshape(B, C, -1) 40 | v = self.v(h).reshape(B, C, -1) 41 | 42 | qk = torch.matmul(q.permute(0, 2, 1), k) # * (int(C) ** (-0.5)) 43 | 44 | w = self.sm(qk) 45 | 46 | h = torch.matmul(v, w.permute(0, 2, 1)).reshape(B, C, *x.shape[2:]) 47 | 48 | h = self.out(h) 49 | 50 | x = h + x 51 | 52 | x = self.nonlin(self.norm(x)) 53 | 54 | return x 55 | 56 | 57 | class FCLayers(nn.Module): 58 | def __init__( 59 | self, 60 | in_features, 61 | layer_outs_specs=[128, 256, 512], 62 | layer_normalization=True, 63 | ) -> None: 64 | super().__init__() 65 | 66 | self.in_features = in_features 67 | self.out_features = layer_outs_specs[-1] 68 | 69 | self.layer_specs = layer_outs_specs 70 | self.layer_normalization = layer_normalization 71 | 72 | self.num_layers = len(layer_outs_specs) 73 | self.layers = self._build_layers() 74 | 75 | def _build_layers(self): 76 | module_list = [] 77 | 78 | for idx, layer_out_features in enumerate(self.layer_specs): 79 | in_feats = self.in_features if idx == 0 else self.layer_specs[idx - 1] 80 | out_feats = layer_out_features 81 | 82 | if self.layer_normalization: 83 | module_list.append( 84 | nn.Sequential( 85 | nn.Linear( 86 | in_feats, out_feats, bias=not self.layer_normalization 87 | ), 88 | nn.LayerNorm(out_feats), 89 | nn.ReLU(), 90 | ) 91 | ) 92 | else: 93 | module_list.append( 94 | nn.Sequential( 95 | nn.Linear( 96 | in_feats, out_feats, bias=not self.layer_normalization 97 | ), 98 | nn.ReLU(), 99 | ) 100 | ) 101 | 102 | return nn.Sequential(*module_list) 103 | 104 | def forward(self, x): 105 | return self.layers(x) 106 | -------------------------------------------------------------------------------- /grasp_ldm/utils/vis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import trimesh 4 | 5 | 6 | # TODO: To be removed in favor of gripper.py 7 | def create_gripper_marker(color=[0, 0, 255], tube_radius=0.001, sections=6): 8 | """Create a 3D mesh visualizing a parallel yaw gripper. It consists of four cylinders. 9 | 10 | From: https://github.com/NVlabs/acronym/blob/main/acronym_tools/acronym.py 11 | 12 | Args: 13 | color (list, optional): RGB values of marker. Defaults to [0, 0, 255]. 14 | tube_radius (float, optional): Radius of cylinders. Defaults to 0.001. 15 | sections (int, optional): Number of sections of each cylinder. Defaults to 6. 16 | 17 | Returns: 18 | trimesh.Trimesh: A mesh that represents a simple parallel yaw gripper. 19 | """ 20 | cfl = trimesh.creation.cylinder( 21 | radius=0.002, 22 | sections=sections, 23 | segment=[ 24 | [4.10000000e-02, -7.27595772e-12, 6.59999996e-02], 25 | [4.10000000e-02, -7.27595772e-12, 1.12169998e-01], 26 | ], 27 | ) 28 | cfr = trimesh.creation.cylinder( 29 | radius=0.002, 30 | sections=sections, 31 | segment=[ 32 | [-4.100000e-02, -7.27595772e-12, 6.59999996e-02], 33 | [-4.100000e-02, -7.27595772e-12, 1.12169998e-01], 34 | ], 35 | ) 36 | cb1 = trimesh.creation.cylinder( 37 | radius=0.002, sections=sections, segment=[[0, 0, 0], [0, 0, 6.59999996e-02]] 38 | ) 39 | cb2 = trimesh.creation.cylinder( 40 | radius=0.002, 41 | sections=sections, 42 | segment=[[-4.100000e-02, 0, 6.59999996e-02], [4.100000e-02, 0, 6.59999996e-02]], 43 | ) 44 | 45 | tmp = trimesh.util.concatenate([cb1, cb2, cfr, cfl]) 46 | tmp.visual.face_colors = color 47 | 48 | return tmp 49 | 50 | 51 | def visualize_pc(pc): 52 | if isinstance(pc, torch.Tensor): 53 | pc = pc.squeeze().numpy() 54 | r = pc[..., 0] * 255 / max(pc[..., 0]) 55 | g = pc[..., 1] * 200 / max(pc[..., 1]) 56 | b = pc[..., 2] * 175 / max(pc[..., 2]) 57 | a = np.ones(pc.shape[0]) * 200 58 | 59 | colors = np.clip(np.vstack((r, g, b, a)).T, 0, 255) 60 | 61 | colors = colors if colors is not None else np.ones((pc.shape[0], 3)) * 85 62 | pc_trimesh = trimesh.points.PointCloud(pc, colors=colors) 63 | scene = trimesh.Scene(pc_trimesh).show(line_settings={"point_size": 5}) 64 | return scene 65 | 66 | 67 | def visualize_pc_grasps( 68 | pc: np.ndarray, grasps: np.ndarray, c: np.ndarray = None 69 | ) -> trimesh.Scene: 70 | # scene = visualize_pc(pc) 71 | r = pc[..., 0] * 255 / max(pc[..., 0]) 72 | g = pc[..., 1] * 200 / max(pc[..., 1]) 73 | b = pc[..., 2] * 175 / max(pc[..., 2]) 74 | a = np.ones(pc.shape[0]) * 200 75 | 76 | pc_colors = np.clip(np.vstack((r, g, b, a)).T, 0, 255) 77 | 78 | if c is not None: 79 | c = c.squeeze(1) if c.ndim == 2 else c 80 | 81 | if c is not None: 82 | gripper_marker = [ 83 | create_gripper_marker( 84 | color=[150, np.clip(255 * ci, 0, 255), 0, np.clip(255 * ci, 150, 255)] 85 | ) 86 | for ci in c 87 | ] 88 | else: 89 | gripper_marker = [create_gripper_marker(color=[0, 255, 0, 255])] * grasps.shape[ 90 | 0 91 | ] 92 | 93 | gripper_markers = [ 94 | gripper_marker[i].copy().apply_transform(t) for i, t in enumerate(grasps) 95 | ] 96 | 97 | scene = trimesh.Scene( 98 | [trimesh.points.PointCloud(pc, colors=pc_colors)] + gripper_markers 99 | ) 100 | return scene 101 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/trilinear_devox.cpp: -------------------------------------------------------------------------------- 1 | #include "trilinear_devox.hpp" 2 | #include "trilinear_devox.cuh" 3 | 4 | #include "../utils.hpp" 5 | 6 | /* 7 | Function: trilinear devoxelization (forward) 8 | Args: 9 | r : voxel resolution 10 | trainig : whether is training mode 11 | coords : the coordinates of points, FloatTensor[b, 3, n] 12 | features : features, FloatTensor[b, c, s], s = r ** 3 13 | Return: 14 | outs : outputs, FloatTensor[b, c, n] 15 | inds : the voxel coordinates of point cube, IntTensor[b, 8, n] 16 | wgts : weight for trilinear interpolation, FloatTensor[b, 8, n] 17 | */ 18 | std::vector 19 | trilinear_devoxelize_forward(const int r, const bool is_training, 20 | const at::Tensor coords, 21 | const at::Tensor features) { 22 | CHECK_CUDA(features); 23 | CHECK_CUDA(coords); 24 | CHECK_CONTIGUOUS(features); 25 | CHECK_CONTIGUOUS(coords); 26 | CHECK_IS_FLOAT(features); 27 | CHECK_IS_FLOAT(coords); 28 | 29 | int b = features.size(0); 30 | int c = features.size(1); 31 | int n = coords.size(2); 32 | int r2 = r * r; 33 | int r3 = r2 * r; 34 | at::Tensor outs = torch::zeros( 35 | {b, c, n}, at::device(features.device()).dtype(at::ScalarType::Float)); 36 | if (is_training) { 37 | at::Tensor inds = torch::zeros( 38 | {b, 8, n}, at::device(features.device()).dtype(at::ScalarType::Int)); 39 | at::Tensor wgts = torch::zeros( 40 | {b, 8, n}, at::device(features.device()).dtype(at::ScalarType::Float)); 41 | trilinear_devoxelize(b, c, n, r, r2, r3, true, coords.data_ptr(), 42 | features.data_ptr(), inds.data_ptr(), 43 | wgts.data_ptr(), outs.data_ptr()); 44 | return {outs, inds, wgts}; 45 | } else { 46 | at::Tensor inds = torch::zeros( 47 | {1}, at::device(features.device()).dtype(at::ScalarType::Int)); 48 | at::Tensor wgts = torch::zeros( 49 | {1}, at::device(features.device()).dtype(at::ScalarType::Float)); 50 | trilinear_devoxelize(b, c, n, r, r2, r3, false, coords.data_ptr(), 51 | features.data_ptr(), inds.data_ptr(), 52 | wgts.data_ptr(), outs.data_ptr()); 53 | return {outs, inds, wgts}; 54 | } 55 | } 56 | 57 | /* 58 | Function: trilinear devoxelization (backward) 59 | Args: 60 | grad_y : grad outputs, FloatTensor[b, c, n] 61 | indices : the voxel coordinates of point cube, IntTensor[b, 8, n] 62 | weights : weight for trilinear interpolation, FloatTensor[b, 8, n] 63 | r : voxel resolution 64 | Return: 65 | grad_x : grad inputs, FloatTensor[b, c, s], s = r ** 3 66 | */ 67 | at::Tensor trilinear_devoxelize_backward(const at::Tensor grad_y, 68 | const at::Tensor indices, 69 | const at::Tensor weights, 70 | const int r) { 71 | CHECK_CUDA(grad_y); 72 | CHECK_CUDA(weights); 73 | CHECK_CUDA(indices); 74 | CHECK_CONTIGUOUS(grad_y); 75 | CHECK_CONTIGUOUS(weights); 76 | CHECK_CONTIGUOUS(indices); 77 | CHECK_IS_FLOAT(grad_y); 78 | CHECK_IS_FLOAT(weights); 79 | CHECK_IS_INT(indices); 80 | 81 | int b = grad_y.size(0); 82 | int c = grad_y.size(1); 83 | int n = grad_y.size(2); 84 | int r3 = r * r * r; 85 | at::Tensor grad_x = torch::zeros( 86 | {b, c, r3}, at::device(grad_y.device()).dtype(at::ScalarType::Float)); 87 | trilinear_devoxelize_grad(b, c, n, r3, indices.data_ptr(), 88 | weights.data_ptr(), grad_y.data_ptr(), 89 | grad_x.data_ptr()); 90 | return grad_x; 91 | } 92 | -------------------------------------------------------------------------------- /tools/train_generator.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 5 | import argparse 6 | 7 | from grasp_ldm.trainers import E_Trainers 8 | from grasp_ldm.utils.config import Config 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser(description="Runner for Training Grasp Samplers") 13 | parser.add_argument("--config", "-c", help="Path to config file", required=True) 14 | parser.add_argument( 15 | "--model", 16 | "-m", 17 | help="Model type", 18 | required=True, 19 | choices=["classifier", "vae", "ddm"], 20 | ) 21 | parser.add_argument("--root-dir", "-d", help="Root directory") 22 | parser.add_argument("--num-gpus", "-g", type=int, help="Number of GPUs to use") 23 | parser.add_argument("--batch-size", "-b", type=int, help="Batch size per device") 24 | parser.add_argument( 25 | "-debug", 26 | action="store_true", 27 | default=False, 28 | help="Setting this will disable wandb logger and ... TODO", 29 | ) 30 | parser.add_argument( 31 | "--deterministic", 32 | action="store_true", 33 | default=False, 34 | help="Make everything deterministic", 35 | ) 36 | parser.add_argument( 37 | "--seed", type=int, default=None, help="Make everything deterministic" 38 | ) 39 | 40 | return parser.parse_args() 41 | 42 | 43 | def set_deterministic(config, args): 44 | """Deterministic Run 45 | 46 | Mediate config and CLI args to set deterministic run. 47 | CLI args take priority and overwrite config. 48 | 49 | In config: 50 | 51 | config.trainer.deterministic =True 52 | config.seed = 123 53 | 54 | In CLI: 55 | --deterministic 56 | --seed 123 57 | """ 58 | config.trainer.deterministic = ( 59 | False if "deterministic" not in config.trainer else config.trainer.deterministic 60 | ) 61 | 62 | if args.deterministic: 63 | config.trainer.deterministic = True 64 | 65 | if config.trainer.deterministic: 66 | if not "seed" in config: 67 | config.seed = 42 68 | if args.seed is not None: 69 | config.seed = args.seed 70 | 71 | from pytorch_lightning import seed_everything 72 | 73 | seed_everything(config.seed, workers=True) 74 | print( 75 | "Training will be run in deterministic mode for reproducibility. This might be a bit slower." 76 | ) 77 | else: 78 | print( 79 | "Training is not deterministic. This is a bit faster and alright. If you want deterministic training, set `deterministic=True` in trainer config." 80 | ) 81 | 82 | return config 83 | 84 | 85 | def main(args): 86 | ## -- Config -- 87 | config = Config.fromfile(args.config) 88 | 89 | # Overwrite config with args 90 | ## Overwrite config with args 91 | # Num gpus 92 | if args.num_gpus: 93 | config.trainer.devices = args.num_gpus 94 | config.trainer.num_workers = args.num_gpus * config.num_workers_per_gpu 95 | 96 | # Batch size 97 | if args.batch_size: 98 | config.trainer.batch_size = args.batch_size 99 | config.data.train.batch_size = args.batch_size 100 | 101 | # Data Root 102 | if args.root_dir: 103 | for split in config.data: 104 | config.data[split].args.data_root_dir = args.root_dir 105 | 106 | # Deterministic 107 | config = set_deterministic(config=config, args=args) 108 | 109 | ## -- Trainer -- 110 | Trainer = E_Trainers.get(model_type=args.model) 111 | trainer = Trainer(config) 112 | trainer.run() 113 | 114 | 115 | if __name__ == "__main__": 116 | args = parse_args() 117 | main(args) 118 | -------------------------------------------------------------------------------- /grasp_ldm/dataset/acronym/gripper_ctrl_pts.json: -------------------------------------------------------------------------------- 1 | [ 2 | [ 3 | 0.0, 4 | 0.0, 5 | 0.0 6 | ], 7 | [ 8 | 0.0, 9 | 0.0, 10 | 0.00659999996 11 | ], 12 | [ 13 | 0.0, 14 | 0.0, 15 | 0.01319999992 16 | ], 17 | [ 18 | 0.0, 19 | 0.0, 20 | 0.019799999880000002 21 | ], 22 | [ 23 | 0.0, 24 | 0.0, 25 | 0.02639999984 26 | ], 27 | [ 28 | 0.0, 29 | 0.0, 30 | 0.0329999998 31 | ], 32 | [ 33 | 0.0, 34 | 0.0, 35 | 0.039599999760000004 36 | ], 37 | [ 38 | 0.0, 39 | 0.0, 40 | 0.04619999972 41 | ], 42 | [ 43 | 0.0, 44 | 0.0, 45 | 0.05279999968 46 | ], 47 | [ 48 | 0.0, 49 | 0.0, 50 | 0.05939999964 51 | ], 52 | [ 53 | 0.0, 54 | 0.0, 55 | 0.0659999996 56 | ], 57 | [ 58 | 0.041, 59 | 0.0, 60 | 0.0659999996 61 | ], 62 | [ 63 | 0.0328, 64 | 0.0, 65 | 0.0659999996 66 | ], 67 | [ 68 | 0.0246, 69 | 0.0, 70 | 0.0659999996 71 | ], 72 | [ 73 | 0.016399999999999998, 74 | 0.0, 75 | 0.0659999996 76 | ], 77 | [ 78 | 0.008199999999999999, 79 | 0.0, 80 | 0.0659999996 81 | ], 82 | [ 83 | 0.0, 84 | 0.0, 85 | 0.0659999996 86 | ], 87 | [ 88 | -0.008200000000000006, 89 | 0.0, 90 | 0.0659999996 91 | ], 92 | [ 93 | -0.016400000000000005, 94 | 0.0, 95 | 0.0659999996 96 | ], 97 | [ 98 | -0.024600000000000004, 99 | 0.0, 100 | 0.0659999996 101 | ], 102 | [ 103 | -0.0328, 104 | 0.0, 105 | 0.0659999996 106 | ], 107 | [ 108 | -0.041, 109 | 0.0, 110 | 0.0659999996 111 | ], 112 | [ 113 | 0.041, 114 | 0.0, 115 | 0.0659999996 116 | ], 117 | [ 118 | 0.041, 119 | 0.0, 120 | 0.07061699944 121 | ], 122 | [ 123 | 0.041, 124 | 0.0, 125 | 0.07523399928 126 | ], 127 | [ 128 | 0.041, 129 | 0.0, 130 | 0.07985099912 131 | ], 132 | [ 133 | 0.041, 134 | 0.0, 135 | 0.08446799896 136 | ], 137 | [ 138 | 0.041, 139 | 0.0, 140 | 0.08908499880000001 141 | ], 142 | [ 143 | 0.041, 144 | 0.0, 145 | 0.09370199864 146 | ], 147 | [ 148 | 0.041, 149 | 0.0, 150 | 0.09831899848 151 | ], 152 | [ 153 | 0.041, 154 | 0.0, 155 | 0.10293599832 156 | ], 157 | [ 158 | 0.041, 159 | 0.0, 160 | 0.10755299816000001 161 | ], 162 | [ 163 | 0.041, 164 | 0.0, 165 | 0.112169998 166 | ], 167 | [ 168 | -0.041, 169 | 0.0, 170 | 0.0659999996 171 | ], 172 | [ 173 | -0.041, 174 | 0.0, 175 | 0.07061699944 176 | ], 177 | [ 178 | -0.041, 179 | 0.0, 180 | 0.07523399928 181 | ], 182 | [ 183 | -0.041, 184 | 0.0, 185 | 0.07985099912 186 | ], 187 | [ 188 | -0.041, 189 | 0.0, 190 | 0.08446799896 191 | ], 192 | [ 193 | -0.041, 194 | 0.0, 195 | 0.08908499880000001 196 | ], 197 | [ 198 | -0.041, 199 | 0.0, 200 | 0.09370199864 201 | ], 202 | [ 203 | -0.041, 204 | 0.0, 205 | 0.09831899848 206 | ], 207 | [ 208 | -0.041, 209 | 0.0, 210 | 0.10293599832 211 | ], 212 | [ 213 | -0.041, 214 | 0.0, 215 | 0.10755299816000001 216 | ], 217 | [ 218 | -0.041, 219 | 0.0, 220 | 0.112169998 221 | ] 222 | ] 223 | -------------------------------------------------------------------------------- /grasp_ldm/models/builder.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from grasp_ldm.utils.config import Config 4 | 5 | from . import GraspCVAE, GraspLatentDDM 6 | from .diffusion import GaussianDiffusion1D 7 | from .grasp_classifier import PointsBasedGraspClassifier 8 | from .modules.class_conditioned_resnet import ClassTimeConditionedResNet1D 9 | from .modules.resnets import ResNet1D, TimeConditionedResNet1D, Unet1D 10 | 11 | ## ----------------- Makeshift Model Registry ----------------- ## 12 | DIFFUSION_MODELS = { 13 | "GaussianDiffusion1D": GaussianDiffusion1D, 14 | "TimeConditionedResNet1D": TimeConditionedResNet1D, 15 | "ClassTimeConditionedResNet1D": ClassTimeConditionedResNet1D, 16 | } 17 | 18 | STANDARD_MODULES = { 19 | "ResNet1D": ResNet1D, 20 | "Unet1D": Unet1D, 21 | } 22 | 23 | CLASSIFIERS = { 24 | "PointsBasedGraspClassifier": PointsBasedGraspClassifier, 25 | } 26 | 27 | 28 | ALL_MODELS = { 29 | "GraspCVAE": GraspCVAE, 30 | "GraspLatentDDM": GraspLatentDDM, 31 | **CLASSIFIERS, 32 | **STANDARD_MODULES, 33 | **DIFFUSION_MODELS, 34 | } 35 | 36 | 37 | ## ----------------- Model Build methods ----------------- ## 38 | 39 | 40 | ### For now, user `build_model` for single model and `build_model_from_cfg` for multiple models specified in a composite model config 41 | def build_model(model_cfg: Config) -> nn.Module: 42 | """Build model from config 43 | 44 | Args: 45 | model_cfg (Config): model config 46 | 47 | Returns: 48 | (nn.Module): built model 49 | """ 50 | if model_cfg.type not in ALL_MODELS: 51 | raise KeyError( 52 | f"`{model_cfg.type}` in the model_registry. \n Supported models are: {list(ALL_MODELS)}" 53 | ) 54 | return ALL_MODELS[model_cfg.type](**model_cfg.args) 55 | 56 | 57 | def build_model_configs_recursive(model_cfg: Config) -> Config: 58 | """Build model configs recursively 59 | 60 | This allows building of nested models. For example, if we have a model that takes in a model as an argument, 61 | this can be handled in the config as in the example below: 62 | model = dict( 63 | type="SomeModel", 64 | args=dict( 65 | model=dict( 66 | type="SomeOtherModel", 67 | args=dict( 68 | ... 69 | ) 70 | ) 71 | ) 72 | ) 73 | 74 | Returns a dict with values for all "model" keys replaced with the built model. 75 | 76 | Args: 77 | model_cfg (Config): model config 78 | 79 | Returns: 80 | Config: model config 81 | """ 82 | # new_model_cfg = copy.deepcopy(cfg) 83 | if isinstance(model_cfg, dict) or isinstance(model_cfg, Config): 84 | for k, v in model_cfg.items(): 85 | if k == "args": 86 | if isinstance(v, dict): 87 | model_cfg[k] = build_model_configs_recursive(v) 88 | if k == "model": 89 | if isinstance(v, dict): 90 | model_cfg[k] = build_model_configs_recursive(v) 91 | model_cfg[k] = build_model(model_cfg[k]) 92 | 93 | return model_cfg 94 | 95 | 96 | def build_model_from_cfg(model_cfg: Config) -> nn.Module: 97 | """Build model from config 98 | # TODO: Rename this to indicate multiple models building 99 | 100 | This relies on a hacky model registry specified by ALL_MODELS and the `type` key in the config. 101 | The `type` key is used to look up the model class and the `args` key is used to pass in the 102 | arguments to the model class. 103 | 104 | Args: 105 | model_cfg (Config): model config 106 | 107 | Returns: 108 | (nn.Module): model 109 | """ 110 | 111 | # recursively build model configs for nested model configs 112 | built_model_cfg = build_model_configs_recursive(model_cfg) 113 | 114 | return ( 115 | built_model_cfg.model if hasattr(built_model_cfg, "model") else built_model_cfg 116 | ) 117 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/sampling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import Function 4 | 5 | from .backend import _backend 6 | 7 | __all__ = ["gather", "furthest_point_sample", "logits_mask"] 8 | 9 | 10 | class Gather(Function): 11 | @staticmethod 12 | def forward(ctx, features, indices): 13 | """ 14 | Gather 15 | :param ctx: 16 | :param features: features of points, FloatTensor[B, C, N] 17 | :param indices: centers' indices in points, IntTensor[b, m] 18 | :return: 19 | centers_coords: coordinates of sampled centers, FloatTensor[B, C, M] 20 | """ 21 | features = features.contiguous() 22 | indices = indices.int().contiguous() 23 | ctx.save_for_backward(indices) 24 | ctx.num_points = features.size(-1) 25 | return _backend.gather_features_forward(features, indices) 26 | 27 | @staticmethod 28 | def backward(ctx, grad_output): 29 | (indices,) = ctx.saved_tensors 30 | grad_features = _backend.gather_features_backward( 31 | grad_output.contiguous(), indices, ctx.num_points 32 | ) 33 | return grad_features, None 34 | 35 | 36 | gather = Gather.apply 37 | 38 | 39 | def furthest_point_sample(coords, num_samples): 40 | """ 41 | Uses iterative furthest point sampling to select a set of npoint features that have the largest 42 | minimum distance to the sampled point set 43 | :param coords: coordinates of points, FloatTensor[B, 3, N] 44 | :param num_samples: int, M 45 | :return: 46 | centers_coords: coordinates of sampled centers, FloatTensor[B, 3, M] 47 | """ 48 | coords = coords.contiguous() 49 | indices = _backend.furthest_point_sampling(coords, num_samples) 50 | return gather(coords, indices) 51 | 52 | 53 | def logits_mask(coords, logits, num_points_per_object): 54 | """ 55 | Use logits to sample points 56 | :param coords: coords of points, FloatTensor[B, 3, N] 57 | :param logits: binary classification logits, FloatTensor[B, 2, N] 58 | :param num_points_per_object: M, #points per object after masking, int 59 | :return: 60 | selected_coords: FloatTensor[B, 3, M] 61 | masked_coords_mean: mean coords of selected points, FloatTensor[B, 3] 62 | mask: mask to select points, BoolTensor[B, N] 63 | """ 64 | batch_size, _, num_points = coords.shape 65 | mask = torch.lt(logits[:, 0, :], logits[:, 1, :]) # [B, N] 66 | num_candidates = torch.sum(mask, dim=-1, keepdim=True) # [B, 1] 67 | masked_coords = coords * mask.view(batch_size, 1, num_points) # [B, C, N] 68 | masked_coords_mean = ( 69 | torch.sum(masked_coords, dim=-1) 70 | / torch.max(num_candidates, torch.ones_like(num_candidates)).float() 71 | ) # [B, C] 72 | selected_indices = torch.zeros( 73 | (batch_size, num_points_per_object), device=coords.device, dtype=torch.int32 74 | ) 75 | for i in range(batch_size): 76 | current_mask = mask[i] # [N] 77 | current_candidates = current_mask.nonzero().view(-1) 78 | current_num_candidates = current_candidates.numel() 79 | if current_num_candidates >= num_points_per_object: 80 | choices = np.random.choice( 81 | current_num_candidates, num_points_per_object, replace=False 82 | ) 83 | selected_indices[i] = current_candidates[choices] 84 | elif current_num_candidates > 0: 85 | choices = np.concatenate( 86 | [ 87 | np.arange(current_num_candidates).repeat( 88 | num_points_per_object // current_num_candidates 89 | ), 90 | np.random.choice( 91 | current_num_candidates, 92 | num_points_per_object % current_num_candidates, 93 | replace=False, 94 | ), 95 | ] 96 | ) 97 | np.random.shuffle(choices) 98 | selected_indices[i] = current_candidates[choices] 99 | selected_coords = gather( 100 | masked_coords - masked_coords_mean.view(batch_size, -1, 1), selected_indices 101 | ) 102 | return selected_coords, masked_coords_mean, mask 103 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/class_conditioned_resnet.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | import torch 4 | from torch import nn 5 | 6 | from .resnets import TimeConditionedResNet1D, default 7 | 8 | 9 | class ClassTimeConditionedResNet1D(TimeConditionedResNet1D): 10 | def __init__( 11 | self, 12 | dim: int, 13 | init_dim: int = None, 14 | out_channels: int = None, 15 | block_channels: Sequence = ..., 16 | channels: int = 1, 17 | input_conditioning_dims: int = None, 18 | is_self_conditioned: bool = False, 19 | resnet_block_groups: int = 8, 20 | learned_variance: bool = False, 21 | dropout=None, 22 | is_time_conditioned: bool = True, 23 | learned_sinusoidal_cond: bool = False, 24 | random_fourier_features: bool = False, 25 | learned_sinusoidal_dim: int = 16, 26 | ) -> None: 27 | super().__init__( 28 | dim, 29 | init_dim, 30 | out_channels, 31 | block_channels, 32 | channels, 33 | input_conditioning_dims, 34 | is_self_conditioned, 35 | resnet_block_groups, 36 | learned_variance, 37 | dropout, 38 | is_time_conditioned, 39 | learned_sinusoidal_cond, 40 | random_fourier_features, 41 | learned_sinusoidal_dim, 42 | ) 43 | self.cls_embed = nn.Sequential( 44 | nn.Linear(1, self.emb_dim), 45 | nn.SiLU(), 46 | ) 47 | 48 | def forward( 49 | self, 50 | x: torch.Tensor, 51 | *, 52 | time: torch.Tensor = None, 53 | z_cond: torch.Tensor = None, 54 | x_self_cond: torch.Tensor = None, 55 | cls_cond: torch.Tensor = None, 56 | **kwargs 57 | ) -> torch.Tensor: 58 | """Forward 59 | 60 | Args: 61 | x (torch.Tensor): input 62 | time (torch.Tensor): timestep for diffusion 63 | Note: Set to None, when using the architecture outside diffusion. 64 | i.e. self.is_time_conditioned = False 65 | z_cond (torch.Tensor, optional): conditioning latent. Defaults to None. 66 | x_self_cond (torch.Tensor, optional): self conditioning vector. Defaults to None. 67 | 68 | Returns: 69 | torch.Tensor: output 70 | """ 71 | 72 | # Ugly: improve 73 | if cls_cond is None: 74 | assert ( 75 | "mode_cls" in kwargs["metas"] 76 | ), "Class conditioning tensor is required" 77 | cls_cond = ( 78 | kwargs["metas"]["mode_cls"] 79 | .unsqueeze(-1) 80 | .reshape(-1, 1) 81 | .to(dtype=x.dtype) 82 | ) 83 | 84 | if self.is_self_conditioned: 85 | x_self_cond = default(x_self_cond, lambda: torch.zeros_like(x)) 86 | x = torch.cat((x_self_cond, x), dim=1) 87 | 88 | x = self.init_conv(x) 89 | # r = x.clone() 90 | 91 | # Time embedding for diffusion, None for non-diffusion 92 | if self.is_time_conditioned and self.time_mlp is not None: 93 | assert time is not None 94 | latent_emb = self.time_mlp(time) 95 | else: 96 | latent_emb = None 97 | 98 | # Class embedding 99 | cls_emb = self.cls_embed(cls_cond).squeeze(1) 100 | latent_emb += cls_emb 101 | 102 | # Add input embedding if inupt conditioned 103 | if self.is_input_conditioned: 104 | input_emb = self.input_emb_layers(z_cond) 105 | if input_emb.ndim != 2 and input_emb.ndim == 3: 106 | latent_emb = latent_emb.unsqueeze(-2).repeat([1, 3, 1]) 107 | else: 108 | raise NotImplementedError 109 | latent_emb = latent_emb + input_emb if latent_emb is not None else input_emb 110 | 111 | for block1, block2, attn, updownsample in self.blocks: 112 | x = block1(x, latent_emb) 113 | 114 | x = block2(x, latent_emb) 115 | x = attn(x) 116 | 117 | x = updownsample(x) 118 | if self.dropout: 119 | x = self.dropout(x) 120 | 121 | x = self.final_res_block(x, latent_emb) 122 | return self.final_conv(x) 123 | -------------------------------------------------------------------------------- /grasp_ldm/trainers/experiment.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import shutil 4 | import warnings 5 | 6 | 7 | class Experiment: 8 | def __init__( 9 | self, 10 | config_path, 11 | resume_from="last", 12 | out_dir="output/", 13 | ckpt_format="ckpt", 14 | model_suffix="", 15 | configs_dir_name="configs", 16 | ) -> None: 17 | """ 18 | NOTE: configs_dir_name is important to split the category and name of the experiment 19 | 20 | Args: 21 | config_path (str): path to the config file 22 | resume_from (str, optional): Checkpoint to resume training from. Defaults to "last". 23 | out_dir (str, optional): Output directory. Defaults to "output/". 24 | ckpt_format (str, optional): Checkpoint format. Defaults to "ckpt". 25 | model_suffix (str, optional): Suffix for the model directory. Defaults to None. 26 | configs_dir_name (str, optional): Name of the directory containing the configs. Defaults to "configs". 27 | """ 28 | # Checkpoint format 29 | self._ckpt_format = ckpt_format 30 | 31 | # Experiment naming 32 | # Split from configs directory 33 | relative_config_path = config_path.split(configs_dir_name)[-1].strip("/") 34 | self.name = os.path.basename(relative_config_path).split(".")[0] 35 | self.category = os.path.dirname(relative_config_path) 36 | 37 | # Experiment directories 38 | self.out_dir = out_dir 39 | self.exp_dir = os.path.join(os.path.abspath(out_dir), self.category, self.name) 40 | self.model_dir = self.exp_dir + ( 41 | f"/{model_suffix}" if model_suffix is not None else "" 42 | ) 43 | 44 | self.ckpt_dir = os.path.join(self.model_dir, "checkpoints") 45 | self.log_dir = os.path.join(self.model_dir, "logs") 46 | self._make_dirs() 47 | 48 | # Make a copy of the config file when training 49 | self.src_config_path = config_path 50 | self.dst_config_path = os.path.join(self.model_dir, f"{self.name}.py") 51 | 52 | # Maintain a single config in exp dir. Warn if exists and over-write 53 | if os.path.isfile(self.dst_config_path): 54 | warnings.warn( 55 | f"Existing config file will be over-written: {self.dst_config_path}" 56 | ) 57 | shutil.copy(self.src_config_path, self.dst_config_path) 58 | 59 | # Resume from checkpoint 60 | self.resume_from = resume_from 61 | 62 | @property 63 | def all_checkpoints(self): 64 | return glob.glob(os.path.join(self.ckpt_dir, f"*.{self._ckpt_format}")) 65 | 66 | @property 67 | def exists(self): 68 | return os.path.isdir(self.exp_dir) 69 | 70 | @property 71 | def last_checkpoint(self): 72 | ckpt_path = os.path.join(self.ckpt_dir, f"last.{self._ckpt_format}") 73 | return ckpt_path if os.path.exists(ckpt_path) else None 74 | 75 | @property 76 | def best_checkpoint(self): 77 | ckpt_path = os.path.join(self.ckpt_dir, f"best.{self._ckpt_format}") 78 | return ckpt_path if os.path.exists(ckpt_path) else None 79 | 80 | @property 81 | def default_resume_checkpoint(self): 82 | _default_checkpoint = self.last_checkpoint 83 | 84 | if self.resume_from in ("best", "last"): 85 | ckpt_path = ( 86 | self.last_checkpoint 87 | if self.resume_from == "last" 88 | else self.best_checkpoint 89 | ) 90 | else: 91 | ckpt_path = self.resume_from 92 | 93 | if ckpt_path is not None and os.path.isfile(ckpt_path): 94 | _default_checkpoint = ckpt_path 95 | else: 96 | # Do nothing and start from scratch 97 | pass 98 | 99 | # warnings.warn(f"Could not find checkpoint: {ckpt_path}") 100 | # if _default_checkpoint is None: 101 | # warnings.warn( 102 | # f"Default checkpoint {_default_checkpoint} also not found." 103 | # ) 104 | return _default_checkpoint 105 | 106 | def _make_dirs(self): 107 | # Warn existing checkpoint directory 108 | if os.path.exists(self.ckpt_dir): 109 | warnings.warn( 110 | f"Experiment Checkpoint directory exists: {self.ckpt_dir} \nCheckpoints may be auto-overwritten by the trainer." 111 | ) 112 | else: 113 | os.makedirs(self.ckpt_dir, exist_ok=True) 114 | 115 | if not os.path.exists(self.log_dir): 116 | os.makedirs(self.log_dir, exist_ok=True) 117 | 118 | return 119 | -------------------------------------------------------------------------------- /tools/generate_grasps.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | from typing import Optional, Tuple 5 | 6 | import numpy as np 7 | 8 | os.environ["LIBGL_ALWAYS_INDIRECT"] = "0" 9 | sys.path.append((os.getcwd())) 10 | 11 | from tools.inference import Conditioning, InferenceLDM, InferenceVAE, ModelType 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description="Grasp Generation Script") 16 | parser.add_argument( 17 | "--exp_path", type=str, required=True, help="Path to experiment checkpoint" 18 | ) 19 | parser.add_argument( 20 | "--data_root", type=str, default="data/ACRONYM", help="Root directory for data" 21 | ) 22 | parser.add_argument( 23 | "--mode", 24 | type=str, 25 | choices=["VAE", "LDM"], 26 | default="VAE", 27 | help="Model type to use", 28 | ) 29 | parser.add_argument("--split", type=str, default="test", help="Data split to use") 30 | parser.add_argument( 31 | "--num_grasps", type=int, default=20, help="Number of grasps to generate" 32 | ) 33 | parser.add_argument("--visualize", action="store_true", help="Enable visualization") 34 | parser.add_argument( 35 | "--no_ema", 36 | action="store_false", 37 | dest="use_ema_model", 38 | help="Disable EMA model usage", 39 | ) 40 | parser.add_argument( 41 | "--num_samples", type=int, default=11, help="Number of samples to generate" 42 | ) 43 | parser.add_argument( 44 | "--conditioning", 45 | type=str, 46 | choices=["unconditional", "class", "region"], 47 | default="unconditional", 48 | help="Type of conditioning to use", 49 | ) 50 | parser.add_argument( 51 | "--condition_value", 52 | type=int, 53 | help="Value for conditioning (class label or region ID)", 54 | ) 55 | parser.add_argument( 56 | "--inference_steps", 57 | type=int, 58 | default=100, 59 | help="Number of inference steps for LDM", 60 | ) 61 | return parser.parse_args() 62 | 63 | 64 | def setup_model(args): 65 | exp_name = os.path.basename(args.exp_path) 66 | exp_out_root = os.path.dirname(args.exp_path) 67 | 68 | if args.mode == "LDM": 69 | model = InferenceLDM( 70 | exp_name=exp_name, 71 | exp_out_root=exp_out_root, 72 | use_elucidated=False, 73 | data_root=args.data_root, 74 | load_dataset=True, 75 | num_inference_steps=args.inference_steps, 76 | use_fast_sampler=False, 77 | data_split=args.split, 78 | use_ema_model=args.use_ema_model, 79 | ) 80 | print( 81 | f"Trained using noise schedule: beta0 = {model.model.diffusion_model.beta_start} ; betaT = {model.model.diffusion_model.beta_end}" 82 | ) 83 | elif args.mode == "VAE": 84 | model = InferenceVAE( 85 | exp_name=exp_name, 86 | exp_out_root=exp_out_root, 87 | data_root=args.data_root, 88 | load_dataset=True, 89 | data_split=args.split, 90 | use_ema_model=args.use_ema_model, 91 | ) 92 | return model 93 | 94 | 95 | def get_conditioning(args) -> Tuple[Optional[Conditioning], Optional[int]]: 96 | if args.conditioning == "unconditional": 97 | return Conditioning.UNCONDITIONAL, None 98 | elif args.conditioning == "class": 99 | if args.condition_value is None: 100 | raise ValueError("Must provide --condition_value for class conditioning") 101 | return Conditioning.CLASS_CONDITIONED, args.condition_value 102 | elif args.conditioning == "region": 103 | if args.condition_value is None: 104 | raise ValueError("Must provide --condition_value for region conditioning") 105 | return Conditioning.REGION_CONDITIONED, args.condition_value 106 | return None, None 107 | 108 | 109 | def main(): 110 | args = parse_args() 111 | model = setup_model(args) 112 | condition_type, conditioning = get_conditioning(args) 113 | 114 | for _ in range(args.num_samples): 115 | data_idx = np.random.randint(0, len(model.dataset)) 116 | 117 | # Skip conditioning for VAE mode 118 | if args.mode == "VAE": 119 | condition_type = Conditioning.UNCONDITIONAL 120 | conditioning = None 121 | 122 | results = model.infer( 123 | data_idx=data_idx, 124 | num_grasps=args.num_grasps, 125 | visualize=args.visualize, 126 | condition_type=condition_type, 127 | conditioning=conditioning, 128 | ) 129 | 130 | if args.visualize: 131 | results.show(line_settings={"point_size": 10}) 132 | 133 | 134 | if __name__ == "__main__": 135 | main() 136 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/voxelization/vox.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "../cuda_utils.cuh" 5 | 6 | /* 7 | Function: get how many points in each voxel grid 8 | Args: 9 | b : batch size 10 | n : number of points 11 | r : voxel resolution 12 | r2 : = r * r 13 | r3 : s, voxel cube size = r ** 3 14 | coords : coords of each point, IntTensor[b, 3, n] 15 | ind : voxel index of each point, IntTensor[b, n] 16 | cnt : #points in each voxel index, IntTensor[b, s] 17 | */ 18 | __global__ void grid_stats_kernel(int b, int n, int r, int r2, int r3, 19 | const int *__restrict__ coords, 20 | int *__restrict__ ind, int *cnt) { 21 | int batch_index = blockIdx.x; 22 | int stride = blockDim.x; 23 | int index = threadIdx.x; 24 | coords += batch_index * n * 3; 25 | ind += batch_index * n; 26 | cnt += batch_index * r3; 27 | 28 | for (int i = index; i < n; i += stride) { 29 | // if (ind[i] == -1) 30 | // continue; 31 | ind[i] = coords[i] * r2 + coords[i + n] * r + coords[i + n + n]; 32 | atomicAdd(cnt + ind[i], 1); 33 | } 34 | } 35 | 36 | /* 37 | Function: average pool voxelization (forward) 38 | Args: 39 | b : batch size 40 | c : #channels 41 | n : number of points 42 | s : voxel cube size = voxel resolution ** 3 43 | ind : voxel index of each point, IntTensor[b, n] 44 | cnt : #points in each voxel index, IntTensor[b, s] 45 | feat: features, FloatTensor[b, c, n] 46 | out : outputs, FloatTensor[b, c, s] 47 | */ 48 | __global__ void avg_voxelize_kernel(int b, int c, int n, int s, 49 | const int *__restrict__ ind, 50 | const int *__restrict__ cnt, 51 | const float *__restrict__ feat, 52 | float *__restrict__ out) { 53 | int batch_index = blockIdx.x; 54 | int stride = blockDim.x; 55 | int index = threadIdx.x; 56 | ind += batch_index * n; 57 | feat += batch_index * c * n; 58 | out += batch_index * c * s; 59 | cnt += batch_index * s; 60 | for (int i = index; i < n; i += stride) { 61 | int pos = ind[i]; 62 | // if (pos == -1) 63 | // continue; 64 | int cur_cnt = cnt[pos]; 65 | if (cur_cnt > 0) { 66 | float div_cur_cnt = 1.0 / static_cast(cur_cnt); 67 | for (int j = 0; j < c; j++) { 68 | atomicAdd(out + j * s + pos, feat[j * n + i] * div_cur_cnt); 69 | } 70 | } 71 | } 72 | } 73 | 74 | /* 75 | Function: average pool voxelization (backward) 76 | Args: 77 | b : batch size 78 | c : #channels 79 | n : number of points 80 | r3 : voxel cube size = voxel resolution ** 3 81 | ind : voxel index of each point, IntTensor[b, n] 82 | cnt : #points in each voxel index, IntTensor[b, s] 83 | grad_y : grad outputs, FloatTensor[b, c, s] 84 | grad_x : grad inputs, FloatTensor[b, c, n] 85 | */ 86 | __global__ void avg_voxelize_grad_kernel(int b, int c, int n, int r3, 87 | const int *__restrict__ ind, 88 | const int *__restrict__ cnt, 89 | const float *__restrict__ grad_y, 90 | float *__restrict__ grad_x) { 91 | int batch_index = blockIdx.x; 92 | int stride = blockDim.x; 93 | int index = threadIdx.x; 94 | ind += batch_index * n; 95 | grad_x += batch_index * c * n; 96 | grad_y += batch_index * c * r3; 97 | cnt += batch_index * r3; 98 | for (int i = index; i < n; i += stride) { 99 | int pos = ind[i]; 100 | // if (pos == -1) 101 | // continue; 102 | int cur_cnt = cnt[pos]; 103 | if (cur_cnt > 0) { 104 | float div_cur_cnt = 1.0 / static_cast(cur_cnt); 105 | for (int j = 0; j < c; j++) { 106 | atomicAdd(grad_x + j * n + i, grad_y[j * r3 + pos] * div_cur_cnt); 107 | } 108 | } 109 | } 110 | } 111 | 112 | void avg_voxelize(int b, int c, int n, int r, int r2, int r3, const int *coords, 113 | const float *feat, int *ind, int *cnt, float *out) { 114 | grid_stats_kernel<<>>(b, n, r, r2, r3, coords, ind, 115 | cnt); 116 | avg_voxelize_kernel<<>>(b, c, n, r3, ind, cnt, 117 | feat, out); 118 | CUDA_CHECK_ERRORS(); 119 | } 120 | 121 | void avg_voxelize_grad(int b, int c, int n, int s, const int *ind, 122 | const int *cnt, const float *grad_y, float *grad_x) { 123 | avg_voxelize_grad_kernel<<>>(b, c, n, s, ind, cnt, 124 | grad_y, grad_x); 125 | CUDA_CHECK_ERRORS(); 126 | } 127 | -------------------------------------------------------------------------------- /.docker/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Based on Gist: AndrejOrsula/_docker_helper_scripts.md 3 | 4 | set -e 5 | 6 | ## Configuration 7 | # Directory and image 8 | IMAGE_NAME="kuldeepbrd/grasp_ldm:latest" 9 | 10 | # Flags for running the container 11 | DOCKER_RUN_OPTS="${DOCKER_RUN_OPTS:- 12 | --interactive 13 | --tty 14 | --rm 15 | --network host 16 | --ipc host 17 | }" 18 | 19 | # Flags for enabling GPU and GUI (X11) inside the container 20 | ENABLE_GPU="${ENABLE_GPU:-true}" 21 | ENABLE_GUI="${ENABLE_GUI:-true}" 22 | 23 | 24 | # List of volumes to mount (can be updated by passing -v HOST_DIR:DOCKER_DIR:OPTIONS) 25 | CUSTOM_VOLUMES=( 26 | "/etc/localtime:/etc/localtime:ro" 27 | ) 28 | 29 | 30 | 31 | ## Select the container name based on the image name 32 | CONTAINER_NAME="${IMAGE_NAME##*/}" 33 | # If the container name is already in use, append a unique (incremental) numerical suffix 34 | if docker container list --all --format "{{.Names}}" | grep -qi "${CONTAINER_NAME}"; then 35 | CONTAINER_NAME="${CONTAINER_NAME}1" 36 | while docker container list --all --format "{{.Names}}" | grep -qi "${CONTAINER_NAME}"; do 37 | CONTAINER_NAME="${CONTAINER_NAME%?}$((${CONTAINER_NAME: -1} + 1))" 38 | done 39 | fi 40 | DOCKER_RUN_OPTS="--name ${CONTAINER_NAME} ${DOCKER_RUN_OPTS}" 41 | 42 | ## Parse volumes and environment variables 43 | while getopts ":v:e:" opt; do 44 | case "${opt}" in 45 | v) CUSTOM_VOLUMES+=("${OPTARG}") ;; 46 | e) CUSTOM_ENVS+=("${OPTARG}") ;; 47 | *) 48 | echo >&2 "Usage: ${0} [-v HOST_DIR:DOCKER_DIR:OPTIONS] [-e ENV=VALUE] [TAG] [CMD]" 49 | exit 2 50 | ;; 51 | esac 52 | done 53 | shift "$((OPTIND - 1))" 54 | 55 | ## GPU 56 | if [[ "${ENABLE_GPU,,}" = true ]]; then 57 | check_nvidia_gpu() { 58 | if [[ -n "${ENABLE_GPU_FORCE_NVIDIA}" ]]; then 59 | if [[ "${ENABLE_GPU_FORCE_NVIDIA,,}" = true ]]; then 60 | echo "INFO: NVIDIA GPU is force-enabled via \`ENABLE_GPU_FORCE_NVIDIA=true\`." 61 | return 0 # NVIDIA GPU is force-enabled 62 | else 63 | echo "INFO: NVIDIA GPU is force-disabled via \`ENABLE_GPU_FORCE_NVIDIA=false\`." 64 | return 1 # NVIDIA GPU is force-disabled 65 | fi 66 | elif ! lshw -C display 2>/dev/null | grep -qi "vendor.*nvidia"; then 67 | return 1 # NVIDIA GPU is not present 68 | elif [[ ! -x "$(command -v nvidia-smi)" ]]; then 69 | echo >&2 -e "\e[33mWARNING: NVIDIA GPU is detected, but its functionality cannot be verified. This container will not be able to use the GPU. Please install nvidia-utils on the host system or force-enable NVIDIA GPU via \`ENABLE_GPU_FORCE_NVIDIA=true\` environment variable.\e[0m" 70 | return 1 # NVIDIA GPU is present but nvidia-utils not installed 71 | elif ! nvidia-smi -L &>/dev/null; then 72 | echo >&2 -e "\e[33mWARNING: NVIDIA GPU is detected, but it does not seem to be working properly. This container will not be able to use the GPU. Please ensure the NVIDIA drivers are properly installed on the host system.\e[0m" 73 | return 1 # NVIDIA GPU is present but is not working properly 74 | else 75 | return 0 # NVIDIA GPU is present and appears to be working 76 | fi 77 | } 78 | if check_nvidia_gpu; then 79 | # Enable GPU either via NVIDIA Container Toolkit or NVIDIA Docker (depending on Docker version) 80 | if dpkg --compare-versions "$(docker version --format '{{.Server.Version}}')" gt "19.3"; then 81 | GPU_OPT="--gpus all" 82 | else 83 | GPU_OPT="--runtime nvidia" 84 | fi 85 | GPU_ENVS=( 86 | NVIDIA_VISIBLE_DEVICES="all" 87 | NVIDIA_DRIVER_CAPABILITIES="all" 88 | ) 89 | elif [[ $(getent group video) ]]; then 90 | GPU_OPT="--device=/dev/dri:/dev/dri --group-add video" 91 | else 92 | GPU_OPT="--device=/dev/dri:/dev/dri" 93 | fi 94 | fi 95 | 96 | ## GUI 97 | if [[ "${ENABLE_GUI,,}" = true ]]; then 98 | # To enable GUI, make sure processes in the container can connect to the x server 99 | XAUTH=/tmp/.docker.xauth 100 | if [ ! -f ${XAUTH} ]; then 101 | touch ${XAUTH} 102 | chmod a+r ${XAUTH} 103 | 104 | XAUTH_LIST=$(xauth nlist "${DISPLAY}") 105 | if [ -n "${XAUTH_LIST}" ]; then 106 | # shellcheck disable=SC2001 107 | XAUTH_LIST=$(sed -e 's/^..../ffff/' <<<"${XAUTH_LIST}") 108 | echo "${XAUTH_LIST}" | xauth -f ${XAUTH} nmerge - 109 | fi 110 | fi 111 | # GUI-enabling volumes 112 | GUI_VOLUMES=( 113 | "${XAUTH}:${XAUTH}" 114 | "/tmp/.X11-unix:/tmp/.X11-unix" 115 | "/dev/input:/dev/input" 116 | ) 117 | # GUI-enabling environment variables 118 | GUI_ENVS=( 119 | DISPLAY="${DISPLAY}" 120 | XAUTHORITY="${XAUTH}" 121 | ) 122 | fi 123 | 124 | ## Run the container 125 | DOCKER_RUN_CMD=( 126 | docker run 127 | "${DOCKER_RUN_OPTS}" 128 | "${GPU_OPT}" 129 | "${GPU_ENVS[@]/#/"--env "}" 130 | "${GUI_VOLUMES[@]/#/"--volume "}" 131 | "${GUI_ENVS[@]/#/"--env "}" 132 | "${CUSTOM_VOLUMES[@]/#/"--volume "}" 133 | "${CUSTOM_ENVS[@]/#/"--env "}" 134 | "${IMAGE_NAME}" 135 | "${CMD}" 136 | ) 137 | echo -e "\033[1;30m${DOCKER_RUN_CMD[*]}\033[0m" | xargs 138 | # shellcheck disable=SC2048 139 | exec ${DOCKER_RUN_CMD[*]} 140 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/pointnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from . import functional as F 5 | from .ball_query import BallQuery 6 | from .shared_mlp import SharedMLP 7 | 8 | __all__ = ["PointNetAModule", "PointNetSAModule", "PointNetFPModule"] 9 | 10 | 11 | class PointNetAModule(nn.Module): 12 | def __init__(self, in_channels, out_channels, include_coordinates=True): 13 | super().__init__() 14 | if not isinstance(out_channels, (list, tuple)): 15 | out_channels = [[out_channels]] 16 | elif not isinstance(out_channels[0], (list, tuple)): 17 | out_channels = [out_channels] 18 | 19 | mlps = [] 20 | total_out_channels = 0 21 | for _out_channels in out_channels: 22 | mlps.append( 23 | SharedMLP( 24 | in_channels=in_channels + (3 if include_coordinates else 0), 25 | out_channels=_out_channels, 26 | dim=1, 27 | ) 28 | ) 29 | total_out_channels += _out_channels[-1] 30 | 31 | self.include_coordinates = include_coordinates 32 | self.out_channels = total_out_channels 33 | self.mlps = nn.ModuleList(mlps) 34 | 35 | def forward(self, inputs): 36 | features, coords = inputs 37 | if self.include_coordinates: 38 | features = torch.cat([features, coords], dim=1) 39 | coords = torch.zeros((coords.size(0), 3, 1), device=coords.device) 40 | if len(self.mlps) > 1: 41 | features_list = [] 42 | for mlp in self.mlps: 43 | features_list.append(mlp(features).max(dim=-1, keepdim=True).values) 44 | return torch.cat(features_list, dim=1), coords 45 | else: 46 | return self.mlps[0](features).max(dim=-1, keepdim=True).values, coords 47 | 48 | def extra_repr(self): 49 | return f"out_channels={self.out_channels}, include_coordinates={self.include_coordinates}" 50 | 51 | 52 | class PointNetSAModule(nn.Module): 53 | def __init__( 54 | self, 55 | num_centers, 56 | radius, 57 | num_neighbors, 58 | in_channels, 59 | out_channels, 60 | include_coordinates=True, 61 | ): 62 | super().__init__() 63 | if not isinstance(radius, (list, tuple)): 64 | radius = [radius] 65 | if not isinstance(num_neighbors, (list, tuple)): 66 | num_neighbors = [num_neighbors] * len(radius) 67 | assert len(radius) == len(num_neighbors) 68 | if not isinstance(out_channels, (list, tuple)): 69 | out_channels = [[out_channels]] * len(radius) 70 | elif not isinstance(out_channels[0], (list, tuple)): 71 | out_channels = [out_channels] * len(radius) 72 | assert len(radius) == len(out_channels) 73 | 74 | groupers, mlps = [], [] 75 | total_out_channels = 0 76 | for _radius, _out_channels, _num_neighbors in zip( 77 | radius, out_channels, num_neighbors 78 | ): 79 | groupers.append( 80 | BallQuery( 81 | radius=_radius, 82 | num_neighbors=_num_neighbors, 83 | include_coordinates=include_coordinates, 84 | ) 85 | ) 86 | mlps.append( 87 | SharedMLP( 88 | in_channels=in_channels + (3 if include_coordinates else 0), 89 | out_channels=_out_channels, 90 | dim=2, 91 | ) 92 | ) 93 | total_out_channels += _out_channels[-1] 94 | 95 | self.num_centers = num_centers 96 | self.out_channels = total_out_channels 97 | self.groupers = nn.ModuleList(groupers) 98 | self.mlps = nn.ModuleList(mlps) 99 | 100 | def forward(self, inputs): 101 | features, coords = inputs 102 | centers_coords = F.furthest_point_sample(coords, self.num_centers) 103 | features_list = [] 104 | for grouper, mlp in zip(self.groupers, self.mlps): 105 | features_list.append( 106 | mlp(grouper(coords, centers_coords, features)).max(dim=-1).values 107 | ) 108 | if len(features_list) > 1: 109 | return torch.cat(features_list, dim=1), centers_coords 110 | else: 111 | return features_list[0], centers_coords 112 | 113 | def extra_repr(self): 114 | return f"num_centers={self.num_centers}, out_channels={self.out_channels}" 115 | 116 | 117 | class PointNetFPModule(nn.Module): 118 | def __init__(self, in_channels, out_channels): 119 | super().__init__() 120 | self.mlp = SharedMLP(in_channels=in_channels, out_channels=out_channels, dim=1) 121 | 122 | def forward(self, inputs): 123 | if len(inputs) == 3: 124 | points_coords, centers_coords, centers_features = inputs 125 | points_features = None 126 | else: 127 | points_coords, centers_coords, centers_features, points_features = inputs 128 | interpolated_features = F.nearest_neighbor_interpolate( 129 | points_coords, centers_coords, centers_features 130 | ) 131 | if points_features is not None: 132 | interpolated_features = torch.cat( 133 | [interpolated_features, points_features], dim=1 134 | ) 135 | return self.mlp(interpolated_features), points_coords 136 | -------------------------------------------------------------------------------- /grasp_ldm/models/grasp_classifier.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import torch 4 | from torch import Tensor, nn 5 | 6 | from ..losses import ClassificationLoss as BCELogitLoss 7 | from ..utils.gripper import SimplePandaGripper 8 | from .modules.base_network import BaseGraspClassifier 9 | from .modules.ext.pvcnn.utils import create_mlp_components 10 | from .modules.pc_encoders import PVCNN, PVCNN2 11 | 12 | 13 | class PointsBasedGraspClassifier(BaseGraspClassifier): 14 | SUPPORTED_BASE_NETWORKS = {"PVCNN": PVCNN, "PVCNN2": PVCNN2} 15 | 16 | SUPPORTED_LOSSES = {"BCEClassificationLoss": BCELogitLoss} 17 | 18 | def __init__(self, num_pc_points, points_backbone_config: dict, loss_config: dict): 19 | super().__init__() 20 | 21 | # Loss 22 | self._loss_config = loss_config 23 | _classification_loss_cfg = loss_config.classification_loss 24 | self.loss = self.SUPPORTED_LOSSES[_classification_loss_cfg["type"]]( 25 | **_classification_loss_cfg["args"] 26 | ) 27 | 28 | # Object point cloud 29 | self.num_pc_points = num_pc_points 30 | 31 | # Base Point cloud network 32 | self.base_network = self.SUPPORTED_BASE_NETWORKS[ 33 | points_backbone_config["type"] 34 | ](**points_backbone_config["args"]) 35 | 36 | # Cls sub-network 37 | self._cls_out_dim = 1 38 | self._width_multiplier = 1 39 | 40 | cls_mlp_layers, _ = create_mlp_components( 41 | in_channels=self.base_network.out_channels, 42 | out_channels=[128, 0.5, 1], 43 | classifier=True, 44 | dim=2, 45 | width_multiplier=self._width_multiplier, 46 | ) 47 | logit_layer = nn.Linear(self.num_pc_points, 1) 48 | 49 | self.classifier = nn.Sequential(*cls_mlp_layers, logit_layer) 50 | 51 | # Classifier outputs binary logits. We use sigmoid to get psuedo-probability 52 | self.sigmoid = nn.Sigmoid() 53 | 54 | def forward( 55 | self, 56 | pc: Tensor, 57 | grasp_points: Tensor, 58 | *, 59 | cls_target: Tensor = None, 60 | compute_loss: bool = True 61 | ) -> Tensor: 62 | """ 63 | Args: 64 | pc (Tensor): [B, NP, 3] Point cloud 65 | grasp_points (Tensor): [B, NG, 3] Grasp pose (t(3), mrp(3)) 66 | Returns: 67 | Tensor: [B, 1] Grasp success pred logit or loss 68 | """ 69 | 70 | # Add feature label. 0 for pc points and 1 for gripper points 71 | obj_pc = torch.cat((pc, torch.zeros_like(pc[..., :1])), dim=-1) 72 | grasp_points = torch.cat( 73 | (grasp_points, torch.ones_like(grasp_points[..., :1])), dim=-1 74 | ) 75 | 76 | # Concat object and gripper point cloud : [B, Np, 3] -> [B, Np+Ng, 3] 77 | pc_in = torch.cat((obj_pc, grasp_points), dim=-2) 78 | 79 | # [B, N, 3] -> [B, 3, N] 80 | pc_in = torch.transpose(pc_in, 1, 2).contiguous() 81 | 82 | # Pass through PVCNN modules 83 | x = self.base_network(pc_in) 84 | 85 | # [B, 1] 86 | cls_logit = self.classifier(x).squeeze() 87 | 88 | # # Sanity check 89 | # assert ( 90 | # cls_logit.ndim == 1 and cls_logit.shape[0] == pc.shape[0] 91 | # ), "Something went wrong in classifier shape broadcasting" 92 | preds = self.sigmoid(cls_logit) 93 | if compute_loss: 94 | if cls_target is None: 95 | raise ValueError("cls_target must be provided if compute_loss is True") 96 | 97 | if cls_target.shape[0] != cls_logit.shape[0]: 98 | raise ValueError("cls_target and cls_logit size mismatch") 99 | 100 | # Note: Loss is BCE with logits, so we don't apply sigmoid here 101 | loss = self.loss(cls_logit, cls_target) 102 | return loss, preds 103 | else: 104 | return None, preds 105 | 106 | # def merge_pc_gripper_points(self, pc: Tensor, grasp_pose: Tensor) -> Tensor: 107 | # """Merge point cloud and gripper points for PVCNN input 108 | 109 | # B: Batch size 110 | # Np: Number of points in point cloud 111 | # Ng: Number of gripper points 112 | 113 | # Args: 114 | # pc (Tensor): [B, Np, 3] Point cloud 115 | # grasp_pose (Tensor): [B, 6] Grasp pose (t(3), mrp(3)) 116 | 117 | # Returns: 118 | # Tensor: [B, 3, Np+Ng] Point cloud with gripper points 119 | # """ 120 | 121 | # # Get projected gripper points per grasp pose: [Ng, 3] -> [Bp, Ng, 3] 122 | # grasp_points = self.gripper_points @ grasp_pose 123 | # grasp_points = grasp_points[..., :3] 124 | 125 | # # Transpose for valid input to PVCNN: [Bp, Np, 3] -> [Bp, 3, Np] 126 | # pc = pc.transpose(-1, -2).contiguous() 127 | # grasp_points = grasp_points.transpose(-1, -2).contiguous() 128 | 129 | # # Concat point cloud and features: [B, 3, Np+Ng] 130 | # pc = torch.cat((pc, grasp_points), dim=-1) 131 | 132 | # # Construct feature label tensor that is 0 for pc points and 1 for gripper points 133 | # feats = torch.zeros_like(pc[..., :1, :]) 134 | # feats[..., : -self.num_gripper_points, :] = 1 135 | 136 | # # point-features 137 | # pc_with_features = torch.cat((pc, feats), dim=-2) 138 | 139 | # return pc_with_features 140 | 141 | def classify_grasps(self, pc: Tensor, grasp_pose: Tensor) -> Tensor: 142 | _, preds = self.forward(pc, grasp_pose, compute_loss=False) 143 | return preds 144 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/pvconv.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/alexzhou907/PVD/blob/9747265a5f141e5546fd4f862bfa66aa59f1bd33/modules/pvconv.py 2 | import torch.nn as nn 3 | 4 | from ....modules import Attention, Swish 5 | from . import functional as F 6 | from .se import SE3d 7 | from .shared_mlp import SharedMLP 8 | from .voxelization import Voxelization 9 | 10 | __all__ = ["PVConv"] 11 | 12 | 13 | class PVConv(nn.Module): 14 | def __init__( 15 | self, 16 | in_channels, 17 | out_channels, 18 | kernel_size, 19 | resolution, 20 | use_attention=False, 21 | dropout=0.1, 22 | with_se=False, 23 | with_se_relu=False, 24 | normalize=True, 25 | eps=0, 26 | ): 27 | """PVConv 28 | 29 | Args: 30 | in_channels (int): Number of input channels. 31 | out_channels (int): Number of output channels. 32 | kernel_size (int): Kernel size of the convolution. 33 | resolution (int): Voxel resolution. 34 | attention (bool, optional): Whether to use attention. Defaults to False. 35 | dropout (float, optional): Dropout rate. Defaults to 0.1. 36 | with_se (bool, optional): Whether to use SE. Defaults to False. 37 | with_se_relu (bool, optional): Whether to use ReLU in SE. Defaults to False. 38 | eps (float, optional): Epsilon for normalization. Defaults to 0. 39 | 40 | """ 41 | super().__init__() 42 | self.in_channels = in_channels 43 | self.out_channels = out_channels 44 | self.kernel_size = kernel_size 45 | self.resolution = resolution 46 | 47 | self.voxelization = Voxelization(resolution, normalize=normalize, eps=eps) 48 | voxel_layers = [ 49 | nn.Conv3d( 50 | in_channels, 51 | out_channels, 52 | kernel_size, 53 | stride=1, 54 | padding=kernel_size // 2, 55 | ), 56 | nn.GroupNorm(num_groups=8, num_channels=out_channels), 57 | Swish(), 58 | ] 59 | voxel_layers += [nn.Dropout(dropout)] if dropout is not None else [] 60 | voxel_layers += [ 61 | nn.Conv3d( 62 | out_channels, 63 | out_channels, 64 | kernel_size, 65 | stride=1, 66 | padding=kernel_size // 2, 67 | ), 68 | nn.GroupNorm(num_groups=8, num_channels=out_channels), 69 | Attention(out_channels, 8) if use_attention else Swish(), 70 | ] 71 | if with_se: 72 | voxel_layers.append(SE3d(out_channels, use_relu=with_se_relu)) 73 | self.voxel_layers = nn.Sequential(*voxel_layers) 74 | self.point_features = SharedMLP(in_channels, out_channels) 75 | 76 | def forward(self, inputs): 77 | features, coords = inputs 78 | voxel_features, voxel_coords = self.voxelization(features, coords) 79 | voxel_features = self.voxel_layers(voxel_features) 80 | voxel_features = F.trilinear_devoxelize( 81 | voxel_features, voxel_coords, self.resolution, self.training 82 | ) 83 | fused_features = voxel_features + self.point_features(features) 84 | return fused_features, coords 85 | 86 | 87 | class PVConvReLU(nn.Module): 88 | def __init__( 89 | self, 90 | in_channels, 91 | out_channels, 92 | kernel_size, 93 | resolution, 94 | attention=False, 95 | leak=0.2, 96 | dropout=0.1, 97 | with_se=False, 98 | with_se_relu=False, 99 | normalize=True, 100 | eps=0, 101 | ): 102 | super().__init__() 103 | self.in_channels = in_channels 104 | self.out_channels = out_channels 105 | self.kernel_size = kernel_size 106 | self.resolution = resolution 107 | 108 | self.voxelization = Voxelization(resolution, normalize=normalize, eps=eps) 109 | voxel_layers = [ 110 | nn.Conv3d( 111 | in_channels, 112 | out_channels, 113 | kernel_size, 114 | stride=1, 115 | padding=kernel_size // 2, 116 | ), 117 | nn.BatchNorm3d(out_channels), 118 | nn.LeakyReLU(leak, True), 119 | ] 120 | voxel_layers += [nn.Dropout(dropout)] if dropout is not None else [] 121 | voxel_layers += [ 122 | nn.Conv3d( 123 | out_channels, 124 | out_channels, 125 | kernel_size, 126 | stride=1, 127 | padding=kernel_size // 2, 128 | ), 129 | nn.BatchNorm3d(out_channels), 130 | Attention(out_channels, 8) if attention else nn.LeakyReLU(leak, True), 131 | ] 132 | if with_se: 133 | voxel_layers.append(SE3d(out_channels, use_relu=with_se_relu)) 134 | self.voxel_layers = nn.Sequential(*voxel_layers) 135 | self.point_features = SharedMLP(in_channels, out_channels) 136 | 137 | def forward(self, inputs): 138 | features, coords, temb = inputs 139 | voxel_features, voxel_coords = self.voxelization(features, coords) 140 | voxel_features = self.voxel_layers(voxel_features) 141 | voxel_features = F.trilinear_devoxelize( 142 | voxel_features, voxel_coords, self.resolution, self.training 143 | ) 144 | fused_features = voxel_features + self.point_features(features) 145 | return fused_features, coords, temb 146 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/pointnet2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .utils import ( 5 | create_mlp_components, 6 | create_pointnet2_fp_modules, 7 | create_pointnet2_sa_components, 8 | ) 9 | 10 | __all__ = ["PointNet2SSG", "PointNet2MSG"] 11 | 12 | 13 | class PointNet2(nn.Module): 14 | def __init__( 15 | self, 16 | # num_classes, 17 | sa_blocks, 18 | fp_blocks, 19 | with_one_hot_shape_id=False, 20 | num_shapes=0, 21 | extra_feature_channels=3, 22 | width_multiplier=1, 23 | voxel_resolution_multiplier=1, 24 | ): 25 | super().__init__() 26 | assert extra_feature_channels >= 0 27 | 28 | self.in_channels = extra_feature_channels + 3 29 | self.num_shapes = num_shapes 30 | self.with_one_hot_shape_id = with_one_hot_shape_id 31 | 32 | ( 33 | sa_layers, 34 | sa_in_channels, 35 | channels_sa_features, 36 | _, 37 | ) = create_pointnet2_sa_components( 38 | sa_blocks=sa_blocks, 39 | extra_feature_channels=extra_feature_channels, 40 | width_multiplier=width_multiplier, 41 | ) 42 | self.sa_layers = nn.ModuleList(sa_layers) 43 | 44 | # use one hot vector in the last fp module 45 | sa_in_channels[0] += num_shapes if with_one_hot_shape_id else 0 46 | fp_layers, channels_fp_features = create_pointnet2_fp_modules( 47 | fp_blocks=fp_blocks, 48 | in_channels=channels_sa_features, 49 | sa_in_channels=sa_in_channels, 50 | width_multiplier=width_multiplier, 51 | voxel_resolution_multiplier=voxel_resolution_multiplier, 52 | ) 53 | self.fp_layers = nn.ModuleList(fp_layers) 54 | 55 | # layers, _ = create_mlp_components( 56 | # in_channels=channels_fp_features, 57 | # out_channels=[128, 0.5, num_classes], 58 | # classifier=True, 59 | # dim=2, 60 | # width_multiplier=width_multiplier, 61 | # ) 62 | # self.classifier = nn.Sequential(*layers) 63 | 64 | def forward(self, inputs): 65 | # inputs : [B, in_channels + S, N] 66 | features = inputs[:, : self.in_channels, :] 67 | if self.with_one_hot_shape_id: 68 | assert inputs.size(1) == self.in_channels + self.num_shapes 69 | features_with_one_hot_vectors = inputs 70 | else: 71 | features_with_one_hot_vectors = features 72 | 73 | coords, features = ( 74 | features[:, :3, :].contiguous(), 75 | features[:, 3:, :].contiguous(), 76 | ) 77 | coords_list, in_features_list = [], [] 78 | for sa_module in self.sa_layers: 79 | in_features_list.append(features) 80 | coords_list.append(coords) 81 | features, coords = sa_module((features, coords)) 82 | in_features_list[0] = features_with_one_hot_vectors.contiguous() 83 | 84 | for fp_idx, fp_module in enumerate(self.fp_layers): 85 | features, coords = fp_module( 86 | ( 87 | coords_list[-1 - fp_idx], 88 | coords, 89 | features, 90 | in_features_list[-1 - fp_idx], 91 | ) 92 | ) 93 | 94 | # return self.classifier(features) 95 | return features 96 | 97 | 98 | class PointNet2SSG(PointNet2): 99 | sa_blocks = [ 100 | (None, (512, 0.2, 64, (64, 64, 128))), 101 | (None, (128, 0.4, 64, (128, 128, 256))), 102 | (None, (None, None, None, (256, 512, 1024))), 103 | ] 104 | fp_blocks = [((256, 256), None), ((256, 128), None), ((128, 128, 128), None)] 105 | 106 | def __init__( 107 | self, 108 | # num_classes, 109 | num_shapes=0, 110 | extra_feature_channels=3, 111 | width_multiplier=1, 112 | voxel_resolution_multiplier=1, 113 | ): 114 | super().__init__( 115 | # num_classes=num_classes, 116 | num_shapes=num_shapes, 117 | sa_blocks=self.sa_blocks, 118 | fp_blocks=self.fp_blocks, 119 | with_one_hot_shape_id=False, 120 | extra_feature_channels=extra_feature_channels, 121 | width_multiplier=width_multiplier, 122 | voxel_resolution_multiplier=voxel_resolution_multiplier, 123 | ) 124 | 125 | 126 | class PointNet2MSG(PointNet2): 127 | sa_blocks = [ 128 | ( 129 | None, 130 | ( 131 | 512, 132 | [0.1, 0.2, 0.4], 133 | [32, 64, 128], 134 | [(32, 32, 64), (64, 64, 128), (64, 96, 128)], 135 | ), 136 | ), 137 | (None, (128, [0.4, 0.8], [64, 128], [(128, 128, 256), (128, 196, 256)])), 138 | (None, (None, None, None, (256, 512, 1024))), 139 | ] 140 | fp_blocks = [((256, 256), None), ((256, 128), None), ((128, 128, 128), None)] 141 | 142 | def __init__( 143 | self, 144 | num_classes, 145 | num_shapes, 146 | extra_feature_channels=3, 147 | width_multiplier=1, 148 | voxel_resolution_multiplier=1, 149 | ): 150 | super().__init__( 151 | num_classes=num_classes, 152 | num_shapes=num_shapes, 153 | sa_blocks=self.sa_blocks, 154 | fp_blocks=self.fp_blocks, 155 | with_one_hot_shape_id=True, 156 | extra_feature_channels=extra_feature_channels, 157 | width_multiplier=width_multiplier, 158 | voxel_resolution_multiplier=voxel_resolution_multiplier, 159 | ) 160 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/sampling/sampling.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "../cuda_utils.cuh" 5 | 6 | /* 7 | Function: gather centers' features (forward) 8 | Args: 9 | b : batch size 10 | c : #channles of features 11 | n : number of points in point clouds 12 | m : number of query/sampled centers 13 | features: points' features, FloatTensor[b, c, n] 14 | indices : centers' indices in points, IntTensor[b, m] 15 | out : gathered features, FloatTensor[b, c, m] 16 | */ 17 | __global__ void gather_features_kernel(int b, int c, int n, int m, 18 | const float *__restrict__ features, 19 | const int *__restrict__ indices, 20 | float *__restrict__ out) { 21 | int batch_index = blockIdx.x; 22 | int channel_index = blockIdx.y; 23 | int temp_index = batch_index * c + channel_index; 24 | features += temp_index * n; 25 | indices += batch_index * m; 26 | out += temp_index * m; 27 | 28 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 29 | out[j] = features[indices[j]]; 30 | } 31 | } 32 | 33 | void gather_features(int b, int c, int n, int m, const float *features, 34 | const int *indices, float *out) { 35 | gather_features_kernel<<>>( 37 | b, c, n, m, features, indices, out); 38 | CUDA_CHECK_ERRORS(); 39 | } 40 | 41 | /* 42 | Function: gather centers' features (backward) 43 | Args: 44 | b : batch size 45 | c : #channles of features 46 | n : number of points in point clouds 47 | m : number of query/sampled centers 48 | grad_y : grad of gathered features, FloatTensor[b, c, m] 49 | indices : centers' indices in points, IntTensor[b, m] 50 | grad_x : grad of points' features, FloatTensor[b, c, n] 51 | */ 52 | __global__ void gather_features_grad_kernel(int b, int c, int n, int m, 53 | const float *__restrict__ grad_y, 54 | const int *__restrict__ indices, 55 | float *__restrict__ grad_x) { 56 | int batch_index = blockIdx.x; 57 | int channel_index = blockIdx.y; 58 | int temp_index = batch_index * c + channel_index; 59 | grad_y += temp_index * m; 60 | indices += batch_index * m; 61 | grad_x += temp_index * n; 62 | 63 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 64 | atomicAdd(grad_x + indices[j], grad_y[j]); 65 | } 66 | } 67 | 68 | void gather_features_grad(int b, int c, int n, int m, const float *grad_y, 69 | const int *indices, float *grad_x) { 70 | gather_features_grad_kernel<<>>( 72 | b, c, n, m, grad_y, indices, grad_x); 73 | CUDA_CHECK_ERRORS(); 74 | } 75 | 76 | /* 77 | Function: furthest point sampling 78 | Args: 79 | b : batch size 80 | n : number of points in point clouds 81 | m : number of query/sampled centers 82 | coords : points' coords, FloatTensor[b, 3, n] 83 | distances : minimum distance of a point to the set, IntTensor[b, n] 84 | indices : sampled centers' indices in points, IntTensor[b, m] 85 | */ 86 | __global__ void furthest_point_sampling_kernel(int b, int n, int m, 87 | const float *__restrict__ coords, 88 | float *__restrict__ distances, 89 | int *__restrict__ indices) { 90 | if (m <= 0) 91 | return; 92 | int batch_index = blockIdx.x; 93 | coords += batch_index * n * 3; 94 | distances += batch_index * n; 95 | indices += batch_index * m; 96 | 97 | const int BlockSize = 512; 98 | __shared__ float dists[BlockSize]; 99 | __shared__ int dists_i[BlockSize]; 100 | const int BufferSize = 3072; 101 | __shared__ float buf[BufferSize * 3]; 102 | 103 | int old = 0; 104 | if (threadIdx.x == 0) 105 | indices[0] = old; 106 | 107 | for (int j = threadIdx.x; j < min(BufferSize, n); j += blockDim.x) { 108 | buf[j] = coords[j]; 109 | buf[j + BufferSize] = coords[j + n]; 110 | buf[j + BufferSize + BufferSize] = coords[j + n + n]; 111 | } 112 | __syncthreads(); 113 | 114 | for (int j = 1; j < m; j++) { 115 | int besti = 0; // best index 116 | float best = -1; // farthest distance 117 | // calculating the distance with the latest sampled point 118 | float x1 = coords[old]; 119 | float y1 = coords[old + n]; 120 | float z1 = coords[old + n + n]; 121 | for (int k = threadIdx.x; k < n; k += blockDim.x) { 122 | // fetch distance at block n, thread k 123 | float td = distances[k]; 124 | float x2, y2, z2; 125 | if (k < BufferSize) { 126 | x2 = buf[k]; 127 | y2 = buf[k + BufferSize]; 128 | z2 = buf[k + BufferSize + BufferSize]; 129 | } else { 130 | x2 = coords[k]; 131 | y2 = coords[k + n]; 132 | z2 = coords[k + n + n]; 133 | } 134 | float d = 135 | (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); 136 | float d2 = min(d, td); 137 | // update "point-to-set" distance 138 | if (d2 != td) 139 | distances[k] = d2; 140 | // update the farthest distance at sample step j 141 | if (d2 > best) { 142 | best = d2; 143 | besti = k; 144 | } 145 | } 146 | 147 | dists[threadIdx.x] = best; 148 | dists_i[threadIdx.x] = besti; 149 | for (int u = 0; (1 << u) < blockDim.x; u++) { 150 | __syncthreads(); 151 | if (threadIdx.x < (blockDim.x >> (u + 1))) { 152 | int i1 = (threadIdx.x * 2) << u; 153 | int i2 = (threadIdx.x * 2 + 1) << u; 154 | if (dists[i1] < dists[i2]) { 155 | dists[i1] = dists[i2]; 156 | dists_i[i1] = dists_i[i2]; 157 | } 158 | } 159 | } 160 | __syncthreads(); 161 | 162 | // finish sample step j; old is the sampled index 163 | old = dists_i[0]; 164 | if (threadIdx.x == 0) 165 | indices[j] = old; 166 | } 167 | } 168 | 169 | void furthest_point_sampling(int b, int n, int m, const float *coords, 170 | float *distances, int *indices) { 171 | furthest_point_sampling_kernel<<>>(b, n, m, coords, distances, 172 | indices); 173 | CUDA_CHECK_ERRORS(); 174 | } 175 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/trilinear_devox.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "../cuda_utils.cuh" 5 | 6 | /* 7 | Function: trilinear devoxlization (forward) 8 | Args: 9 | b : batch size 10 | c : #channels 11 | n : number of points 12 | r : voxel resolution 13 | r2 : r ** 2 14 | r3 : r ** 3 15 | coords : the coordinates of points, FloatTensor[b, 3, n] 16 | feat : features, FloatTensor[b, c, r3] 17 | inds : the voxel indices of point cube, IntTensor[b, 8, n] 18 | wgts : weight for trilinear interpolation, FloatTensor[b, 8, n] 19 | outs : outputs, FloatTensor[b, c, n] 20 | */ 21 | __global__ void trilinear_devoxelize_kernel(int b, int c, int n, int r, int r2, 22 | int r3, bool is_training, 23 | const float *__restrict__ coords, 24 | const float *__restrict__ feat, 25 | int *__restrict__ inds, 26 | float *__restrict__ wgts, 27 | float *__restrict__ outs) { 28 | int batch_index = blockIdx.x; 29 | int stride = blockDim.x; 30 | int index = threadIdx.x; 31 | coords += batch_index * n * 3; 32 | inds += batch_index * n * 8; 33 | wgts += batch_index * n * 8; 34 | feat += batch_index * c * r3; 35 | outs += batch_index * c * n; 36 | 37 | for (int i = index; i < n; i += stride) { 38 | float x = coords[i]; 39 | float y = coords[i + n]; 40 | float z = coords[i + n + n]; 41 | float x_lo_f = floorf(x); 42 | float y_lo_f = floorf(y); 43 | float z_lo_f = floorf(z); 44 | 45 | float x_d_1 = x - x_lo_f; // / (x_hi_f - x_lo_f + 1e-8f) 46 | float y_d_1 = y - y_lo_f; 47 | float z_d_1 = z - z_lo_f; 48 | float x_d_0 = 1.0f - x_d_1; 49 | float y_d_0 = 1.0f - y_d_1; 50 | float z_d_0 = 1.0f - z_d_1; 51 | 52 | float wgt000 = x_d_0 * y_d_0 * z_d_0; 53 | float wgt001 = x_d_0 * y_d_0 * z_d_1; 54 | float wgt010 = x_d_0 * y_d_1 * z_d_0; 55 | float wgt011 = x_d_0 * y_d_1 * z_d_1; 56 | float wgt100 = x_d_1 * y_d_0 * z_d_0; 57 | float wgt101 = x_d_1 * y_d_0 * z_d_1; 58 | float wgt110 = x_d_1 * y_d_1 * z_d_0; 59 | float wgt111 = x_d_1 * y_d_1 * z_d_1; 60 | 61 | int x_lo = static_cast(x_lo_f); 62 | int y_lo = static_cast(y_lo_f); 63 | int z_lo = static_cast(z_lo_f); 64 | int x_hi = (x_d_1 > 0) ? -1 : 0; 65 | int y_hi = (y_d_1 > 0) ? -1 : 0; 66 | int z_hi = (z_d_1 > 0) ? 1 : 0; 67 | 68 | int idx000 = x_lo * r2 + y_lo * r + z_lo; 69 | int idx001 = idx000 + z_hi; // x_lo * r2 + y_lo * r + z_hi; 70 | int idx010 = idx000 + (y_hi & r); // x_lo * r2 + y_hi * r + z_lo; 71 | int idx011 = idx010 + z_hi; // x_lo * r2 + y_hi * r + z_hi; 72 | int idx100 = idx000 + (x_hi & r2); // x_hi * r2 + y_lo * r + z_lo; 73 | int idx101 = idx100 + z_hi; // x_hi * r2 + y_lo * r + z_hi; 74 | int idx110 = idx100 + (y_hi & r); // x_hi * r2 + y_hi * r + z_lo; 75 | int idx111 = idx110 + z_hi; // x_hi * r2 + y_hi * r + z_hi; 76 | 77 | if (is_training) { 78 | wgts[i] = wgt000; 79 | wgts[i + n] = wgt001; 80 | wgts[i + n * 2] = wgt010; 81 | wgts[i + n * 3] = wgt011; 82 | wgts[i + n * 4] = wgt100; 83 | wgts[i + n * 5] = wgt101; 84 | wgts[i + n * 6] = wgt110; 85 | wgts[i + n * 7] = wgt111; 86 | inds[i] = idx000; 87 | inds[i + n] = idx001; 88 | inds[i + n * 2] = idx010; 89 | inds[i + n * 3] = idx011; 90 | inds[i + n * 4] = idx100; 91 | inds[i + n * 5] = idx101; 92 | inds[i + n * 6] = idx110; 93 | inds[i + n * 7] = idx111; 94 | } 95 | 96 | for (int j = 0; j < c; j++) { 97 | int jr3 = j * r3; 98 | outs[j * n + i] = 99 | wgt000 * feat[jr3 + idx000] + wgt001 * feat[jr3 + idx001] + 100 | wgt010 * feat[jr3 + idx010] + wgt011 * feat[jr3 + idx011] + 101 | wgt100 * feat[jr3 + idx100] + wgt101 * feat[jr3 + idx101] + 102 | wgt110 * feat[jr3 + idx110] + wgt111 * feat[jr3 + idx111]; 103 | } 104 | } 105 | } 106 | 107 | /* 108 | Function: trilinear devoxlization (backward) 109 | Args: 110 | b : batch size 111 | c : #channels 112 | n : number of points 113 | r3 : voxel cube size = voxel resolution ** 3 114 | inds : the voxel indices of point cube, IntTensor[b, 8, n] 115 | wgts : weight for trilinear interpolation, FloatTensor[b, 8, n] 116 | grad_y : grad outputs, FloatTensor[b, c, n] 117 | grad_x : grad inputs, FloatTensor[b, c, r3] 118 | */ 119 | __global__ void trilinear_devoxelize_grad_kernel( 120 | int b, int c, int n, int r3, const int *__restrict__ inds, 121 | const float *__restrict__ wgts, const float *__restrict__ grad_y, 122 | float *__restrict__ grad_x) { 123 | int batch_index = blockIdx.x; 124 | int stride = blockDim.x; 125 | int index = threadIdx.x; 126 | inds += batch_index * n * 8; 127 | wgts += batch_index * n * 8; 128 | grad_x += batch_index * c * r3; 129 | grad_y += batch_index * c * n; 130 | 131 | for (int i = index; i < n; i += stride) { 132 | int idx000 = inds[i]; 133 | int idx001 = inds[i + n]; 134 | int idx010 = inds[i + n * 2]; 135 | int idx011 = inds[i + n * 3]; 136 | int idx100 = inds[i + n * 4]; 137 | int idx101 = inds[i + n * 5]; 138 | int idx110 = inds[i + n * 6]; 139 | int idx111 = inds[i + n * 7]; 140 | float wgt000 = wgts[i]; 141 | float wgt001 = wgts[i + n]; 142 | float wgt010 = wgts[i + n * 2]; 143 | float wgt011 = wgts[i + n * 3]; 144 | float wgt100 = wgts[i + n * 4]; 145 | float wgt101 = wgts[i + n * 5]; 146 | float wgt110 = wgts[i + n * 6]; 147 | float wgt111 = wgts[i + n * 7]; 148 | 149 | for (int j = 0; j < c; j++) { 150 | int jr3 = j * r3; 151 | float g = grad_y[j * n + i]; 152 | atomicAdd(grad_x + jr3 + idx000, wgt000 * g); 153 | atomicAdd(grad_x + jr3 + idx001, wgt001 * g); 154 | atomicAdd(grad_x + jr3 + idx010, wgt010 * g); 155 | atomicAdd(grad_x + jr3 + idx011, wgt011 * g); 156 | atomicAdd(grad_x + jr3 + idx100, wgt100 * g); 157 | atomicAdd(grad_x + jr3 + idx101, wgt101 * g); 158 | atomicAdd(grad_x + jr3 + idx110, wgt110 * g); 159 | atomicAdd(grad_x + jr3 + idx111, wgt111 * g); 160 | } 161 | } 162 | } 163 | 164 | void trilinear_devoxelize(int b, int c, int n, int r, int r2, int r3, 165 | bool training, const float *coords, const float *feat, 166 | int *inds, float *wgts, float *outs) { 167 | trilinear_devoxelize_kernel<<>>( 168 | b, c, n, r, r2, r3, training, coords, feat, inds, wgts, outs); 169 | CUDA_CHECK_ERRORS(); 170 | } 171 | 172 | void trilinear_devoxelize_grad(int b, int c, int n, int r3, const int *inds, 173 | const float *wgts, const float *grad_y, 174 | float *grad_x) { 175 | trilinear_devoxelize_grad_kernel<<>>( 176 | b, c, n, r3, inds, wgts, grad_y, grad_x); 177 | CUDA_CHECK_ERRORS(); 178 | } 179 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/modules/functional/src/interpolate/neighbor_interpolate.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../cuda_utils.cuh" 6 | 7 | /* 8 | Function: three nearest neighbors 9 | Args: 10 | b : batch size 11 | n : number of points in point clouds 12 | m : number of query centers 13 | points_coords : coordinates of points, FloatTensor[b, 3, n] 14 | centers_coords: coordinates of centers, FloatTensor[b, 3, m] 15 | weights : weights of nearest 3 centers to the point, 16 | FloatTensor[b, 3, n] 17 | indices : indices of nearest 3 centers to the point, 18 | IntTensor[b, 3, n] 19 | */ 20 | __global__ void three_nearest_neighbors_kernel( 21 | int b, int n, int m, const float *__restrict__ points_coords, 22 | const float *__restrict__ centers_coords, float *__restrict__ weights, 23 | int *__restrict__ indices) { 24 | int batch_index = blockIdx.x; 25 | int index = threadIdx.x; 26 | int stride = blockDim.x; 27 | points_coords += batch_index * 3 * n; 28 | weights += batch_index * 3 * n; 29 | indices += batch_index * 3 * n; 30 | centers_coords += batch_index * 3 * m; 31 | 32 | for (int j = index; j < n; j += stride) { 33 | float ux = points_coords[j]; 34 | float uy = points_coords[j + n]; 35 | float uz = points_coords[j + n + n]; 36 | 37 | double best0 = 1e40, best1 = 1e40, best2 = 1e40; 38 | int besti0 = 0, besti1 = 0, besti2 = 0; 39 | for (int k = 0; k < m; ++k) { 40 | float x = centers_coords[k]; 41 | float y = centers_coords[k + m]; 42 | float z = centers_coords[k + m + m]; 43 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 44 | if (d < best2) { 45 | best2 = d; 46 | besti2 = k; 47 | if (d < best1) { 48 | best2 = best1; 49 | besti2 = besti1; 50 | best1 = d; 51 | besti1 = k; 52 | if (d < best0) { 53 | best1 = best0; 54 | besti1 = besti0; 55 | best0 = d; 56 | besti0 = k; 57 | } 58 | } 59 | } 60 | } 61 | best0 = max(min(1e10f, best0), 1e-10f); 62 | best1 = max(min(1e10f, best1), 1e-10f); 63 | best2 = max(min(1e10f, best2), 1e-10f); 64 | float d0d1 = best0 * best1; 65 | float d0d2 = best0 * best2; 66 | float d1d2 = best1 * best2; 67 | float d0d1d2 = 1.0f / (d0d1 + d0d2 + d1d2); 68 | weights[j] = d1d2 * d0d1d2; 69 | indices[j] = besti0; 70 | weights[j + n] = d0d2 * d0d1d2; 71 | indices[j + n] = besti1; 72 | weights[j + n + n] = d0d1 * d0d1d2; 73 | indices[j + n + n] = besti2; 74 | } 75 | } 76 | 77 | /* 78 | Function: interpolate three nearest neighbors (forward) 79 | Args: 80 | b : batch size 81 | c : #channels of features 82 | m : number of query centers 83 | n : number of points in point clouds 84 | centers_features: features of centers, FloatTensor[b, c, m] 85 | indices : indices of nearest 3 centers to the point, 86 | IntTensor[b, 3, n] 87 | weights : weights for interpolation, FloatTensor[b, 3, n] 88 | out : features of points, FloatTensor[b, c, n] 89 | */ 90 | __global__ void three_nearest_neighbors_interpolate_kernel( 91 | int b, int c, int m, int n, const float *__restrict__ centers_features, 92 | const int *__restrict__ indices, const float *__restrict__ weights, 93 | float *__restrict__ out) { 94 | int batch_index = blockIdx.x; 95 | centers_features += batch_index * m * c; 96 | indices += batch_index * n * 3; 97 | weights += batch_index * n * 3; 98 | out += batch_index * n * c; 99 | 100 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 101 | const int stride = blockDim.y * blockDim.x; 102 | for (int i = index; i < c * n; i += stride) { 103 | const int l = i / n; 104 | const int j = i % n; 105 | float w1 = weights[j]; 106 | float w2 = weights[j + n]; 107 | float w3 = weights[j + n + n]; 108 | int i1 = indices[j]; 109 | int i2 = indices[j + n]; 110 | int i3 = indices[j + n + n]; 111 | 112 | out[i] = centers_features[l * m + i1] * w1 + 113 | centers_features[l * m + i2] * w2 + 114 | centers_features[l * m + i3] * w3; 115 | } 116 | } 117 | 118 | void three_nearest_neighbors_interpolate(int b, int c, int m, int n, 119 | const float *points_coords, 120 | const float *centers_coords, 121 | const float *centers_features, 122 | int *indices, float *weights, 123 | float *out) { 124 | three_nearest_neighbors_kernel<<>>( 126 | b, n, m, points_coords, centers_coords, weights, indices); 127 | three_nearest_neighbors_interpolate_kernel<<< 128 | b, optimal_block_config(n, c), 0, at::cuda::getCurrentCUDAStream()>>>( 129 | b, c, m, n, centers_features, indices, weights, out); 130 | CUDA_CHECK_ERRORS(); 131 | } 132 | 133 | /* 134 | Function: interpolate three nearest neighbors (backward) 135 | Args: 136 | b : batch size 137 | c : #channels of features 138 | m : number of query centers 139 | n : number of points in point clouds 140 | grad_y : grad of features of points, FloatTensor[b, c, n] 141 | indices : indices of nearest 3 centers to the point, IntTensor[b, 3, n] 142 | weights : weights for interpolation, FloatTensor[b, 3, n] 143 | grad_x : grad of features of centers, FloatTensor[b, c, m] 144 | */ 145 | __global__ void three_nearest_neighbors_interpolate_grad_kernel( 146 | int b, int c, int n, int m, const float *__restrict__ grad_y, 147 | const int *__restrict__ indices, const float *__restrict__ weights, 148 | float *__restrict__ grad_x) { 149 | int batch_index = blockIdx.x; 150 | grad_y += batch_index * n * c; 151 | indices += batch_index * n * 3; 152 | weights += batch_index * n * 3; 153 | grad_x += batch_index * m * c; 154 | 155 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 156 | const int stride = blockDim.y * blockDim.x; 157 | for (int i = index; i < c * n; i += stride) { 158 | const int l = i / n; 159 | const int j = i % n; 160 | float w1 = weights[j]; 161 | float w2 = weights[j + n]; 162 | float w3 = weights[j + n + n]; 163 | int i1 = indices[j]; 164 | int i2 = indices[j + n]; 165 | int i3 = indices[j + n + n]; 166 | atomicAdd(grad_x + l * m + i1, grad_y[i] * w1); 167 | atomicAdd(grad_x + l * m + i2, grad_y[i] * w2); 168 | atomicAdd(grad_x + l * m + i3, grad_y[i] * w3); 169 | } 170 | } 171 | 172 | void three_nearest_neighbors_interpolate_grad(int b, int c, int n, int m, 173 | const float *grad_y, 174 | const int *indices, 175 | const float *weights, 176 | float *grad_x) { 177 | three_nearest_neighbors_interpolate_grad_kernel<<< 178 | b, optimal_block_config(n, c), 0, at::cuda::getCurrentCUDAStream()>>>( 179 | b, c, n, m, grad_y, indices, weights, grad_x); 180 | CUDA_CHECK_ERRORS(); 181 | } 182 | -------------------------------------------------------------------------------- /grasp_ldm/losses/loss.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | from typing import Any 4 | 5 | import numpy as np 6 | import torch 7 | from torch import nn 8 | 9 | from grasp_ldm.utils.rotations import tmrp_to_H 10 | 11 | __all__ = [ 12 | "VAEReconstructionLoss", 13 | "VAELatentLoss", 14 | "ClassificationLoss", 15 | "QualityLoss", 16 | "GraspReconstructionLoss", 17 | "GraspControlPointsReconstructionLoss", 18 | ] 19 | 20 | 21 | # From: https://github.com/haofuml/cyclical_annealing 22 | def linear_cyclical_anneling(n_iter, start=0.0, stop=1.0, n_cycle=4, ratio=0.5): 23 | L = np.ones(n_iter) * stop 24 | period = n_iter / n_cycle 25 | step = (stop - start) / (period * ratio) # linear schedule 26 | 27 | for c in range(n_cycle): 28 | v, i = start, 0 29 | while v <= stop and (int(i + c * period) < n_iter): 30 | L[int(i + c * period)] = v 31 | v += step 32 | i += 1 33 | return L 34 | 35 | 36 | class VAEReconstructionLoss(nn.Module): 37 | def __init__(self, weight=1, name="reconstruction_loss") -> None: 38 | super().__init__() 39 | self.name = name 40 | self.criterion = nn.MSELoss() 41 | self.weight = weight 42 | 43 | def forward(self, input, output): 44 | return self.weight * self.criterion(input, output) 45 | 46 | 47 | class GraspReconstructionLoss(VAEReconstructionLoss): 48 | def __init__( 49 | self, translation_weight=10, rotation_weight=1, name="reconstruction_loss" 50 | ) -> None: 51 | super().__init__(weight=1, name=name) 52 | 53 | self.translation_weight = translation_weight 54 | self.rotation_weight = rotation_weight 55 | 56 | def forward(self, x_out, x_in, **kwargs): 57 | """Forward 58 | 59 | Args: 60 | x_out (Tensor): [B, 6] Predicted pose- (t(3), mrp(3)) 61 | x_in (Tensor): [B, 6] Ground truth pose- (t(3), mrp(3)) 62 | 63 | Returns: 64 | _type_: _description_ 65 | """ 66 | x_pred = x_out.clone() 67 | x_pred[..., :3] = x_pred[..., :3] * self.translation_weight 68 | x_pred[..., 3:] = x_pred[..., 3:] * self.rotation_weight 69 | 70 | x_gt = x_in.clone() 71 | x_gt[..., :3] = x_gt[..., :3] * self.translation_weight 72 | x_gt[..., 3:] = x_gt[..., 3:] * self.rotation_weight 73 | 74 | return super().forward(x_gt, x_pred) 75 | 76 | 77 | class GraspControlPointsReconstructionLoss(VAEReconstructionLoss): 78 | def __init__( 79 | self, 80 | weight=1, 81 | name="reconstruction_loss", 82 | control_pts_file="grasp_ldm/dataset/acronym/gripper_ctrl_pts.json", 83 | ) -> None: 84 | super().__init__(weight=1, name=name) 85 | 86 | with open(control_pts_file) as f: 87 | _control_pts = np.array(json.load(f)) 88 | 89 | # append 1 to the end of each control point 90 | self.control_pts = torch.from_numpy( 91 | np.concatenate( 92 | [_control_pts, np.ones((_control_pts.shape[0], 1))], 93 | axis=1, 94 | ) 95 | ) 96 | self.criterion = nn.MSELoss() 97 | self.weight = weight 98 | 99 | def forward(self, x_target, x_pred, **kwargs): 100 | """Forward 101 | 102 | Args: 103 | x_out (Tensor): [B, 6] Predicted pose- (t(3), mrp(3)) 104 | x_in (Tensor): [B, 6] Ground truth pose- (t(3), mrp(3)) 105 | 106 | Returns: 107 | _type_: _description_ 108 | """ 109 | metas = kwargs["metas"] 110 | pc_batch_size = metas["grasp_std"].shape[0] 111 | h_target = x_target.view((pc_batch_size, -1, 6)) * metas["grasp_std"].unsqueeze( 112 | 1 113 | ) + metas["grasp_mean"].unsqueeze(1) 114 | h_pred = x_pred.view((pc_batch_size, -1, 6)) * metas["grasp_std"].unsqueeze( 115 | 1 116 | ) + metas["grasp_mean"].unsqueeze(1) 117 | 118 | ctrl_pts = self.control_pts.clone().to(h_target.device, h_target.dtype) 119 | 120 | H_target = tmrp_to_H(h_target.view((-1, 6))) 121 | H_pred = tmrp_to_H(h_pred.view((-1, 6))) 122 | 123 | # Get the control points 124 | control_pts_target = (H_target @ ctrl_pts.T).transpose(1, 2) 125 | control_pts_pred = (H_pred @ ctrl_pts.T).transpose(1, 2) 126 | 127 | return self.weight * self.criterion(control_pts_target, control_pts_pred) 128 | 129 | 130 | class VAELatentLoss(nn.Module): 131 | def __init__( 132 | self, 133 | weight=1, 134 | name="kl_loss", 135 | cyclical_annealing=False, 136 | num_steps=None, 137 | num_cycles=None, 138 | start=1e-7, 139 | stop=0.2, 140 | ratio=0.25, 141 | ) -> None: 142 | super().__init__() 143 | self.name = name 144 | 145 | if not cyclical_annealing: 146 | self.weight = weight 147 | self.schedule = None 148 | else: 149 | assert num_cycles is not None and num_steps is not None 150 | self.weight = None 151 | self.schedule = linear_cyclical_anneling( 152 | num_steps, 153 | start=start, 154 | stop=stop, 155 | n_cycle=num_cycles, 156 | ratio=ratio, 157 | ) 158 | self.is_annealed = cyclical_annealing 159 | 160 | def forward( 161 | self, 162 | mu: torch.Tensor, 163 | logvar: torch.Tensor, 164 | return_unweighted: bool = False, 165 | **kwargs, 166 | ): 167 | """Forward 168 | B: Batch size 169 | D: Dimensions of the latent 170 | 171 | Args: 172 | mu (torch.Tensor): latent means [B, D] 173 | logvar (torch.Tensor): latent logvars [B, D] 174 | step (int, optional): step number for weight schedule. 175 | None, if no schedule. i.e. Constant weight 176 | return_unweighted (bool, optional): Whether to also return unweighted loss 177 | Defaults to False 178 | Returns: 179 | torch.Tensor: weighted kl loss [1,] (if return_unweighted is False) 180 | tuple(torch.Tensor, torch.Tensor): weighted_loss[1,], unweighted_kld[1,] 181 | """ 182 | kl_d = -0.5 * torch.sum(1 + logvar - mu**2 - logvar.exp(), dim=1) 183 | kl_d = torch.mean(kl_d, dim=0) 184 | 185 | if return_unweighted: 186 | return self.weight * kl_d, kl_d 187 | else: 188 | return self.weight * kl_d 189 | 190 | def set_weight_from_schedule(self, step): 191 | assert ( 192 | hasattr(self, "schedule") and self.schedule is not None 193 | ), "No member schedule found in self, to set the loss weight from schedule." 194 | f"Weight annealing was set to {self.is_annealed}" 195 | 196 | self.weight = ( 197 | self.schedule[step] if step < len(self.schedule) else self.schedule[-1] 198 | ) 199 | return 200 | 201 | 202 | class ClassificationLoss(nn.Module): 203 | def __init__(self, weight=1, name="classfication_loss") -> None: 204 | super().__init__() 205 | self.name = name 206 | self.weight = weight 207 | self.class_criterion = nn.BCEWithLogitsLoss(reduction="mean") 208 | self.class_weight = weight 209 | 210 | def forward(self, output, targets, **kwargs): 211 | classification_loss = self.class_criterion(output, targets) 212 | return self.weight * classification_loss 213 | 214 | 215 | class QualityLoss(nn.Module): 216 | def __init__(self, weight=1, name="quality_loss") -> None: 217 | super().__init__() 218 | self.name = name 219 | self.weight = weight 220 | self.criterion = nn.SmoothL1Loss() 221 | 222 | def forward(self, quals_in, quals_target, **kwargs): 223 | confidence_loss = self.criterion(quals_in, quals_target) 224 | 225 | return self.weight * confidence_loss 226 | -------------------------------------------------------------------------------- /grasp_ldm/utils/camera.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | import os 4 | import warnings 5 | 6 | import numpy as np 7 | import torch 8 | 9 | from .utils import load_json 10 | 11 | try: 12 | import pyrender 13 | except: 14 | warnings.warn("pyrender was not found. Rendering modules will not work.") 15 | 16 | 17 | def read_csv_realsense(csv_file_path): 18 | with open(csv_file_path, "r") as csv_file: 19 | csv_reader = csv.reader(csv_file, delimiter=",") 20 | data = {row[0]: row[1] for row in csv_reader if len(row) > 1} 21 | 22 | frame_info = { 23 | key: data[key] 24 | for key in [ 25 | "Type", 26 | "Depth", 27 | "Format", 28 | "Frame Number", 29 | "Timestamp (ms)", 30 | "Resolution x", 31 | "Resolution y", 32 | "Bytes per pixel", 33 | ] 34 | } 35 | intrinsic_info = { 36 | key: data[key] for key in ["Fx", "Fy", "PPx", "PPy", "Distorsion"] 37 | } 38 | 39 | cam_json = { 40 | "hfov": 2 * np.arctan2(data["Resolution_x"] / (2 * data["Fx"])) * 180 / np.pi, 41 | "vfov": 2 * np.arctan2(data["Resolution_y"] / (2 * data["Fy"])) * 180 / np.pi, 42 | "width": int(data["Resolution_x"]), 43 | "height": int(data["Resolution_y"]), 44 | "cameraMatrix": [ 45 | [float(data["Fx"]), 0, float(data["PPx"])], 46 | [0, float(data["Fy"]), float(data["PPy"])], 47 | [0, 0, 1], 48 | ], 49 | "distCoeffs": [], 50 | } 51 | return cam_json 52 | 53 | 54 | def calculate_view_frustum(start_point, end_point, fov): 55 | """ 56 | Calculate the coordinates of the view frustum given the boresight line and FOV. 57 | 58 | Args: 59 | start_point (tuple): The starting point of the boresight line. 60 | end_point (tuple): The ending point of the boresight line. 61 | fov (float): The field of view of the camera in degrees. 62 | 63 | Returns: 64 | view_frustum (list): A list of tuples containing the coordinates of the view frustum. 65 | """ 66 | 67 | # Convert the FOV from degrees to radians 68 | fov_rad = np.radians(fov) 69 | 70 | # Calculate the distance between the two points 71 | distance = np.sqrt( 72 | sum([(end - start) ** 2 for start, end in zip(start_point, end_point)]) 73 | ) 74 | 75 | # Calculate the half-angle of the FOV 76 | half_angle = np.tan(fov_rad / 2) 77 | 78 | # Calculate the coordinates of the view frustum 79 | view_frustum = [] 80 | for i in range(-1, 2, 2): # Iterate twice: -1 for near plane, +1 for far plane 81 | x = start_point[0] + i * distance * half_angle 82 | y = start_point[1] + i * distance * half_angle 83 | z = start_point[2] + i * distance 84 | view_frustum.append((x, y, z)) 85 | 86 | return view_frustum 87 | 88 | 89 | class Camera: 90 | """Camera model using a user json file""" 91 | 92 | def __init__( 93 | self, 94 | camera_json_path: str, 95 | z_near: float = 0.05, 96 | z_far: float = 20, 97 | ) -> None: 98 | """ 99 | Args: 100 | camera_json_path (str): camera json file path 101 | camera_name (str): camera name from the json. 102 | """ 103 | self.name = os.path.basename(camera_json_path) 104 | self.data = load_json(camera_json_path) 105 | 106 | # Intrinsics and distortion matrix 107 | self.K = np.array(self.data["cameraMatrix"]) 108 | self.dists = np.array(self.data["distCoeffs"]) 109 | 110 | # Focal Length in px 111 | self._fx = self.K[0, 0] 112 | self._fy = self.K[1, 1] 113 | 114 | # Principal centers 115 | self._cx = self.K[0, 2] 116 | self._cy = self.K[1, 2] 117 | 118 | # Near/Far limits in boresight 119 | self.z_near = z_near 120 | self.z_far = z_far 121 | 122 | # Image size in px 123 | self.width = self.data["width"] 124 | self.height = self.data["height"] 125 | 126 | # FOV 127 | self.xfov = self.data["hfov"] # HFOV 128 | self.yfov = self.data["vfov"] # VFOV 129 | 130 | def to_pyrender_camera(self): 131 | return pyrender.IntrinsicsCamera( 132 | self._fx, self._fy, self._cx, self._cy, self.z_near, self.z_far 133 | ) 134 | 135 | def depth_to_pointcloud( 136 | self, depth: np.ndarray, rgb: np.ndarray = None 137 | ) -> np.ndarray: 138 | """Convert depth image to pointcloud given camera intrinsics. 139 | Args: 140 | depth (np.ndarray): Depth image. 141 | Returns: 142 | np.ndarray: [nx4] (x, y, z, 1) Point cloud. 143 | """ 144 | 145 | height = depth.shape[0] 146 | width = depth.shape[1] 147 | 148 | assert ( 149 | height == self.height 150 | ), "Something went wrong. height of the depth image does not match the camera model." 151 | assert ( 152 | width == self.width 153 | ), "Something went wrong. width of the depth image does not match the camera model." 154 | 155 | mask = np.where(depth > 0) 156 | x, y = mask[1], mask[0] 157 | 158 | normalized_x = x.astype(np.float32) - self._cx 159 | normalized_y = y.astype(np.float32) - self._cy 160 | 161 | world_x = normalized_x * depth[y, x] / self._fx 162 | world_y = normalized_y * depth[y, x] / self._fy 163 | world_z = depth[y, x] 164 | 165 | if rgb is not None: 166 | rgb = rgb[y, x, :] 167 | 168 | pc = np.vstack((world_x, world_y, world_z)).T 169 | 170 | if rgb is not None: 171 | rgb = rgb[y, x, :] 172 | return pc, rgb 173 | else: 174 | return pc 175 | 176 | def depth_to_pointcloud_torch( 177 | self, depth: torch.Tensor, rgb: torch.Tensor = None 178 | ) -> torch.Tensor: 179 | """Convert depth image to pointcloud given camera intrinsics. 180 | Args: 181 | depth (torch.Tensor): Depth image. 182 | Returns: 183 | torch.Tensor: [nx4] (x, y, z, 1) Point cloud. 184 | """ 185 | 186 | height = depth.shape[0] 187 | width = depth.shape[1] 188 | 189 | assert ( 190 | height == self.height 191 | ), "Something went wrong. height of the depth image does not match the camera model." 192 | assert ( 193 | width == self.width 194 | ), "Something went wrong. width of the depth image does not match the camera model." 195 | 196 | mask = torch.where(depth > 0) 197 | x, y = mask[1], mask[0] 198 | 199 | normalized_x = x.to(torch.float32) - self._cx 200 | normalized_y = y.to(torch.float32) - self._cy 201 | 202 | world_x = normalized_x * depth[y, x] / self._fx 203 | world_y = normalized_y * depth[y, x] / self._fy 204 | world_z = depth[y, x] 205 | 206 | if rgb is not None: 207 | rgb = rgb[y, x, :] 208 | 209 | pc = torch.vstack((world_x, world_y, world_z)).T 210 | 211 | if rgb is not None: 212 | rgb = rgb[y, x, :] 213 | return pc, rgb 214 | else: 215 | return pc 216 | 217 | def write_to_dir(self, out_dir): 218 | json_fp = os.path.join(out_dir, f"camera_{self.name}.json") 219 | 220 | print(f"Writing camera model {self.name} to {json_fp}.") 221 | with json_fp as fileobj: 222 | json.dump(self.data, fileobj) 223 | return 224 | 225 | # def get_trimesh_camera(self): 226 | # """Get a trimesh object representing the camera intrinsics. 227 | # Returns: 228 | # trimesh.scene.cameras.Camera: Intrinsic parameters of the camera model 229 | # """ 230 | # return trimesh.scene.cameras.Camera( 231 | # fov=(np.rad2deg(self._fov), np.rad2deg(self._fov)), 232 | # resolution=(self._height, self._width), 233 | # z_near=self._z_near, 234 | # ) 235 | -------------------------------------------------------------------------------- /grasp_ldm/trainers/grasp_classification_trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | 4 | import einops 5 | import torch 6 | import torch.nn as nn 7 | import torcheval.metrics.functional as Metrics 8 | from pytorch_lightning.callbacks import ( 9 | DeviceStatsMonitor, 10 | LearningRateMonitor, 11 | ModelCheckpoint, 12 | ModelSummary, 13 | StochasticWeightAveraging, 14 | ) 15 | from pytorch_lightning.loggers import CSVLogger, Logger, TensorBoardLogger, WandbLogger 16 | from torch.utils.data import Dataset 17 | from utils.rotations import tmrp_to_H 18 | 19 | from grasp_ldm.dataset.builder import build_dataset_from_cfg 20 | from grasp_ldm.models.builder import build_model_from_cfg 21 | from grasp_ldm.utils.config import Config, ConfigDict 22 | 23 | from .experiment import Experiment 24 | from .trainer import LightningTrainer 25 | 26 | 27 | class GraspClassificationTrainer(LightningTrainer): 28 | CLS_PRED_THRESHOLD = 0.5 29 | 30 | def __init__(self, config: Config = None): 31 | """Grasp Classification Trainer""" 32 | 33 | # Split main sub-configs 34 | model_config = config.model 35 | data_config = config.data 36 | trainer_config = config.trainer 37 | 38 | # Experiment and config 39 | self._config = config 40 | self._experiment = Experiment(config.filename) 41 | 42 | # Checkpointing 43 | self._checkpointing_freq = ( 44 | trainer_config.checkpointing_freq 45 | if hasattr(trainer_config, "checkpointing_freq") 46 | else 1000 47 | ) 48 | trainer_config.default_root_dir = self._experiment.ckpt_dir 49 | 50 | # Initialize parent trainer class 51 | super().__init__(model_config, data_config, trainer_config) 52 | 53 | self.resume_from_checkpoint = self._experiment.default_resume_checkpoint 54 | 55 | def _build_dataset(self, data_config, split): 56 | """Custom routine for building dataset""" 57 | dataset = build_dataset_from_cfg(data_config, split) 58 | 59 | # dataset.pre_load() should define any pre-loading operations before workers are spawned 60 | dataset.pre_load() 61 | 62 | return dataset 63 | 64 | def _build_model(self, model_config): 65 | """Custom routine for building model""" 66 | model = build_model_from_cfg(ConfigDict(model=model_config)) 67 | 68 | ## TODO: custom model initialization, if any 69 | # model.initialize() 70 | 71 | return model 72 | 73 | def training_step(self, batch_data, batch_idx): 74 | """Training step""" 75 | 76 | # Inputs 77 | pc = batch_data["pc"] 78 | grasps = batch_data["grasps"] 79 | 80 | # TODO: verify this reshape consistency 81 | success_labels = batch_data["success"].view(-1) 82 | num_grasps = grasps.shape[1] 83 | 84 | # Repeat pc and grasp so there is a 1-1 pairing 85 | pc = pc.repeat_interleave(num_grasps, dim=0) 86 | grasps = einops.rearrange(grasps, "b n c d -> (b n) c d") 87 | 88 | # Metas 89 | metas = batch_data["metas"] 90 | 91 | # Forward 92 | loss, _ = self.model(pc, grasps, cls_target=success_labels, compute_loss=True) 93 | 94 | # Log Loss 95 | self.log("loss", loss, sync_dist=True) 96 | return loss 97 | 98 | def validation_step(self, batch_data, batch_idx): 99 | """Validation step""" 100 | 101 | # Inputs 102 | pc = batch_data["pc"] 103 | grasps = batch_data["grasps"] 104 | 105 | # TODO: verify this reshape consistency 106 | success_labels = batch_data["success"].view(-1) 107 | num_grasps = grasps.shape[1] 108 | 109 | # Repeat pc and grasp so there is a 1-1 pairing 110 | pc = pc.repeat_interleave(num_grasps, dim=0) 111 | grasps = einops.rearrange(grasps, "b n c d -> (b n) c d") 112 | 113 | # Metas 114 | metas = batch_data["metas"] 115 | 116 | # Forward 117 | loss, preds = self.model( 118 | pc, grasps, cls_target=success_labels, compute_loss=True 119 | ) 120 | 121 | # Convert probs to binary preds 122 | preds = preds.detach() 123 | preds[preds > self.CLS_PRED_THRESHOLD] = 1 124 | preds[preds <= self.CLS_PRED_THRESHOLD] = 0 125 | 126 | # Accumulate preds in cache 127 | self._update_cache("validation", "epoch", "cls_preds", preds.long()) 128 | self._update_cache("validation", "epoch", "cls_targets", success_labels.long()) 129 | 130 | # Log Loss 131 | self.log("val_loss", loss, sync_dist=True, prog_bar=True) 132 | return 133 | 134 | def on_validation_epoch_end(self) -> None: 135 | eval_metrics = self._compute_metrics() 136 | self.log_dict({"validation_metrics": eval_metrics}, sync_dist=True) 137 | self.log( 138 | "val_accuracy", eval_metrics["accuracy"], prog_bar=True, sync_dist=True 139 | ) 140 | return 141 | 142 | def _get_callbacks(self) -> list: 143 | """Custom callbacks to be used by the trainer.""" 144 | 145 | checkpoint_callback1 = ModelCheckpoint( 146 | save_top_k=3, 147 | monitor="loss", 148 | mode="min", 149 | dirpath=self._experiment.ckpt_dir, 150 | filename="epoch_{epoch:02d}-step_{step}-loss_{loss:.2f}", 151 | save_last=True, 152 | every_n_train_steps=self._checkpointing_freq, 153 | ) 154 | 155 | checkpoint_callback2 = ModelCheckpoint( 156 | save_top_k=1, 157 | monitor="loss", 158 | mode="min", 159 | dirpath=self._experiment.ckpt_dir, 160 | filename="best", 161 | save_last=True, 162 | every_n_train_steps=1000, 163 | ) 164 | 165 | lr_monitor_callback = LearningRateMonitor(logging_interval="step") 166 | 167 | callbacks = [checkpoint_callback1, checkpoint_callback2, lr_monitor_callback] 168 | 169 | return callbacks 170 | 171 | def _get_logger(self) -> Logger: 172 | """Custom logger to be used by the trainer.""" 173 | if hasattr(self.trainer_config, "logger"): 174 | logger_config = self.trainer_config.logger 175 | 176 | if logger_config.type == "WandbLogger": 177 | assert hasattr( 178 | logger_config, "project" 179 | ), "WandbLogger requires a project name to be specified in the config." 180 | 181 | logger = WandbLogger( 182 | name=self._experiment.name, 183 | project=logger_config.project, 184 | save_dir=self._experiment.log_dir, 185 | config=self._config, 186 | ) 187 | elif logger_config.type == "TensorBoardLogger": 188 | logger = TensorBoardLogger( 189 | save_dir=self._experiment.log_dir, 190 | name=self._experiment.name, 191 | ) 192 | else: 193 | logger = CSVLogger( 194 | save_dir=self._experiment.log_dir, 195 | name=self._experiment.name, 196 | ) 197 | return logger 198 | 199 | def _compute_metrics(self): 200 | """Compute metrics on validation set""" 201 | 202 | # Collect preds and targets from cache 203 | cls_preds = torch.cat(self._validation_cache["epoch"]["cls_preds"]) 204 | cls_targets = torch.cat(self._validation_cache["epoch"]["cls_targets"]) 205 | 206 | # Compute binary classification metrics 207 | metrics = dict( 208 | accuracy=Metrics.binary_accuracy(cls_preds, cls_targets), 209 | precision=Metrics.binary_precision(cls_preds, cls_targets), 210 | recall=Metrics.binary_recall(cls_preds, cls_targets), 211 | f1=Metrics.binary_f1_score(cls_preds, cls_targets), 212 | aP=Metrics.binary_auprc(cls_preds, cls_targets), 213 | # confusion_matrix=Metrics.binary_confusion_matrix(cls_preds, cls_targets), 214 | ) 215 | 216 | return metrics 217 | -------------------------------------------------------------------------------- /configs/generation/fpc/fpc_1a_latentc3_z4_pc64_180k.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | ## -------------------- Most frequently changed params here -------------------- 4 | 5 | resume_training_from_last = True 6 | 7 | max_steps = 180000 8 | batch_size = 10 9 | 10 | num_gpus = 1 11 | num_workers_per_gpu = 7 12 | 13 | # During training, if a ckpt is provided here, it overrides resume_training_from_last and instead resumes training from this ckpt 14 | vae_ckpt_path = None # "output/boilerplate_kldanneal_c0.1/vae/checkpoints/last.ckpt" 15 | ddm_ckpt_path = None 16 | 17 | max_scenes = None 18 | 19 | 20 | root_data_dir = "data/ACRONYM" 21 | 22 | ## -------------------- Inputs/Shapes ------------------------ 23 | # Input/Output: grasp representation [mrp(3), t(3), cls_success(1), qualities(4)] 24 | 25 | pc_num_points = 1024 26 | pc_latent_dims = 64 27 | pc_latent_channels = 3 28 | 29 | grasp_pose_dims = 6 30 | num_output_qualities = 0 31 | grasp_latent_dims = 4 32 | 33 | grasp_representation_dims = ( 34 | grasp_pose_dims + num_output_qualities + 1 35 | if num_output_qualities is not None 36 | else grasp_pose_dims + 1 37 | ) 38 | 39 | ## ----------------------- Model ----------------------- 40 | 41 | dropout = 0.1 # or None 42 | 43 | pc_encoder_config = dict( 44 | type="PVCNNEncoder", 45 | args=dict( 46 | in_features=3, 47 | n_points=pc_num_points, 48 | scale_channels=0.75, 49 | scale_voxel_resolution=0.75, 50 | num_blocks=(1, 1, 1, 1), 51 | out_channels=pc_latent_channels, 52 | use_global_attention=False, 53 | ), 54 | ) 55 | 56 | grasp_encoder_config = dict( 57 | type="ResNet1D", 58 | args=dict( 59 | in_features=grasp_representation_dims, 60 | block_channels=(32, 64, 128, 256), 61 | input_conditioning_dims=pc_latent_dims, 62 | resnet_block_groups=4, 63 | dropout=dropout, 64 | ), 65 | ) 66 | 67 | decoder_config = dict( 68 | type="ResNet1D", 69 | args=dict( 70 | block_channels=(32, 64, 128, 256), 71 | # out_dim=grasp_pose_dims, 72 | input_conditioning_dims=pc_latent_dims, 73 | resnet_block_groups=4, 74 | dropout=dropout, 75 | ), 76 | ) 77 | 78 | loss_config = dict( 79 | reconstruction_loss=dict( 80 | type="GraspReconstructionLoss", 81 | name="reconstruction_loss", 82 | args=dict(translation_weight=1, rotation_weight=1), 83 | ), 84 | latent_loss=dict( 85 | type="VAELatentLoss", 86 | args=dict( 87 | name="grasp_latent", 88 | cyclical_annealing=True, 89 | num_steps=max_steps, 90 | num_cycles=1, 91 | ratio=0.5, 92 | start=1e-7, 93 | stop=0.1, 94 | ), 95 | ), 96 | classification_loss=dict(type="ClassificationLoss", args=dict(weight=0.1)), 97 | # quality_loss=dict(type="QualityLoss", args=dict(weight=0.1)), 98 | ) 99 | 100 | denoiser_model = dict( 101 | type="TimeConditionedResNet1D", 102 | args=dict( 103 | dim=grasp_latent_dims, 104 | channels=1, 105 | block_channels=(32, 64, 128, 256), 106 | input_conditioning_dims=pc_latent_dims, 107 | resnet_block_groups=4, 108 | dropout=dropout, 109 | is_time_conditioned=True, 110 | learned_variance=False, 111 | learned_sinusoidal_cond=False, 112 | random_fourier_features=True, 113 | # learned_sinusoidal_dim=16, 114 | ), 115 | ) 116 | # Use `model` for single module to be built. If a list of modules are required to be built, use `models` to make sure the outer 117 | # See models/builder.py for more info. 118 | model = dict( 119 | vae=dict( 120 | model=dict( 121 | type="GraspCVAE", 122 | args=dict( 123 | grasp_latent_size=grasp_latent_dims, 124 | pc_latent_size=pc_latent_dims, 125 | pc_encoder_config=pc_encoder_config, 126 | grasp_encoder_config=grasp_encoder_config, 127 | decoder_config=decoder_config, 128 | loss_config=loss_config, 129 | num_output_qualities=num_output_qualities, 130 | intermediate_feature_resolution=16, 131 | ), 132 | ), 133 | ckpt_path=vae_ckpt_path, 134 | ), 135 | ddm=dict( 136 | model=dict( 137 | type="GraspLatentDDM", 138 | args=dict( 139 | model=denoiser_model, 140 | latent_in_features=grasp_latent_dims, 141 | diffusion_timesteps=1000, 142 | noise_scheduler_type="ddpm", 143 | diffusion_loss="l2", 144 | beta_schedule="linear", 145 | is_conditioned=True, 146 | joint_training=False, 147 | denoising_loss_weight=1, 148 | variance_type="fixed_large", 149 | elucidated_diffusion=False, 150 | beta_start=0.00005, 151 | beta_end=0.001, 152 | ), 153 | ), 154 | ckpt_path=ddm_ckpt_path, 155 | use_vae_ema_model=True, 156 | ), 157 | ) 158 | ## -- Data -- 159 | augs_config = [ 160 | dict(type="RandomRotation", args=dict(p=0.5, max_angle=180, is_degree=True)), 161 | dict(type="PointcloudJitter", args=dict(p=1, sigma=0.005, clip=0.005)), 162 | dict(type="RandomPointcloudDropout", args=dict(p=0.5, max_dropout_ratio=0.4)), 163 | ] 164 | 165 | object_categories = [ 166 | "Cup", 167 | "Mug", 168 | "Fork", 169 | "Hat", 170 | "Bottle", 171 | "Bowl", 172 | "Car", 173 | "Donut", 174 | "Laptop", 175 | "MousePad", 176 | "Pencil", 177 | "Plate", 178 | "ScrewDriver", 179 | "WineBottle", 180 | "Backpack", 181 | "Bag", 182 | "Banana", 183 | "Battery", 184 | "BeanBag", 185 | "Bear", 186 | "Book", 187 | "Books", 188 | "Camera", 189 | "CerealBox", 190 | "Cookie", 191 | "Hammer", 192 | "Hanger", 193 | "Knife", 194 | "MilkCarton", 195 | "Painting", 196 | "PillBottle", 197 | "Plant", 198 | "PowerSocket", 199 | "PowerStrip", 200 | "PS3", 201 | "PSP", 202 | "Ring", 203 | "Scissors", 204 | "Shampoo", 205 | "Shoes", 206 | "Sheep", 207 | "Shower", 208 | "Sink", 209 | "SoapBottle", 210 | "SodaCan", 211 | "Spoon", 212 | "Statue", 213 | "Teacup", 214 | "Teapot", 215 | "ToiletPaper", 216 | "ToyFigure", 217 | "Wallet", 218 | "WineGlass", 219 | "Cow", 220 | "Sheep", 221 | "Cat", 222 | "Dog", 223 | "Pizza", 224 | "Elephant", 225 | "Donkey", 226 | "RubiksCube", 227 | "Tank", 228 | "Truck", 229 | "USBStick", 230 | ] 231 | 232 | train_data = dict( 233 | type="AcronymShapenetPointclouds", 234 | args=dict( 235 | data_root_dir=root_data_dir, 236 | batch_num_points_per_pc=pc_num_points, 237 | batch_num_grasps_per_pc=100, 238 | rotation_repr="mrp", 239 | augs_config=augs_config, 240 | split="train", 241 | batch_failed_grasps_ratio=0, 242 | use_dataset_statistics_for_norm=False, 243 | filter_categories=object_categories, 244 | load_fixed_subset_grasps_per_obj=None, 245 | num_repeat_dataset=10, 246 | ), 247 | ) 248 | 249 | data = dict( 250 | train=train_data, 251 | ) 252 | 253 | # Patch: Mesh Categories. Used for simulation 254 | mesh_root = root_data_dir 255 | mesh_categories = object_categories 256 | 257 | ## -------------------- Trainer -------------------- 258 | ## Logger 259 | logger = dict(type="WandbLogger", project="full-pc-ema-63c") 260 | 261 | optimizer = dict( 262 | initial_lr=0.001, 263 | scheduler=dict( 264 | type="MultiStepLR", 265 | args=dict(milestones=[int(max_steps / 3), int(2 * max_steps / 3)], gamma=0.1), 266 | ), 267 | ) 268 | 269 | trainer = dict( 270 | max_steps=max_steps, 271 | batch_size=batch_size, 272 | num_workers=num_workers_per_gpu * num_gpus, 273 | accelerator="gpu", 274 | devices=num_gpus, 275 | strategy="ddp", 276 | logger=logger, 277 | log_every_n_steps=100, 278 | optimizer=optimizer, 279 | resume_training_from_last=resume_training_from_last, 280 | check_val_every_n_epoch=1, 281 | ema=dict( 282 | beta=0.990, 283 | update_after_step=1000, 284 | ), 285 | deterministic=True, 286 | ) 287 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ##

GraspLDM: Generative 6-DoF Grasp Synthesis using Latent Diffusion Models

2 | 3 |
4 | Kuldeep Barad·   5 | Andrej Orsula·   6 | Antoine Richard·   7 | Jan Dentler·   8 | Miguel Olivares-Mendez·   9 | Carol Martinez   10 |
11 | 12 | 13 |

ArXiv   |   Video

14 | 15 |

16 | 17 |

18 |
19 | 20 | Vision-based grasping of unknown objects in unstructured environments is a key challenge for autonomous robotic manipulation. A practical grasp synthesis system is required to generate a diverse set of 6-DoF grasps from which a task-relevant grasp can be executed. Although generative models are suitable for learning such complex data distributions, existing models have limitations in grasp quality, long training times, and a lack of flexibility for task-specific generation. In this work, we present GraspLDM, a modular generative framework for 6-DoF grasp synthesis that uses diffusion models as priors in the latent space of a VAE. GraspLDM learns a generative model of object-centric SE(3) grasp poses conditioned on point clouds. GraspLDM's architecture enables us to train task-specific models efficiently by only re-training a small denoising network in the low-dimensional latent space, as opposed to existing models that need expensive re-training. Our framework provides robust and scalable models on both full and partial point clouds. GraspLDM models trained with simulation data transfer well to the real world without any further fine-tuning. Our models provide an 80% success rate for 80 grasp attempts of diverse test objects across two real-world robotic setups. 21 | 22 | ## Pre-requisites 23 | 24 | 1. Python >= 3.8 25 | 1. CUDA > 11.1 and compatible Nvidia driver 26 | 1. (Only for Docker) Nvidia container toolkit 27 | 28 | ## Setup 29 | 30 | You can setup a python environment using **Conda** or **virtualenv**. 31 | Alternatively, to avoid issues with system libraries, you can use a **Docker** container or a **VSCode** Devcontainer. 32 | 33 | 1. **Conda** 34 | 35 | ``` 36 | conda create env -f environment.yml 37 | conda activate grasp_ldm 38 | ``` 39 | 40 | 1. **virtualenv** 41 | 42 | ``` 43 | python -m venv grasp_ldm 44 | source grasp_ldm/bin/activate 45 | pip install -r requirements.txt 46 | ``` 47 | 48 | 1. **Docker** 49 | 50 | - Use the helper scripts to build a docker image and run the container. 51 | 52 | NOTE: Executing bash scripts may not always be safe. Double check before executing. 53 | 54 | ``` 55 | cd .docker 56 | chmod +x build.sh run.sh 57 | 58 | # Build the image 59 | ./build.sh 60 | 61 | # Run a container 62 | ./run.sh 63 | ``` 64 | 65 | 1. **Devcontainer** 66 | 67 | - Use the editor commands (`Ctrl+Shft+P`) and start typing `Dev Containers: Reopen in Container` and select. 68 | 69 | - Generally, use `Dev Containers: Reopen in Container` to start the devcontainer. When you wish to rebuild after change use `Dev Containers: Rebuild and Reopen ion Container.` 70 | 71 | - For more info on Devcontainers, refer to : ... 72 | 73 | ## Prepare Data 74 | 75 | 1. Download the ACRONYM dataset using the instructions given in [`nvlabs/acronym`](https://github.com/NVlabs/acronym?tab=readme-ov-file#using-the-full-acronym-dataset). 76 | 77 | 1. Download the train/test splits data from the 🤗 HuggingFace [`kuldeepbarad/GraspLDM/splits`](https://huggingface.co/kuldeepbarad/GraspLDM/tree/main/splits) 78 | 79 | ## Run Generation Demo on ShapeNet Point Clouds 80 | 81 | 1. Download the pretrained models from 🤗 HuggingFace repository [`kuldeepbarad/GraspLDM`](https://huggingface.co/kuldeepbarad/GraspLDM). 82 | 83 | 1. Run the demo script using pretrained model: 84 | 85 | ```bash 86 | python tools/generate_grasps.py --exp_path --mode VAE --visualize 87 | 88 | # Example 89 | python tools/generate_grasps.py --exp_path checkpoints/generation/fpc_1a_latentc3_z4_pc64_simple_140k_noatt --mode VAE --visualize 90 | ``` 91 | 92 |
93 | All options 94 | 95 | - `--exp_path`: Path to the experiment checkpoint 96 | ```bash 97 | python generate_grasps.py --exp_path checkpoints/generation/fpc_1a_latentc3_z4_pc64_simple_140k_noatt 98 | ``` 99 | - `--data_root`: Root directory for data (default: "data/ACRONYM") 100 | - `--mode`: Model type to use, either 'VAE' or 'LDM' (default: 'VAE') 101 | - `--split`: Data split to use (default: "test") 102 | - `--num_grasps`: Number of grasps to generate (default: 20) 103 | - `--visualize`: Enable visualization 104 | - `--no_ema`: Disable EMA model usage 105 | - `--num_samples`: Number of samples to generate (default: 11) 106 | - `--inference_steps`: Number of inference steps for LDM (default: 100) 107 | 108 |
109 | 110 | ## Run Training on ACRONYM Dataset 111 | 112 | Train grasp sampling models (VAE, DDM) with multi-GPU support. 113 | 114 | NOTE: The training is done in two stages. First the VAE encoders are trained and then the latent space denoising diffusion model. 115 | 116 | ```bash 117 | # Basic usage 118 | ## 1. First train the VAE 119 | python tools/train_generator.py --config configs/generation/fpc/fpc_1a_latentc3_z4_pc64_180k.py --model vae 120 | ## 2. Then train the DDM once VAE checkpoints are available. 121 | python tools/train_generator.py --config configs/generation/fpc/fpc_1a_latentc3_z4_pc64_180k.py --model ddm 122 | ``` 123 | 124 | Optional usage examples: 125 | ```bash 126 | # Multi-GPU training 127 | python tools/train_generator.py --config configs/generation/fpc/fpc_1a_latentc3_z4_pc64_180k.py --model vae --num-gpus 4 --batch-size 32 128 | 129 | # DDM training - NOTE: DDM training can only be done once the VAE model for this experiment has been trained 130 | python tools/train_generator.py --config configs/generation/fpc/fpc_1a_latentc3_z4_pc64_180k.py --model ddm --seed 42 131 | ``` 132 | 133 |
134 | All options 135 | 136 | - `--config`, `-c`: Path to config file 137 | - `--model`, `-m`: Model type (`classifier`, `vae`, `ddm`) 138 | - `--root-dir`, `-d`: Data root directory 139 | - `--num-gpus`, `-g`: Number of GPUs 140 | - `--batch-size`, `-b`: Batch size per device 141 | - `--deterministic`: Enable deterministic training 142 | - `--seed`: Random seed 143 | - `-debug`: Disable wandb logging 144 | 145 |
146 | 147 | ## Attribution 148 | 149 | If you find this code useful, please cite our work: 150 | 151 | ``` 152 | @article{barad2023graspldm, 153 | title={GraspLDM: Generative 6-DoF Grasp Synthesis using Latent Diffusion Models}, 154 | author={Barad, Kuldeep R and Orsula, Andrej and Richard, Antoine and Dentler, Jan and Olivares-Mendez, Miguel and Martinez, Carol}, 155 | journal={arXiv preprint arXiv:2312.11243}, 156 | year={2023} 157 | } 158 | ``` 159 | 160 | ## License 161 | 162 | Apache 2.0 License. See [LICENSE](LICENSE) for more details. 163 | 164 | ## Acknowledgements/External Resources 165 | 166 | - Acronym tools and helpers are adapted from [https://github.com/NVlabs/acronym](https://github.com/NVlabs/acronym) 167 | 168 | - PVCNN implementation and CUDA kernel are taken from [https://github.com/mit-han-lab/pvcnn](https://github.com/mit-han-lab/pvcnn) 169 | 170 | - [`grasp_vdm/utils/config.py`](grasp_vdm/utils/config.py) is adapted from [https://github.com/open-mmlab/mmcv](https://github.com/open-mmlab/mmcv) 171 | 172 | - Resnet models for DDM implementation is adapted from [https://github.com/lucidrains/denoising-diffusion-pytorch](https://github.com/lucidrains/denoising-diffusion-pytorch) and [https://github.com/openai/improved-diffusion](https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/unet.py). Elucidated Diffusion Model is adapted from [https://github.com/NVlabs/edm](https://github.com/NVlabs/edm). 173 | -------------------------------------------------------------------------------- /configs/generation/partial_pc/ppc_1a_partial_63cat8k_filtered_latentc3_z16_pc256_180k.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | ## -------------------- Most frequently changed params here -------------------- 4 | 5 | resume_training_from_last = True 6 | 7 | max_steps = 180000 8 | batch_size = 60 9 | 10 | num_gpus = 1 11 | num_workers_per_gpu = 7 12 | 13 | # During training, if a ckpt is provided here, it overrides resume_training_from_last and instead resumes training from this ckpt 14 | vae_ckpt_path = None # "output/boilerplate_kldanneal_c0.1/vae/checkpoints/last.ckpt" 15 | ddm_ckpt_path = None 16 | 17 | max_scenes = None 18 | 19 | 20 | root_data_dir = "data/acronym/renders/objects_filtered_grasps_63cat_8k/" 21 | camera_json = "grasp_ldm/dataset/cameras/camera_d435i_dummy.json" 22 | 23 | ## -------------------- Inputs/Shapes ------------------------ 24 | # Input/Output: grasp representation [mrp(3), t(3), cls_success(1), qualities(4)] 25 | 26 | pc_num_points = 1024 27 | pc_latent_dims = 256 28 | pc_latent_channels = 3 29 | 30 | grasp_pose_dims = 6 31 | num_output_qualities = 0 32 | grasp_latent_dims = 16 33 | 34 | grasp_representation_dims = ( 35 | grasp_pose_dims + num_output_qualities + 1 36 | if num_output_qualities is not None 37 | else grasp_pose_dims + 1 38 | ) 39 | 40 | ## ----------------------- Model ----------------------- 41 | 42 | dropout = 0.1 # or None 43 | 44 | pc_encoder_config = dict( 45 | type="PVCNNEncoder", 46 | args=dict( 47 | in_features=3, 48 | n_points=pc_num_points, 49 | scale_channels=0.75, 50 | scale_voxel_resolution=0.75, 51 | num_blocks=(1, 1, 1, 1), 52 | out_channels=pc_latent_channels, 53 | use_global_attention=False, 54 | ), 55 | ) 56 | 57 | grasp_encoder_config = dict( 58 | type="ResNet1D", 59 | args=dict( 60 | in_features=grasp_representation_dims, 61 | block_channels=(32, 64, 128, 256), 62 | input_conditioning_dims=pc_latent_dims, 63 | resnet_block_groups=4, 64 | dropout=dropout, 65 | ), 66 | ) 67 | 68 | decoder_config = dict( 69 | type="ResNet1D", 70 | args=dict( 71 | block_channels=(32, 64, 128, 256), 72 | # out_dim=grasp_pose_dims, 73 | input_conditioning_dims=pc_latent_dims, 74 | resnet_block_groups=4, 75 | dropout=dropout, 76 | ), 77 | ) 78 | 79 | loss_config = dict( 80 | reconstruction_loss=dict( 81 | type="GraspReconstructionLoss", 82 | name="reconstruction_loss", 83 | args=dict(translation_weight=1, rotation_weight=1), 84 | ), 85 | latent_loss=dict( 86 | type="VAELatentLoss", 87 | args=dict( 88 | name="grasp_latent", 89 | cyclical_annealing=True, 90 | num_steps=max_steps, 91 | num_cycles=1, 92 | ratio=0.5, 93 | start=1e-7, 94 | stop=0.1, 95 | ), 96 | ), 97 | classification_loss=dict(type="ClassificationLoss", args=dict(weight=0.1)), 98 | # quality_loss=dict(type="QualityLoss", args=dict(weight=0.1)), 99 | ) 100 | 101 | denoiser_model = dict( 102 | type="TimeConditionedResNet1D", 103 | args=dict( 104 | dim=grasp_latent_dims, 105 | channels=1, 106 | block_channels=(32, 64, 128, 256), 107 | input_conditioning_dims=pc_latent_dims, 108 | resnet_block_groups=4, 109 | dropout=dropout, 110 | is_time_conditioned=True, 111 | learned_variance=False, 112 | learned_sinusoidal_cond=False, 113 | random_fourier_features=True, 114 | # learned_sinusoidal_dim=16, 115 | ), 116 | ) 117 | # Use `model` for single module to be built. If a list of modules are required to be built, use `models` to make sure the outer 118 | # See models/builder.py for more info. 119 | model = dict( 120 | vae=dict( 121 | model=dict( 122 | type="GraspCVAE", 123 | args=dict( 124 | grasp_latent_size=grasp_latent_dims, 125 | pc_latent_size=pc_latent_dims, 126 | pc_encoder_config=pc_encoder_config, 127 | grasp_encoder_config=grasp_encoder_config, 128 | decoder_config=decoder_config, 129 | loss_config=loss_config, 130 | num_output_qualities=num_output_qualities, 131 | intermediate_feature_resolution=16, 132 | ), 133 | ), 134 | ckpt_path=vae_ckpt_path, 135 | ), 136 | ddm=dict( 137 | model=dict( 138 | type="GraspLatentDDM", 139 | args=dict( 140 | model=denoiser_model, 141 | latent_in_features=grasp_latent_dims, 142 | diffusion_timesteps=1000, 143 | noise_scheduler_type="ddpm", 144 | diffusion_loss="l2", 145 | beta_schedule="linear", 146 | is_conditioned=True, 147 | joint_training=False, 148 | denoising_loss_weight=1, 149 | variance_type="fixed_large", 150 | elucidated_diffusion=False, 151 | beta_start=0.00005, 152 | beta_end=0.001, 153 | ), 154 | ), 155 | ckpt_path=ddm_ckpt_path, 156 | use_vae_ema_model=True, 157 | ), 158 | ) 159 | ## -- Data -- 160 | augs_config = [ 161 | dict(type="RandomRotation", args=dict(p=0.5, max_angle=180, is_degree=True)), 162 | dict(type="PointcloudJitter", args=dict(p=1, sigma=0.005, clip=0.005)), 163 | dict(type="RandomPointcloudDropout", args=dict(p=0.5, max_dropout_ratio=0.4)), 164 | ] 165 | 166 | 167 | train_data = dict( 168 | type="AcronymPartialPointclouds", 169 | args=dict( 170 | data_root_dir=root_data_dir, 171 | max_scenes=max_scenes, 172 | camera_json=camera_json, 173 | num_points_per_pc=pc_num_points, 174 | num_grasps_per_obj=100, 175 | rotation_repr="mrp", 176 | augs_config=augs_config, 177 | split="train", 178 | depth_px_scale=10000, 179 | scene_prefix="scene_", 180 | min_usable_pc_points=1024, 181 | preempt_load_data=True, 182 | use_failed_grasps=False, 183 | failed_grasp_ratio=0.3, 184 | load_fixed_grasp_transforms=None, 185 | is_input_dataset_normalized=False, 186 | num_repeat_dataset=10, 187 | ), 188 | batch_size=batch_size, 189 | ) 190 | 191 | data = dict( 192 | train=train_data, 193 | ) 194 | 195 | # Patch: Mesh Categories. Used for simulation 196 | mesh_root = root_data_dir 197 | mesh_categories = [ 198 | "Cup", 199 | "Mug", 200 | "Fork", 201 | "Hat", 202 | "Bottle", 203 | "Bowl", 204 | "Car", 205 | "Donut", 206 | "Laptop", 207 | "MousePad", 208 | "Pencil", 209 | "Plate", 210 | "ScrewDriver", 211 | "WineBottle", 212 | "Backpack", 213 | "Bag", 214 | "Banana", 215 | "Battery", 216 | "BeanBag", 217 | "Bear", 218 | "Book", 219 | "Books", 220 | "Camera", 221 | "CerealBox", 222 | "Cookie", 223 | "Hammer", 224 | "Hanger", 225 | "Knife", 226 | "MilkCarton", 227 | "Painting", 228 | "PillBottle", 229 | "Plant", 230 | "PowerSocket", 231 | "PowerStrip", 232 | "PS3", 233 | "PSP", 234 | "Ring", 235 | "Scissors", 236 | "Shampoo", 237 | "Shoes", 238 | "Sheep", 239 | "Shower", 240 | "Sink", 241 | "SoapBottle", 242 | "SodaCan", 243 | "Spoon", 244 | "Statue", 245 | "Teacup", 246 | "Teapot", 247 | "ToiletPaper", 248 | "ToyFigure", 249 | "Wallet", 250 | "WineGlass", 251 | "Cow", 252 | "Sheep", 253 | "Cat", 254 | "Dog", 255 | "Pizza", 256 | "Elephant", 257 | "Donkey", 258 | "RubiksCube", 259 | "Tank", 260 | "Truck", 261 | "USBStick", 262 | ] 263 | 264 | ## -------------------- Trainer -------------------- 265 | ## Logger 266 | logger = dict(type="WandbLogger", project="partial-pc-63c-ema") 267 | 268 | optimizer = dict( 269 | initial_lr=0.001, 270 | scheduler=dict( 271 | type="MultiStepLR", 272 | args=dict(milestones=[int(max_steps / 3), int(2 * max_steps / 3)], gamma=0.1), 273 | ), 274 | ) 275 | 276 | trainer = dict( 277 | max_steps=max_steps, 278 | batch_size=batch_size, 279 | num_workers=num_workers_per_gpu * num_gpus, 280 | accelerator="gpu", 281 | devices=num_gpus, 282 | strategy="ddp", 283 | logger=logger, 284 | log_every_n_steps=100, 285 | optimizer=optimizer, 286 | resume_training_from_last=resume_training_from_last, 287 | check_val_every_n_epoch=1, 288 | ema=dict( 289 | beta=0.990, 290 | update_after_step=1000, 291 | ), 292 | deterministic=True, 293 | ) 294 | -------------------------------------------------------------------------------- /grasp_ldm/models/modules/ext/pvcnn/utils.py: -------------------------------------------------------------------------------- 1 | # Adapted from PVCNN and PVD 2 | import functools 3 | 4 | import torch.nn as nn 5 | 6 | from .modules import ( 7 | PointNetAModule, 8 | PointNetFPModule, 9 | PointNetSAModule, 10 | PVConv, 11 | SharedMLP, 12 | ) 13 | 14 | __all__ = [ 15 | "create_mlp_components", 16 | "create_pointnet_components", 17 | "create_pointnet2_sa_components", 18 | "create_pointnet2_fp_modules", 19 | ] 20 | 21 | 22 | def _linear_bn_relu(in_channels, out_channels): 23 | return nn.Sequential( 24 | nn.Linear(in_channels, out_channels), 25 | nn.BatchNorm1d(out_channels), 26 | nn.ReLU(True), 27 | ) 28 | 29 | 30 | def create_mlp_components( 31 | in_channels, out_channels, classifier=False, dim=2, width_multiplier=1 32 | ): 33 | r = width_multiplier 34 | 35 | if dim == 1: 36 | block = _linear_bn_relu 37 | else: 38 | block = SharedMLP 39 | if not isinstance(out_channels, (list, tuple)): 40 | out_channels = [out_channels] 41 | if len(out_channels) == 0 or (len(out_channels) == 1 and out_channels[0] is None): 42 | return nn.Sequential(), in_channels, in_channels 43 | 44 | layers = [] 45 | for oc in out_channels[:-1]: 46 | if oc < 1: 47 | layers.append(nn.Dropout(oc)) 48 | else: 49 | oc = int(r * oc) 50 | layers.append(block(in_channels, oc)) 51 | in_channels = oc 52 | if dim == 1: 53 | if classifier: 54 | layers.append(nn.Linear(in_channels, out_channels[-1])) 55 | else: 56 | layers.append(_linear_bn_relu(in_channels, int(r * out_channels[-1]))) 57 | else: 58 | if classifier: 59 | layers.append(nn.Conv1d(in_channels, out_channels[-1], 1)) 60 | else: 61 | layers.append(SharedMLP(in_channels, int(r * out_channels[-1]))) 62 | return layers, out_channels[-1] if classifier else int(r * out_channels[-1]) 63 | 64 | 65 | def create_pointnet_components( 66 | blocks, 67 | in_channels, 68 | with_se=False, 69 | normalize=True, 70 | eps=0, 71 | width_multiplier=1, 72 | voxel_resolution_multiplier=1, 73 | ): 74 | r, vr = width_multiplier, voxel_resolution_multiplier 75 | 76 | layers, concat_channels = [], 0 77 | for out_channels, num_blocks, voxel_resolution in blocks: 78 | out_channels = int(r * out_channels) 79 | if voxel_resolution is None: 80 | block = SharedMLP 81 | else: 82 | block = functools.partial( 83 | PVConv, 84 | kernel_size=3, 85 | resolution=int(vr * voxel_resolution), 86 | with_se=with_se, 87 | normalize=normalize, 88 | eps=eps, 89 | ) 90 | for _ in range(num_blocks): 91 | layers.append(block(in_channels, out_channels)) 92 | in_channels = out_channels 93 | concat_channels += out_channels 94 | return layers, in_channels, concat_channels 95 | 96 | 97 | def create_pointnet2_sa_components( 98 | sa_blocks, 99 | extra_feature_channels, 100 | embed_dim=0, 101 | use_attention=False, 102 | dropout=0.1, 103 | with_se=False, 104 | voxelization_normalize=True, 105 | eps=0, 106 | width_multiplier=1, 107 | voxel_resolution_multiplier=1, 108 | ): 109 | r, vr = width_multiplier, voxel_resolution_multiplier 110 | in_channels = extra_feature_channels + 3 111 | 112 | sa_layers, sa_in_channels = [], [] 113 | c = 0 114 | for conv_configs, sa_configs in sa_blocks: 115 | k = 0 116 | sa_in_channels.append(in_channels) 117 | sa_blocks = [] 118 | 119 | if conv_configs is not None: 120 | out_channels, num_blocks, voxel_resolution = conv_configs 121 | out_channels = int(r * out_channels) 122 | for p in range(num_blocks): 123 | attention = (c + 1) % 2 == 0 and use_attention and p == 0 124 | if voxel_resolution is None: 125 | block = SharedMLP 126 | else: 127 | block = functools.partial( 128 | PVConv, 129 | kernel_size=3, 130 | resolution=int(vr * voxel_resolution), 131 | use_attention=attention, 132 | dropout=dropout, 133 | with_se=with_se, 134 | with_se_relu=True, 135 | normalize=voxelization_normalize, 136 | eps=eps, 137 | ) 138 | 139 | if c == 0: 140 | sa_blocks.append(block(in_channels, out_channels)) 141 | elif k == 0: 142 | sa_blocks.append(block(in_channels + embed_dim, out_channels)) 143 | in_channels = out_channels 144 | k += 1 145 | extra_feature_channels = in_channels 146 | num_centers, radius, num_neighbors, out_channels = sa_configs 147 | _out_channels = [] 148 | for oc in out_channels: 149 | if isinstance(oc, (list, tuple)): 150 | _out_channels.append([int(r * _oc) for _oc in oc]) 151 | else: 152 | _out_channels.append(int(r * oc)) 153 | out_channels = _out_channels 154 | if num_centers is None: 155 | block = PointNetAModule 156 | else: 157 | block = functools.partial( 158 | PointNetSAModule, 159 | num_centers=num_centers, 160 | radius=radius, 161 | num_neighbors=num_neighbors, 162 | ) 163 | sa_blocks.append( 164 | block( 165 | in_channels=extra_feature_channels + (embed_dim if k == 0 else 0), 166 | out_channels=out_channels, 167 | include_coordinates=True, 168 | ) 169 | ) 170 | c += 1 171 | in_channels = extra_feature_channels = sa_blocks[-1].out_channels 172 | if len(sa_blocks) == 1: 173 | sa_layers.append(sa_blocks[0]) 174 | else: 175 | sa_layers.append(nn.Sequential(*sa_blocks)) 176 | 177 | return ( 178 | sa_layers, 179 | sa_in_channels, 180 | in_channels, 181 | 1 if num_centers is None else num_centers, 182 | ) 183 | 184 | 185 | def create_pointnet2_fp_modules( 186 | fp_blocks, 187 | in_channels, 188 | sa_in_channels, 189 | embed_dim=0, 190 | use_attention=False, 191 | dropout=0.1, 192 | with_se=False, 193 | normalize=True, 194 | eps=0, 195 | width_multiplier=1, 196 | voxel_resolution_multiplier=1, 197 | ): 198 | r, vr = width_multiplier, voxel_resolution_multiplier 199 | 200 | fp_layers = [] 201 | c = 0 202 | for fp_idx, (fp_configs, conv_configs) in enumerate(fp_blocks): 203 | fp_blocks = [] 204 | out_channels = tuple(int(r * oc) for oc in fp_configs) 205 | fp_blocks.append( 206 | PointNetFPModule( 207 | in_channels=in_channels + sa_in_channels[-1 - fp_idx] + embed_dim, 208 | out_channels=out_channels, 209 | ) 210 | ) 211 | in_channels = out_channels[-1] 212 | 213 | if conv_configs is not None: 214 | out_channels, num_blocks, voxel_resolution = conv_configs 215 | out_channels = int(r * out_channels) 216 | for p in range(num_blocks): 217 | attention = ( 218 | (c + 1) % 2 == 0 219 | and c < len(fp_blocks) - 1 220 | and use_attention 221 | and p == 0 222 | ) 223 | if voxel_resolution is None: 224 | block = SharedMLP 225 | else: 226 | block = functools.partial( 227 | PVConv, 228 | kernel_size=3, 229 | resolution=int(vr * voxel_resolution), 230 | use_attention=attention, 231 | dropout=dropout, 232 | with_se=with_se, 233 | with_se_relu=True, 234 | normalize=normalize, 235 | eps=eps, 236 | ) 237 | 238 | fp_blocks.append(block(in_channels, out_channels)) 239 | in_channels = out_channels 240 | if len(fp_blocks) == 1: 241 | fp_layers.append(fp_blocks[0]) 242 | else: 243 | fp_layers.append(nn.Sequential(*fp_blocks)) 244 | 245 | c += 1 246 | 247 | return fp_layers, in_channels 248 | --------------------------------------------------------------------------------