├── .github ├── fair.png ├── preview.gif ├── urdf_visualizer.png └── tactile_transformer.png ├── neuralfeels ├── __init__.py ├── contrib │ ├── __init__.py │ ├── sam │ │ ├── __init__.py │ │ └── test_sam.py │ ├── urdf │ │ ├── SceneGraph │ │ │ ├── __init__.py │ │ │ ├── MeshNode.py │ │ │ ├── Transform.py │ │ │ ├── SceneNode.py │ │ │ └── SceneGraph.py │ │ ├── URDF │ │ │ ├── URDFTree │ │ │ │ ├── __init__.py │ │ │ │ ├── ExLink.py │ │ │ │ └── URDFTree.py │ │ │ ├── Parser │ │ │ │ ├── __init__.py │ │ │ │ ├── Joint.py │ │ │ │ ├── Link.py │ │ │ │ └── URDFParser.py │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── README.md │ │ └── viz.py │ └── tactile_transformer │ │ ├── __init__.py │ │ ├── custom_augmentation.py │ │ ├── fusion.py │ │ ├── head.py │ │ ├── README.md │ │ ├── dpt_model.py │ │ ├── reassemble.py │ │ ├── touch_vit.py │ │ ├── loss.py │ │ ├── tactile_depth.py │ │ └── utils.py ├── eval │ ├── __init__.py │ ├── occlusion_plot.py │ ├── group_plot.py │ ├── feelsight_init.py │ └── metrics.py ├── geometry │ ├── __init__.py │ ├── align_utils.py │ └── frustum.py ├── datasets │ ├── __init__.py │ ├── mesh_to_sdf_test.py │ ├── image_transforms.py │ ├── redwood_depth_noise_model.py │ ├── dataset.py │ ├── data_util.py │ └── sdf_util.py ├── viz │ ├── __init__.py │ ├── plot_utils.py │ ├── show_object_dataset.py │ ├── debug.py │ ├── rotate_object_video.py │ └── draw.py └── modules │ ├── __init__.py │ ├── misc.py │ ├── render.py │ ├── object.py │ ├── allegro.py │ └── loss.py ├── scripts ├── config │ ├── main │ │ ├── touch_depth │ │ │ ├── gt.yaml │ │ │ └── vit.yaml │ │ ├── data │ │ │ └── default.yaml │ │ ├── scene │ │ │ └── default.yaml │ │ ├── vi.yaml │ │ ├── baseline.yaml │ │ ├── tac.yaml │ │ ├── vitac.yaml │ │ ├── eval │ │ │ └── default.yaml │ │ ├── viz │ │ │ └── default.yaml │ │ ├── sensor │ │ │ ├── realsense.yaml │ │ │ └── digit.yaml │ │ ├── train │ │ │ └── default.yaml │ │ └── pose │ │ │ └── default.yaml │ ├── launcher │ │ └── basic.yaml │ └── config.yaml ├── run └── run.py ├── .pre-commit-config.yaml ├── LICENSE ├── setup.py ├── CONTRIBUTING.md ├── environment.yml ├── .gitignore ├── install.sh ├── data └── README.md └── CODE_OF_CONDUCT.md /.github/fair.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/neuralfeels/HEAD/.github/fair.png -------------------------------------------------------------------------------- /.github/preview.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/neuralfeels/HEAD/.github/preview.gif -------------------------------------------------------------------------------- /.github/urdf_visualizer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/neuralfeels/HEAD/.github/urdf_visualizer.png -------------------------------------------------------------------------------- /.github/tactile_transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/neuralfeels/HEAD/.github/tactile_transformer.png -------------------------------------------------------------------------------- /neuralfeels/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | -------------------------------------------------------------------------------- /neuralfeels/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | -------------------------------------------------------------------------------- /neuralfeels/contrib/sam/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | -------------------------------------------------------------------------------- /scripts/config/main/touch_depth/gt.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | mode : gt 7 | -------------------------------------------------------------------------------- /scripts/config/main/data/default.yaml: -------------------------------------------------------------------------------- 1 | dataset: feelsight_real 2 | object: bell_pepper 3 | log: '00' 4 | 5 | dataset_path: "data/${main.data.dataset}/${main.data.object}/${main.data.log}" 6 | 7 | gt_sdf_dir: data/assets/gt_models 8 | 9 | train_fps: 1 10 | -------------------------------------------------------------------------------- /neuralfeels/eval/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # flake8: noqa 7 | 8 | from . import metrics 9 | -------------------------------------------------------------------------------- /scripts/config/launcher/basic.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | defaults: 3 | - override /hydra/launcher: joblib 4 | - _self_ 5 | 6 | hydra: 7 | sweep: 8 | dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} 9 | launcher: 10 | n_jobs: 1 11 | pre_dispatch: 0 12 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/SceneGraph/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .SceneGraph import SceneGraph 7 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/URDF/URDFTree/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .URDFTree import URDFTree 7 | -------------------------------------------------------------------------------- /neuralfeels/geometry/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # flake8: noqa 7 | 8 | from . import frustum, transform 9 | -------------------------------------------------------------------------------- /neuralfeels/contrib/tactile_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .tactile_depth import TactileDepth 7 | -------------------------------------------------------------------------------- /neuralfeels/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from . import data_util, dataset, image_transforms, sdf_util 7 | -------------------------------------------------------------------------------- /neuralfeels/viz/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # flake8: noqa 7 | 8 | from . import draw, neuralfeels_gui, sdf_viewer 9 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/URDF/Parser/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .Link import Link 7 | from .URDFParser import URDFParser 8 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/URDF/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .Parser import URDFParser 7 | from .URDFTree import URDFTree 8 | -------------------------------------------------------------------------------- /neuralfeels/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # flake8: noqa 7 | 8 | from . import loss, model, render, sample, trainer 9 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from .SceneGraph import SceneGraph 7 | from .URDF import URDFParser, URDFTree 8 | -------------------------------------------------------------------------------- /scripts/config/main/scene/default.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | grid_dim : 200 8 | crop_dist: 5e-3 9 | mesh_interval: 10 10 | 11 | object_limits : 0 12 | rotate_z: 0 13 | extents: [0.15, 0.15, 0.15] 14 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 24.4.2 4 | hooks: 5 | - id: black 6 | files: | 7 | (?x)^( 8 | neuralfeels | 9 | scripts 10 | ) 11 | 12 | - repo: https://github.com/pycqa/isort 13 | rev: 5.13.2 14 | hooks: 15 | - id: isort 16 | args: ["--profile", "black"] 17 | files: | 18 | (?x)^( 19 | neuralfeels | 20 | scripts 21 | ) -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/README.md: -------------------------------------------------------------------------------- 1 | ## Open3D URDF visualization 2 | 3 | This code is based on [Helper3D](https://github.com/Jianghanxiao/Helper3D), a super useful tool to visualize URDFs dynamically in Open3D. We thank the authors for their work, and we have made some modifications to the code to make it work with our project. 4 | 5 | 6 | 7 | ### Test URDF visualization 8 | 9 | ```bash 10 | python neuralfeels/contrib/urdf/viz.py 11 | ``` 12 | -------------------------------------------------------------------------------- /scripts/config/main/vi.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Vision-only condfig 7 | 8 | defaults: 9 | - data: default 10 | - eval: default 11 | - train: default 12 | - pose: default 13 | - scene: default 14 | - viz: default 15 | - sensor@sensor0: realsense 16 | - _self_ 17 | 18 | mode: vision 19 | occlusion: False 20 | 21 | sensor0: 22 | name: realsense_front_left 23 | -------------------------------------------------------------------------------- /scripts/config/main/baseline.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Baseline three-camera config 7 | 8 | defaults: 9 | - data: default 10 | - eval: default 11 | - train: default 12 | - pose: default 13 | - scene: default 14 | - viz: default 15 | - sensor@sensor0: realsense 16 | - sensor@sensor1: realsense 17 | - sensor@sensor2: realsense 18 | - _self_ 19 | 20 | mode: baseline 21 | occlusion: False 22 | 23 | sensor0: 24 | name: realsense_front_left 25 | sensor1: 26 | name: realsense_back_right 27 | sensor2: 28 | name: realsense_top_down 29 | -------------------------------------------------------------------------------- /scripts/config/main/tac.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Tactile-only config 7 | 8 | defaults: 9 | - data: default 10 | - eval: default 11 | - train: default 12 | - pose: default 13 | - scene: default 14 | - viz: default 15 | - sensor@sensor0: digit 16 | - sensor@sensor1: digit 17 | - sensor@sensor2: digit 18 | - sensor@sensor3: digit 19 | - _self_ 20 | 21 | mode: tactile 22 | occlusion: False 23 | 24 | sensor0: 25 | name: digit_thumb 26 | sensor1: 27 | name: digit_index 28 | sensor2: 29 | name: digit_middle 30 | sensor3: 31 | name: digit_ring -------------------------------------------------------------------------------- /scripts/config/main/vitac.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Visuo-tactile config 7 | 8 | defaults: 9 | - data: default 10 | - eval: default 11 | - train: default 12 | - pose: default 13 | - scene: default 14 | - viz: default 15 | - sensor@sensor0: realsense 16 | - sensor@sensor1: digit 17 | - sensor@sensor2: digit 18 | - sensor@sensor3: digit 19 | - sensor@sensor4: digit 20 | - _self_ 21 | 22 | mode: vitac 23 | occlusion: False 24 | 25 | sensor0: 26 | name: realsense_front_left 27 | sensor1: 28 | name: digit_thumb 29 | sensor2: 30 | name: digit_index 31 | sensor3: 32 | name: digit_middle 33 | sensor4: 34 | name: digit_ring 35 | -------------------------------------------------------------------------------- /scripts/config/config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | defaults: 7 | - main: vitac 8 | - launcher: basic 9 | - _self_ 10 | 11 | user: suddhu 12 | profile: False 13 | seed: 1 14 | gpu_id: 0 15 | create_display: False 16 | expt_name: "${main.train.train_mode}" 17 | vox_size: "${main.train.gt_voxel_size}" 18 | 19 | sweep_dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} 20 | 21 | hydra: 22 | job_logging: 23 | root: 24 | handlers: [] 25 | job: 26 | chdir: true 27 | run: 28 | dir: ${sweep_dir}/${main.data.object}/${main.data.log}/${main.mode}/${expt_name}_${seed} 29 | sweep: 30 | dir: ${sweep_dir} 31 | subdir: ${main.data.object}/${main.data.log}/${main.mode}/${expt_name}_${hydra.job.num} # standard mode -------------------------------------------------------------------------------- /scripts/config/main/eval/default.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Eval parameters 7 | 8 | do_eval: 0 9 | eval_freq_s: 1 10 | sdf_eval: 1 11 | mesh_eval: 0 12 | 13 | save_period: 10 14 | save_slices: 0 15 | save_meshes: 0 16 | 17 | # if true saves intermediate reconstruction (mesh/point cloud) instead of just final one 18 | save_intermediate_recons: True 19 | # if true saves point clouds from SDF (at the same time mesh is saved) 20 | save_rendered_pcs: False 21 | # this is a maximum. The actual number might be less due to filtering rays not hitting a surface 22 | num_points_pcs: 2000 23 | num_points_f_score: 10000 24 | f_score_T: [2e-2, 1.8e-2, 1.6e-2, 1.4e-2, 1.2e-2, 1e-2, 9e-3, 8e-3, 7e-3, 6e-3, 5e-3, 4e-3, 3e-3, 2e-3, 1e-3] # range from 2cm to 1mm 25 | which_f_score: 10 # which one to display live, choose index of 5e-3 -------------------------------------------------------------------------------- /scripts/config/main/viz/default.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Visualizer settings 7 | 8 | meshes: 9 | mesh_rec: True 10 | mesh_rec_crop: False 11 | write_all_meshes: False 12 | save_rotate: True 13 | save_neural_field: True 14 | allegro: True 15 | has_gt_object: True # if dataset does not have gt object, set to False 16 | show_gt_object: False 17 | sensors_est: False 18 | transparent: False 19 | 20 | debug: 21 | rays: False 22 | frontend: False 23 | bbox: False 24 | origin: False 25 | 26 | layers: 27 | colormap: Color # Sensor, Normals, FScore, Color, n/a 28 | keyframes: None # None, Latest, All 29 | pointcloud: None # None, Both, Vision, Touch 30 | 31 | misc: 32 | record: False 33 | downsample_threshold: 50000 34 | rotate: False 35 | follow : False 36 | render_stream: False 37 | render_open3d: True 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/URDF/URDFTree/ExLink.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D 7 | 8 | 9 | class ExLink: 10 | def __init__(self, link): 11 | self.link = link 12 | self.parent = None 13 | self.children = [] 14 | self.joint = None 15 | 16 | def setParent(self, parent): 17 | self.parent = parent 18 | 19 | def addChild(self, child): 20 | self.children.append(child) 21 | 22 | def setJoint(self, joint): 23 | self.joint = joint 24 | 25 | def __repr__(self): 26 | output = {} 27 | output["link"] = self.link 28 | if self.parent != None: 29 | output["parent"] = self.parent.link.link_name 30 | else: 31 | output["parent"] = None 32 | output["children"] = [child.link.link_name for child in self.children] 33 | output["joint"] = self.joint 34 | return str(output) 35 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/SceneGraph/MeshNode.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D 7 | 8 | 9 | import copy 10 | import re 11 | 12 | import open3d as o3d 13 | 14 | 15 | class MeshNode: 16 | def __init__(self): 17 | self.mesh = None 18 | 19 | def addMesh(self, mesh): 20 | if self.mesh == None: 21 | self.mesh = mesh 22 | else: 23 | self.mesh += mesh 24 | 25 | def addMeshFile(self, mesh_file, color): 26 | # Read the mesh from obj file 27 | mesh_file = re.sub("allegro/allegro", "allegro", mesh_file) 28 | mesh = o3d.io.read_triangle_mesh(mesh_file) 29 | mesh.paint_uniform_color(color) 30 | self.addMesh(mesh) 31 | 32 | def getMesh(self, worldMatrix): 33 | if self.mesh == None: 34 | return None 35 | new_mesh = copy.deepcopy(self.mesh) 36 | new_mesh.transform(worldMatrix) 37 | return new_mesh 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | MIT License 3 | 4 | Copyright (c) Meta Platforms, Inc. and affiliates. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /neuralfeels/datasets/mesh_to_sdf_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | # Test script to visualize the SDF of a mesh, SDFViewer is taken from iSDF 8 | 9 | import os 10 | 11 | import git 12 | import numpy as np 13 | 14 | from neuralfeels.datasets import sdf_util 15 | from neuralfeels.datasets.sdf_util import load_gt_mesh 16 | from neuralfeels.viz import sdf_viewer 17 | 18 | root = git.Repo(".", search_parent_directories=True).working_tree_dir 19 | 20 | 21 | def main(): 22 | mesh_path = os.path.join( 23 | root, "data/assets/gt_models/ycb/contactdb_rubber_duck.urdf" 24 | ) 25 | mesh, _ = load_gt_mesh(mesh_path) 26 | sdf, transform = sdf_util.sdf_from_mesh( 27 | mesh=mesh, voxel_size=5e-4, extend_factor=0.1, origin_voxel=np.zeros(3) 28 | ) 29 | sdf_viewer.SDFViewer( 30 | mesh=mesh, 31 | sdf_grid=sdf, 32 | sdf_range=None, 33 | grid2world=transform, 34 | surface_cutoff=0.001, 35 | colormap=True, 36 | ) 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import setuptools 7 | 8 | with open("README.md", "r") as fh: 9 | long_description = fh.read() 10 | 11 | 12 | setuptools.setup( 13 | name="neuralfeels", 14 | version="0.0.1", 15 | author="Meta Research", 16 | description="Neural Feels.", 17 | long_description=long_description, 18 | long_description_content_type="text/markdown", 19 | url="https://github.com/facebookresearch/neuralfeels", 20 | packages=["neuralfeels"], 21 | classifiers=[ 22 | "Programming Language :: Python :: 3", 23 | "License :: OSI Approved :: MIT License", 24 | "Intended Audience :: Science/Research", 25 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 26 | ], 27 | python_requires=">=3.8", 28 | install_requires=[ 29 | "pyserial==3.5", 30 | "betterproto==2.0.0b5", 31 | "cobs==1.2.0", 32 | "google-api-python-client==2.97.0", 33 | "google-auth-httplib2==0.1.0", 34 | "google-auth-oauthlib==0.5.0", 35 | ], 36 | ) 37 | -------------------------------------------------------------------------------- /neuralfeels/datasets/image_transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | 10 | class BGRtoRGB(object): 11 | """bgr format to rgb""" 12 | 13 | def __call__(self, image): 14 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 15 | return image 16 | 17 | 18 | class DepthTransform(object): 19 | """ 20 | Transforms tactile depth from the gel coordinate system to the camera coordinate system 21 | The camera is placed 0.022 m behind the gel surface 22 | """ 23 | 24 | def __init__(self, cam_dist): 25 | self.cam_dist = cam_dist 26 | 27 | def __call__(self, depth): 28 | depth = depth.astype(np.float32) 29 | depth += self.cam_dist 30 | depth[depth == self.cam_dist] = np.nan 31 | return depth.astype(np.float32) 32 | 33 | 34 | class DepthScale(object): 35 | """scale depth to meters""" 36 | 37 | def __init__(self, scale): 38 | self.scale = scale 39 | 40 | def __call__(self, depth): 41 | depth = depth.astype(np.float32) 42 | return depth * self.scale 43 | -------------------------------------------------------------------------------- /neuralfeels/contrib/tactile_transformer/custom_augmentation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT 7 | 8 | import numpy as np 9 | import torch 10 | 11 | 12 | class ToMask(object): 13 | """ 14 | Convert a 3 channel RGB image into a 1 channel segmentation mask 15 | """ 16 | 17 | def __init__(self, palette_dictionnary): 18 | self.nb_classes = len(palette_dictionnary) 19 | self.palette_dictionnary = palette_dictionnary 20 | 21 | def __call__(self, pil_image): 22 | # avoid taking the alpha channel 23 | image_array = np.array(pil_image) 24 | # get only one channel for the output 25 | output_array = np.zeros(image_array.shape, dtype="int") 26 | 27 | for label in self.palette_dictionnary.keys(): 28 | rgb_color = self.palette_dictionnary[label]["color"] 29 | mask = image_array == rgb_color 30 | output_array[mask] = int(label) 31 | 32 | output_array = torch.from_numpy(output_array).long() 33 | return output_array 34 | -------------------------------------------------------------------------------- /scripts/config/main/sensor/realsense.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Config for the realsense 7 | 8 | name: 9 | 10 | sampling: 11 | n_rays: 400 12 | n_strat_samples: 30 13 | n_surf_samples: 10 14 | depth_range: [0.3, 1.0] # [0.6, 6.0] for D455, [0.3, 3.0] for D435 15 | surface_samples_offset: 1e-3 # needs to be small to capture surface detail 16 | dist_behind_surf: 2e-2 # needs to be larger to carve out object extents 17 | loss_ratio : 1.0 18 | free_space_ratio: 0.7 # used for mapping, but not tracking 19 | 20 | kf_min_loss: 1e-2 21 | 22 | masks: sam_vit_l # read (gt from file), sam_vit_h, sam_vit_l, sam_vit_b 23 | sim_noise_iters: 5 24 | 25 | # (empirical) prefers this area of mask from multi-mask SAM output 26 | optimal_mask_size: 27 | realsense_front_left: 15000.0 28 | realsense_back_right: 5000.0 29 | realsense_top_down: 4000.0 30 | 31 | # (empirical) z-offset for pixel prompt wrt grasp center 32 | sam_offset: 33 | realsense_front_left: 0.0 34 | realsense_back_right: 0.01 35 | realsense_top_down: 0.0 36 | 37 | 38 | viz: 39 | reduce_factor: 1 40 | reduce_factor_up: 1 -------------------------------------------------------------------------------- /scripts/config/main/train/default.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Optimizer class 7 | 8 | optimizer: 9 | device: cuda 10 | checkpoint: 11 | incremental : True 12 | lr: 2e-4 13 | weight_decay: 1e-6 14 | num_iters: 1 15 | map_init_iters: 500 16 | 17 | model: 18 | do_active: 0 19 | scale_output: 1.0 20 | noise_std: 21 | feelsight : [2e-3, 2e-3] # [vision, touch] 22 | feelsight_real : [1e-3, 5e-3] # [vision, touch] 23 | window_size: 10 24 | num_layers: 3 # num_layers - 1 hidden layers 25 | hidden_feature_size: 64 26 | kf_time : 0.2 27 | milestones: [1, 2, 3] 28 | gamma: 0.5 29 | 30 | pos_encoding: 31 | n_levels: 19 # previous: 19 32 | n_features_per_level: 2 33 | log2_hashmap_size: 23 # previous: 23 34 | base_resolution: 4 35 | per_level_scale: 1.3 36 | 37 | loss: 38 | bounds_method: pc 39 | loss_type: L1 40 | trunc_weight: 10.0 41 | eik_weight: 0.0 42 | trunc_distance: 5e-3 43 | 44 | train_mode: slam # pose, map, slam 45 | gt_voxel_size: 5e-4 46 | load_checkpoint_model: False 47 | grasp: False 48 | 49 | batch: 50 | train_time_min: 0.5 51 | max_frames: 10 -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to neuralfeels 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from `main`. 9 | 2. If you've added code that should be tested, add tests. 10 | 3. If you've changed APIs, update the documentation. 11 | 4. Ensure the test suite passes. 12 | 5. Make sure your code lints. 13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 14 | 15 | ## Contributor License Agreement ("CLA") 16 | In order to accept your pull request, we need you to submit a CLA. You only need 17 | to do this once to work on any of Facebook's open source projects. 18 | 19 | Complete your CLA here: 20 | 21 | ## Issues 22 | We use GitHub issues to track public bugs. Please ensure your description is 23 | clear and has sufficient instructions to be able to reproduce the issue. 24 | 25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 26 | disclosure of security bugs. In those cases, please go through the process 27 | outlined on that page and do not file a public issue. 28 | 29 | ## License 30 | By contributing to neuralfeels, you agree that your contributions will be licensed 31 | under the LICENSE file in the root directory of this source tree. -------------------------------------------------------------------------------- /neuralfeels/eval/occlusion_plot.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ 7 | Plot graph of pose error v.s. noise for a sweep of neuralfeels experiments 8 | Usage: python neuralfeels/eval/noise_plot.py log_path= # e.g. multirun/2023-07-31/14-27-43 9 | """ 10 | 11 | import os 12 | 13 | import git 14 | import hydra 15 | from omegaconf import DictConfig 16 | 17 | from neuralfeels.viz.plot_metrics import ( 18 | pose_error_vs_occlusion, 19 | pose_errors_vs_camera_frustums, 20 | ) 21 | 22 | root = git.Repo(".", search_parent_directories=True).working_tree_dir 23 | 24 | from pathlib import Path 25 | 26 | 27 | @hydra.main(version_base=None, config_path="config/", config_name="group_error") 28 | def main(cfg: DictConfig) -> None: 29 | log_path = os.path.join(root, cfg.log_path) 30 | if log_path[-1] == "/": 31 | log_path = log_path[:-1] 32 | all_expts = [] 33 | for path in Path(log_path).rglob("stats.pkl"): 34 | expt_path = str(path.parent).replace(log_path + "/", "") 35 | all_expts.append(expt_path) 36 | 37 | pose_errors_vs_camera_frustums(all_expts, log_path) 38 | pose_error_vs_occlusion(all_expts, log_path) 39 | print(f"All outputs saved at {log_path}") 40 | 41 | 42 | if __name__ == "__main__": 43 | main() 44 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/SceneGraph/Transform.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D 7 | 8 | import numpy as np 9 | 10 | 11 | # Borrow idea form pySG repo: https://github.com/jmanek/pySG 12 | class Transform: 13 | # Only support matrix currently 14 | def __init__(self): 15 | self._matrix = np.eye(4) 16 | # Matrix for calculating the latest matrix, the order will always be sclae -> rotate -> translate 17 | self._transMat = np.eye(4) 18 | self._rotMat = np.eye(4) 19 | self._scaleMat = np.eye(4) 20 | 21 | def getMatrix(self): 22 | self.updateMatrix() 23 | return self._matrix 24 | 25 | def updateMatrix(self): 26 | self._matrix = np.dot(self._transMat, np.dot(self._rotMat, self._scaleMat)) 27 | 28 | def translateMat(self, transMat): 29 | # Apply the translation after previous _transMat 30 | self._transMat = np.dot(transMat, self._transMat) 31 | 32 | def rotateMat(self, rotMat): 33 | # Apply the rotation after previous _rotMat 34 | self._rotMat = np.dot(rotMat, self._rotMat) 35 | 36 | def scaleMat(self, scaleMat): 37 | # Apply the scale after previous _scaleMat 38 | self._scaleMat = np.dot(scaleMat, self._scaleMat) 39 | -------------------------------------------------------------------------------- /scripts/config/main/sensor/digit.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Config for DIGIT sensor 7 | 8 | name: 9 | 10 | tactile_depth: 11 | mode: vit 12 | use_real_data: True 13 | 14 | sampling: 15 | n_rays: 5 # samples/area should be somewhat consistent with vision 16 | n_strat_samples: 10 17 | n_surf_samples: 10 18 | depth_range: [-0.01, 0.05] # sampling from behind the digit camera to beyond the surface 19 | surface_samples_offset: 1e-3 # needs to be small to capture surface detail 20 | dist_behind_surf: 2e-2 # needs to be larger to carve out object extents 21 | loss_ratio : 0.1 # slower lr for tactile because of less FoV 22 | free_space_ratio: 0.0 # used for mapping, but not tracking 23 | 24 | kf_min_loss: 1e-2 25 | 26 | # taken from config_digit_shadow.yml from tacto 27 | gel: 28 | origin: [0.022, 0, 0] # Center coordinate of the gel, in meters 29 | width: 0.02 # Width of the gel, y-axis, in meters 30 | height: 0.03 # Height of the gel, z-axis, in meters 31 | curvature: True # Model the gel as curve? True/False 32 | curvatureMax: 0.004 # Deformation of the gel due to convexity 33 | R: 0.1 # Radius of curved gel 34 | countW: 100 # Number of samples for horizontal direction; higher the finer details 35 | 36 | viz: 37 | reduce_factor: 1 38 | reduce_factor_up: 1 -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/viz.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import os 7 | 8 | import git 9 | import numpy as np 10 | import open3d as o3d 11 | 12 | from neuralfeels.contrib.urdf import SceneGraph, URDFParser, URDFTree 13 | 14 | root = git.Repo(".", search_parent_directories=True).working_tree_dir 15 | 16 | 17 | if __name__ == "__main__": 18 | URDF_file = os.path.join(root, "data/assets/allegro/allegro_digit_left_ball.urdf") 19 | # Parse the URDF file 20 | parser = URDFParser(URDF_file) 21 | parser.parse() 22 | # Construct the URDF tree 23 | links = parser.links 24 | joints = parser.joints 25 | tree = URDFTree(links, joints) 26 | # Construct the scene graph 27 | init_pose = np.array( 28 | [ 29 | 0.0627, 30 | 1.2923, 31 | 0.3383, 32 | 0.1088, 33 | 0.0724, 34 | 1.1983, 35 | 0.1551, 36 | 0.1499, 37 | 0.1343, 38 | 1.1736, 39 | 0.5355, 40 | 0.2164, 41 | 1.1202, 42 | 1.1374, 43 | 0.8535, 44 | -0.0852, 45 | ] 46 | ) 47 | 48 | init_pose = np.zeros(16) 49 | init_pose[12] += 1.4 50 | scene = SceneGraph(tree.root, init_pose) 51 | mesh = scene.getMesh() 52 | 53 | o3d.visualization.draw_geometries(mesh) 54 | -------------------------------------------------------------------------------- /scripts/config/main/pose/default.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Pose opt class 7 | 8 | timer: false 9 | grasp_threshold: 100 10 | show_samples: False 11 | method: second-order # first-order, second-order 12 | window_size: 3 # TODO: change this 13 | 14 | second_order: 15 | optimizer_cls: LevenbergMarquardt 16 | linear_solver_cls: CholeskyDenseSolver 17 | linearization_cls: DenseLinearization 18 | optimizer_kwargs: 19 | track_best_solution: True 20 | verbose: false 21 | __keep_final_step_size__: true 22 | adaptive_damping: true 23 | autograd_strategy: forward-mode # autograd_strategy: forward-mode for pose estimation 24 | tsdf_method: analytic # [analytic, numerical, autodiff] 25 | vectorize: true # true for pose estimation 26 | test_jacobians: false # debugging only 27 | empty_cuda_cache: false 28 | lm_iters: 20 29 | num_iters: 2 30 | step_size: 1.0 31 | tsdf_w: 1e-2 32 | regularize: true 33 | reg_w: 1e-2 34 | icp: true 35 | icp_w: 1e0 36 | icp_fitness: 0.5 37 | icp_inlier_rmse: 5e-3 38 | icp_thresh: [5, 0.01] # [rotation (deg), translation (m)] 39 | 40 | 41 | loss_type: L1 42 | n_rays_per_sensor_vision: 300 # total rays = n_rays_per_sensor * n_valid_sensors 43 | n_rays_per_sensor_tactile: 25 # total rays = n_rays_per_sensor * n_valid_sensors 44 | w_vision: 1.0 45 | w_tactile: 1.0 46 | -------------------------------------------------------------------------------- /neuralfeels/viz/plot_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import matplotlib.pyplot as plt 7 | import pandas as pd 8 | 9 | 10 | def smooth_data(y, N=5): 11 | # rolling avg. over N timesteps 12 | df = pd.DataFrame() 13 | df["y"] = y 14 | df_smooth = df.rolling(N).mean() 15 | df_smooth["y"][0 : N - 1] = y[0 : N - 1] # first N readings are as-is 16 | return df_smooth["y"] 17 | 18 | 19 | feelsight_sim_objects = [ 20 | "contactdb_rubber_duck", 21 | "contactdb_elephant", 22 | "077_rubiks_cube", 23 | "large_dice", 24 | "016_pear", 25 | "015_peach", 26 | "010_potted_meat_can", 27 | "073-f_lego_duplo", 28 | ] 29 | 30 | feelsight_sim_mesh_diag = { 31 | "contactdb_rubber_duck": 0.14833374114812853, 32 | "contactdb_elephant": 0.1850651169858869, 33 | "077_rubiks_cube": 0.12201651401757059, 34 | "large_dice": 0.08720458052763055, 35 | "016_pear": 0.13722709752814855, 36 | "015_peach": 0.10593046598594759, 37 | "010_potted_meat_can": 0.1449591345276316, 38 | "073-f_lego_duplo": 0.06760945759285457, 39 | } 40 | 41 | feelsight_real_objects = [ 42 | "bell_pepper", 43 | "large_dice", 44 | "peach", 45 | "pear", 46 | "pepper_grinder", 47 | "rubiks_cube_small", 48 | ] 49 | 50 | feelsight_real_mesh_diag = { 51 | "bell_pepper": 0.14895704905777368, 52 | "large_dice": 0.08720458052763055, 53 | "peach": 0.10578790231401698, 54 | "pear": 0.13838421462002087, 55 | "pepper_grinder": 0.14848234731441984, 56 | "rubiks_cube_small": 0.09042267417523107, 57 | } 58 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/URDF/Parser/Joint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D 7 | 8 | import numpy as np 9 | 10 | 11 | class Joint: 12 | def __init__(self, joint_name, joint_type, child_name, parent_name): 13 | self.joint_name = joint_name 14 | self.joint_type = joint_type 15 | self.child_name = child_name 16 | self.parent_name = parent_name 17 | # Naming rule: concaten tag name as the variable name, and attribute name as the key 18 | # If the tag just has one attribute, ignore the dictionary 19 | self.origin = {"xyz": np.array([0, 0, 0]), "rpy": np.array([0, 0, 0])} 20 | self.axis = np.array([1, 0, 0]) 21 | self.limit = {"lower": 0, "upper": 0} 22 | 23 | def setOriginXyz(self, xyz): 24 | self.origin["xyz"] = np.array(xyz) 25 | 26 | def setOriginRpy(self, rpy): 27 | self.origin["rpy"] = np.array(rpy) 28 | 29 | def setAxis(self, axis): 30 | self.axis = np.array(axis) 31 | 32 | def setLimitLower(self, lower): 33 | self.limit["lower"] = lower 34 | 35 | def setLimitUpper(self, upper): 36 | self.limit["upper"] = upper 37 | 38 | def __repr__(self): 39 | output = {} 40 | output["name"] = self.joint_name 41 | output["type"] = self.joint_type 42 | output["child_name"] = self.child_name 43 | output["parent_name"] = self.parent_name 44 | output["origin"] = self.origin 45 | output["axis"] = self.axis 46 | output["limit"] = self.limit 47 | 48 | return str(output) 49 | -------------------------------------------------------------------------------- /neuralfeels/modules/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Miscellaneous utility functions 7 | 8 | import gc 9 | import os 10 | import shutil 11 | from typing import Dict 12 | 13 | import numpy as np 14 | import torch 15 | from scipy.spatial.transform import Rotation as R 16 | from termcolor import cprint 17 | 18 | 19 | def print_once(string, bucket=[]): 20 | """ 21 | Print statement only once: https://stackoverflow.com/a/75484543 22 | """ 23 | if string not in bucket: 24 | print(string) 25 | bucket.append(string) 26 | if len(bucket) > 50: 27 | del bucket[:-1] 28 | 29 | 30 | def gpu_usage_check(): 31 | available, total = torch.cuda.mem_get_info("cuda:0") 32 | availableGb = available / (1024**3) 33 | ratioGb = available / total 34 | if ratioGb < 0.1: 35 | cprint(f"WARNING: {availableGb}GB available on GPU", color="red") 36 | gc.collect() 37 | torch.cuda.empty_cache() 38 | 39 | 40 | def remove_and_mkdir(results_path: str) -> None: 41 | """ 42 | Remove directory (if exists) and create 43 | """ 44 | if os.path.exists(results_path): 45 | shutil.rmtree(results_path) 46 | os.makedirs(results_path) 47 | 48 | 49 | def pose_from_config(cfg: Dict): 50 | T = np.eye(4) 51 | T[:3, :3] = R.from_quat( 52 | [ 53 | cfg["rotation"]["x"], 54 | cfg["rotation"]["y"], 55 | cfg["rotation"]["z"], 56 | cfg["rotation"]["w"], 57 | ] 58 | ).as_matrix() 59 | T[:3, 3] = np.array( 60 | [cfg["translation"]["x"], cfg["translation"]["y"], cfg["translation"]["z"]] 61 | ) 62 | return T 63 | -------------------------------------------------------------------------------- /neuralfeels/contrib/tactile_transformer/fusion.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT 7 | 8 | # Fusion module for tactile transformer 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | 14 | class ResidualConvUnit(nn.Module): 15 | def __init__(self, features): 16 | super().__init__() 17 | 18 | self.conv1 = nn.Conv2d( 19 | features, features, kernel_size=3, stride=1, padding=1, bias=True 20 | ) 21 | self.conv2 = nn.Conv2d( 22 | features, features, kernel_size=3, stride=1, padding=1, bias=True 23 | ) 24 | self.relu = nn.ReLU(inplace=True) 25 | 26 | def forward(self, x): 27 | """Forward pass. 28 | Args: 29 | x (tensor): input 30 | Returns: 31 | tensor: output 32 | """ 33 | out = self.relu(x) 34 | out = self.conv1(out) 35 | out = self.relu(out) 36 | out = self.conv2(out) 37 | return out + x 38 | 39 | 40 | class Fusion(nn.Module): 41 | def __init__(self, resample_dim): 42 | super(Fusion, self).__init__() 43 | self.res_conv1 = ResidualConvUnit(resample_dim) 44 | self.res_conv2 = ResidualConvUnit(resample_dim) 45 | 46 | def forward(self, x, previous_stage=None): 47 | if previous_stage == None: 48 | previous_stage = torch.zeros_like(x) 49 | output_stage1 = self.res_conv1(x) 50 | output_stage1 += previous_stage 51 | output_stage2 = self.res_conv2(output_stage1) 52 | output_stage2 = nn.functional.interpolate( 53 | output_stage2, scale_factor=2, mode="bilinear", align_corners=True 54 | ) 55 | return output_stage2 56 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: _neuralfeels 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex 6 | - _openmp_mutex 7 | - brotlipy 8 | - ca-certificates 9 | - certifi 10 | - cffi 11 | - chardet 12 | - conda-package-handling 13 | - idna 14 | - ld_impl_linux-64 15 | - libffi 16 | - libgcc-ng 17 | - libgomp 18 | - libspatialindex 19 | - libstdcxx-ng 20 | - openssl 21 | - pip 22 | - pycosat 23 | - pycparser 24 | - pyopenssl 25 | - python=3.9.15 26 | - readline 27 | - requests 28 | - ruamel_yaml 29 | - six 30 | - sqlite 31 | - tk 32 | - tqdm 33 | - urllib3 34 | - wheel 35 | - xz 36 | - yaml 37 | - zlib 38 | - pip: 39 | - cython==3.0.0 40 | - datasets==3.1.0 41 | - dill==0.3.7 42 | - einops==0.6.1 43 | - ffmpeg-python==0.2.0 44 | - gdown==5.2.0 45 | - gitdb==4.0.10 46 | - gitpython==3.1.32 47 | - gputil==1.4.0 48 | - h5py==3.9.0 49 | - hydra-core==1.3.2 50 | - hydra-joblib-launcher 51 | - hydra-submitit-launcher==1.2.0 52 | - imageio==2.31.1 53 | - imageio-ffmpeg==0.4.8 54 | - imgviz==1.7.4 55 | - ipdb==0.13.13 56 | - ipykernel==6.25.1 57 | - ipython==8.14.0 58 | - ipython-genutils==0.2.0 59 | - json5==0.9.14 60 | - matplotlib==3.7.2 61 | - matplotlib-inline==0.1.6 62 | - networkx==3.1 63 | - ninja==1.11.1 # for quicker tcnn build 64 | - numpy==1.22.4 65 | - numba==0.60.0 66 | - onnx==1.14.0 67 | - onnxruntime==1.15.1 68 | - open3d==0.16.0 69 | - opencv-python 70 | - pandas==2.0.3 71 | - pre-commit-4.0.1 72 | - pycocotools==2.0.7 73 | - pyglet==1.5.27 74 | - pyopengl==3.1.0 75 | - pyvirtualdisplay==3.0 76 | - pyvista==0.41.1 77 | - pyyaml==6.0.1 78 | - rtree==1.0.1 79 | - scikit-image==0.21.0 80 | - scikit-learn==1.3.0 81 | - scipy==1.11.2 82 | - seaborn==0.12.2 83 | - shapely==2.0.1 84 | - snakeviz==2.2.0 85 | - termcolor==2.3.0 86 | - timm==0.9.5 87 | - trimesh==3.23.3 88 | - urdf-parser-py==0.0.4 89 | - wandb==0.15.8 90 | - yappi==1.4.0 91 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/URDF/Parser/Link.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D 7 | 8 | import numpy as np 9 | 10 | 11 | class Link: 12 | def __init__(self, link_name): 13 | self.link_name = link_name 14 | self.color = [0.0, 0.0, 0.0] 15 | # Naming rule: concaten tag name as the variable name, and attribute name as the key 16 | self.visuals = [] 17 | 18 | def hasVisual(self): 19 | if len(self.visuals) == 0: 20 | return False 21 | return True 22 | 23 | def addVisual(self, visual_name=None): 24 | self.visuals.append(Visual(visual_name)) 25 | 26 | def setVisualMeshScale(self, scale): 27 | current_visual = len(self.visuals) - 1 28 | self.visuals[current_visual].geometry_mesh["scale"] = np.array(scale) 29 | 30 | def setVisualOriginXyz(self, xyz): 31 | current_visual = len(self.visuals) - 1 32 | self.visuals[current_visual].origin["xyz"] = np.array(xyz) 33 | 34 | def setVisualOriginRpy(self, rpy): 35 | current_visual = len(self.visuals) - 1 36 | self.visuals[current_visual].origin["rpy"] = np.array(rpy) 37 | 38 | def setVisualGeometryMeshFilename(self, filename): 39 | current_visual = len(self.visuals) - 1 40 | self.visuals[current_visual].geometry_mesh["filename"] = filename 41 | 42 | def __repr__(self): 43 | output = {} 44 | output["name"] = self.link_name 45 | output["visual"] = self.visuals 46 | return str(output) 47 | 48 | 49 | class Visual: 50 | def __init__(self, visual_name=None): 51 | self.visual_name = visual_name 52 | self.origin = {"xyz": np.array([0, 0, 0]), "rpy": np.array([0, 0, 0])} 53 | self.geometry_mesh = {"filename": None, "scale": np.array([1.0, 1.0, 1.0])} 54 | 55 | def __repr__(self): 56 | output = {} 57 | output["origin"] = self.origin 58 | output["mesh"] = self.geometry_mesh["filename"] 59 | return str(output) 60 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/URDF/URDFTree/URDFTree.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D 7 | 8 | from .ExLink import ExLink 9 | 10 | 11 | class URDFTree: 12 | # Construct the URDF tree based on the parser 13 | def __init__(self, links, joints): 14 | self.links = links 15 | self.joints = joints 16 | # Init EsLinks (extended links: include joint info in the child part; parent and child info) 17 | self.exLinks = {} 18 | self.initExlinks() 19 | # Build the tree and find the root (If not strictly a tree, consttruct a virtual root) 20 | self.buildTree() 21 | self.root = None 22 | self.findRoot() 23 | 24 | def initExlinks(self): 25 | # Create extended links list 26 | for link_name in self.links: 27 | exLink = ExLink(self.links[link_name]) 28 | self.exLinks[link_name] = exLink 29 | 30 | def buildTree(self): 31 | for joint_name in self.joints: 32 | joint = self.joints[joint_name] 33 | # Connect child and parent through parent and children in exLink 34 | child_name = joint.child_name 35 | parent_name = joint.parent_name 36 | child = self.exLinks[child_name] 37 | parent = self.exLinks[parent_name] 38 | child.setJoint(joint) 39 | child.setParent(parent) 40 | parent.addChild(child) 41 | 42 | def findRoot(self): 43 | roots = [] 44 | for link_name in self.exLinks: 45 | link = self.exLinks[link_name] 46 | if link.parent == None: 47 | roots.append(link) 48 | if len(roots) == 0: 49 | raise RuntimeError("Invalid: No root nodes for the URDF") 50 | elif len(roots) == 1: 51 | self.root = roots[0] 52 | else: 53 | # Construct a virtual root to connect all nodes without a parent 54 | self.root = ExLink(None) 55 | for child in roots: 56 | self.root.addChild(child) 57 | child.setParent(self.root) 58 | -------------------------------------------------------------------------------- /neuralfeels/contrib/tactile_transformer/head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT 7 | 8 | # Depth and segmentation head for tactile transformer 9 | 10 | import torch.nn as nn 11 | 12 | 13 | class Interpolate(nn.Module): 14 | def __init__(self, scale_factor, mode, align_corners=False): 15 | super(Interpolate, self).__init__() 16 | self.interp = nn.functional.interpolate 17 | self.scale_factor = scale_factor 18 | self.mode = mode 19 | self.align_corners = align_corners 20 | 21 | def forward(self, x): 22 | x = self.interp( 23 | x, 24 | scale_factor=self.scale_factor, 25 | mode=self.mode, 26 | align_corners=self.align_corners, 27 | ) 28 | return x 29 | 30 | 31 | class HeadDepth(nn.Module): 32 | def __init__(self, features): 33 | super(HeadDepth, self).__init__() 34 | self.head = nn.Sequential( 35 | nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1), 36 | Interpolate(scale_factor=2, mode="bilinear", align_corners=True), 37 | nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1), 38 | nn.ReLU(), 39 | nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), 40 | # nn.ReLU() 41 | nn.Sigmoid(), 42 | ) 43 | 44 | def forward(self, x): 45 | x = self.head(x) 46 | # x = (x - x.min())/(x.max()-x.min() + 1e-15) 47 | return x 48 | 49 | 50 | class HeadSeg(nn.Module): 51 | def __init__(self, features, nclasses=2): 52 | super(HeadSeg, self).__init__() 53 | self.head = nn.Sequential( 54 | nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1), 55 | Interpolate(scale_factor=2, mode="bilinear", align_corners=True), 56 | nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1), 57 | nn.ReLU(), 58 | nn.Conv2d(32, nclasses, kernel_size=1, stride=1, padding=0), 59 | ) 60 | 61 | def forward(self, x): 62 | x = self.head(x) 63 | return x 64 | -------------------------------------------------------------------------------- /neuralfeels/contrib/tactile_transformer/README.md: -------------------------------------------------------------------------------- 1 | ## Tactile transformer: vision-based touch to depth 2 | 3 | https://github.com/user-attachments/assets/148ec457-d9d1-415f-887f-73ebed8a568b 4 | 5 | While vision-based touch sensors interpret contact geometry as images, they remain out-of-distribution from natural images. The embedded camera directly perceives the illuminated gelpad, and contact depth is either obtained via photometric stereo, or supervised learning. Existing touch-to-depth relies on convolution, however recent work has shown the benefit of a ViT for dense depth prediction in natural images. We present a tactile transformer for predicting contact depth from vision-based touch, trained entirely in simulation to generalize across multiple real-world DIGIT sensors. For use, refer to the [NeuralFeels](https://github.com/facebookresearch/neuralfeels) repository. 6 | 7 | Our code is based on [FocusOnDepth](https://github.com/antocad/FocusOnDepth), a re-implementation of the popular [DPT](https://github.com/isl-org/DPT) vision transformer. We make necessary modification to work for tactile images, and share the weights online. The models `data/tactile_transformer/dpt_real.p` and `data/tactile_transformer/dpt_sim.p` are trained on TACTO data from simulated interaction with YCB objects. The models differ slightly in the augmentations used during data generation. 8 | 9 | ## Example script 10 | 11 | 12 | 13 | *Tactile transformer depth outputs (colormapped) for simulated data from interaction with YCB objects.* 14 | 15 | First download tactile data from `YCB` objects: 16 | ```bash 17 | cd data 18 | gdown https://drive.google.com/drive/folders/1a-8vfMCkW52BpWOPfqk5WM5zsSjBfhN1?usp=sharing --folder 19 | mv sim tacto_data 20 | cd tacto_data && unzip -q '*.zip' && rm *.zip 21 | cd ../.. 22 | ``` 23 | 24 | Run the test script 25 | ```bash 26 | python neuralfeels/contrib/tactile_transformer/touch_vit.py 27 | ``` 28 | 29 | ## Citation 30 | 31 | If you find NeuralFeels useful in your research, please consider citing our paper: 32 | 33 | ```bibtex 34 | @article{suresh2024neuralfeels, 35 | title={{N}eural feels with neural fields: {V}isuo-tactile perception for in-hand manipulation}, 36 | author={Suresh, Sudharshan and Qi, Haozhi and Wu, Tingfan and Fan, Taosha and Pineda, Luis and Lambeta, Mike and Malik, Jitendra and Kalakrishnan, Mrinal and Calandra, Roberto and Kaess, Michael and Ortiz, Joseph and Mukadam, Mustafa}, 37 | journal={Science Robotics}, 38 | pages={adl0628}, 39 | year={2024}, 40 | publisher={American Association for the Advancement of Science} 41 | } 42 | ``` 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | res 2 | results/* 3 | 4 | # mesh files 5 | *.ply 6 | *.obj 7 | *core 8 | 9 | # save files 10 | *.pkl 11 | 12 | data/* 13 | !data/README.md 14 | .vscode 15 | .hydra 16 | /tools/* 17 | outputs/* 18 | multirun/* 19 | *.code-workspace 20 | 21 | # Byte-compiled / optimized / DLL files 22 | __pycache__/ 23 | *.py[cod] 24 | *$py.class 25 | 26 | # C extensions 27 | *.so 28 | *.p 29 | *.tar 30 | 31 | # Distribution / packaging 32 | .Python 33 | build/ 34 | develop-eggs/ 35 | ^dist/ 36 | downloads/ 37 | eggs/ 38 | .eggs/ 39 | lib/ 40 | lib64/ 41 | parts/ 42 | sdist/ 43 | var/ 44 | wheels/ 45 | pip-wheel-metadata/ 46 | share/python-wheels/ 47 | *.egg-info/ 48 | .installed.cfg 49 | *.egg 50 | MANIFEST 51 | 52 | # PyInstaller 53 | # Usually these files are written by a python script from a template 54 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 55 | *.manifest 56 | *.spec 57 | 58 | # Installer logs 59 | pip-log.txt 60 | pip-delete-this-directory.txt 61 | 62 | # Unit test / coverage reports 63 | htmlcov/ 64 | .tox/ 65 | .nox/ 66 | .coverage 67 | .coverage.* 68 | .cache 69 | nosetests.xml 70 | coverage.xml 71 | *.cover 72 | .hypothesis/ 73 | .pytest_cache/ 74 | 75 | # Translations 76 | *.mo 77 | *.pot 78 | 79 | # Django stuff: 80 | *.log 81 | local_settings.py 82 | db.sqlite3 83 | 84 | # Flask stuff: 85 | instance/ 86 | .webassets-cache 87 | 88 | # Scrapy stuff: 89 | .scrapy 90 | 91 | # Sphinx documentation 92 | docs/_build/ 93 | 94 | # PyBuilder 95 | target/ 96 | 97 | # Jupyter Notebook 98 | .ipynb_checkpoints 99 | 100 | # IPython 101 | profile_default/ 102 | ipython_config.py 103 | 104 | # pyenv 105 | .python-version 106 | 107 | # pipenv 108 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 109 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 110 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 111 | # install all needed dependencies. 112 | #Pipfile.lock 113 | 114 | # celery beat schedule file 115 | celerybeat-schedule 116 | 117 | # SageMath parsed files 118 | *.sage.py 119 | 120 | # Environments 121 | .env 122 | .venv 123 | env/ 124 | venv/ 125 | ENV/ 126 | env.bak/ 127 | venv.bak/ 128 | 129 | # Spyder project settings 130 | .spyderproject 131 | .spyproject 132 | 133 | # Rope project settings 134 | .ropeproject 135 | 136 | # mkdocs documentation 137 | /site 138 | 139 | # mypy 140 | .mypy_cache/ 141 | .dmypy.json 142 | dmypy.json 143 | 144 | # Pyre type checker 145 | .pyre/ 146 | 147 | __MACOSX 148 | 149 | # Misc 150 | .nfs* 151 | -------------------------------------------------------------------------------- /neuralfeels/modules/render.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # SDF depth rendering, based on iSDF: https://github.com/facebookresearch/iSDF 7 | 8 | import torch 9 | 10 | from neuralfeels.geometry import transform 11 | from neuralfeels.modules.model import gradient 12 | 13 | 14 | def sdf_render_depth(z_vals, sdf, t): 15 | """ 16 | Basic method for rendering depth from SDF using samples along a ray. 17 | Assumes z_vals are ordered small -> large. 18 | Assumes sdf are ordered from expected small -> expected large 19 | """ 20 | # assert (z_vals[0].sort()[1].cpu() == torch.arange(len(z_vals[0]))).all() 21 | 22 | # z_vals are sorted from gel to camera 23 | # sdfs sorted negative to positive (inside to outside) 24 | n = sdf.size(1) # n_sample per ray 25 | 26 | inside = sdf < 0 # sdf indices outside object 27 | ixs = torch.arange(0, n, 1, device=sdf.device) # ascending order [0, n] 28 | mul = inside * ixs # keep only inside points 29 | max_ix = mul.argmax(dim=1) # smallest -ve value before intersection 30 | 31 | arange = torch.arange(z_vals.size(0), device=sdf.device) # [0 - n_pixels] 32 | depths = ( 33 | z_vals[arange, max_ix] + sdf[arange, max_ix] * t 34 | ) # sdf will always be +ve, z_vals always -ve 35 | 36 | # if no zero crossing found 37 | depths[max_ix == 0] = torch.nan 38 | # print(torch.sum(~torch.isnan(depths)) / len(depths.view(-1))) 39 | return depths 40 | 41 | 42 | # Compute surface normals in the camera frame 43 | def render_normals(T_WC, render_depth, sdf_map, dirs_C): 44 | origins, dirs_W = transform.origin_dirs_W(T_WC, dirs_C) 45 | origins = origins.view(-1, 3) 46 | dirs_W = dirs_W.view(-1, 3) 47 | 48 | pc = origins + (dirs_W * (render_depth.flatten()[:, None])) 49 | pc.requires_grad_() 50 | sdf = sdf_map(pc.unsqueeze(0)) 51 | sdf_grad = gradient(pc, sdf) 52 | 53 | surface_normals_W = -sdf_grad / (sdf_grad.norm(dim=1, keepdim=True) + 1e-6) 54 | R_CW = T_WC[:, :3, :3].inverse() 55 | surface_normals_C = (R_CW * surface_normals_W[..., None, :]).sum(dim=-1) 56 | 57 | surface_normals_C = surface_normals_C.view( 58 | render_depth.shape[0], render_depth.shape[1], 3 59 | ) 60 | return surface_normals_C 61 | 62 | 63 | def render_weighted(weights, vals, dim=-1, normalise=False): 64 | """ 65 | General rendering function using weighted sum. 66 | """ 67 | weighted_vals = weights * vals 68 | render = weighted_vals.sum(dim=dim) 69 | if normalise: 70 | n_samples = weights.size(dim) 71 | render = render / n_samples 72 | 73 | return render 74 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # Copyright (c) Meta Platforms, Inc. and affiliates. 4 | 5 | # This source code is licensed under the MIT license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | usage="$(basename "$0") [-h] [-e ENV_NAME] [-f INSTALL_FAIROTAG] -- 9 | Install the neuralfeels environment 10 | where: 11 | -h show this help text 12 | -e name of the environment, default=_neuralfeels 13 | " 14 | 15 | options=':he:' 16 | while getopts $options option; do 17 | case "$option" in 18 | h) 19 | echo "$usage" 20 | exit 21 | ;; 22 | e) ENV_NAME=$OPTARG ;; 23 | :) 24 | printf "missing argument for -%s\n" "$OPTARG" >&2 25 | echo "$usage" >&2 26 | exit 1 27 | ;; 28 | \?) 29 | printf "illegal option: -%s\n" "$OPTARG" >&2 30 | echo "$usage" >&2 31 | exit 1 32 | ;; 33 | esac 34 | done 35 | 36 | # if ENV_NAME is not set, then set it to _neuralfeels 37 | if [ -z "$ENV_NAME" ]; then 38 | ENV_NAME=_neuralfeels 39 | fi 40 | 41 | echo "Environment Name: $ENV_NAME" 42 | 43 | unset PYTHONPATH LD_LIBRARY_PATH 44 | 45 | # # remove any exisiting env 46 | micromamba remove -y -n $ENV_NAME --all 47 | micromamba env create -y --name $ENV_NAME --file environment.yml 48 | micromamba activate $ENV_NAME 49 | 50 | # Following the instructions from https://docs.nerf.studio/quickstart/installation.html for the right combination of cuda / torch / tinycudann 51 | python -m pip install --upgrade pip 52 | pip uninstall torch torchvision functorch tinycudann -y 53 | pip install torch==2.1.2+cu118 torchvision==0.16.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 -y 54 | micromamba install -c "nvidia/label/cuda-11.8.0" cuda-toolkit 55 | 56 | # Check if the install is successful 57 | python -c "import torch; assert torch.cuda.is_available()" 58 | if nvcc --version &>/dev/null; then 59 | echo "nvcc is installed and working." 60 | else 61 | echo "nvcc is not installed or not in PATH." 62 | exit 1 63 | fi 64 | 65 | # Install tinycudann for instant-ngp backbone. Common issues: 66 | # - Setup with gcc/g++ 9 if it throws errors (see issue: https://github.com/NVlabs/tiny-cuda-nn/issues/284) 67 | # - Differing compute capabilities: https://github.com/NVlabs/tiny-cuda-nn/issues/341#issuecomment-1651814335 68 | pip install ninja \ 69 | git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch \ 70 | git+https://github.com/facebookresearch/segment-anything.git \ 71 | git+https://github.com/suddhu/tacto.git@master 72 | 73 | # Install github.com/facebookresearch/theseus 74 | micromamba install -y suitesparse # required for theseus 75 | pip install theseus-ai 76 | 77 | # Install neuralfeels package 78 | pip install -e . 79 | 80 | # Make entrypoint executable 81 | chmod +x scripts/run 82 | -------------------------------------------------------------------------------- /neuralfeels/contrib/sam/test_sam.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Test multi-mask SAM model with point prompts on sample data from neuralfeels 7 | 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | from PIL import Image 11 | from segment_anything import SamPredictor, sam_model_registry 12 | 13 | 14 | # detect fingers in 2D and compute mask from that 15 | def show_mask(mask, ax, random_color=False): 16 | if random_color: 17 | color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0) 18 | else: 19 | color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6]) 20 | h, w = mask.shape[-2:] 21 | mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) 22 | ax.imshow(mask_image) 23 | 24 | 25 | def show_points(coords, labels, ax, marker_size=375): 26 | pos_points = coords[labels == 1] 27 | neg_points = coords[labels == 0] 28 | ax.scatter( 29 | pos_points[:, 0], 30 | pos_points[:, 1], 31 | color="green", 32 | marker="*", 33 | s=marker_size, 34 | edgecolor="white", 35 | linewidth=1.25, 36 | ) 37 | ax.scatter( 38 | neg_points[:, 0], 39 | neg_points[:, 1], 40 | color="red", 41 | marker="*", 42 | s=marker_size, 43 | edgecolor="white", 44 | linewidth=1.25, 45 | ) 46 | 47 | 48 | def show_box(box, ax): 49 | x0, y0 = box[0], box[1] 50 | w, h = box[2] - box[0], box[3] - box[1] 51 | ax.add_patch( 52 | plt.Rectangle((x0, y0), w, h, edgecolor="green", facecolor=(0, 0, 0, 0), lw=2) 53 | ) 54 | 55 | 56 | def main(): 57 | sam_checkpoint = "data/segment-anything/sam_vit_h_4b8939.pth" 58 | device = "cuda" 59 | model_type = "default" 60 | 61 | sam = sam_model_registry[model_type](checkpoint=sam_checkpoint) 62 | sam.to(device=device) 63 | predictor = SamPredictor(sam) 64 | 65 | image_path = ( 66 | "data/feelsight_real/rubiks_cube_small/00/realsense/front-left/image/0.jpg" 67 | ) 68 | 69 | with Image.open(image_path) as im: 70 | image = np.asarray(im) 71 | 72 | predictor.set_image(image) 73 | 74 | input_point = np.array([[390, 205]]) 75 | input_label = np.array([1]) 76 | 77 | masks, scores, logits = predictor.predict( 78 | point_coords=input_point, 79 | point_labels=input_label, 80 | box=None, 81 | multimask_output=True, 82 | ) 83 | 84 | for i, (mask, score) in enumerate(zip(masks, scores)): 85 | plt.figure(figsize=(10, 10)) 86 | plt.imshow(image) 87 | show_mask(mask, plt.gca()) 88 | show_points(input_point, input_label, plt.gca()) 89 | plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18) 90 | plt.axis("off") 91 | plt.show() 92 | 93 | 94 | if __name__ == "__main__": 95 | main() 96 | -------------------------------------------------------------------------------- /neuralfeels/geometry/align_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/psodhi/tactile-in-hand 7 | 8 | import copy 9 | 10 | import numpy as np 11 | import open3d as o3d 12 | 13 | 14 | def visualize_registration(source, target, transformation, vis3d=None, colors=None): 15 | """Open3D visualizer for registration""" 16 | source_copy = copy.deepcopy(source) 17 | target_copy = copy.deepcopy(target) 18 | 19 | source_copy.transform(transformation) 20 | 21 | clouds = [source, target_copy, source_copy] 22 | 23 | if colors is not None: 24 | clouds[0].paint_uniform_color(colors[0]) # black, source 25 | clouds[1].paint_uniform_color(colors[1]) # green, target 26 | clouds[2].paint_uniform_color(colors[2]) # red, transformed 27 | 28 | vis3d.add_geometry(clouds[0]) 29 | vis3d.add_geometry(clouds[1]) 30 | vis3d.add_geometry(clouds[2]) 31 | 32 | vis3d.run() 33 | vis3d.destroy_window() 34 | 35 | 36 | def icp( 37 | source: o3d.geometry.PointCloud, 38 | target: o3d.geometry.PointCloud, 39 | T_init=np.eye(4), 40 | mcd=0.01, 41 | max_iter=15, 42 | ): 43 | """Point to point ICP registration 44 | 45 | Args: 46 | source: source point cloud 47 | target: target point cloud 48 | T_init : Defaults to np.eye(4). 49 | mcd : Defaults to 0.01. 50 | max_iter : Defaults to 15. 51 | """ 52 | result = o3d.pipelines.registration.registration_icp( 53 | source=source, 54 | target=target, 55 | max_correspondence_distance=mcd, 56 | init=T_init, 57 | estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPoint(), 58 | criteria=o3d.pipelines.registration.ICPConvergenceCriteria( 59 | max_iteration=max_iter 60 | ), 61 | ) 62 | transformation = result.transformation 63 | metrics = [result.fitness, result.inlier_rmse, result.correspondence_set] 64 | return transformation, metrics 65 | 66 | 67 | def register( 68 | points3d_1, 69 | points3d_2, 70 | T_init=np.eye(4), 71 | debug_vis=False, 72 | ): 73 | """Register two point clouds using ICP and returns the 6DoF transformation""" 74 | 75 | cloud_1, cloud_2 = (points3d_1, points3d_2) 76 | 77 | T_reg, metrics_reg = icp(source=cloud_1, target=cloud_2, T_init=T_init) 78 | 79 | # print("T_reg: ", T_reg) 80 | if debug_vis: 81 | colors = [ 82 | [0, 0, 0], 83 | [0, 1, 0], 84 | [1, 0, 0], 85 | ] # black, green, red 86 | 87 | vis3d = o3d.visualization.Visualizer() 88 | vis3d.create_window() 89 | visualize_registration( 90 | source=cloud_1, 91 | target=cloud_2, 92 | transformation=T_reg, 93 | vis3d=vis3d, 94 | colors=colors, 95 | ) 96 | 97 | return T_reg, metrics_reg 98 | -------------------------------------------------------------------------------- /neuralfeels/modules/object.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Dataloader for object data in neuralfeels 7 | 8 | import os 9 | 10 | import dill as pickle 11 | import numpy as np 12 | import theseus as th 13 | import torch 14 | 15 | 16 | class Object: 17 | def __init__( 18 | self, 19 | map_mode: False, 20 | dataset_path: str = None, 21 | device: str = "cuda", 22 | ): 23 | """Dataloader for object data in neuralfeels""" 24 | super(Object, self).__init__() 25 | 26 | self.data_path = None 27 | if dataset_path is not None: 28 | self.data_path = os.path.join(dataset_path, "data.pkl") 29 | with open(self.data_path, "rb") as p: 30 | self.data = pickle.load(p) 31 | self.object_pose_gt = torch.tensor( 32 | np.array(self.data["object"]["pose"]), 33 | device=device, 34 | dtype=torch.float32, 35 | ) 36 | else: 37 | self.object_pose_gt = torch.eye(4, device=device).unsqueeze(0) 38 | # tensor large enough for 180 seconds of data at 30 fps 39 | self.object_pose_gt = self.object_pose_gt.repeat(180 * 30, 1, 1) 40 | 41 | # current_pose_offset: computes the differences between the current and ground truth pose at every iteration. Needed to isolate 42 | # pose errors from map errors in F-score computation. (only applicable for mode=SLAM) 43 | self.current_pose_offset = np.eye(4) 44 | 45 | if map_mode: 46 | # if mapping, initialize the tracking problem with ground-truth 47 | self.object_pose_track = self.object_pose_gt.clone() 48 | else: 49 | # if slam/pure pose, initialize the tracking problem with identity 50 | self.object_pose_track = torch.zeros_like(self.object_pose_gt) 51 | self.object_pose_track[0] = torch.eye(4, device=device) 52 | 53 | def add_noise_to_poses(self, poses, noise_cfg): 54 | """ 55 | Corrupt poses with noise 56 | """ 57 | 58 | N = poses.shape[0] 59 | pose_noise = th.SE3.exp_map( 60 | torch.cat( 61 | [ 62 | noise_cfg.translation 63 | * ( 64 | 2.0 * torch.rand((N, 3), device=poses.device) - 1 65 | ), # scale translation noise n_t * [-1, 1] 66 | noise_cfg.rotation 67 | * ( 68 | 2 * torch.rand((N, 3), device=poses.device) - 1 69 | ), # scale rotation noise n_r * [-1, 1] 70 | ], 71 | dim=1, 72 | ) 73 | ).to_matrix() 74 | 75 | return poses @ pose_noise 76 | 77 | def save_baseline(self): 78 | # save pickle file with added baseline 79 | self.data["object"]["pose"] = list(self.object_pose_track.clone().cpu().numpy()) 80 | with open(self.data_path, "wb") as p: 81 | pickle.dump(self.data, p) 82 | print("Saved baseline poses to: ", self.data_path) 83 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # *Feelsight* : A visuo-tactile robot manipulation dataset 2 | 3 | 4 | 5 |
6 | 9 |
10 | 11 |
12 | 13 | The FeelSight dataset is a dataset of vision, touch, and proprioception data collected from in-hand rotation of objects via an RL policy. It consists of a total of 70 experiments, 30 in the real-world and 40 in simulation, each lasting 30 seconds. For training neural field models with FeelSight, refer to the [NeuralFeels](https://github.com/facebookresearch/neuralfeels) repository. 14 | 15 | ## Simulation data 16 | 17 | [Our simulated data](https://suddhu.github.io/neural-feels/video/feelsight_sim_rubber_duck.mp4") is collected in IsaacGym with TACTO touch simulation in the loop. 18 | 19 | ## Real-world data 20 | 21 | [Here's](https://suddhu.github.io/neural-feels/video/feelsight_real_bell_pepper.mp4") an example of real-world data from our three-camera setup and the DIGIT-Allegro hand. 22 | 23 | ## Robot setup 24 | 25 | The Allegro hand is mounted on the Franka Emika Panda robot. The hand is sensorized with DIGIT tactile sensors, and surrounded by three Intel RealSense cameras. 26 | 27 | 28 | 29 | ## Dataset structure 30 | 31 | For dataloaders, refer to the [NeuralFeels](https://github.com/facebookresearch/neuralfeels) repository. 32 | 33 | ```bash 34 | feelsight/ # root directory, either feelsight or feelsight_real 35 | ├── object_1/ # e.g. 077_rubiks_cube 36 | │ ├── 00/ # log directory 37 | │ │ ├── allegro/ # tactile sensor data 38 | │ │ │ ├── index/ # finger id 39 | │ │ │ │ ├── depth # only in sim, ground-truth 40 | | | | | | └── ..jpg 41 | │ │ │ │ ├── image # RGB tactile images 42 | | | | | | └── ..jpg 43 | │ │ │ │ └── mask # only in sim, ground-truth 44 | | | | | └── ..jpg 45 | │ │ │ └── .. 46 | │ │ ├── realsense/ # RGB-D data 47 | │ │ │ ├── front-left/ # camera id 48 | │ │ │ │ ├── image # RGB images 49 | | | | | | └── ..jpg 50 | │ │ │ │ ├── seg # only in sim, ground-truth 51 | | | | | | └── ..jpg 52 | │ │ │ | └── depth.npz # depth images 53 | │ │ ├── object_1.mp4 # video of sensor stream 54 | │ │ └── data.pkl # proprioception data 55 | │ └── .. 56 | ├── object_2/ 57 | │ └── .. 58 | └── .. 59 | ``` 60 | 61 | ## Citation 62 | 63 | If you find NeuralFeels useful in your research, please consider citing our paper: 64 | 65 | ```bibtex 66 | @article{suresh2024neuralfeels, 67 | title={{N}eural feels with neural fields: {V}isuo-tactile perception for in-hand manipulation}, 68 | author={Suresh, Sudharshan and Qi, Haozhi and Wu, Tingfan and Fan, Taosha and Pineda, Luis and Lambeta, Mike and Malik, Jitendra and Kalakrishnan, Mrinal and Calandra, Roberto and Kaess, Michael and Ortiz, Joseph and Mukadam, Mustafa}, 69 | journal={Science Robotics}, 70 | pages={adl0628}, 71 | year={2024}, 72 | publisher={American Association for the Advancement of Science} 73 | } 74 | ``` 75 | -------------------------------------------------------------------------------- /neuralfeels/datasets/redwood_depth_noise_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Modified from Habitat-Sim (https://aihabitat.org/docs/habitat-sim/habitat_sim.sensors.noise_models.RedwoodDepthNoiseModel.html) and based on the 7 | # Redwood Depth Noise Model (http://redwood-data.org/indoor/data/simdepth.py) from 8 | # choi2015robust (https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Choi_Robust_Reconstruction_of_2015_CVPR_paper.pdf) 9 | 10 | import numba 11 | import numpy as np 12 | 13 | try: 14 | import torch 15 | from torch import Tensor 16 | except ImportError: 17 | torch = None 18 | 19 | 20 | # Read about the noise model here: http://www.alexteichman.com/octo/clams/ 21 | # Original source code: http://redwood-data.org/indoor/data/simdepth.py 22 | @numba.jit(nopython=True, fastmath=True) 23 | def _undistort(x, y, z, model): 24 | i2 = int((z + 1) / 2) 25 | i1 = int(i2 - 1) 26 | a = (z - (i1 * 2.0 + 1.0)) / 2.0 27 | x = x // 8 28 | y = y // 6 29 | f = (1.0 - a) * model[y, x, min(max(i1, 0), 4)] + a * model[y, x, min(i2, 4)] 30 | 31 | if f < 1e-5: 32 | return 0.0 33 | 34 | return z / f 35 | 36 | 37 | @numba.jit(nopython=True, parallel=True, fastmath=True) 38 | def _simulate(gt_depth, model, noise_multiplier): 39 | noisy_depth = np.empty_like(gt_depth) 40 | 41 | H, W = gt_depth.shape 42 | ymax, xmax = H - 1.0, W - 1.0 43 | 44 | rand_nums = np.random.randn(H, W, 3).astype(np.float32) 45 | 46 | # Parallelize just the outer loop. This doesn't change the speed 47 | # noticably but reduces CPU usage compared to two parallel loops 48 | for j in numba.prange(H): 49 | for i in range(W): 50 | y = int( 51 | min(max(j + rand_nums[j, i, 0] * 0.25 * noise_multiplier, 0.0), ymax) 52 | + 0.5 53 | ) 54 | x = int( 55 | min(max(i + rand_nums[j, i, 1] * 0.25 * noise_multiplier, 0.0), xmax) 56 | + 0.5 57 | ) 58 | 59 | # Downsample 60 | d = gt_depth[y - y % 2, x - x % 2] 61 | # If the depth is greater than 10, the sensor will just return 0 62 | if d >= 10.0: 63 | noisy_depth[j, i] = 0.0 64 | else: 65 | # Distort 66 | # The noise model was originally made for a 640x480 sensor, 67 | # so re-map our arbitrarily sized sensor to that size! 68 | undistorted_d = _undistort( 69 | int(x / xmax * 639.0 + 0.5), int(y / ymax * 479.0 + 0.5), d, model 70 | ) 71 | 72 | if undistorted_d == 0.0: 73 | noisy_depth[j, i] = 0.0 74 | else: 75 | denom = round( 76 | ( 77 | 35.130 / undistorted_d 78 | + rand_nums[j, i, 2] * 0.027778 * noise_multiplier 79 | ) 80 | * 8.0 81 | ) 82 | if denom <= 1e-5: 83 | noisy_depth[j, i] = 0.0 84 | else: 85 | noisy_depth[j, i] = 35.130 * 8.0 / denom 86 | 87 | return noisy_depth 88 | -------------------------------------------------------------------------------- /scripts/config/main/touch_depth/vit.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | mode : vit 7 | weights: dpt_real 8 | 9 | settings: 10 | real: 11 | blend_sz : 0 12 | border : 0 13 | ratio : 1.2 # multiplier to decide the contact threshold. Set higher to reduce false positives 14 | clip : 10 # sets all heightmap outputs < clip pix to zero. Set higher to reduce noisy predictions 15 | batch_size : 1 16 | bg_id: null 17 | 18 | sim: 19 | blend_sz : 0 20 | border : 0 21 | ratio : 0 # multiplier to decide the contact threshold. Set higher to reduce false positives 22 | clip : 5 # sets all heightmap outputs < clip pix to zero. Set higher to reduce noisy predictions 23 | batch_size : 1 24 | bg_id: 10 25 | 26 | General: 27 | device: cuda 28 | type : depth 29 | model_timm : vit_small_patch16_224.dino 30 | emb_dim : 384 31 | hooks : [2, 5, 8, 11] 32 | read : projection 33 | resample_dim : 128 34 | optim : adam 35 | lr_backbone : 1e-5 36 | lr_scratch : 1e-4 37 | loss_depth : mse 38 | loss_segmentation : ce 39 | momentum : 0.9 40 | epochs : 500 41 | batch_size : 50 42 | path_model : data/tactile_transformer 43 | path_input_images : data/tacto_data/004_sugar_box/00/tactile_images 44 | path_predicted_images : output 45 | seed : 0 46 | patch_size : 16 47 | 48 | Dataset: 49 | paths: 50 | path_dataset : input 51 | list_datasets: [ 52 | "002_master_chef_can", 53 | "003_cracker_box", 54 | "007_tuna_fish_can", 55 | "008_pudding_box", 56 | "009_gelatin_box", 57 | "010_potted_meat_can", 58 | "011_banana", 59 | "012_strawberry", 60 | "013_apple", 61 | "014_lemon", 62 | "015_peach", 63 | "016_pear", 64 | "017_orange", 65 | "018_plum", 66 | "019_pitcher_base", 67 | "024_bowl", 68 | "026_sponge", 69 | "029_plate", 70 | "030_fork", 71 | "031_spoon", 72 | "032_knife", 73 | "033_spatula", 74 | "036_wood_block", 75 | "040_large_marker", 76 | "044_flat_screwdriver", 77 | "050_medium_clamp", 78 | "051_large_clamp", 79 | "052_extra_large_clamp", 80 | "053_mini_soccer_ball", 81 | "054_softball", 82 | "056_tennis_ball", 83 | "057_racquetball", 84 | "058_golf_ball", 85 | "061_foam_brick", 86 | "062_dice", 87 | "065-a_cups", 88 | "065-b_cups", 89 | "070-a_colored_wood_blocks", 90 | "072-a_toy_airplane", 91 | "077_rubiks_cube"] 92 | path_images : tactile_images 93 | path_segmentations : gt_contactmasks 94 | path_depths : gt_heightmaps 95 | extensions : 96 | ext_images : .jpg 97 | ext_segmentations : .jpg 98 | ext_depths : .jpg 99 | splits: 100 | split_train : 0.6 101 | split_val : 0.2 102 | split_test : 0.2 103 | transforms: 104 | resize : [224, 224] 105 | p_flip : 0.0 106 | p_crop : 0.0 107 | p_rot : 0.0 108 | classes: 109 | "1": 110 | name: contact 111 | color: 255 112 | wandb : 113 | enable : true 114 | username : suddhu 115 | images_to_show : 5 116 | im_h : 640 117 | im_w : 480 118 | -------------------------------------------------------------------------------- /neuralfeels/eval/group_plot.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ 7 | Plot group statistics for a sweep of neuralfeels experiments 8 | Usage: python neuralfeels/eval/group_plot.py log_path= # e.g. multirun/2023-07-31/14-27-43 9 | """ 10 | 11 | import os 12 | 13 | import git 14 | import hydra 15 | from omegaconf import DictConfig 16 | from tqdm import tqdm 17 | 18 | from neuralfeels.viz.plot_metrics import ( 19 | avg_map_error_over_time, 20 | avg_map_error_per_experiment, 21 | avg_map_error_per_modality, 22 | avg_map_error_per_object, 23 | avg_pose_error_over_time, 24 | avg_pose_error_per_camera_placement, 25 | avg_pose_error_per_experiment, 26 | avg_pose_error_per_modality, 27 | avg_pose_error_per_object, 28 | avg_pose_error_per_optimizer, 29 | avg_pose_error_per_shape_res, 30 | avg_precision_over_time, 31 | avg_recall_over_time, 32 | avg_timing_per_modality, 33 | avg_timing_per_optimizer, 34 | draw_map_error, 35 | draw_pose_error, 36 | get_dataframe, 37 | map_error_vs_thresh, 38 | success_failure_stats, 39 | ) 40 | 41 | root = git.Repo(".", search_parent_directories=True).working_tree_dir 42 | 43 | from pathlib import Path 44 | 45 | 46 | @hydra.main(version_base=None, config_path="config/", config_name="group_error") 47 | def main(cfg: DictConfig) -> None: 48 | log_path = os.path.join(root, cfg.log_path) 49 | if log_path[-1] == "/": 50 | log_path = log_path[:-1] 51 | all_expts = [] 52 | for path in Path(log_path).rglob("stats.pkl"): 53 | expt_path = str(path.parent).replace(log_path + "/", "") 54 | all_expts.append(expt_path) 55 | which_f_score = cfg.which_f_score 56 | print(f"Found {len(all_expts)} experiments in {log_path}: {all_expts}") 57 | df_combined = get_dataframe(all_expts, log_path, which_f_score) 58 | 59 | # assert len(df_combined["slam_mode"].unique()) == 1 # only one slam_mode per plot 60 | slam_mode = df_combined["slam_mode"].unique()[0] 61 | 62 | avg_timing_per_optimizer(df_combined, log_path) 63 | avg_timing_per_modality(df_combined, log_path) 64 | 65 | if slam_mode in ["pose", "slam"]: 66 | avg_pose_error_over_time(df_combined, log_path) 67 | avg_pose_error_per_modality(df_combined, log_path) 68 | avg_pose_error_per_optimizer(df_combined, log_path) 69 | avg_pose_error_per_object(df_combined, log_path) 70 | avg_pose_error_per_camera_placement(df_combined, log_path) 71 | success_failure_stats(df_combined) 72 | if slam_mode in ["map", "slam"]: 73 | avg_map_error_over_time(df_combined, log_path) 74 | avg_precision_over_time(df_combined, log_path) 75 | avg_recall_over_time(df_combined, log_path) 76 | avg_map_error_per_modality(df_combined, log_path) 77 | avg_map_error_per_object(df_combined, log_path) 78 | map_error_vs_thresh(all_expts, log_path) 79 | 80 | if slam_mode in ["pose", "slam"]: 81 | avg_pose_error_per_experiment(df_combined, log_path) 82 | avg_pose_error_per_shape_res(df_combined, log_path) 83 | if slam_mode in ["map", "slam"]: 84 | avg_map_error_per_experiment(df_combined, log_path) 85 | if cfg.individual: 86 | print("Drawing individual plots") 87 | for expt_path in tqdm(all_expts): 88 | if "map" in expt_path or "slam" in expt_path: 89 | draw_map_error(expt_path=expt_path) 90 | if "pose" in expt_path or "slam" in expt_path: 91 | draw_pose_error(expt_path=expt_path, slam_mode=slam_mode) 92 | print(f"All outputs saved at {log_path}") 93 | 94 | 95 | if __name__ == "__main__": 96 | main() 97 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | This Code of Conduct also applies outside the project spaces when there is a 56 | reasonable belief that an individual's behavior may have a negative impact on 57 | the project or its community. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported by contacting the project team at . All 63 | complaints will be reviewed and investigated and will result in a response that 64 | is deemed necessary and appropriate to the circumstances. The project team is 65 | obligated to maintain confidentiality with regard to the reporter of an incident. 66 | Further details of specific enforcement policies may be posted separately. 67 | 68 | Project maintainers who do not follow or enforce the Code of Conduct in good 69 | faith may face temporary or permanent repercussions as determined by other 70 | members of the project's leadership. 71 | 72 | ## Attribution 73 | 74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 76 | 77 | [homepage]: https://www.contributor-covenant.org 78 | 79 | For answers to common questions about this code of conduct, see 80 | https://www.contributor-covenant.org/faq 81 | -------------------------------------------------------------------------------- /neuralfeels/contrib/tactile_transformer/dpt_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT 7 | 8 | # Tactile transformer model 9 | 10 | import numpy as np 11 | import timm 12 | import torch.nn as nn 13 | 14 | from neuralfeels.contrib.tactile_transformer.fusion import Fusion 15 | from neuralfeels.contrib.tactile_transformer.head import HeadDepth, HeadSeg 16 | from neuralfeels.contrib.tactile_transformer.reassemble import Reassemble 17 | 18 | 19 | class DPTModel(nn.Module): 20 | def __init__( 21 | self, 22 | image_size=(3, 384, 384), 23 | patch_size=16, 24 | emb_dim=1024, 25 | resample_dim=256, 26 | read="projection", 27 | num_layers_encoder=24, 28 | hooks=[5, 11, 17, 23], 29 | reassemble_s=[4, 8, 16, 32], 30 | transformer_dropout=0, 31 | nclasses=2, 32 | type="full", 33 | model_timm="vit_large_patch16_384", 34 | pretrained=False, 35 | ): 36 | """ 37 | type : {"full", "depth", "segmentation"} 38 | image_size : (c, h, w) 39 | patch_size : *a square* 40 | emb_dim <=> D (in the paper) 41 | resample_dim <=> ^D (in the paper) 42 | read : {"ignore", "add", "projection"} 43 | """ 44 | super().__init__() 45 | 46 | self.transformer_encoders = timm.create_model(model_timm, pretrained=pretrained) 47 | self.type_ = type 48 | 49 | # Register hooks 50 | self.activation = {} 51 | self.hooks = hooks 52 | self._get_layers_from_hooks(self.hooks) 53 | 54 | # Reassembles Fusion 55 | self.reassembles = [] 56 | self.fusions = [] 57 | for s in reassemble_s: 58 | self.reassembles.append( 59 | Reassemble(image_size, read, patch_size, s, emb_dim, resample_dim) 60 | ) 61 | self.fusions.append(Fusion(resample_dim)) 62 | self.reassembles = nn.ModuleList(self.reassembles) 63 | self.fusions = nn.ModuleList(self.fusions) 64 | 65 | # Head 66 | if type == "full": 67 | self.head_depth = HeadDepth(resample_dim) 68 | self.head_segmentation = HeadSeg(resample_dim, nclasses=nclasses) 69 | elif type == "depth": 70 | self.head_depth = HeadDepth(resample_dim) 71 | self.head_segmentation = None 72 | else: 73 | self.head_depth = None 74 | self.head_segmentation = HeadSeg(resample_dim, nclasses=nclasses) 75 | 76 | def forward(self, img): 77 | 78 | t = self.transformer_encoders(img) 79 | previous_stage = None 80 | for i in np.arange(len(self.fusions) - 1, -1, -1, dtype=int): 81 | hook_to_take = "t" + str(self.hooks[int(i)]) 82 | activation_result = self.activation[hook_to_take] 83 | reassemble_result = self.reassembles[i](activation_result) 84 | fusion_result = self.fusions[i](reassemble_result, previous_stage) 85 | previous_stage = fusion_result 86 | out_depth = None 87 | out_segmentation = None 88 | if self.head_depth != None: 89 | out_depth = self.head_depth(previous_stage) 90 | if self.head_segmentation != None: 91 | out_segmentation = self.head_segmentation(previous_stage) 92 | return out_depth, out_segmentation 93 | 94 | def _get_layers_from_hooks(self, hooks: list): 95 | def get_activation(name): 96 | def hook(model, input, output): 97 | self.activation[name] = output 98 | 99 | return hook 100 | 101 | for h in hooks: 102 | self.transformer_encoders.blocks[h].register_forward_hook( 103 | get_activation("t" + str(h)) 104 | ) 105 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/SceneGraph/SceneNode.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D 7 | 8 | import numpy as np 9 | import open3d as o3d 10 | 11 | from .MeshNode import MeshNode 12 | from .Transform import Transform 13 | 14 | 15 | class SceneNode: 16 | def __init__(self, parent=None): 17 | self.parent = parent 18 | self.children = [] 19 | self.name = None 20 | # Store the local transform and world transform 21 | self.localTransform = Transform() 22 | self.worldMatrix = np.eye(4) 23 | self._transformHelper = o3d.geometry.TriangleMesh.create_coordinate_frame() 24 | # Store the mesh and deal with functions draw the mesh based on the transform 25 | self.meshNode = MeshNode() 26 | self.joint = None 27 | 28 | def setParent(self, parent): 29 | if parent == None: 30 | raise RuntimeError("Invalid Parent: parent is not in the SceneNode type") 31 | self.parent = parent 32 | self.worldMatrix = np.dot( 33 | self.parent.worldMatrix, self.localTransform.getMatrix() 34 | ) 35 | 36 | def update(self): 37 | # Update the worldMatrix of current scene node 38 | if self.parent != None: 39 | self.worldMatrix = np.dot( 40 | self.parent.worldMatrix, self.localTransform.getMatrix() 41 | ) 42 | else: 43 | self.worldMatrix = self.localTransform.getMatrix() 44 | # Update the worldMatrix for all it children 45 | for child in self.children: 46 | child.update() 47 | 48 | def addChild(self, child): 49 | # child should also be SceneNode 50 | self.children.append(child) 51 | 52 | def addMesh(self, mesh): 53 | # mesh should be in open3d form 54 | self.meshNode.addMesh(mesh) 55 | 56 | def addMeshFile(self, mesh_file, color): 57 | self.meshNode.addMeshFile(mesh_file, color) 58 | 59 | def getMesh(self): 60 | # Get the new mesh based on the world Matrix (Assume that the matrix has been updatated) 61 | new_mesh = self.meshNode.getMesh(self.worldMatrix) 62 | if new_mesh != None: 63 | new_mesh = [new_mesh] 64 | else: 65 | new_mesh = [] 66 | # add mesh from all children 67 | for child in self.children: 68 | new_mesh += child.getMesh() 69 | return new_mesh 70 | 71 | def resetlocalTransform(self): 72 | self.localTransform = Transform() 73 | 74 | def translate(self, translation): 75 | # translation should be in the array form np.arraay([float, float, float]) 76 | transMat = np.array( 77 | [ 78 | [1, 0, 0, translation[0]], 79 | [0, 1, 0, translation[1]], 80 | [0, 0, 1, translation[2]], 81 | [0, 0, 0, 1], 82 | ] 83 | ) 84 | self.localTransform.translateMat(transMat) 85 | 86 | def scale(self, scale): 87 | s = np.eye(4) 88 | s[:3, :3] = np.diag(scale) 89 | self.localTransform.scaleMat(s) 90 | 91 | def rotate(self, axis, angle): 92 | # Convert axis into 3*1 array 93 | axis = axis / np.linalg.norm(axis) 94 | axisAngle = axis * angle 95 | # matrix here is 3*3 96 | matrix = self._transformHelper.get_rotation_matrix_from_axis_angle(axisAngle) 97 | rotMat = np.eye(4) 98 | rotMat[0:3, 0:3] = matrix 99 | self.localTransform.rotateMat(rotMat) 100 | 101 | def rotateXYZ(self, angle): 102 | # angle should be in array form [float, float, float] in radius 103 | matrix = self._transformHelper.get_rotation_matrix_from_xyz(angle) 104 | rotMat = np.eye(4) 105 | rotMat[0:3, 0:3] = matrix 106 | self.localTransform.rotateMat(rotMat) 107 | -------------------------------------------------------------------------------- /neuralfeels/eval/feelsight_init.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ 7 | dictionary of init_poses for all objects hand-computed in meshlab. 8 | "default" represents the front-left-cam viewing angle 9 | the rotated camera views are copied from meshlab to align with the first frame of the logs 10 | In some cases, translation offset is added for long objects 11 | """ 12 | 13 | feelsight_init_poses = { 14 | "default": "0.317553 -0.948214 0.00707686 0 0.402073 0.141404 0.904623 0 -0.858777 -0.28442 0.426154 0 0 0 0 1", 15 | "bell_pepper": { 16 | "00": "-0.447304 0.888958 0.0983457 0 -0.358188 -0.278809 0.891048 0 0.819524 0.363344 0.443127 0 0 0 0 1", 17 | "01": "-0.987494 0.127347 0.0929425 0 -0.0292416 -0.727241 0.685759 0 0.154921 0.674466 0.72187 0 0 0 0 1", 18 | "02": "-0.850107 0.518781 0.0904702 0 -0.385385 -0.729957 0.564483 0 0.358882 0.445005 0.820472 0 0 0 0 1", 19 | "03": "-0.0444936 0.982537 0.180669 0 -0.499921 -0.178473 0.847482 0 0.864928 -0.0526125 0.499132 0 0 0 0 1", 20 | "04": "0.825204 -0.284835 -0.487759 0 -0.564694 -0.396674 -0.72372 0 0.012659 0.87265 -0.488181 0 0 0 0 1", 21 | }, 22 | "large_dice": { 23 | "00": "0.735374 0.671656 -0.0900187 0 -0.674798 0.713579 -0.188288 0 -0.0622293 0.199207 0.97798 0 0 0 0 1", 24 | "01": "-0.500624 0.04705 -0.864385 0 -0.780434 -0.456581 0.42715 0 -0.374564 0.888437 0.265294 0 0 0 0 1", 25 | "02": "-0.0637699 0.801287 -0.594872 0 0.218083 0.592867 0.775208 0 0.973845 -0.0802967 -0.212554 0 0 0 0 1", 26 | "03": "-0.0822555 0.435224 0.896557 0 0.351904 0.854341 -0.382445 0 -0.932415 0.284044 -0.223431 0 0 0 0 1", 27 | "04": "-0.369389 -0.922218 -0.114302 0 -0.929275 0.366602 0.04529 0 0.000136163 0.122948 -0.992413 0 0 0 0 1", 28 | }, 29 | "peach": { 30 | "00": "0.0258558 0.9112 0.411152 0 -0.151259 -0.402988 0.902619 0 0.988156 -0.0855286 0.127407 0 0 0 0 1", 31 | "01": "0.979369 -0.102887 0.173927 0 -0.0973723 0.513909 0.8523 0 -0.177073 -0.851652 0.493289 0 0 0 0 1", 32 | "02": "-0.46817 0.309157 -0.827792 0 0.864185 -0.0352976 -0.501934 0 -0.184396 -0.950356 -0.250644 0 0 0 0 1", 33 | "03": "-0.609501 0.509715 0.607206 0 0.186121 -0.652509 0.734569 0 0.770628 0.560734 0.302836 0 0 0 0 1", 34 | "04": "-0.385757 -0.786756 0.481877 0 0.316775 0.377603 0.870097 0 -0.866513 0.488293 0.103561 0 0 0 0 1", 35 | }, 36 | "pear": { 37 | "00": "0.452873 -0.851159 0.265394 0 0.431213 0.469642 0.770384 0 -0.78036 -0.234445 0.579719 0 0 0 0 1", 38 | "01": "0.274908 0.865858 -0.41799 0 0.543147 -0.498581 -0.675579 0 -0.793358 -0.0413085 -0.607352 0 0 0 0 1", 39 | "02": "0.324129 -0.93561 0.139906 0 0.0372977 0.160415 0.986345 0 -0.945277 -0.314485 0.0868914 0 0 0 0 1", 40 | "03": "0.420609 -0.906618 0.0336632 0 0.0912765 0.079205 0.992671 0 -0.902639 -0.414453 0.116068 0 0 0 0 1", 41 | "04": "0.438803 -0.86858 -0.230265 0 -0.0173117 -0.264377 0.964264 0 -0.898417 -0.419135 -0.131045 0 0 0 0 1", 42 | }, 43 | "pepper_grinder": { 44 | "00": "0.317113 -0.944011 0.0910105 0 0.647693 0.285669 0.70632 0 -0.692772 -0.165037 0.702018 -0.02 0 0 0 1", 45 | "01": "0.317113 -0.944011 0.0910105 0 0.647693 0.285669 0.70632 0 -0.692772 -0.165037 0.702018 -0.02 0 0 0 1", 46 | "02": "0.317113 -0.944011 0.0910105 0 0.647693 0.285669 0.70632 0 -0.692772 -0.165037 0.702018 -0.02 0 0 0 1", 47 | "03": "0.317113 -0.944011 0.0910105 0 0.647693 0.285669 0.70632 0 -0.692772 -0.165037 0.702018 -0.02 0 0 0 1", 48 | "04": "0.317113 -0.944011 0.0910105 0 0.647693 0.285669 0.70632 0 -0.692772 -0.165037 0.702018 -0.02 0 0 0 1", 49 | }, 50 | "rubiks_cube_small": { 51 | "00": "0.904895 -0.033284 -0.424332 0 -0.418351 -0.253193 -0.872282 0 -0.078405 0.966843 -0.243038 0 0 0 0 1", 52 | "01": "0.440733 0.0711927 -0.894811 0 0.885631 0.128017 0.446396 0 0.14633 -0.989213 -0.00662905 0 0 0 0 1", 53 | "02": "0.862521 0.503581 -0.0496423 0 -0.502013 0.863885 0.0410602 0 0.0635623 -0.0104942 0.997923 0 0 0 0 1", 54 | "03": "0.117835 -0.397397 0.91005 0 0.357761 -0.837905 -0.412216 0 0.926349 0.374153 0.0434396 0 0 0 0 1", 55 | "04": "0.437746 -0.104018 0.893062 0 0.855152 -0.258576 -0.449281 0 0.277658 0.960374 -0.0242391 0 0 0 0 1", 56 | }, 57 | } 58 | -------------------------------------------------------------------------------- /neuralfeels/eval/metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Compute metrics for neuralfeels evaluation 7 | 8 | import time 9 | 10 | import numpy as np 11 | import torch 12 | import trimesh 13 | from scipy.spatial import cKDTree as KDTree 14 | 15 | np.set_printoptions(precision=2, suppress=True) 16 | 17 | 18 | def start_timing(): 19 | if torch.cuda.is_available(): 20 | torch.cuda.synchronize() 21 | start = torch.cuda.Event(enable_timing=True) 22 | end = torch.cuda.Event(enable_timing=True) 23 | start.record() 24 | else: 25 | start = time.perf_counter() 26 | end = None 27 | return start, end 28 | 29 | 30 | def end_timing(start, end): 31 | if torch.cuda.is_available(): 32 | torch.cuda.synchronize() 33 | end.record() 34 | # Waits for everything to finish running 35 | torch.cuda.synchronize() 36 | elapsed_time = start.elapsed_time(end) 37 | else: 38 | end = time.perf_counter() 39 | elapsed_time = end - start 40 | # Convert to milliseconds to have the same units 41 | # as torch.cuda.Event.elapsed_time 42 | elapsed_time = elapsed_time * 1000 43 | return elapsed_time 44 | 45 | 46 | def average_3d_error(point_cloud1, point_cloud2): 47 | # point_cloud1, point_cloud2: numpy arrays of shape (N, 3) 48 | # ADD-S: symmetric average 3D error pose metric (https://arxiv.org/pdf/1711.00199.pdf) 49 | # find nearest neighbors for each point in point_cloud1 50 | tree = KDTree(point_cloud2) 51 | distances, _ = tree.query(point_cloud1) # returns euclidean distance 52 | return np.mean(distances) 53 | 54 | 55 | def sample_trimesh_points(mesh, num_samples): 56 | """ 57 | Sample points on trimesh surface 58 | """ 59 | sampled_points = trimesh.sample.sample_surface(mesh, num_samples)[0] 60 | return sampled_points 61 | 62 | 63 | def compute_f_score( 64 | gt_points_np, recon_mesh, num_mesh_samples=30000, T=[2e-2, 1e-2, 5e-3, 1e-3] 65 | ): 66 | """ 67 | https://openaccess.thecvf.com/content_ICCV_2017/papers/Park_Colored_Point_Cloud_ICCV_2017_paper.pdf 68 | Compute F-score between a ground truth point cloud and a reconstructed mesh. 69 | gt_points_np: trimesh.points.PointCloud of just poins, sampled from the surface (see 70 | compute_metrics.ply for more documentation) 71 | 72 | recon_mesh: trimesh.base.Trimesh of output mesh from whichever autoencoding reconstruction 73 | method (see compute_metrics.py for more) 74 | 75 | """ 76 | 77 | gen_points_sampled = sample_trimesh_points(recon_mesh, num_mesh_samples) 78 | # print(f"ptp gen_points_sampled: {np.ptp(gen_points_sampled, axis=0)*1000}, gt_points_np: {np.ptp(gt_points_np, axis=0)*1000}") 79 | 80 | # one_distances is distance from each gen_points_sampled to its nearest neighbor in gt_points_np 81 | gt_points_kd_tree = KDTree(gt_points_np) 82 | one_distances, _ = gt_points_kd_tree.query(gen_points_sampled, p=2) 83 | 84 | # two_distances is distance from each gt point to its nearest neighbor in gen_points_sampled 85 | gen_points_kd_tree = KDTree( 86 | gen_points_sampled 87 | ) # build a KD tree for the generated points 88 | two_distances, _ = gen_points_kd_tree.query( 89 | gt_points_np, p=2 90 | ) # find nearest neighbors for all gt_points_np from gen_points_sampled 91 | 92 | f_scores, precisions, recalls = [], [], [] 93 | for t in T: 94 | precision = (one_distances < t).sum() / len( 95 | gen_points_sampled 96 | ) # precision = percentage of gen_points_sampled that have a gt point within T mm 97 | recall = (two_distances < t).sum() / len( 98 | gt_points_np 99 | ) # recall = percentage of gt_points_np that have a gen_points_sampled within T mm 100 | # compupte F-score = 2 * (precision * recall) / (precision + recall) where 101 | # precision = percentage of gen_points_sampled that have a gt point within T mm 102 | f_score = 2 * (precision * recall) / (precision + recall) 103 | precisions.append(precision) 104 | recalls.append(recall) 105 | f_scores.append(f_score) 106 | 107 | _, vertex_ids = gen_points_kd_tree.query(np.array(recon_mesh.vertices)) 108 | return (f_scores, precisions, recalls, one_distances, vertex_ids) 109 | -------------------------------------------------------------------------------- /neuralfeels/viz/show_object_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Viser visualization script for objects in the FeelSight dataset. 7 | # pip install viser before running this script 8 | 9 | import os 10 | import time 11 | from pathlib import Path 12 | 13 | import git 14 | import numpy as np 15 | import trimesh 16 | import viser 17 | 18 | root = git.Repo(".", search_parent_directories=True).working_tree_dir 19 | 20 | 21 | def main( 22 | model_path: Path = os.path.join(root, "data", "assets", "gt_models", "ycb") 23 | ) -> None: 24 | # get list of folders in model_path 25 | object_names = os.listdir(model_path) 26 | # remove urdf files 27 | 28 | if "ycb" in model_path: 29 | object_names = [ 30 | "contactdb_rubber_duck", 31 | "contactdb_elephant", 32 | "077_rubiks_cube", 33 | "large_dice", 34 | "016_pear", 35 | "015_peach", 36 | "010_potted_meat_can", 37 | "073-f_lego_duplo", 38 | ] 39 | else: 40 | object_names = [ 41 | x for x in object_names if not x.endswith(".urdf") and x != ".DS_Store" 42 | ] 43 | 44 | server = viser.ViserServer() 45 | 46 | def add_selectable_mesh( 47 | name: str, mesh: trimesh.Trimesh, x: float, y: float 48 | ) -> None: 49 | def add_mesh() -> None: 50 | handle = server.add_mesh_trimesh( 51 | "/" + name, 52 | mesh=mesh, 53 | # vertices=mesh.vertices, 54 | # faces=mesh.faces, 55 | position=(y, 0.0, x), 56 | # color=colorsys.hls_to_rgb( 57 | # np.random.default_rng( 58 | # np.frombuffer( 59 | # hashlib.md5(name.encode("utf-8")).digest(), 60 | # dtype="uint32", 61 | # ) 62 | # + 5 63 | # ).uniform(), 64 | # 0.6, 65 | # 0.9, 66 | # ), 67 | ) 68 | 69 | # Requires the cmk/add_click branch of viser. 70 | # handle.clickable = True 71 | # @handle.on_click 72 | def _(_) -> None: 73 | add_mesh() 74 | 75 | add_mesh() 76 | 77 | nominal_column_width = len(object_names) 78 | rows_indices = np.array_split( 79 | np.arange(len(object_names)), np.rint(len(object_names) / nominal_column_width) 80 | ) 81 | mesh_diags = [] 82 | for row, row_indices in enumerate(rows_indices): 83 | for col, mesh_index in enumerate(row_indices): 84 | x = row * 0.12 85 | y = col * nominal_column_width * 0.12 / len(row_indices) 86 | mesh_path = os.path.join( 87 | model_path, object_names[mesh_index], "textured.obj" 88 | ) 89 | # check if mesh_path exists 90 | if not os.path.exists(mesh_path): 91 | mesh_path = os.path.join( 92 | model_path, object_names[mesh_index], "google_16k", "textured.obj" 93 | ) 94 | if not os.path.exists(mesh_path): 95 | mesh_path = os.path.join( 96 | model_path, 97 | object_names[mesh_index], 98 | f"{object_names[mesh_index]}.obj", 99 | ) 100 | mesh = trimesh.load( 101 | mesh_path, 102 | force="mesh", 103 | ) 104 | if isinstance(mesh.visual, trimesh.visual.texture.TextureVisuals): 105 | # TextureVisuals are not supported by viser yet 106 | mesh.visual = mesh.visual.to_color() 107 | 108 | # append mesh diagonal 109 | mesh_diags.append(mesh.scale) 110 | print(f"Added {object_names[mesh_index]} at ({x}, {y})") 111 | print(f"Object: {object_names[mesh_index]}, mesh diagonal: {mesh.scale}") 112 | add_selectable_mesh(object_names[mesh_index], mesh, x=x, y=y) 113 | 114 | # print min and max mesh diagonal 115 | mesh_diags = np.array(mesh_diags) 116 | print(f"Min mesh diagonal: {np.min(mesh_diags)}") 117 | print(f"Max mesh diagonal: {np.max(mesh_diags)}") 118 | while True: 119 | time.sleep(10.0) 120 | 121 | 122 | if __name__ == "__main__": 123 | # main() 124 | main( 125 | model_path=os.path.join(root, "data", "assets", "gt_models", "ycb") 126 | ) # sim dataset 127 | -------------------------------------------------------------------------------- /neuralfeels/contrib/tactile_transformer/reassemble.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT 7 | 8 | # Resampling code for tactile transformer 9 | 10 | import torch 11 | import torch.nn as nn 12 | from einops.layers.torch import Rearrange 13 | 14 | 15 | class Read_ignore(nn.Module): 16 | def __init__(self, start_index=1): 17 | super(Read_ignore, self).__init__() 18 | self.start_index = start_index 19 | 20 | def forward(self, x): 21 | return x[:, self.start_index :] 22 | 23 | 24 | class Read_add(nn.Module): 25 | def __init__(self, start_index=1): 26 | super(Read_add, self).__init__() 27 | self.start_index = start_index 28 | 29 | def forward(self, x): 30 | if self.start_index == 2: 31 | readout = (x[:, 0] + x[:, 1]) / 2 32 | else: 33 | readout = x[:, 0] 34 | return x[:, self.start_index :] + readout.unsqueeze(1) 35 | 36 | 37 | class Read_projection(nn.Module): 38 | def __init__(self, in_features, start_index=1): 39 | super(Read_projection, self).__init__() 40 | self.start_index = start_index 41 | self.project = nn.Sequential(nn.Linear(2 * in_features, in_features), nn.GELU()) 42 | 43 | def forward(self, x): 44 | readout = x[:, 0].unsqueeze(1).expand_as(x[:, self.start_index :]) 45 | features = torch.cat((x[:, self.start_index :], readout), -1) 46 | return self.project(features) 47 | 48 | 49 | class MyConvTranspose2d(nn.Module): 50 | def __init__(self, conv, output_size): 51 | super(MyConvTranspose2d, self).__init__() 52 | self.output_size = output_size 53 | self.conv = conv 54 | 55 | def forward(self, x): 56 | x = self.conv(x, output_size=self.output_size) 57 | return x 58 | 59 | 60 | class Resample(nn.Module): 61 | def __init__(self, p, s, h, emb_dim, resample_dim): 62 | super(Resample, self).__init__() 63 | assert s in [4, 8, 16, 32], "s must be in [0.5, 4, 8, 16, 32]" 64 | self.conv1 = nn.Conv2d( 65 | emb_dim, resample_dim, kernel_size=1, stride=1, padding=0 66 | ) 67 | if s == 4: 68 | self.conv2 = nn.ConvTranspose2d( 69 | resample_dim, 70 | resample_dim, 71 | kernel_size=4, 72 | stride=4, 73 | padding=0, 74 | bias=True, 75 | dilation=1, 76 | groups=1, 77 | ) 78 | elif s == 8: 79 | self.conv2 = nn.ConvTranspose2d( 80 | resample_dim, 81 | resample_dim, 82 | kernel_size=2, 83 | stride=2, 84 | padding=0, 85 | bias=True, 86 | dilation=1, 87 | groups=1, 88 | ) 89 | elif s == 16: 90 | self.conv2 = nn.Identity() 91 | else: 92 | self.conv2 = nn.Conv2d( 93 | resample_dim, 94 | resample_dim, 95 | kernel_size=2, 96 | stride=2, 97 | padding=0, 98 | bias=True, 99 | ) 100 | 101 | def forward(self, x): 102 | x = self.conv1(x) 103 | x = self.conv2(x) 104 | return x 105 | 106 | 107 | class Reassemble(nn.Module): 108 | def __init__(self, image_size, read, p, s, emb_dim, resample_dim): 109 | """ 110 | p = patch size 111 | s = coefficient resample 112 | emb_dim <=> D (in the paper) 113 | resample_dim <=> ^D (in the paper) 114 | read : {"ignore", "add", "projection"} 115 | """ 116 | super(Reassemble, self).__init__() 117 | channels, image_height, image_width = image_size 118 | 119 | # Read 120 | self.read = Read_ignore() 121 | if read == "add": 122 | self.read = Read_add() 123 | elif read == "projection": 124 | self.read = Read_projection(emb_dim) 125 | 126 | # Concat after read 127 | self.concat = Rearrange( 128 | "b (h w) c -> b c h w", 129 | c=emb_dim, 130 | h=(image_height // p), 131 | w=(image_width // p), 132 | ) 133 | 134 | # Projection + Resample 135 | self.resample = Resample(p, s, image_height, emb_dim, resample_dim) 136 | 137 | def forward(self, x): 138 | x = self.read(x) 139 | x = self.concat(x) 140 | x = self.resample(x) 141 | return x 142 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/SceneGraph/SceneGraph.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D 7 | 8 | import re 9 | 10 | import numpy as np 11 | import open3d as o3d 12 | 13 | from .SceneNode import SceneNode 14 | 15 | # TODO : very slow, can we cache the SceneNodes? 16 | 17 | 18 | class SceneGraph: 19 | def __init__(self, rootLink, joint_angles=None): 20 | self.root = SceneNode() 21 | self.joint_angles = joint_angles 22 | self.constructNode(self.root, rootLink) 23 | 24 | def update(self): 25 | self.root.update() 26 | 27 | def getMesh(self): 28 | self.update() 29 | meshes = self.root.getMesh() 30 | new_meshes = [] 31 | for mesh in meshes: 32 | new_meshes.append(mesh) 33 | return new_meshes 34 | 35 | def updateJointAngles(self, joint_angles): 36 | self.joint_angles = joint_angles.cpu().numpy() 37 | return 38 | 39 | def rotateNode(self, node, joint_rpy): 40 | updates = np.nonzero(joint_rpy)[0] 41 | if len(updates) > 1: 42 | for i in [0, 1, 2]: 43 | _joint_rpy = np.zeros(3) 44 | _joint_rpy[i] += joint_rpy[i] 45 | node.rotateXYZ(_joint_rpy) 46 | else: 47 | node.rotateXYZ(joint_rpy) 48 | 49 | def getRPY(self, node): 50 | joint_axis = node.joint.axis 51 | axis_of_rotation = np.nonzero(joint_axis)[0].squeeze().item() 52 | rotate_rpy = np.zeros(3) 53 | joint_rpy = node.joint.origin["rpy"].astype(np.float64) 54 | 55 | if "tip" not in node.joint.joint_name: 56 | joint_id = re.findall("\d+\.\d+", node.joint.joint_name)[0] 57 | joint_id = int(float(joint_id)) 58 | rotate_rpy[axis_of_rotation] += self.joint_angles[joint_id] * ( 59 | -1.0 if joint_id == 13 else 1.0 60 | ) 61 | # rotate_rpy[axis_of_rotation] += -3.14159 if joint_id == 12 else 0.0 62 | joint_rpy += rotate_rpy 63 | return joint_rpy 64 | 65 | def updateState(self, node=None): 66 | if node == None: 67 | node = self.root 68 | 69 | if "base_link" not in node.name: 70 | node.resetlocalTransform() 71 | 72 | if node.joint != None: 73 | # Construct the joint node firstly; Deal with xyz and rpy of the node 74 | joint_xyz = node.joint.origin["xyz"] 75 | joint_rpy = self.getRPY(node) 76 | 77 | # TODO: fix the 78 | # if node.name == "link_12.0": 79 | # print("update state joint_rpy", joint_rpy) 80 | 81 | self.rotateNode(node, joint_rpy) 82 | node.translate(joint_xyz) 83 | 84 | for child_node in node.children: 85 | self.updateState(child_node) 86 | 87 | def constructNode(self, node, link): 88 | node.name = link.link.link_name 89 | 90 | node.joint = link.joint 91 | if node.joint != None: 92 | # Construct the joint node firstly; Deal with xyz and rpy of the node 93 | 94 | joint_xyz = node.joint.origin["xyz"] 95 | joint_rpy = self.getRPY(node) 96 | 97 | # if node.name == "link_12.0": 98 | # print("construct state joint_rpy", joint_rpy) 99 | 100 | self.rotateNode(node, joint_rpy) 101 | node.translate(joint_xyz) 102 | 103 | # Construct the mesh nodes for multiple visuals in link 104 | visuals = link.link.visuals 105 | color = link.link.color 106 | for visual in visuals: 107 | visual_node = SceneNode(node) 108 | node.addChild(visual_node) 109 | visual_node.name = node.name + "_mesh:" + str(visual.visual_name) 110 | if visual.geometry_mesh["filename"] == None: 111 | raise RuntimeError("Invalid File path") 112 | visual_node.addMeshFile(visual.geometry_mesh["filename"], color) 113 | # Deal with xyz and rpy of the visual node 114 | visual_xyz = visual.origin["xyz"] 115 | visual_rpy = visual.origin["rpy"] 116 | visual_scale = visual.geometry_mesh["scale"] 117 | visual_node.rotateXYZ(visual_rpy) 118 | visual_node.translate(visual_xyz) 119 | visual_node.scale(visual_scale) 120 | 121 | # Construct node for the children 122 | for child in link.children: 123 | child_node = SceneNode(node) 124 | node.addChild(child_node) 125 | self.constructNode(child_node, child) 126 | -------------------------------------------------------------------------------- /neuralfeels/viz/debug.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Debug utilities for visualizing neuralfeels outputs 7 | 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | import torch 11 | from scipy.spatial.transform import Rotation as R 12 | 13 | 14 | def viz_dirs_C(dirs_C: torch.Tensor, poses: np.ndarray, skip: int = 100): 15 | """ 16 | Visualize the vector field for a single image from camera origin 17 | """ 18 | poses = poses.cpu().numpy() 19 | 20 | dirs_C = dirs_C.squeeze().cpu().numpy() 21 | dirs_C = dirs_C.reshape(-1, 3) 22 | 23 | dirs_C = dirs_C[::skip, :] 24 | mags = np.linalg.norm(dirs_C[:, :2], axis=1) 25 | 26 | fig = plt.figure() 27 | ax = fig.gca(projection="3d") 28 | 29 | x, y, z = poses[:, 0, 3], poses[:, 1, 3], poses[:, 2, 3] 30 | 31 | u = dirs_C[:, 0] 32 | v = dirs_C[:, 1] 33 | w = dirs_C[:, 2] 34 | 35 | ax.set_box_aspect((1, 1, 1)) 36 | ax.quiver(x, y, z, u, v, w, length=0.01, colors=plt.cm.plasma(mags)) 37 | ax.view_init(azim=-90, elev=90) # x-y plane 38 | ax.set_xlabel("x") 39 | ax.set_ylabel("y") 40 | ax.set_zlabel("z") 41 | plotCameras(poses, ax) 42 | # ax.set_xlim(-1.5, 1.5) 43 | # ax.set_ylim(-1.5, 1.5) 44 | # ax.set_zlim(-1.5, 1.5) 45 | ax.set_box_aspect([1, 1, 1]) 46 | plt.show() 47 | return 48 | 49 | 50 | def viz_dirs_W(origins: torch.Tensor, dirs_W: torch.Tensor, skip: int = 10): 51 | """ 52 | Visualize the vector field in world coordinates for a batch of images 53 | """ 54 | fig = plt.figure() 55 | ax = fig.gca(projection="3d") 56 | 57 | origins = origins.cpu().numpy() 58 | dirs_W = dirs_W.squeeze().cpu().numpy() 59 | 60 | origins = origins[::skip, :] 61 | dirs_W = dirs_W[::skip, :] 62 | 63 | x, y, z = origins[:, 0], origins[:, 1], origins[:, 2] 64 | 65 | u, v, w = dirs_W[:, 0], dirs_W[:, 1], dirs_W[:, 2] 66 | 67 | ax.quiver(x, y, z, u, v, w, length=0.001, color="black") 68 | # ax.view_init(azim=-90, elev=90) # x-y plane 69 | ax.set_xlabel("x") 70 | ax.set_ylabel("y") 71 | ax.set_zlabel("z") 72 | ax.set_box_aspect((1, 1, 1)) 73 | 74 | plt.show() 75 | return 76 | 77 | 78 | def pose2axes(rotm: np.ndarray): 79 | """ 80 | Convert rotation matrix to x, y, z axes 81 | """ 82 | x, y, z = np.array([1, 0, 0]), np.array([0, 1, 0]), np.array([0, 0, 1]) 83 | r = R.from_matrix(rotm) # (N, 3, 3) [qx, qy, qz, qw] 84 | quivers_u = r.apply(x) 85 | quivers_v = r.apply(y) 86 | quivers_w = r.apply(z) 87 | return quivers_u, quivers_v, quivers_w 88 | 89 | 90 | def plotCameras(poses: np.ndarray, ax: None): 91 | """ 92 | Plot camera matrices (XYZ -> RGB) 93 | """ 94 | if type(poses) is not np.ndarray: 95 | poses = poses.cpu().numpy() 96 | 97 | axes_sz = 2e-2 98 | x, y, z = poses[:, 0, 3], poses[:, 1, 3], poses[:, 2, 3] 99 | ax.scatter(x, y, z, color="k", s=1) 100 | u, v, w = pose2axes(poses[:, :3, :3]) 101 | ax.quiver( 102 | x, 103 | y, 104 | z, 105 | u[:, 0], 106 | u[:, 1], 107 | u[:, 2], 108 | length=axes_sz, 109 | color="r", 110 | linewidths=0.5, 111 | alpha=0.5, 112 | normalize=True, 113 | ) 114 | ax.quiver( 115 | x, 116 | y, 117 | z, 118 | v[:, 0], 119 | v[:, 1], 120 | v[:, 2], 121 | length=axes_sz, 122 | color="g", 123 | linewidths=0.5, 124 | alpha=0.5, 125 | normalize=True, 126 | ) 127 | ax.quiver( 128 | x, 129 | y, 130 | z, 131 | w[:, 0], 132 | w[:, 1], 133 | w[:, 2], 134 | length=axes_sz, 135 | color="b", 136 | linewidths=0.5, 137 | alpha=0.5, 138 | normalize=True, 139 | ) 140 | 141 | ax.legend() 142 | ax.set_xlabel("X") 143 | ax.set_ylabel("Y") 144 | ax.set_zlabel("Z") 145 | return 146 | 147 | 148 | def viz_ray_samples(pc: torch.Tensor, poses: torch.Tensor, skip: int = 1): 149 | """ 150 | Visualize the vector field in world coordinates for a batch of images 151 | """ 152 | 153 | fig = plt.figure() 154 | ax = fig.gca(projection="3d") 155 | 156 | pc = pc.cpu().numpy() 157 | pc = pc[::skip, :] 158 | 159 | x, y, z = pc[:, 0], pc[:, 1], pc[:, 2] 160 | 161 | ax.scatter(x, y, z) 162 | # ax.view_init(azim=-90, elev=90) # x-y plane 163 | ax.set_xlabel("x") 164 | ax.set_ylabel("y") 165 | ax.set_zlabel("z") 166 | ax.set_box_aspect((1, 1, 1)) 167 | 168 | plotCameras(poses, ax) 169 | plt.show() 170 | return 171 | -------------------------------------------------------------------------------- /neuralfeels/contrib/urdf/URDF/Parser/URDFParser.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D 7 | 8 | import os 9 | import xml.etree.ElementTree as ET 10 | 11 | import numpy as np 12 | 13 | from .Joint import Joint 14 | from .Link import Link 15 | 16 | 17 | def parseThreeNumber(string): 18 | strings = string.split(" ") 19 | numbers = np.array(list(map(float, strings))) 20 | return numbers 21 | 22 | 23 | class URDFParser: 24 | def __init__(self, file_name): 25 | self.file_name = file_name 26 | self._root_path = os.path.dirname(file_name) + "/" 27 | self.links = {} 28 | self.joints = {} 29 | 30 | # Parse the URDF(XML) file into a tree structure 31 | def parse(self): 32 | # Get the XML tree 33 | root_xml = ET.parse(self.file_name).getroot() 34 | self.links_xml = root_xml.findall("link") 35 | self.joints_xml = root_xml.findall("joint") 36 | # Parse links before parsing joints 37 | self.parseLinks() 38 | self.parseJoints() 39 | 40 | def parseLinks(self): 41 | for link_xml in self.links_xml: 42 | link_name = link_xml.attrib["name"] 43 | link = Link(link_name) 44 | # Deal with multiple visuals 45 | visuals_xml = link_xml.findall("visual") 46 | for visual_xml in visuals_xml: 47 | # Add new visual in link 48 | if "name" in visual_xml.attrib: 49 | visual_name = visual_xml.attrib["name"] 50 | else: 51 | visual_name = None 52 | link.addVisual(visual_name) 53 | # Get origin 54 | origin_xml = visual_xml.find("origin") 55 | if origin_xml != None: 56 | if "xyz" in origin_xml.attrib: 57 | xyz = parseThreeNumber(origin_xml.attrib["xyz"]) 58 | link.setVisualOriginXyz(xyz) 59 | if "rpy" in origin_xml.attrib: 60 | rpy = parseThreeNumber(origin_xml.attrib["rpy"]) 61 | link.setVisualOriginRpy(rpy) 62 | # Get geometry 63 | geometry_xml = visual_xml.find("geometry") 64 | if geometry_xml != None: 65 | mesh_xml = geometry_xml.find("mesh") 66 | if mesh_xml != None: 67 | filename = mesh_xml.attrib["filename"] 68 | link.setVisualGeometryMeshFilename(self._root_path + filename) 69 | if "scale" in mesh_xml.attrib: 70 | scale = parseThreeNumber(mesh_xml.attrib["scale"]) 71 | link.setVisualMeshScale(scale) 72 | 73 | if visual_xml.find("material"): 74 | color_xml = visual_xml.find("material").find("color") 75 | if color_xml != None: 76 | link.color = np.fromstring( 77 | color_xml.attrib["rgba"], dtype=float, sep=" " 78 | )[:3] 79 | 80 | self.links[link_name] = link 81 | 82 | def parseJoints(self): 83 | for joint_xml in self.joints_xml: 84 | joint_name = joint_xml.attrib["name"] 85 | joint_type = joint_xml.attrib["type"] 86 | child_name = joint_xml.find("child").attrib["link"] 87 | parent_name = joint_xml.find("parent").attrib["link"] 88 | joint = Joint(joint_name, joint_type, child_name, parent_name) 89 | # Get origin 90 | origin_xml = joint_xml.find("origin") 91 | if origin_xml != None: 92 | if "xyz" in origin_xml.attrib: 93 | xyz = parseThreeNumber(origin_xml.attrib["xyz"]) 94 | joint.setOriginXyz(xyz) 95 | if "rpy" in origin_xml.attrib: 96 | rpy = parseThreeNumber(origin_xml.attrib["rpy"]) 97 | joint.setOriginRpy(rpy) 98 | # Get Axis 99 | axis_xml = joint_xml.find("axis") 100 | if axis_xml != None: 101 | axis = parseThreeNumber(axis_xml.attrib["xyz"]) 102 | joint.setAxis(axis) 103 | # Get Limit 104 | limit_xml = joint_xml.find("limit") 105 | if limit_xml != None: 106 | lower = float(limit_xml.attrib["lower"]) 107 | upper = float(limit_xml.attrib["upper"]) 108 | joint.setLimitLower(lower) 109 | joint.setLimitUpper(upper) 110 | self.joints[joint_name] = joint 111 | 112 | 113 | # Test the parser 114 | if __name__ == "__main__": 115 | file_name = "../../../data/43074/mobility.urdf" 116 | parser = URDFParser(file_name) 117 | parser.parse() 118 | 119 | print(parser.links) 120 | -------------------------------------------------------------------------------- /neuralfeels/contrib/tactile_transformer/touch_vit.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT 7 | 8 | import os 9 | from glob import glob 10 | 11 | import git 12 | import hydra 13 | import numpy as np 14 | import torch 15 | from hydra.utils import to_absolute_path 16 | from omegaconf import DictConfig 17 | from PIL import Image 18 | from torchvision import transforms 19 | 20 | from neuralfeels.contrib.tactile_transformer.dpt_model import DPTModel 21 | from neuralfeels.contrib.tactile_transformer.utils import ( 22 | apply_jet_colormap, 23 | concat_images, 24 | create_dir, 25 | ) 26 | 27 | root = git.Repo(".", search_parent_directories=True).working_tree_dir 28 | 29 | 30 | class TouchVIT: 31 | """ 32 | Image to 3D model for DIGIT 33 | """ 34 | 35 | def __init__(self, cfg: DictConfig): 36 | super(TouchVIT, self).__init__() 37 | 38 | self.config = cfg 39 | input_dir = to_absolute_path(self.config["General"]["path_input_images"]) 40 | self.input_images = glob(f"{input_dir}/*.jpg") + glob(f"{input_dir}/*.png") 41 | 42 | self.type = self.config["General"]["type"] 43 | 44 | self.device = torch.device( 45 | self.config["General"]["device"] if torch.cuda.is_available() else "cpu" 46 | ) 47 | # print("device: %s" % self.device) 48 | resize = self.config["Dataset"]["transforms"]["resize"] 49 | self.model = DPTModel( 50 | image_size=(3, resize[0], resize[1]), 51 | emb_dim=self.config["General"]["emb_dim"], 52 | resample_dim=self.config["General"]["resample_dim"], 53 | read=self.config["General"]["read"], 54 | nclasses=len(self.config["Dataset"]["classes"]), 55 | hooks=self.config["General"]["hooks"], 56 | model_timm=self.config["General"]["model_timm"], 57 | type=self.type, 58 | patch_size=self.config["General"]["patch_size"], 59 | ) 60 | path_model = to_absolute_path( 61 | os.path.join( 62 | self.config["General"]["path_model"], 63 | f"{self.config['weights']}.p", 64 | ) 65 | ) 66 | 67 | # print(f"TouchVIT path: {path_model}") 68 | self.model.load_state_dict( 69 | torch.load(path_model, map_location=self.device)["model_state_dict"] 70 | ) 71 | self.model.eval() 72 | self.model.to(self.device) 73 | self.transform_image = transforms.Compose( 74 | [ 75 | transforms.Resize((resize[0], resize[1])), 76 | transforms.ToTensor(), 77 | transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), 78 | ] 79 | ) 80 | self.output_dir = self.config["General"]["path_predicted_images"] 81 | 82 | def image2heightmap(self, image): 83 | image = Image.fromarray(image) 84 | original_size = image.size 85 | image = self.transform_image(image).unsqueeze(0) 86 | image = image.to(self.device).float() 87 | 88 | output_depth, _ = self.model(image) # [0 - 1] output 89 | 90 | output_depth = transforms.ToPILImage()(output_depth.squeeze(0).float()).resize( 91 | original_size, resample=Image.BICUBIC 92 | ) # [0 - 255] output 93 | return transforms.PILToTensor()(output_depth).squeeze() 94 | 95 | def run(self): 96 | path_dir_depths = os.path.join(self.output_dir, "depths") 97 | create_dir(self.output_dir) 98 | create_dir(path_dir_depths) 99 | 100 | output_depths, input_images = [], [] 101 | for images in self.input_images[:10]: 102 | pil_im = Image.open(images) 103 | im = np.array(pil_im) 104 | with torch.no_grad(): 105 | output_depth = self.image2heightmap(im) 106 | output_depths.append(output_depth) 107 | input_images.append(pil_im) 108 | 109 | # Convert list of tensors to image collage 110 | output_depths = [transforms.ToPILImage()(depth) for depth in output_depths] 111 | # Concatenate all 10 PIL images 112 | collage_depth = concat_images(output_depths, direction="horizontal") 113 | collage_depth = apply_jet_colormap(collage_depth) 114 | collage_images = concat_images(input_images, direction="horizontal") 115 | collage = concat_images([collage_images, collage_depth], direction="vertical") 116 | # add jet colormap to the collage 117 | collage.show() 118 | 119 | 120 | @hydra.main( 121 | version_base=None, 122 | config_path=os.path.join(root, "scripts/config/main/touch_depth"), 123 | config_name="vit", 124 | ) 125 | def main(cfg: DictConfig): 126 | cfg.weights = "dpt_sim" 127 | t = TouchVIT(cfg) 128 | t.run() 129 | 130 | 131 | if __name__ == "__main__": 132 | main() 133 | -------------------------------------------------------------------------------- /neuralfeels/modules/allegro.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Class for Allegro hand joint state and forward kinematics 7 | 8 | import os 9 | from typing import Dict 10 | 11 | import dill as pickle 12 | import git 13 | import numpy as np 14 | import theseus as th 15 | import torch 16 | from torchkin import Robot, get_forward_kinematics_fns 17 | 18 | from neuralfeels.modules.misc import pose_from_config 19 | 20 | root = git.Repo(".", search_parent_directories=True).working_tree_dir 21 | 22 | 23 | class Allegro: 24 | def __init__( 25 | self, 26 | dataset_path: str = None, 27 | base_pose: Dict = None, 28 | device: str = "cuda", 29 | ): 30 | """Allegro hand dataloader for neuralfeels data""" 31 | super(Allegro, self).__init__() 32 | assert (dataset_path is None) != (base_pose is None) 33 | self.device = device 34 | 35 | urdf_path = os.path.join( 36 | root, "data/assets/allegro/allegro_digit_left_ball.urdf" 37 | ) # Allegro hand URDF file 38 | self.robot, self.fkin, self.links, self.joint_map = load_robot( 39 | urdf_file=urdf_path, num_dofs=16, device=device 40 | ) 41 | 42 | if dataset_path is not None: 43 | # Load base pose and jointstate vectors 44 | data_path = os.path.join(dataset_path, "data.pkl") 45 | with open(data_path, "rb") as p: 46 | self.data = pickle.load(p) 47 | self.allegro_pose = self.data["allegro"]["base_pose"] 48 | self.joint_states = torch.tensor( 49 | self.data["allegro"]["joint_state"], device=device, dtype=torch.float32 50 | ) 51 | else: 52 | self.allegro_pose = pose_from_config(base_pose) 53 | 54 | def _hora_to_neural(self, finger_poses): 55 | """ 56 | Convert the DIGIT urdf reference frame (bottom of the sensor) to neural SLAM frame 57 | """ 58 | finger_poses = finger_poses @ np.linalg.inv( 59 | np.array( 60 | [ 61 | [0.000000, -1.000000, 0.000000, 0.000021], 62 | [0.000000, 0.000000, 1.000000, -0.017545], 63 | [-1.000000, 0.000000, 0.000000, -0.002132], 64 | [0.000000, 0.000000, 0.000000, 1.000000], 65 | ] 66 | ) 67 | ) 68 | return finger_poses 69 | 70 | def get_fk(self, idx=None, joint_state=None): 71 | """Forward kinematics using theseus torchkin""" 72 | 73 | assert idx is None or joint_state is None 74 | if joint_state is not None: 75 | joint_states = torch.tensor(joint_state, device=self.device) 76 | else: 77 | if idx >= len(self.joint_states): 78 | return None 79 | joint_states = self.joint_states[idx].clone() 80 | 81 | # joint states is saved as [index, middle, ring, thumb] 82 | self.current_joint_state = joint_states # for viz 83 | 84 | # Swap index and ring for left-hand, theseus FK requires this 85 | joint_states_theseus = joint_states.clone() 86 | joint_states_theseus[[0, 1, 2, 3]], joint_states_theseus[[8, 9, 10, 11]] = ( 87 | joint_states_theseus[[8, 9, 10, 11]], 88 | joint_states_theseus[[0, 1, 2, 3]], 89 | ) 90 | 91 | # Change to breadth-first order, theseus needs this too 92 | joint_states_theseus = joint_states_theseus[self.joint_map] 93 | j = th.Vector( 94 | tensor=joint_states_theseus.unsqueeze(0), 95 | name="joint_states", 96 | ) 97 | link_poses = self.fkin(j.tensor) 98 | digit_poses = torch.vstack(link_poses).to(self.robot.device) 99 | digit_poses = th.SE3(tensor=digit_poses).to_matrix().cpu().numpy() 100 | 101 | base_tf = np.repeat( 102 | self.allegro_pose[np.newaxis, :, :], digit_poses.shape[0], axis=0 103 | ) 104 | digit_poses = base_tf @ digit_poses 105 | digit_poses = self._hora_to_neural(digit_poses) 106 | return {k: v for k, v in zip(list(self.links.keys()), list(digit_poses))} 107 | 108 | def get_base_pose(self): 109 | return self.allegro_pose 110 | 111 | 112 | def load_robot(urdf_file: str, num_dofs: int, device): 113 | """Load robot from URDF file and cache FK functions""" 114 | robot = Robot.from_urdf_file(urdf_file, device=device) 115 | links = { 116 | "digit_index": "link_3.0_tip", 117 | "digit_middle": "link_7.0_tip", 118 | "digit_ring": "link_11.0_tip", 119 | "digit_thumb": "link_15.0_tip", 120 | } 121 | 122 | # FK function is applied breadth-first, so swap the indices from the allegro convention 123 | joint_map = torch.tensor( 124 | [joint.id for joint in robot.joint_map.values() if joint.id < num_dofs], 125 | device=device, 126 | ) 127 | # base, index, middle, ring, thumb 128 | fkin, *_ = get_forward_kinematics_fns(robot, list(links.values())) 129 | return (robot, fkin, links, joint_map) 130 | -------------------------------------------------------------------------------- /neuralfeels/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import os 7 | 8 | import cv2 9 | import git 10 | import numpy as np 11 | import torch 12 | from torch.utils.data import Dataset 13 | from tqdm import tqdm 14 | 15 | from neuralfeels.datasets import redwood_depth_noise_model as noise_model 16 | 17 | root = git.Repo(".", search_parent_directories=True).working_tree_dir 18 | 19 | 20 | class VisionDataset(torch.utils.data.Dataset): 21 | """Realsense data loader for neuralfeels dataset""" 22 | 23 | def __init__( 24 | self, 25 | root_dir: str, 26 | gt_seg: bool, 27 | sim_noise_iters: float, 28 | col_ext: str = ".jpg", 29 | ): 30 | # pre-load depth data 31 | depth_file = os.path.join(root_dir, "depth.npz") 32 | depth_loaded = np.load(depth_file, fix_imports=True, encoding="latin1") 33 | self.depth_data = depth_loaded["depth"] 34 | self.depth_scale = depth_loaded["depth_scale"] 35 | self.depth_data = self.depth_data.astype(np.float32) 36 | self.depth_data = self.depth_data * self.depth_scale 37 | 38 | if sim_noise_iters > 0: 39 | # add noise to the clean simulation depth data 40 | # At 1 meter distance an accuracy of 2.5 mm to 5 mm (https://github.com/IntelRealSense/librealsense/issues/7806). 41 | # We operate at roughly 0.5 meter, we empirally pick 2mm as the noise std. 42 | # Adding the noise here allows us to ablate the effect of depth noise on the performance of the system. 43 | self.dist_model = np.load( 44 | os.path.join(root, "data", "feelsight", "redwood-depth-dist-model.npy") 45 | ) 46 | self.dist_model = self.dist_model.reshape(80, 80, 5) 47 | for i, depth in enumerate(tqdm(self.depth_data)): 48 | depth = noise_model._simulate(-depth, self.dist_model, sim_noise_iters) 49 | self.depth_data[i, :, :] = -depth 50 | 51 | self.rgb_dir = os.path.join(root_dir, "image") 52 | self.seg_dir = os.path.join(root_dir, "seg") 53 | self.col_ext = col_ext 54 | self.gt_seg = gt_seg 55 | 56 | def __len__(self): 57 | return len(os.listdir(self.rgb_dir)) 58 | 59 | def __getitem__(self, idx): 60 | if torch.is_tensor(idx): 61 | idx = idx.tolist() 62 | 63 | rgb_file = os.path.join(self.rgb_dir, f"{idx}" + self.col_ext) 64 | image = cv2.imread(rgb_file) 65 | depth = self.depth_data[idx] 66 | 67 | if self.gt_seg: 68 | mask = self.get_gt_seg(idx) 69 | depth = depth * mask # mask depth with gt segmentation 70 | 71 | return image, depth 72 | 73 | def get_avg_seg_area(self): 74 | """ 75 | Returns the average segmentation area of the dataset 76 | """ 77 | seg_area = 0.0 78 | for i in range(len(self)): 79 | mask = self.get_gt_seg(i) 80 | seg_area += mask.sum() / mask.size 81 | seg_area /= len(self) 82 | return seg_area 83 | 84 | def get_gt_seg(self, idx: int): 85 | """ 86 | Returns a binary mask of the segmentation ground truth 87 | """ 88 | seg_file = os.path.join(self.seg_dir, f"{idx}" + self.col_ext) 89 | mask = cv2.imread(seg_file, 0).astype(np.int64) 90 | # round every pixel to either 0, 255/2, 255 91 | mask = np.round(mask / 127.5) * 127.5 92 | # check if there exists three classes, if not return empty mask 93 | if np.unique(mask).size != 3: 94 | mask = np.zeros_like(mask) 95 | else: 96 | mask = mask == 255 97 | return mask 98 | 99 | 100 | class TactileDataset(Dataset): 101 | def __init__( 102 | self, 103 | root_dir: str, 104 | gt_depth: bool, 105 | col_ext: str = ".jpg", 106 | ): 107 | """DIGIT dataset loader for neuralfeels dataset""" 108 | self.rgb_dir = os.path.join(root_dir, "image") 109 | self.depth_dir = os.path.join(root_dir, "depth") 110 | self.mask_dir = os.path.join(root_dir, "mask") 111 | self.col_ext = col_ext 112 | self.gt_depth = gt_depth 113 | 114 | def __len__(self): 115 | return len(os.listdir(self.rgb_dir)) 116 | 117 | def __getitem__(self, idx): 118 | if torch.is_tensor(idx): 119 | idx = idx.tolist() 120 | rgb_file = os.path.join(self.rgb_dir, f"{idx}" + self.col_ext) 121 | image = cv2.imread(rgb_file) 122 | 123 | depth = None 124 | if self.gt_depth: 125 | depth_file = os.path.join(self.depth_dir, f"{idx}" + self.col_ext) 126 | mask_file = os.path.join(self.mask_dir, f"{idx}" + self.col_ext) 127 | depth = cv2.imread(depth_file, 0).astype(np.int64) 128 | 129 | depth[depth < 0] = 0 130 | 131 | mask = cv2.imread(mask_file, 0).astype(np.int64) 132 | mask = mask > 255 / 2 133 | if mask.sum() / mask.size < 0.01: 134 | # tiny mask, ignore 135 | mask *= False 136 | 137 | depth = depth * mask # apply contact mask 138 | 139 | return image, depth 140 | -------------------------------------------------------------------------------- /neuralfeels/contrib/tactile_transformer/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT 7 | 8 | # Loss functions for tactile transformer 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | 14 | def compute_scale_and_shift(prediction, target, mask): 15 | # system matrix: A = [[a_00, a_01], [a_10, a_11]] 16 | a_00 = torch.sum(mask * prediction * prediction, (1, 2)) 17 | a_01 = torch.sum(mask * prediction, (1, 2)) 18 | a_11 = torch.sum(mask, (1, 2)) 19 | 20 | # right hand side: b = [b_0, b_1] 21 | b_0 = torch.sum(mask * prediction * target, (1, 2)) 22 | b_1 = torch.sum(mask * target, (1, 2)) 23 | 24 | # solution: x = A^-1 . b = [[a_11, -a_01], [-a_10, a_00]] / (a_00 * a_11 - a_01 * a_10) . b 25 | x_0 = torch.zeros_like(b_0) 26 | x_1 = torch.zeros_like(b_1) 27 | 28 | det = a_00 * a_11 - a_01 * a_01 29 | valid = det.nonzero() 30 | 31 | x_0[valid] = (a_11[valid] * b_0[valid] - a_01[valid] * b_1[valid]) / det[valid] 32 | x_1[valid] = (-a_01[valid] * b_0[valid] + a_00[valid] * b_1[valid]) / det[valid] 33 | 34 | return x_0, x_1 35 | 36 | 37 | def reduction_batch_based(image_loss, M): 38 | # average of all valid pixels of the batch 39 | # avoid division by 0 (if sum(M) = sum(sum(mask)) = 0: sum(image_loss) = 0) 40 | divisor = torch.sum(M) 41 | 42 | if divisor == 0: 43 | return 0 44 | else: 45 | return torch.sum(image_loss) / divisor 46 | 47 | 48 | def reduction_image_based(image_loss, M): 49 | # mean of average of valid pixels of an image 50 | 51 | # avoid division by 0 (if M = sum(mask) = 0: image_loss = 0) 52 | valid = M.nonzero() 53 | 54 | image_loss[valid] = image_loss[valid] / M[valid] 55 | 56 | return torch.mean(image_loss) 57 | 58 | 59 | def mse_loss(prediction, target, mask, reduction=reduction_batch_based): 60 | M = torch.sum(mask, (1, 2)) 61 | res = prediction - target 62 | image_loss = torch.sum(mask * res * res, (1, 2)) 63 | 64 | return reduction(image_loss, 2 * M) 65 | 66 | 67 | def gradient_loss(prediction, target, mask, reduction=reduction_batch_based): 68 | M = torch.sum(mask, (1, 2)) 69 | 70 | diff = prediction - target 71 | diff = torch.mul(mask, diff) 72 | 73 | grad_x = torch.abs(diff[:, :, 1:] - diff[:, :, :-1]) 74 | mask_x = torch.mul(mask[:, :, 1:], mask[:, :, :-1]) 75 | grad_x = torch.mul(mask_x, grad_x) 76 | 77 | grad_y = torch.abs(diff[:, 1:, :] - diff[:, :-1, :]) 78 | mask_y = torch.mul(mask[:, 1:, :], mask[:, :-1, :]) 79 | grad_y = torch.mul(mask_y, grad_y) 80 | 81 | image_loss = torch.sum(grad_x, (1, 2)) + torch.sum(grad_y, (1, 2)) 82 | 83 | return reduction(image_loss, M) 84 | 85 | 86 | class MSELoss(nn.Module): 87 | def __init__(self, reduction="batch-based"): 88 | super().__init__() 89 | 90 | if reduction == "batch-based": 91 | self.__reduction = reduction_batch_based 92 | else: 93 | self.__reduction = reduction_image_based 94 | 95 | def forward(self, prediction, target, mask): 96 | return mse_loss(prediction, target, mask, reduction=self.__reduction) 97 | 98 | 99 | class GradientLoss(nn.Module): 100 | def __init__(self, scales=4, reduction="batch-based"): 101 | super().__init__() 102 | 103 | if reduction == "batch-based": 104 | self.__reduction = reduction_batch_based 105 | else: 106 | self.__reduction = reduction_image_based 107 | 108 | self.__scales = scales 109 | 110 | def forward(self, prediction, target, mask): 111 | total = 0 112 | 113 | for scale in range(self.__scales): 114 | step = pow(2, scale) 115 | 116 | total += gradient_loss( 117 | prediction[:, ::step, ::step], 118 | target[:, ::step, ::step], 119 | mask[:, ::step, ::step], 120 | reduction=self.__reduction, 121 | ) 122 | 123 | return total 124 | 125 | 126 | class ScaleAndShiftInvariantLoss(nn.Module): 127 | def __init__(self, alpha=0.5, scales=4, reduction="batch-based"): 128 | super().__init__() 129 | 130 | self.__data_loss = MSELoss(reduction=reduction) 131 | self.__regularization_loss = GradientLoss(scales=scales, reduction=reduction) 132 | self.__alpha = alpha 133 | 134 | self.__prediction_ssi = None 135 | 136 | def forward(self, prediction, target): 137 | # preprocessing 138 | mask = target > 0 139 | 140 | # calcul 141 | scale, shift = compute_scale_and_shift(prediction, target, mask) 142 | # print(scale, shift) 143 | self.__prediction_ssi = scale.view(-1, 1, 1) * prediction + shift.view(-1, 1, 1) 144 | 145 | total = self.__data_loss(self.__prediction_ssi, target, mask) 146 | if self.__alpha > 0: 147 | total += self.__alpha * self.__regularization_loss( 148 | self.__prediction_ssi, target, mask 149 | ) 150 | 151 | return total 152 | 153 | def __get_prediction_ssi(self): 154 | return self.__prediction_ssi 155 | 156 | prediction_ssi = property(__get_prediction_ssi) 157 | -------------------------------------------------------------------------------- /neuralfeels/geometry/frustum.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from iSDF: https://github.com/facebookresearch/iSDF 7 | 8 | import numpy as np 9 | import torch 10 | import trimesh 11 | 12 | from neuralfeels import viz 13 | from neuralfeels.geometry import transform 14 | from neuralfeels.modules import sample 15 | 16 | 17 | def get_frustum_normals(R_WC, H, W, fx, fy, cx, cy): 18 | c = np.array([0, W, W, 0]) 19 | r = np.array([0, 0, H, H]) 20 | x = (c - cx) / fx 21 | y = (r - cy) / fy 22 | corner_dirs_C = np.vstack((x, y, np.ones(4))).T 23 | corner_dirs_W = (R_WC * corner_dirs_C[..., None, :]).sum(axis=-1) 24 | 25 | frustum_normals = np.empty((4, 3)) 26 | frustum_normals[0] = np.cross(corner_dirs_W[0], corner_dirs_W[1]) 27 | frustum_normals[1] = np.cross(corner_dirs_W[1], corner_dirs_W[2]) 28 | frustum_normals[2] = np.cross(corner_dirs_W[2], corner_dirs_W[3]) 29 | frustum_normals[3] = np.cross(corner_dirs_W[3], corner_dirs_W[0]) 30 | frustum_normals = frustum_normals / np.linalg.norm(frustum_normals, axis=1)[:, None] 31 | 32 | return frustum_normals 33 | 34 | 35 | def check_inside_frustum(points, cam_center, frustum_normals): 36 | """For a point to be within the frustrum, the projection on each normal 37 | vector must be positive. 38 | params: 39 | """ 40 | pts = points - cam_center 41 | dots = np.dot(pts, frustum_normals.T) 42 | return (dots >= 0).all(axis=1) 43 | 44 | 45 | def is_visible( 46 | points, T_WC, depth, H, W, fx, fy, cx, cy, trunc=0.2, use_projection=True 47 | ): 48 | """Are points visible to in this frame. 49 | Up to trunc metres behind the surface counts in visible region. 50 | """ 51 | # forward project points 52 | K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) 53 | ones = np.ones([len(points), 1]) 54 | homog_points = np.concatenate((points, ones), axis=-1) 55 | points_C = (np.linalg.inv(T_WC) @ homog_points.T)[:3] 56 | uv = K @ points_C 57 | z = uv[2] 58 | uv = uv[:2] / z 59 | uv = uv.T 60 | 61 | if use_projection: 62 | x_valid = np.logical_and(uv[:, 0] > 0, uv[:, 0] < W) 63 | y_valid = np.logical_and(uv[:, 1] > 0, uv[:, 1] < H) 64 | xy_valid = np.logical_and(x_valid, y_valid) 65 | else: # use frustrum 66 | R_WC = T_WC[:3, :3] 67 | cam_center = T_WC[:3, 3] 68 | 69 | frustum_normals = get_frustum_normals(R_WC, H, W, fx, fy, cx, cy) 70 | 71 | xy_valid = check_inside_frustum(points, cam_center, frustum_normals) 72 | 73 | uv = uv.astype(int) 74 | depth_vals = depth[uv[xy_valid, 1], uv[xy_valid, 0]] 75 | max_depths = np.full(len(uv), -np.inf) 76 | max_depths[xy_valid] = depth_vals + trunc 77 | z_valid = np.logical_and(z > 0, z < max_depths) 78 | 79 | inside = np.logical_and(xy_valid, z_valid) 80 | 81 | return inside 82 | 83 | 84 | def test_inside_frustum(T_WC, depth): 85 | fx, fy = 600.0, 600.0 86 | cx, cy = 600.0, 340.0 87 | H, W = 680, 1200.0 88 | 89 | # show camera 90 | scene = trimesh.Scene() 91 | viz.draw.draw_cams(1, T_WC, scene) 92 | 93 | # show random point cloud 94 | points = np.random.normal(0.0, 2.0, [1000, 3]) 95 | visible = is_visible(points, T_WC, depth, H, W, fx, fy, cx, cy) 96 | cols = np.full(points.shape, [255, 0, 0]) 97 | cols[visible] = [0, 255, 0] 98 | pc = trimesh.PointCloud(points, cols) 99 | scene.add_geometry(pc) 100 | 101 | # show rays 102 | sparse = 20 103 | dirs_C = transform.ray_dirs_C( 104 | 1, 105 | int(H / sparse), 106 | int(W / sparse), 107 | fx / sparse, 108 | fy / sparse, 109 | cx / sparse, 110 | cy / sparse, 111 | "cpu", 112 | depth_type="z", 113 | ) 114 | dirs_C = dirs_C.view(1, -1, 3) 115 | dirs_C = dirs_C.cpu().numpy() 116 | dirs_W = (T_WC[:3, :3] * dirs_C[..., None, :]).sum(axis=-1) 117 | n_rays = dirs_W.shape[1] 118 | sparse_depth = depth[::sparse, ::sparse] 119 | max_depth = torch.from_numpy(sparse_depth + 0.9).flatten() 120 | z_vals = sample.stratified_sample(0.2, max_depth, n_rays, "cpu", n_bins=12) 121 | dirs_W = torch.from_numpy(dirs_W) 122 | dirs_W = dirs_W.squeeze() 123 | origins = torch.from_numpy(T_WC[:3, 3]) 124 | origins = origins[None, :].repeat(n_rays, 1) 125 | rays_pc = origins[:, None, :] + (dirs_W[:, None, :] * z_vals[:, :, None]) 126 | rays_pc = rays_pc.reshape(-1, 3).numpy() 127 | visible_rays = is_visible(rays_pc, T_WC, depth, H, W, fx, fy, cx, cy) 128 | ray_col = np.full(rays_pc.shape, [255, 0, 0]) 129 | ray_col[visible_rays] = [0, 255, 0] 130 | rays_tmpc = trimesh.PointCloud(rays_pc, ray_col) 131 | scene.add_geometry(rays_tmpc) 132 | 133 | # show frustum normals 134 | starts = T_WC[:3, 3][None, :].repeat(4, 0) 135 | frustum_normals = get_frustum_normals(T_WC[:3, :3], H, W, fx, fy, cx, cy) 136 | normal_ends = T_WC[:3, 3] + frustum_normals * 4 137 | normal_lines = np.concatenate((starts[:, None, :], normal_ends[:, None, :]), axis=1) 138 | normal_paths = trimesh.load_path(normal_lines) 139 | normal_paths.colors = [[255, 255, 0, 255]] * 3 140 | scene.add_geometry(normal_paths) 141 | 142 | # show rays in corners of frame 143 | # ends = C + corner_dirs_W * 3 144 | # lines = np.concatenate((starts[:, None, :], ends[:, None, :]), axis=1) 145 | # paths = trimesh.load_path(lines) 146 | # paths.colors = [[0, 255, 0, 255]] * len(lines) 147 | # scene.add_geometry(paths) 148 | 149 | scene.show() 150 | -------------------------------------------------------------------------------- /neuralfeels/contrib/tactile_transformer/tactile_depth.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT 7 | 8 | # Wrapper for tactile depth estimation model 9 | 10 | import collections 11 | import os.path as osp 12 | 13 | import cv2 14 | import numpy as np 15 | import tacto 16 | import torch 17 | from hydra import compose 18 | 19 | from neuralfeels.contrib.tactile_transformer.touch_vit import TouchVIT 20 | 21 | dname = osp.dirname(osp.abspath(__file__)) 22 | 23 | 24 | class TactileDepth: 25 | def __init__(self, depth_mode, real=False, device="cuda"): 26 | super(TactileDepth, self).__init__() 27 | 28 | cfg = compose(config_name=f"main/touch_depth/{depth_mode}").main.touch_depth 29 | 30 | cfg.weights = "dpt_real" if real else "dpt_sim" 31 | 32 | if depth_mode == "gt": 33 | self.model = None 34 | return 35 | if depth_mode == "vit": 36 | # print("Loading ViT depth model----") 37 | self.model = TouchVIT(cfg=cfg) 38 | else: 39 | raise NotImplementedError(f"Mode not implemented: {cfg.mode}") 40 | # print("done") 41 | self.device = device 42 | 43 | settings_config = cfg.settings.real if real else cfg.settings.sim 44 | self.b, self.r, self.clip = ( 45 | settings_config.border, 46 | settings_config.ratio, 47 | settings_config.clip, 48 | ) 49 | 50 | self.bg_id = settings_config.bg_id 51 | self.blend_sz = settings_config.blend_sz 52 | self.heightmap_window = collections.deque([]) 53 | 54 | # background templates for heightmap2mask 55 | self.bg_template = {} 56 | 57 | def image2heightmap(self, image: np.ndarray, sensor_name: str = "digit_0"): 58 | if sensor_name not in self.bg_template: 59 | if self.bg_id is None: 60 | print( 61 | f"{sensor_name} not in background images, generating new background template using first frame" 62 | ) 63 | self.bg_template[sensor_name] = self.model.image2heightmap(image) 64 | else: 65 | print( 66 | f"{sensor_name} not in background images, generating new background template from bg_id {self.bg_id}" 67 | ) 68 | self.bg_template[sensor_name] = self.model.image2heightmap( 69 | cv2.imread(tacto.get_background_image_path(self.bg_id)) 70 | ) 71 | self.bg_template[sensor_name] = self.bg_template[sensor_name].to( 72 | dtype=float, device=self.device 73 | ) 74 | heightmap = self.model.image2heightmap(image) 75 | return self.blend_heightmaps(heightmap) 76 | 77 | def heightmap2mask( 78 | self, heightmap: torch.tensor, sensor_name: str = "digit_0" 79 | ) -> torch.Tensor: 80 | """Thresholds heightmap to return binary contact mask 81 | 82 | Args: 83 | heightmap: single tactile image 84 | 85 | Returns: 86 | padded_contact_mask: contact mask [True: is_contact, False: no_contact] 87 | 88 | """ 89 | 90 | heightmap = heightmap.squeeze().to(self.device) 91 | bg_template = self.bg_template[sensor_name] 92 | # scale bg_template to match heightmap if different size 93 | if bg_template.shape != heightmap.shape: 94 | bg_template = torch.nn.functional.interpolate( 95 | bg_template[None, None, :, :], heightmap.shape[-2:], mode="bilinear" 96 | ).squeeze() 97 | 98 | init_height = bg_template 99 | if self.b: 100 | heightmap = heightmap[self.b : -self.b, self.b : -self.b] 101 | init_height = init_height[self.b : -self.b, self.b : -self.b] 102 | diff_heights = heightmap - init_height 103 | diff_heights[diff_heights < self.clip] = 0 104 | threshold = torch.quantile(diff_heights, 0.9) * self.r 105 | contact_mask = diff_heights > threshold 106 | padded_contact_mask = torch.zeros_like(bg_template, dtype=bool) 107 | 108 | if self.b: 109 | padded_contact_mask[self.b : -self.b, self.b : -self.b] = contact_mask 110 | else: 111 | padded_contact_mask = contact_mask 112 | return padded_contact_mask 113 | 114 | def blend_heightmaps(self, heightmap: torch.Tensor) -> torch.Tensor: 115 | """Exponentially weighted heightmap blending. 116 | 117 | Args: 118 | heightmap: input heightmap 119 | 120 | Returns: 121 | blended_heightmap: output heightmap blended over self.heightmap_window 122 | 123 | """ 124 | 125 | if not self.blend_sz: 126 | return heightmap 127 | 128 | if len(self.heightmap_window) >= self.blend_sz: 129 | self.heightmap_window.popleft() 130 | 131 | self.heightmap_window.append(heightmap) 132 | n = len(self.heightmap_window) 133 | 134 | weights = torch.tensor( 135 | [x / n for x in range(1, n + 1)], device=heightmap.device 136 | ) # exponentially weighted time series costs 137 | 138 | weights = torch.exp(weights) / torch.sum(torch.exp(weights)) 139 | 140 | all_heightmaps = torch.stack(list(self.heightmap_window)) 141 | blended_heightmap = torch.sum( 142 | (all_heightmaps * weights[:, None, None]) / weights.sum(), dim=0 143 | ) # weighted average 144 | 145 | # view_subplots([heightmap, blended_heightmap], [["heightmap", "blended_heightmap"]]) 146 | return blended_heightmap 147 | -------------------------------------------------------------------------------- /scripts/run: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Entrypoint bash script for neuralfeels 3 | # Usage: ./scripts/run DATASET SLAM_MODE MODALITY OBJECT LOG FPS RECORD OPEN3D 4 | # 5 | # Arguments: 6 | if [[ $1 == "--help" ]]; then 7 | echo "Usage: ./scripts/run DATASET SLAM_MODE MODALITY OBJECT LOG FPS RECORD OPEN3D" 8 | echo "" 9 | echo "Arguments:" 10 | echo " DATASET: string # The dataset to be used, options are 'feelsight', 'feelsight_real'" 11 | echo " SLAM_MODE: string # The mode to be used, options are 'slam', 'pose', 'map'" 12 | echo " MODALITY: string # The modality to be used, options are 'vitac', 'vi', 'tac'" 13 | echo " OBJECT: string # The object to be used, e.g., '077_rubiks_cube'" 14 | echo " LOG: string # The log identifier, e.g., '00', '01', '02'" 15 | echo " FPS: integer # The frames per second, e.g., '1', '5'" 16 | echo " RECORD: integer # Whether to record the session, options are '1' (yes) or '0' (no)" 17 | echo " OPEN3D: integer # Whether to use Open3D, options are '1' (yes) or '0' (no)" 18 | echo "" 19 | echo "Presets:" 20 | echo " --slam-sim # Run neural SLAM in simulation with rubber duck" 21 | echo " --pose-sim # Run neural tracking in simulation with Rubik's cube" 22 | echo " --slam-real # Run neural SLAM in real-world with bell pepper" 23 | echo " --pose-real # Run neural tracking in real-world with large dice" 24 | echo " --three-cam # Three camera pose tracking in real-world with large dice" 25 | echo " --occlusion-sim # Run neural tracking in simulation with occlusion logs" 26 | exit 0 27 | elif [[ $1 = "--slam-sim" ]]; then 28 | # Example of neural SLAM in simulation with rubber duck 29 | DATASET="feelsight" 30 | SLAM_MODE="slam" 31 | MODALITY="vitac" 32 | OBJECT="contactdb_rubber_duck" 33 | LOG="00" 34 | FPS=1 35 | RECORD=1 36 | OPEN3D=1 37 | elif [[ $1 = "--pose-sim" ]]; then 38 | # Example of neural tracking in simulation with Rubik's cube 39 | DATASET="feelsight" 40 | SLAM_MODE="pose" 41 | MODALITY="vitac" 42 | OBJECT="077_rubiks_cube" 43 | LOG="00" 44 | FPS=1 45 | RECORD=1 46 | OPEN3D=1 47 | elif [[ $1 = "--slam-real" ]]; then 48 | # Example of neural SLAM in real-world with bell pepper 49 | DATASET="feelsight_real" 50 | SLAM_MODE="slam" 51 | MODALITY="vitac" 52 | OBJECT="bell_pepper" 53 | LOG="00" 54 | FPS=1 55 | RECORD=1 56 | OPEN3D=1 57 | elif [[ $1 = "--pose-real" ]]; then 58 | # Example of neural tracking in real-world with large dice 59 | DATASET="feelsight_real" 60 | SLAM_MODE="pose" 61 | MODALITY="vitac" 62 | OBJECT="large_dice" 63 | LOG="00" 64 | FPS=1 65 | RECORD=1 66 | OPEN3D=1 67 | elif [[ $1 = "--three-cam" ]]; then 68 | # Three camera pose tracking in real-world with large dice 69 | DATASET="feelsight_real" 70 | SLAM_MODE="pose" 71 | MODALITY="vi" 72 | OBJECT="large_dice" 73 | LOG="00" 74 | FPS=1 75 | RECORD=1 76 | OPEN3D=1 77 | EXTRA_ARGS="main=baseline" 78 | elif [[ $1 = "--occlusion-sim" ]]; then 79 | # Example of neural tracking in simulation with occlusion logs 80 | DATASET="feelsight" 81 | SLAM_MODE="pose" 82 | MODALITY="vitac" 83 | OBJECT="077_rubiks_cube" 84 | LOG="00" 85 | FPS=1 86 | RECORD=1 87 | OPEN3D=1 88 | EXTRA_ARGS="main.sensor0.masks=read main.occlusion=True" 89 | else 90 | if [ $# -lt 8 ]; then 91 | echo "Error: Missing arguments." 92 | echo "Usage: $0 DATASET SLAM_MODE MODALITY OBJECT LOG FPS RECORD OPEN3D" 93 | echo "You provided $# arguments, but at least 8 are required." 94 | exit 1 95 | fi 96 | DATASET=$1 97 | SLAM_MODE=$2 98 | MODALITY=$3 99 | OBJECT=$4 100 | LOG=$5 101 | FPS=$6 102 | RECORD=$7 103 | OPEN3D=$8 104 | array=($@) 105 | len=${#array[@]} 106 | EXTRA_ARGS=${array[@]:8:$len} 107 | fi 108 | 109 | if [[ ${DATASET} != "feelsight" && ${DATASET} != "feelsight_real" ]]; then 110 | echo "Error: Invalid DATASET. Options are 'feelsight', 'feelsight_real'." 111 | exit 1 112 | elif [[ ${SLAM_MODE} != "slam" && ${SLAM_MODE} != "pose" && ${SLAM_MODE} != "map" ]]; then 113 | echo "Error: Invalid SLAM_MODE. Options are 'slam', 'pose', 'map'." 114 | exit 1 115 | elif [[ ${MODALITY} != "vitac" && ${MODALITY} != "vi" && ${MODALITY} != "tac" ]]; then 116 | echo "Error: Invalid MODALITY. Options are 'vitac', 'vi', 'tac'." 117 | exit 1 118 | elif [[ ! ${FPS} =~ ^[0-9]+$ ]]; then 119 | echo "Error: Invalid FPS. Must be a positive integer." 120 | exit 1 121 | elif [[ ${RECORD} != 0 && ${RECORD} != 1 ]]; then 122 | echo "Error: Invalid RECORD. Options are '0' (no) or '1' (yes)." 123 | exit 1 124 | elif [[ ${OPEN3D} != 0 && ${OPEN3D} != 1 ]]; then 125 | echo "Error: Invalid OPEN3D. Options are '0' (no) or '1' (yes)." 126 | exit 1 127 | fi 128 | 129 | echo "Extra arguments: ${EXTRA_ARGS}" 130 | 131 | if [ $RECORD -eq 1 ]; then 132 | record_string="main.viz.misc.record=True main.viz.misc.render_stream=True" 133 | else 134 | record_string="" 135 | fi 136 | 137 | if [ $OPEN3D -eq 1 ]; then 138 | open3d_string="main.viz.misc.render_open3d=True" 139 | else 140 | open3d_string="main.viz.misc.render_open3d=False" 141 | fi 142 | 143 | dir=$(date +\"outputs/%Y-%m-%d/%H-%M-%S\") 144 | argstring="main=${MODALITY} \ 145 | main.data.object=${OBJECT} \ 146 | main.data.log=${LOG} \ 147 | main.data.dataset=${DATASET} \ 148 | main.train.train_mode=${SLAM_MODE} \ 149 | main.viz.meshes.mesh_rec_crop=False \ 150 | main.viz.debug.origin=True \ 151 | main.viz.meshes.show_gt_object=False \ 152 | main.viz.meshes.transparent=False \ 153 | main.data.train_fps=${FPS} \ 154 | main.viz.layers.pointcloud=None \ 155 | ${record_string} \ 156 | sweep_dir=${dir} \ 157 | ${EXTRA_ARGS}" 158 | 159 | echo -e "python scripts/run.py ${argstring}" 160 | python scripts/run.py ${argstring} 161 | -------------------------------------------------------------------------------- /neuralfeels/datasets/data_util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Class to store image and depth data for each frame of the optimization 7 | 8 | import copy 9 | 10 | import numpy as np 11 | import torch 12 | 13 | 14 | class FrameData: 15 | def __init__( 16 | self, 17 | frame_id=None, 18 | im_batch=None, 19 | im_batch_np=None, 20 | depth_batch=None, 21 | depth_batch_np=None, 22 | T_WC_batch=None, 23 | T_WC_batch_np=None, 24 | normal_batch=None, 25 | seg_pixels=None, 26 | frame_avg_losses=None, 27 | format=None, 28 | ): 29 | super(FrameData, self).__init__() 30 | 31 | self.frame_id = frame_id 32 | self.im_batch = im_batch 33 | self.im_batch_np = im_batch_np 34 | self.depth_batch = depth_batch 35 | self.depth_batch_np = depth_batch_np 36 | self.T_WC_batch = T_WC_batch 37 | self.T_WC_batch_np = T_WC_batch_np 38 | 39 | self.normal_batch = normal_batch 40 | self.seg_pixels = seg_pixels 41 | self.frame_avg_losses = frame_avg_losses 42 | 43 | self.format = format 44 | 45 | self.frame_count = 0 if frame_id is None else len(frame_id) 46 | 47 | def add_frame_data(self, data, replace): 48 | """ 49 | Add new FrameData to existing FrameData. 50 | """ 51 | self.frame_count += len(data.frame_id) 52 | self.frame_id = expand_data(self.frame_id, data.frame_id, replace) 53 | 54 | self.im_batch = expand_data(self.im_batch, data.im_batch, replace) 55 | self.im_batch_np = expand_data(self.im_batch_np, data.im_batch_np, replace) 56 | 57 | self.depth_batch = expand_data(self.depth_batch, data.depth_batch, replace) 58 | self.depth_batch_np = expand_data( 59 | self.depth_batch_np, data.depth_batch_np, replace 60 | ) 61 | 62 | self.T_WC_batch = expand_data(self.T_WC_batch, data.T_WC_batch, replace) 63 | self.T_WC_batch_np = expand_data( 64 | self.T_WC_batch_np, data.T_WC_batch_np, replace 65 | ) 66 | 67 | self.normal_batch = expand_data(self.normal_batch, data.normal_batch, replace) 68 | 69 | self.seg_pixels = expand_data(self.seg_pixels, data.seg_pixels, replace) 70 | device = data.im_batch.device 71 | empty_dist = torch.zeros([data.im_batch.shape[0]], device=device) 72 | self.frame_avg_losses = expand_data(self.frame_avg_losses, empty_dist, replace) 73 | 74 | if type(data.format) is not list: 75 | data.format = [data.format] 76 | if self.format is None: 77 | self.format = data.format 78 | else: 79 | self.format += data.format 80 | 81 | def delete_frame_data(self, indices): 82 | """ 83 | Delete FrameData at given indices. 84 | """ 85 | self.frame_count -= len(indices) 86 | self.frame_id = np.delete(self.frame_id, indices) 87 | 88 | self.im_batch = torch.cat( 89 | [self.im_batch[: indices[0]], self.im_batch[indices[-1] + 1 :]] 90 | ) 91 | self.im_batch_np = np.delete(self.im_batch_np, indices, axis=0) 92 | 93 | self.depth_batch = torch.cat( 94 | [self.depth_batch[: indices[0]], self.depth_batch[indices[-1] + 1 :]] 95 | ) 96 | self.depth_batch_np = np.delete(self.depth_batch_np, indices, axis=0) 97 | 98 | self.T_WC_batch = torch.cat( 99 | [self.T_WC_batch[: indices[0]], self.T_WC_batch[indices[-1] + 1 :]] 100 | ) 101 | self.T_WC_batch_np = np.delete(self.T_WC_batch_np, indices, axis=0) 102 | 103 | if self.normal_batch is not None: 104 | self.normal_batch = torch.cat( 105 | [self.normal_batch[: indices[0]], self.normal_batch[indices[-1] + 1 :]] 106 | ) 107 | 108 | if self.seg_pixels is not None: 109 | self.seg_pixels = torch.cat( 110 | [self.seg_pixels[: indices[0]], self.seg_pixels[indices[-1] + 1 :]] 111 | ) 112 | self.frame_avg_losses = torch.cat( 113 | [ 114 | self.frame_avg_losses[: indices[0]], 115 | self.frame_avg_losses[indices[-1] + 1 :], 116 | ] 117 | ) 118 | 119 | def __len__(self): 120 | if self.T_WC_batch is None: 121 | return 0 122 | else: 123 | return self.T_WC_batch.shape[0] 124 | 125 | def __getitem__(self, index): 126 | return FrameData( 127 | frame_id=np.array(self.frame_id[index]), 128 | im_batch=self.im_batch[index], 129 | im_batch_np=self.im_batch_np[index], 130 | depth_batch=self.depth_batch[index], 131 | depth_batch_np=self.depth_batch_np[index], 132 | T_WC_batch=self.T_WC_batch[index], 133 | T_WC_batch_np=self.T_WC_batch_np[index], 134 | normal_batch=( 135 | None if self.normal_batch is None else self.normal_batch[index] 136 | ), 137 | seg_pixels=None if self.seg_pixels is None else self.seg_pixels[index], 138 | frame_avg_losses=self.frame_avg_losses[index], 139 | format=self.format[index], 140 | ) 141 | 142 | 143 | def expand_data(batch, data, replace=False): 144 | """ 145 | Add new FrameData attribute to exisiting FrameData attribute. 146 | Either concatenate or replace last row in batch. 147 | """ 148 | cat_fn = np.concatenate 149 | if torch.is_tensor(data): 150 | cat_fn = torch.cat 151 | 152 | if batch is None: 153 | batch = copy.deepcopy(data) 154 | 155 | else: 156 | if replace is False: 157 | batch = cat_fn((batch, data)) 158 | else: 159 | batch[-1] = data[0] 160 | 161 | return batch 162 | -------------------------------------------------------------------------------- /neuralfeels/viz/rotate_object_video.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ 7 | Script to rotate a sequence of meshes and save as a video. 8 | """ 9 | 10 | import os 11 | import pathlib 12 | import time 13 | 14 | import cv2 15 | import ffmpeg 16 | import git 17 | import matplotlib.pyplot as plt 18 | import numpy as np 19 | import open3d as o3d 20 | import open3d.visualization.rendering as rendering 21 | 22 | root = git.Repo(".", search_parent_directories=True).working_tree_dir 23 | 24 | 25 | def draw_rotating_geometry(mesh_dir, mesh_file): 26 | # create folder to save images 27 | image_path = os.path.join(mesh_dir, "images") 28 | 29 | def get_orbit(final_mesh, timsteps=400, num_orbits=1): 30 | diag = np.linalg.norm( 31 | np.asarray(final_mesh.get_max_bound()) 32 | - np.asarray(final_mesh.get_min_bound()) 33 | ) 34 | radius = diag * 1.5 35 | # initialize camera at 45 degrees of circle 36 | orbit_size = timsteps // num_orbits 37 | theta = np.linspace(0, 2 * np.pi, orbit_size) 38 | z = np.zeros(orbit_size) + 0.33 * radius 39 | x = radius * np.cos(theta) 40 | y = radius * np.sin(theta) 41 | traj = np.vstack((x, y, z)).transpose() 42 | center = final_mesh.get_center() 43 | offset_traj = traj + center 44 | 45 | offset_traj = np.tile(offset_traj, (num_orbits, 1)) 46 | return offset_traj, center 47 | 48 | final_mesh = o3d.io.read_triangle_mesh(mesh_file) 49 | 50 | if not os.path.exists(image_path): 51 | os.makedirs(image_path) 52 | 53 | # delete existing images 54 | for file in os.listdir(image_path): 55 | os.remove(os.path.join(image_path, file)) 56 | 57 | # 30 seconds of video, with 20*30 = 600 frames 58 | num_iters = 500 59 | orbit_path, center = get_orbit(final_mesh, timsteps=num_iters, num_orbits=1) 60 | 61 | render = rendering.OffscreenRenderer(1000, 1000) 62 | render.setup_camera(60.0, [0, 0, 0], [0, 10, 0], [0, 0, 1]) 63 | render.scene.set_background([1, 1, 1, 1]) 64 | # render.scene.scene.set_sun_light([0.707, 0.0, -.707], [1.0, 1.0, 1.0], 65 | # 75000) 66 | # render.scene.scene.enable_sun_light(True) 67 | 68 | # Address the white background issue: https://github.com/isl-org/Open3D/issues/6020 69 | cg_settings = rendering.ColorGrading( 70 | rendering.ColorGrading.Quality.ULTRA, 71 | rendering.ColorGrading.ToneMapping.LINEAR, 72 | ) 73 | 74 | obj_mat = rendering.MaterialRecord() 75 | mat_properties = { 76 | "metallic": 0.5, 77 | "roughness": 0.6, 78 | "reflectance": 0.2, 79 | "clearcoat": 0.0, 80 | "clearcoat_roughness": 0.0, 81 | "anisotropy": 0.3, 82 | } 83 | obj_mat.base_color = [0.9, 0.9, 0.9, 1.0] 84 | obj_mat.shader = "defaultLit" 85 | for key, val in mat_properties.items(): 86 | setattr(obj_mat, "base_" + key, val) 87 | 88 | for i in range(num_iters): 89 | render.scene.set_lighting( 90 | rendering.Open3DScene.LightingProfile.SOFT_SHADOWS, 91 | -np.array(orbit_path[i, :] + [0.0, 0.0, 0.01]), 92 | ) 93 | 94 | if i == 0: 95 | pcd = o3d.io.read_triangle_mesh(mesh_file, True) 96 | render.scene.add_geometry("pcd", pcd, obj_mat) 97 | render.setup_camera(60.0, center, orbit_path[i, :], [0, 0, 1]) 98 | render.scene.view.set_color_grading(cg_settings) 99 | 100 | """capture images""" 101 | img = render.render_to_image() 102 | time_label = i 103 | o3d.io.write_image(os.path.join(image_path, f"{time_label:03d}.jpg"), img, 99) 104 | 105 | save_path = os.path.join(mesh_dir, "mesh_viz.mp4") 106 | create_video(image_path, save_path, 30, 20) 107 | 108 | 109 | def get_int(file: str) -> int: 110 | """ 111 | Extract numeric value from file name 112 | """ 113 | return int(file.split(".")[0]) 114 | 115 | 116 | def create_video(path, save_path, length=30, fps=20): 117 | images = os.listdir(path) 118 | images = [im for im in images if im.endswith(".jpg")] 119 | 120 | images = sorted(images, key=get_int) 121 | 122 | interval = 1000.0 / fps 123 | 124 | # Execute FFmpeg sub-process, with stdin pipe as input, and jpeg_pipe input format 125 | process = ( 126 | ffmpeg.input("pipe:", r=str(fps)) 127 | .output(save_path, pix_fmt="yuv420p") 128 | .overwrite_output() 129 | .global_args("-loglevel", "warning") 130 | .global_args("-qscale", "0") 131 | .global_args("-y") 132 | .run_async(pipe_stdin=True) 133 | ) 134 | 135 | for image in images: 136 | image_path = os.path.join(path, image) 137 | im = cv2.imread(image_path) 138 | success, encoded_image = cv2.imencode(".png", im) 139 | process.stdin.write( 140 | encoded_image.tobytes() 141 | ) # If broken pipe error, try mamba update ffmpeg 142 | 143 | # Close stdin pipe - FFmpeg fininsh encoding the output file. 144 | process.stdin.close() 145 | process.wait() 146 | 147 | 148 | def get_last_folders(root_dir): 149 | """ 150 | Recursively traverse down all directories until we reach the last folders, and store those in a list. 151 | """ 152 | last_folders = [] 153 | for path in root_dir.iterdir(): 154 | if path.is_dir(): 155 | # if only an obj file exists, then we have reached the last folder 156 | if len(list(path.glob("*.obj"))) == 1: 157 | last_folders.append(path) 158 | else: 159 | last_folders.extend(get_last_folders(path)) 160 | 161 | if len(last_folders) == 0: 162 | last_folders = [root_dir] 163 | return last_folders 164 | 165 | 166 | # define main function 167 | if __name__ == "__main__": 168 | mesh_dir = pathlib.Path(root) / "data/results/mesh_trials/sim" 169 | all_mesh_dirs = get_last_folders(mesh_dir) 170 | for mesh_dir in all_mesh_dirs: 171 | # convert posix path to string 172 | print(f"Processing {mesh_dir}") 173 | # get all .obj files in mesh_dir 174 | mesh_files = list(mesh_dir.glob("*.obj")) 175 | final_mesh_path = None 176 | # check if final mesh exists 177 | if len( 178 | [ 179 | x.name 180 | for x in mesh_files 181 | if (("final" in x.name) or (x.name == "textured.obj")) 182 | ] 183 | ): 184 | final_mesh_path = [ 185 | x.name 186 | for x in mesh_files 187 | if (("final" in x.name) or (x.name == "textured.obj")) 188 | ][0] 189 | final_mesh_path = str(mesh_dir / final_mesh_path) 190 | 191 | if final_mesh_path is not None: 192 | draw_rotating_geometry(mesh_dir, final_mesh_path) 193 | -------------------------------------------------------------------------------- /scripts/run.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Entrypoint python script for neuralfeels 7 | 8 | import gc 9 | import os 10 | import sys 11 | import traceback 12 | from typing import TYPE_CHECKING 13 | 14 | import cv2 15 | import hydra 16 | import numpy as np 17 | import torch 18 | from omegaconf import DictConfig 19 | from pyvirtualdisplay import Display 20 | from termcolor import cprint 21 | 22 | if TYPE_CHECKING: 23 | from neuralfeels.modules.trainer import Trainer 24 | 25 | 26 | class OptionalDisplay: 27 | def __init__(self, size=(1900, 1084), use_xauth=True, active=False): 28 | self.display = None 29 | if active: 30 | self.display = Display(size=size, use_xauth=use_xauth) 31 | 32 | def __enter__(self): 33 | if self.display is not None: 34 | self.display.__enter__() 35 | print(f"Display created at :{self.display.display}.") 36 | 37 | def __exit__(self, *args, **kwargs): 38 | if self.display is not None: 39 | self.display.__exit__() 40 | 41 | 42 | def _load_frames_incremental(trainer: "Trainer", t): 43 | # lazy imports for tinycudann compatibility issues in cluster 44 | from neuralfeels.modules.misc import print_once 45 | 46 | kf_set = {sensor: None for sensor in trainer.sensor_list} 47 | 48 | trainer.update_current_time() 49 | add_new_frame = True if t == 0 else trainer.check_keyframe_latest() 50 | 51 | end_all = False 52 | if add_new_frame: 53 | new_frame_id = trainer.get_latest_frame_id() 54 | 55 | digit_poses = trainer.allegro.get_fk(idx=new_frame_id) 56 | end_all = trainer.check_end(new_frame_id) 57 | 58 | if end_all: 59 | if not os.path.exists(f"./visualizer/{trainer.cfg_data.object}.mp4"): 60 | print_once("******End of sensor stream******") 61 | return kf_set, end_all 62 | 63 | trainer.update_scene_properties(new_frame_id) 64 | 65 | if t == 0: 66 | trainer.init_first_pose(digit_poses) 67 | 68 | added_frame = False 69 | for sensor_name in trainer.sensor_list: 70 | n_keyframes_start = trainer.n_keyframes[sensor_name] 71 | 72 | if "digit" in sensor_name: 73 | frame_data = trainer.sensor[sensor_name].get_frame_data( 74 | new_frame_id, 75 | digit_poses[sensor_name], 76 | msg_data=None, 77 | ) 78 | else: 79 | frame_data = trainer.sensor[sensor_name].get_frame_data( 80 | new_frame_id, 81 | digit_poses, 82 | trainer.latest_render_depth[sensor_name], 83 | msg_data=None, 84 | ) 85 | 86 | added_frame = trainer.add_frame(frame_data) 87 | if t == 0: 88 | trainer.prev_kf_time = trainer.tot_step_time 89 | 90 | # kf_set thumbnails for visualizer 91 | if trainer.n_keyframes[sensor_name] - n_keyframes_start: 92 | new_kf = trainer.frames[sensor_name].im_batch_np[-1] 93 | h = int(new_kf.shape[0] / 6) 94 | w = int(new_kf.shape[1] / 6) 95 | try: 96 | kf_set[sensor_name] = cv2.resize(new_kf, (w, h)) 97 | except: 98 | # print("Error in resizing keyframe image") 99 | kf_set[sensor_name] = new_kf 100 | 101 | if add_new_frame and added_frame: 102 | trainer.last_is_keyframe = False 103 | 104 | return kf_set, end_all 105 | 106 | 107 | def optim_iter(trainer: "Trainer", t, start_optimize=True): 108 | # lazy imports for tinycudann compatibility issues in cluster 109 | from neuralfeels.modules.misc import gpu_usage_check 110 | 111 | if trainer.incremental: 112 | kf_set, end_all = _load_frames_incremental(trainer, t) 113 | else: 114 | kf_set = {sensor: None for sensor in trainer.sensor_list} 115 | end_all = False 116 | 117 | status = "" 118 | # optimization step--------------------------------------------- 119 | if start_optimize: 120 | # Run map and pose optimization sequentially 121 | pose_loss = trainer.step_pose() 122 | map_loss = trainer.step_map() 123 | 124 | # Store losses 125 | map_loss, pose_loss = float(map_loss or 0.0), float(pose_loss or 0.0) 126 | pose_stats, map_stats = trainer.save_stats["pose"], trainer.save_stats["map"] 127 | pose_error_dict, map_error_dict = pose_stats["errors"], map_stats["errors"] 128 | pose_time_dict, map_time_dict = pose_stats["timing"], map_stats["timing"] 129 | pose_time, pose_errors = 0.0, 0.0 130 | map_time, map_errors, f_score_T = 0.0, 0.0, 0 131 | if len(map_error_dict) > 0: 132 | map_time, map_errors, f_score_T = ( 133 | map_time_dict[-1], 134 | map_error_dict[-1]["f_score"][trainer.which_f_score], 135 | map_error_dict[-1]["f_score_T"][trainer.which_f_score], 136 | ) 137 | if len(pose_error_dict) > 0: 138 | pose_time, pose_errors = ( 139 | pose_time_dict[-1], 140 | pose_error_dict[-1]["avg_3d_error"], 141 | ) 142 | 143 | # retrieve the next frame based on optimization time 144 | trainer.tot_step_time += (map_time + pose_time) * (t > 0) 145 | 146 | # Print useful information 147 | status = f"Map time: {map_time:.2f} s, Pose time: {pose_time:.2f} s, Total: {trainer.tot_step_time:.2f} s, Dataset: {trainer.current_time:.2f} s\n" 148 | status = ( 149 | "".join(status) 150 | + f"Pose err [{pose_errors*1000:.2f} mm] Map err (< {f_score_T*1000:.2f} mm): [{map_errors:.2f}]" 151 | ) 152 | else: 153 | print("Waiting for visualizer..") 154 | 155 | trainer.get_latest_depth_renders() 156 | gpu_usage_check() 157 | return status, kf_set, end_all 158 | 159 | 160 | @hydra.main(version_base=None, config_path="config", config_name="config") 161 | def main(cfg: DictConfig): 162 | """Main function to run neuralfeels 163 | 164 | Args: 165 | cfg (DictConfig): Hydra configuration 166 | """ 167 | gpu_id = cfg.gpu_id 168 | torch.set_default_device(f"cuda:{gpu_id}") 169 | cprint(f"Using GPU: {gpu_id}", color="yellow") 170 | try: 171 | import open3d.visualization.gui as gui 172 | 173 | # lazy imports to avoid tinycudann errors when launching locally for a 174 | # different architecture 175 | from neuralfeels.modules.trainer import Trainer 176 | from neuralfeels.viz import neuralfeels_gui 177 | 178 | seed = cfg.seed 179 | np.random.seed(seed) 180 | torch.manual_seed(seed) 181 | 182 | with OptionalDisplay( 183 | size=(3840, 1644), use_xauth=True, active=cfg.create_display 184 | ): 185 | tac_slam_trainer = Trainer(cfg=cfg, gpu_id=gpu_id, ros_node=None) 186 | # open3d vis window 187 | app = gui.Application.instance 188 | app.initialize() 189 | mono = app.add_font(gui.FontDescription(gui.FontDescription.MONOSPACE)) 190 | size_ratio = 0.4 # scaling ratio w.r.t. tkinter resolution 191 | w = neuralfeels_gui.GUI( 192 | tac_slam_trainer, optim_iter, mono, size_ratio, cfg.profile 193 | ) 194 | app.run() 195 | w.save_data() # save all the images, meshes, plots, etc. 196 | # clear memory 197 | gc.collect() 198 | torch.cuda.empty_cache() 199 | 200 | except Exception: 201 | traceback.print_exc(file=sys.stderr) 202 | raise 203 | 204 | 205 | if __name__ == "__main__": 206 | main() 207 | -------------------------------------------------------------------------------- /neuralfeels/contrib/tactile_transformer/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT 7 | 8 | # Utility functions for tactile transformer 9 | 10 | import errno 11 | import os 12 | from glob import glob 13 | 14 | import numpy as np 15 | import torch.nn as nn 16 | import torch.optim as optim 17 | from matplotlib import cm 18 | from PIL import Image 19 | from torch.optim.lr_scheduler import ReduceLROnPlateau 20 | from torchvision import transforms 21 | 22 | from neuralfeels.contrib.tactile_transformer.custom_augmentation import ToMask 23 | from neuralfeels.contrib.tactile_transformer.loss import ScaleAndShiftInvariantLoss 24 | 25 | 26 | def get_total_paths(path, ext): 27 | return glob(os.path.join(path, "*" + ext)) 28 | 29 | 30 | def get_splitted_dataset( 31 | config, split, dataset_name, path_images, path_depths, path_segmentation 32 | ): 33 | list_files = [os.path.basename(im) for im in path_images] 34 | np.random.seed(config["General"]["seed"]) 35 | np.random.shuffle(list_files) 36 | if split == "train": 37 | selected_files = list_files[ 38 | : int(len(list_files) * config["Dataset"]["splits"]["split_train"]) 39 | ] 40 | elif split == "val": 41 | selected_files = list_files[ 42 | int(len(list_files) * config["Dataset"]["splits"]["split_train"]) : int( 43 | len(list_files) * config["Dataset"]["splits"]["split_train"] 44 | ) 45 | + int(len(list_files) * config["Dataset"]["splits"]["split_val"]) 46 | ] 47 | else: 48 | selected_files = list_files[ 49 | int(len(list_files) * config["Dataset"]["splits"]["split_train"]) 50 | + int(len(list_files) * config["Dataset"]["splits"]["split_val"]) : 51 | ] 52 | 53 | path_images = [ 54 | os.path.join( 55 | config["Dataset"]["paths"]["path_dataset"], 56 | dataset_name, 57 | config["Dataset"]["paths"]["path_images"], 58 | im[:-4] + config["Dataset"]["extensions"]["ext_images"], 59 | ) 60 | for im in selected_files 61 | ] 62 | path_depths = [ 63 | os.path.join( 64 | config["Dataset"]["paths"]["path_dataset"], 65 | dataset_name, 66 | config["Dataset"]["paths"]["path_depths"], 67 | im[:-4] + config["Dataset"]["extensions"]["ext_depths"], 68 | ) 69 | for im in selected_files 70 | ] 71 | path_segmentation = [ 72 | os.path.join( 73 | config["Dataset"]["paths"]["path_dataset"], 74 | dataset_name, 75 | config["Dataset"]["paths"]["path_segmentations"], 76 | im[:-4] + config["Dataset"]["extensions"]["ext_segmentations"], 77 | ) 78 | for im in selected_files 79 | ] 80 | return path_images, path_depths, path_segmentation 81 | 82 | 83 | def get_transforms(config): 84 | im_size = config["Dataset"]["transforms"]["resize"] 85 | transform_image = transforms.Compose( 86 | [ 87 | transforms.Resize((im_size[0], im_size[1])), 88 | transforms.ToTensor(), # converts to [0 - 1] 89 | transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), 90 | ] 91 | ) 92 | transform_depth = transforms.Compose( 93 | [ 94 | transforms.Resize((im_size[0], im_size[1])), 95 | transforms.Grayscale(num_output_channels=1), 96 | transforms.ToTensor(), # converts to [0 - 1] 97 | ] 98 | ) 99 | transform_seg = transforms.Compose( 100 | [ 101 | transforms.Resize( 102 | (im_size[0], im_size[1]), 103 | interpolation=transforms.InterpolationMode.NEAREST, 104 | ), 105 | ToMask(config["Dataset"]["classes"]), 106 | ] 107 | ) 108 | return transform_image, transform_depth, transform_seg 109 | 110 | 111 | def get_losses(config): 112 | def NoneFunction(a, b): 113 | return 0 114 | 115 | loss_depth = NoneFunction 116 | loss_segmentation = NoneFunction 117 | type = config["General"]["type"] 118 | if type == "full" or type == "depth": 119 | if config["General"]["loss_depth"] == "mse": 120 | loss_depth = nn.L1Loss() 121 | elif config["General"]["loss_depth"] == "ssi": 122 | loss_depth = ScaleAndShiftInvariantLoss() 123 | if type == "full" or type == "segmentation": 124 | if config["General"]["loss_segmentation"] == "ce": 125 | loss_segmentation = nn.CrossEntropyLoss() 126 | return loss_depth, loss_segmentation 127 | 128 | 129 | def create_dir(directory): 130 | try: 131 | os.makedirs(directory) 132 | except OSError as e: 133 | if e.errno != errno.EEXIST: 134 | raise 135 | 136 | 137 | def get_optimizer(config, net): 138 | names = set([name.split(".")[0] for name, _ in net.named_modules()]) - set( 139 | ["", "transformer_encoders"] 140 | ) 141 | params_backbone = net.transformer_encoders.parameters() 142 | params_scratch = list() 143 | for name in names: 144 | params_scratch += list(eval("net." + name).parameters()) 145 | 146 | if config["General"]["optim"] == "adam": 147 | optimizer_backbone = optim.Adam( 148 | params_backbone, lr=config["General"]["lr_backbone"] 149 | ) 150 | optimizer_scratch = optim.Adam( 151 | params_scratch, lr=config["General"]["lr_scratch"] 152 | ) 153 | elif config["General"]["optim"] == "sgd": 154 | optimizer_backbone = optim.SGD( 155 | params_backbone, 156 | lr=config["General"]["lr_backbone"], 157 | momentum=config["General"]["momentum"], 158 | ) 159 | optimizer_scratch = optim.SGD( 160 | params_scratch, 161 | lr=config["General"]["lr_scratch"], 162 | momentum=config["General"]["momentum"], 163 | ) 164 | return optimizer_backbone, optimizer_scratch 165 | 166 | 167 | def get_schedulers(optimizers): 168 | return [ 169 | ReduceLROnPlateau(optimizer, verbose=True, factor=0.8) 170 | for optimizer in optimizers 171 | ] 172 | 173 | 174 | def concat_images(images, direction="horizontal"): 175 | widths, heights = zip(*(img.size for img in images)) 176 | 177 | if direction == "horizontal": 178 | total_width = sum(widths) 179 | max_height = max(heights) 180 | new_image = Image.new("RGB", (total_width, max_height)) 181 | x_offset = 0 182 | for img in images: 183 | new_image.paste(img, (x_offset, 0)) 184 | x_offset += img.width 185 | elif direction == "vertical": 186 | total_height = sum(heights) 187 | max_width = max(widths) 188 | new_image = Image.new("RGB", (max_width, total_height)) 189 | y_offset = 0 190 | for img in images: 191 | new_image.paste(img, (0, y_offset)) 192 | y_offset += img.height 193 | else: 194 | raise ValueError("Direction must be 'horizontal' or 'vertical'") 195 | 196 | return new_image 197 | 198 | 199 | def apply_jet_colormap(image): 200 | # Convert to grayscale if not already 201 | grayscale_image = image.convert("L") 202 | 203 | # Convert grayscale image to numpy array 204 | image_np = np.array(grayscale_image) 205 | 206 | # Normalize image data to range [0, 1] for colormap 207 | image_normalized = image_np / 255.0 208 | 209 | # Apply the jet colormap 210 | colormap = cm.get_cmap("jet") 211 | colored_image = colormap(image_normalized) 212 | 213 | # Convert back to 8-bit per channel RGB 214 | colored_image = (colored_image[:, :, :3] * 255).astype(np.uint8) 215 | 216 | # Convert numpy array back to PIL image 217 | return Image.fromarray(colored_image) 218 | -------------------------------------------------------------------------------- /neuralfeels/modules/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Adapted from iSDF: https://github.com/facebookresearch/iSDF 7 | 8 | import torch 9 | 10 | from neuralfeels.geometry import transform 11 | 12 | cosSim = torch.nn.CosineSimilarity(dim=-1, eps=1e-6) 13 | 14 | 15 | # method 1: ray bounds 16 | def bounds_ray(depth_sample, z_vals, dirs_C_sample, T_WC_sample, do_grad: bool = True): 17 | bounds = depth_sample[:, None] - z_vals 18 | z_to_euclidean_depth = dirs_C_sample.norm(dim=-1) 19 | bounds = z_to_euclidean_depth[:, None] * bounds 20 | 21 | grad = None 22 | if do_grad: 23 | grad = grad_ray(T_WC_sample, dirs_C_sample, z_vals.shape[1] - 1) 24 | 25 | return bounds, grad 26 | 27 | 28 | # method 2: ray bound with cos correction 29 | def bounds_normal( 30 | depth_sample, 31 | z_vals, 32 | dirs_C_sample, 33 | norm_sample, 34 | normal_trunc_dist, 35 | T_WC_sample, 36 | do_grad, 37 | ): 38 | ray_bounds, _ = bounds_ray( 39 | depth_sample, z_vals, dirs_C_sample, T_WC_sample, do_grad 40 | ) 41 | 42 | costheta = torch.abs(cosSim(-dirs_C_sample, norm_sample)) 43 | 44 | # only apply correction out to truncation distance 45 | sub = normal_trunc_dist * (1.0 - costheta) 46 | normal_bounds = ray_bounds - sub[:, None] 47 | 48 | trunc_ixs = ray_bounds < normal_trunc_dist 49 | trunc_vals = (ray_bounds * costheta[:, None])[trunc_ixs] 50 | normal_bounds[trunc_ixs] = trunc_vals 51 | 52 | grad = None 53 | if do_grad: 54 | grad = grad_ray(T_WC_sample, dirs_C_sample, z_vals.shape[1] - 1) 55 | 56 | return normal_bounds, grad 57 | 58 | 59 | def grad_ray(T_WC_sample, dirs_C_sample, n_samples): 60 | """Returns the negative of the viewing direction vector""" 61 | _, dirs_W = transform.origin_dirs_W(T_WC_sample, dirs_C_sample) 62 | grad = -dirs_W[:, None, :].repeat(1, n_samples, 1) 63 | 64 | return grad 65 | 66 | 67 | # method 3: brute force 68 | @torch.jit.script 69 | def bounds_pc( 70 | pc: torch.Tensor, 71 | z_vals: torch.Tensor, 72 | depth_sample: torch.Tensor, 73 | object_rays: int, 74 | trunc_dist: float, 75 | do_grad: bool = True, 76 | ): 77 | with torch.no_grad(): 78 | surf_pc = pc[:object_rays, 0] # surface pointcloud element 79 | diff = pc[:, :, None] - surf_pc 80 | dists = diff.norm(p=2, dim=-1) 81 | dists, closest_ixs = torch.min( 82 | dists, dim=-1 83 | ) # closest points to the surface element 84 | behind_surf = torch.abs(z_vals) > torch.abs(depth_sample[:, None]) 85 | # assert behind_surf[object_rays:].any() == False, "free space rays are not behind surface" 86 | dists[behind_surf] *= -1 # make distances behind surface negative 87 | bounds = dists 88 | 89 | if do_grad: 90 | ix1 = torch.arange(diff.shape[0])[:, None].repeat(1, diff.shape[1]) 91 | ix2 = torch.arange(diff.shape[1])[None, :].repeat(diff.shape[0], 1) 92 | grad = diff[ix1, ix2, closest_ixs] 93 | grad = grad[:, 1:] 94 | grad = grad / grad.norm(p=2, dim=-1)[..., None] 95 | # flip grad vectors behind the surf 96 | grad[behind_surf[:, 1:]] *= -1 97 | return bounds, grad 98 | else: 99 | return bounds, None 100 | 101 | 102 | def bounds( 103 | method, 104 | dirs_C_sample, 105 | depth_sample, 106 | T_WC_sample, 107 | z_vals, 108 | pc, 109 | object_rays, 110 | trunc_dist, 111 | norm_sample, 112 | do_grad=True, 113 | ): 114 | """do_grad: compute approximate gradient vector.""" 115 | assert method in ["ray", "normal", "pc"] 116 | 117 | if method == "ray": 118 | bounds, grad = bounds_ray( 119 | depth_sample, z_vals, dirs_C_sample, T_WC_sample, do_grad 120 | ) 121 | 122 | elif method == "normal": 123 | bounds, grad = bounds_normal( 124 | depth_sample, 125 | z_vals, 126 | dirs_C_sample, 127 | norm_sample, 128 | trunc_dist, 129 | T_WC_sample, 130 | do_grad, 131 | ) 132 | 133 | else: 134 | bounds, grad = bounds_pc( 135 | pc, z_vals, depth_sample, object_rays, trunc_dist, do_grad 136 | ) 137 | 138 | return bounds, grad 139 | 140 | 141 | def sdf_loss(sdf, bounds, t, loss_type="L1"): 142 | """ 143 | params: 144 | sdf: predicted sdf values. 145 | bounds: upper bound on abs(sdf) 146 | t: truncation distance up to which the sdf value is directly supevised. 147 | loss_type: L1 or L2 loss. 148 | """ 149 | # free_space_loss_mat, trunc_loss_mat = full_sdf_loss(sdf, bounds, t) 150 | free_space_loss_mat, trunc_loss_mat = tsdf_loss(sdf, bounds, t) 151 | 152 | # decide which supervision based on truncation region 153 | free_space_ixs = torch.abs(bounds) > t 154 | free_space_loss_mat[~free_space_ixs] = 0.0 155 | trunc_loss_mat[free_space_ixs] = 0.0 156 | 157 | sdf_loss_mat = free_space_loss_mat + trunc_loss_mat 158 | 159 | if loss_type == "L1": 160 | sdf_loss_mat = torch.abs(sdf_loss_mat) 161 | elif loss_type == "L2": 162 | sdf_loss_mat = torch.square(sdf_loss_mat) 163 | elif loss_type == "smooth_L1": 164 | sdf_loss_mat = torch.nn.functional.smooth_l1_loss( 165 | sdf_loss_mat, torch.zeros_like(sdf_loss_mat), reduction="none" 166 | ) 167 | else: 168 | raise ValueError("Must be L1 or L2") 169 | 170 | return sdf_loss_mat, free_space_ixs 171 | 172 | 173 | def full_sdf_loss(sdf, target_sdf, trunc_dist, free_space_factor=5.0): 174 | """ 175 | For samples that lie in free space before truncation region: 176 | loss(sdf_pred, sdf_gt) = { max(0, sdf_pred - sdf_gt), if sdf_pred >= 0 177 | { exp(-sdf_pred) - 1, if sdf_pred < 0 178 | 179 | For samples that lie in truncation region: 180 | loss(sdf_pred, sdf_gt) = sdf_pred - sdf_gt 181 | """ 182 | 183 | # free_space_loss_mat = torch.max( 184 | # torch.nn.functional.relu(sdf - target_sdf), 185 | # torch.exp(-free_space_factor * sdf) - 1. 186 | # ) 187 | free_space_loss_mat = sdf - trunc_dist 188 | trunc_loss_mat = sdf - target_sdf 189 | 190 | return free_space_loss_mat, trunc_loss_mat 191 | 192 | 193 | def tsdf_loss(sdf, target_sdf, trunc_dist): 194 | """ 195 | tsdf loss from: https://arxiv.org/pdf/2104.04532.pdf 196 | SDF values in truncation region are scaled in range [0, 1]. 197 | """ 198 | trunc_vals = torch.sign(target_sdf) * torch.ones(sdf.shape, device=sdf.device) 199 | free_space_loss_mat = sdf - trunc_vals 200 | trunc_loss_mat = sdf - target_sdf / trunc_dist 201 | return free_space_loss_mat, trunc_loss_mat 202 | 203 | 204 | def tot_loss( 205 | sdf_loss_mat, 206 | eik_loss_mat, 207 | free_space_ixs, 208 | bounds, 209 | trunc_weight, 210 | eik_weight, 211 | vision_weights=None, 212 | ): 213 | sdf_loss_mat[~free_space_ixs] *= trunc_weight 214 | 215 | if vision_weights is not None: 216 | sdf_loss_mat = torch.mul(sdf_loss_mat, vision_weights) 217 | 218 | losses = {"sdf_loss": sdf_loss_mat.mean()} 219 | tot_loss_mat = sdf_loss_mat 220 | 221 | # eikonal loss 222 | if eik_loss_mat is not None: 223 | eik_loss_mat = eik_loss_mat * eik_weight 224 | tot_loss_mat = tot_loss_mat + eik_loss_mat 225 | losses["eikonal_loss"] = eik_loss_mat.mean() 226 | 227 | tot_loss = tot_loss_mat.mean() 228 | losses["total_loss"] = tot_loss 229 | 230 | return tot_loss, tot_loss_mat, losses 231 | 232 | 233 | def approx_loss(full_loss, binary_masks, W, H, factor=8): 234 | w_block = W // factor 235 | h_block = H // factor 236 | loss_approx = full_loss.view(-1, factor, h_block, factor, w_block) 237 | loss_approx = loss_approx.sum(dim=(2, 4)) 238 | actives = binary_masks.view(-1, factor, h_block, factor, w_block) 239 | actives = actives.sum(dim=(2, 4)) 240 | actives[actives == 0] = 1.0 241 | loss_approx = loss_approx / actives 242 | 243 | return loss_approx 244 | 245 | 246 | def frame_avg( 247 | total_loss_mat, 248 | depth_batch, 249 | indices_b, 250 | indices_h, 251 | indices_w, 252 | W, 253 | H, 254 | loss_approx_factor, 255 | binary_masks, 256 | free_space_masks, 257 | ): 258 | # frame average losses 259 | full_loss = torch.zeros( 260 | depth_batch.shape, dtype=total_loss_mat.dtype, device=depth_batch.device 261 | ) 262 | 263 | full_loss[indices_b, indices_h, indices_w] = total_loss_mat.sum(-1).detach() 264 | 265 | full_loss = ( 266 | full_loss * ~free_space_masks 267 | ) # remove those samples in free space for only surface loss 268 | 269 | loss_approx = approx_loss(full_loss, binary_masks, W, H, factor=loss_approx_factor) 270 | factor = loss_approx.shape[1] 271 | frame_sum = loss_approx.sum(dim=(1, 2)) 272 | frame_avg_loss = frame_sum / (factor * factor) 273 | 274 | return loss_approx, frame_avg_loss 275 | -------------------------------------------------------------------------------- /neuralfeels/viz/draw.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # 2D and 3D visualization utilities for neuralfeels 7 | 8 | import io 9 | 10 | import cv2 11 | import numpy as np 12 | import skimage.measure 13 | import torch 14 | import trimesh 15 | from PIL import Image 16 | 17 | from neuralfeels import geometry 18 | 19 | 20 | def draw_camera(camera, transform, color=(0.0, 1.0, 0.0, 0.8), marker_height=0.2): 21 | marker = trimesh.creation.camera_marker(camera, marker_height=marker_height) 22 | marker[0].apply_transform(transform) 23 | marker[1].apply_transform(transform) 24 | marker[1].colors = (color,) * len(marker[1].entities) 25 | 26 | return marker 27 | 28 | 29 | def draw_cameras_from_eyes(eyes, ats, up, scene): 30 | for eye, at in zip(eyes, ats): 31 | R, t = geometry.transform.look_at(eye, at, up) 32 | T = np.eye(4) 33 | T[:3, :3] = R 34 | T[:3, 3] = t 35 | 36 | transform = T @ geometry.transform.to_replica() 37 | camera = trimesh.scene.Camera( 38 | fov=scene.camera.fov, resolution=scene.camera.resolution 39 | ) 40 | marker = draw_camera(camera, transform) 41 | scene.add_geometry(marker) 42 | 43 | 44 | def draw_cams( 45 | batch_size, T_WC_batch_np, scene, color=None, latest_diff=True, cam_scale=1.0 46 | ): 47 | no_color = color is None 48 | if no_color: 49 | color = (0.0, 1.0, 0.0, 0.8) 50 | for batch_i in range(batch_size): 51 | # if batch_i == (batch_size - 1): 52 | # color = (1., 0., 0., 0.8) 53 | T_WC = T_WC_batch_np[batch_i] 54 | 55 | camera = trimesh.scene.Camera( 56 | fov=scene.camera.fov, resolution=scene.camera.resolution 57 | ) 58 | marker_height = 0.3 * cam_scale 59 | if batch_i == batch_size - 1 and latest_diff: 60 | if no_color: 61 | color = (1.0, 1.0, 1.0, 1.0) 62 | marker_height = 0.5 * cam_scale 63 | 64 | marker = draw_camera(camera, T_WC, color=color, marker_height=marker_height) 65 | scene.add_geometry(marker[1]) 66 | 67 | 68 | def draw_segment(t1, t2, color=(1.0, 1.0, 0.0)): 69 | line_segment = trimesh.load_path([t1, t2]) 70 | line_segment.colors = (color,) * len(line_segment.entities) 71 | 72 | return line_segment 73 | 74 | 75 | def draw_trajectory(trajectory, scene, color=(1.0, 1.0, 0.0)): 76 | for i in range(trajectory.shape[0] - 1): 77 | if (trajectory[i] != trajectory[i + 1]).any(): 78 | segment = draw_segment(trajectory[i], trajectory[i + 1], color) 79 | scene.add_geometry(segment) 80 | 81 | 82 | def draw_pc(batch_size, pcs_cam, T_WC_batch_np, im_batch=None, scene=None): 83 | pcs_w = [] 84 | cols = [] 85 | for batch_i in range(batch_size): 86 | T_WC = T_WC_batch_np[batch_i] 87 | pc_cam = pcs_cam[batch_i] 88 | 89 | col = None 90 | if im_batch is not None: 91 | img = im_batch[batch_i] 92 | col = img.reshape(-1, 3) 93 | cols.append(col) 94 | 95 | pc_tri = trimesh.PointCloud(vertices=pc_cam, colors=col) 96 | pc_tri.apply_transform(T_WC) 97 | pcs_w.append(pc_tri.vertices) 98 | 99 | if scene is not None: 100 | scene.add_geometry(pc_tri) 101 | 102 | pcs_w = np.concatenate(pcs_w, axis=0) 103 | if len(cols) != 0: 104 | cols = np.concatenate(cols) 105 | return pcs_w, cols 106 | 107 | 108 | def marching_cubes_trimesh(numpy_3d_sdf_tensor, level=0.0): 109 | """ 110 | Convert sdf samples to triangular mesh. 111 | """ 112 | vertices, faces, vertex_normals, _ = skimage.measure.marching_cubes( 113 | numpy_3d_sdf_tensor, 114 | level=level, 115 | step_size=1, 116 | ) 117 | 118 | dim = numpy_3d_sdf_tensor.shape[0] 119 | vertices = vertices / (dim - 1) 120 | mesh = trimesh.Trimesh( 121 | vertices=vertices, vertex_normals=vertex_normals, faces=faces 122 | ) 123 | 124 | return mesh 125 | 126 | 127 | def draw_mesh(sdf, color_by="normals", clean_mesh=True): 128 | """ 129 | Run marching cubes on sdf tensor to return mesh. 130 | """ 131 | if isinstance(sdf, torch.Tensor): 132 | sdf = sdf.detach().cpu().numpy() 133 | mesh = marching_cubes_trimesh(sdf) 134 | 135 | # Transform to [-1, 1] range 136 | mesh.apply_translation([-0.5, -0.5, -0.5]) 137 | mesh.apply_scale(2) 138 | 139 | try: 140 | # from NICE-SLAM 141 | if clean_mesh: 142 | get_largest_components = False 143 | remove_small_geometry_threshold = 2 144 | # get connected components 145 | components = mesh.split(only_watertight=False) 146 | if get_largest_components: 147 | areas = np.array([c.area for c in components], dtype=np.float) 148 | print(areas) 149 | clean_mesh = components[areas.argmax()] 150 | else: 151 | new_components = [] 152 | for comp in components: 153 | if comp.area > remove_small_geometry_threshold: 154 | new_components.append(comp) 155 | # print(f"Removed {len(components) - len(new_components)} blobs") 156 | clean_mesh = trimesh.util.concatenate(new_components) 157 | vertices = clean_mesh.vertices 158 | faces = clean_mesh.faces 159 | mesh = trimesh.Trimesh(vertices, faces) 160 | except: 161 | print("clean_mesh error: continuing") 162 | 163 | mesh = trimesh.smoothing.filter_laplacian(mesh, lamb=0.3) 164 | if color_by == "normals": 165 | norm_cols = (-mesh.vertex_normals + 1) / 2 166 | norm_cols = np.clip(norm_cols, 0.0, 1.0) 167 | norm_cols = (norm_cols * 255).astype(np.uint8) 168 | alphas = np.full([norm_cols.shape[0], 1], 255, dtype=np.uint8) 169 | cols = np.concatenate((norm_cols, alphas), axis=1) 170 | mesh.visual.vertex_colors = cols 171 | elif color_by == "height": 172 | zs = mesh.vertices[:, 1] 173 | cols = trimesh.visual.interpolate(zs, color_map="viridis") 174 | mesh.visual.vertex_colors = cols 175 | else: 176 | mesh.visual.face_colors = [160, 160, 160, 255] 177 | return mesh 178 | 179 | 180 | def capture_scene_im(scene, pose, tm_pose=False, resolution=(1280, 720)): 181 | if not tm_pose: 182 | pose = geometry.transform.to_trimesh(pose) 183 | scene.camera_transform = pose 184 | data = scene.save_image(resolution=resolution) 185 | image = np.array(Image.open(io.BytesIO(data))) 186 | 187 | return image 188 | 189 | 190 | # adapted from https://github.com/NVlabs/BundleSDF/blob/878cee2f1cda23810ff861f6fef2922c96c7a67e/Utils.py#L309C1-L344C13 191 | def draw_xyz_axis( 192 | color, 193 | obj_in_cam, 194 | fx, 195 | fy, 196 | cx, 197 | cy, 198 | h, 199 | w, 200 | scale=0.1, 201 | thickness=2, 202 | transparency=0.3, 203 | is_input_rgb=False, 204 | ): 205 | """ 206 | @color: BGR 207 | """ 208 | if is_input_rgb: 209 | color = cv2.cvtColor(color, cv2.COLOR_RGB2BGR) 210 | 211 | oo = np.array([0, 0, 0]).astype(float) 212 | xx = np.array([1, 0, 0]).astype(float) * scale 213 | yy = np.array([0, 1, 0]).astype(float) * scale 214 | zz = np.array([0, 0, 1]).astype(float) * scale 215 | pts_of = torch.tensor(np.vstack((oo, xx, yy, zz))).float() # in object frame 216 | pts_of = pts_of.to(device=obj_in_cam.device, dtype=obj_in_cam.dtype) 217 | pts_cf = geometry.transform.transform_points(pts_of, obj_in_cam) # in camera frame 218 | 219 | pts_2d = geometry.transform.point_cloud_to_image_plane(pts_cf, fx, fy, cx, cy, h, w) 220 | origin = tuple(pts_2d[0].cpu().numpy()) 221 | xx = tuple(pts_2d[1].cpu().numpy()) 222 | yy = tuple(pts_2d[2].cpu().numpy()) 223 | zz = tuple(pts_2d[3].cpu().numpy()) 224 | 225 | line_type = cv2.FILLED 226 | arrow_len = 0 227 | tmp = color.copy() 228 | tmp1 = tmp.copy() 229 | tmp1 = cv2.arrowedLine( 230 | tmp1, 231 | origin, 232 | xx, 233 | color=(0, 0, 255), 234 | thickness=thickness, 235 | line_type=line_type, 236 | tipLength=arrow_len, 237 | ) 238 | mask = np.linalg.norm(tmp1 - tmp, axis=-1) > 0 239 | tmp[mask] = tmp[mask] * transparency + tmp1[mask] * (1 - transparency) 240 | tmp1 = tmp.copy() 241 | tmp1 = cv2.arrowedLine( 242 | tmp1, 243 | origin, 244 | yy, 245 | color=(0, 255, 0), 246 | thickness=thickness, 247 | line_type=line_type, 248 | tipLength=arrow_len, 249 | ) 250 | mask = np.linalg.norm(tmp1 - tmp, axis=-1) > 0 251 | tmp[mask] = tmp[mask] * transparency + tmp1[mask] * (1 - transparency) 252 | tmp1 = tmp.copy() 253 | tmp1 = cv2.arrowedLine( 254 | tmp1, 255 | origin, 256 | zz, 257 | color=(255, 0, 0), 258 | thickness=thickness, 259 | line_type=line_type, 260 | tipLength=arrow_len, 261 | ) 262 | mask = np.linalg.norm(tmp1 - tmp, axis=-1) > 0 263 | tmp[mask] = tmp[mask] * transparency + tmp1[mask] * (1 - transparency) 264 | tmp = tmp.astype(np.uint8) 265 | if is_input_rgb: 266 | tmp = cv2.cvtColor(tmp, cv2.COLOR_BGR2RGB) 267 | 268 | return tmp 269 | -------------------------------------------------------------------------------- /neuralfeels/datasets/sdf_util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | # Utility functions for SDF computation and visualization 7 | 8 | import colorsys 9 | import hashlib 10 | import os 11 | 12 | import matplotlib as mpl 13 | import numpy as np 14 | import open3d as o3d 15 | import trimesh 16 | from matplotlib import cm 17 | from matplotlib.colors import ListedColormap 18 | from scipy import ndimage 19 | from scipy.spatial import cKDTree as KDTree 20 | 21 | from neuralfeels.contrib.urdf import SceneGraph, URDFParser, URDFTree 22 | 23 | 24 | def load_gt_mesh(file, color=True): 25 | """ 26 | Load ground-truth mesh from URDF file 27 | """ 28 | # Parse the URDF file 29 | parser = URDFParser(file) 30 | parser.parse() 31 | # Construct the URDF tree 32 | links = parser.links 33 | joints = parser.joints 34 | tree = URDFTree(links, joints) 35 | scene = SceneGraph(tree.root) 36 | mesh = scene.getMesh()[0] 37 | 38 | # SDF computation needs trimesh, but visualization needs open3d so we load both 39 | mesh_trimesh = trimesh.Trimesh( 40 | np.asarray(mesh.vertices), 41 | np.asarray(mesh.triangles), 42 | vertex_normals=np.asarray(mesh.vertex_normals), 43 | ) 44 | mesh_path = tree.root.link.visuals[0].geometry_mesh["filename"] 45 | mesh_scale = tree.root.link.visuals[0].geometry_mesh["scale"][0] 46 | object_name = os.path.dirname(mesh_path).split("/")[-1] 47 | mesh_o3d = o3d.io.read_triangle_mesh(mesh_path, color) 48 | mesh_o3d = mesh_o3d.scale(mesh_scale, center=mesh_o3d.get_center()) 49 | 50 | if not color: 51 | # assign random color, taken from viser: https://nerfstudio-project.github.io/viser/ 52 | mesh_o3d.paint_uniform_color( 53 | colorsys.hls_to_rgb( 54 | np.random.default_rng( 55 | np.frombuffer( 56 | hashlib.md5(object_name.encode("utf-8")).digest(), 57 | dtype="uint32", 58 | ) 59 | + 5 60 | ).uniform(), 61 | 0.6, 62 | 0.9, 63 | ) 64 | ) 65 | 66 | return mesh_trimesh, mesh_o3d 67 | 68 | 69 | def saturate_colors(rgb_array, factor): 70 | """Increase the saturation of an RGB array by a factor.""" 71 | import colorsys 72 | 73 | # Convert the array to HSL color space 74 | hsl_array = np.zeros_like(rgb_array) 75 | for i in range(rgb_array.shape[0]): 76 | hsl_array[i] = colorsys.rgb_to_hls(*rgb_array[i]) 77 | 78 | # Increase the saturation value 79 | hsl_array[:, 1] *= factor 80 | 81 | # Convert the array back to RGB color space 82 | rgb_array_out = np.zeros_like(rgb_array) 83 | for i in range(rgb_array.shape[0]): 84 | rgb_array_out[i] = colorsys.hls_to_rgb(*hsl_array[i]) 85 | 86 | return rgb_array_out 87 | 88 | 89 | def get_grid_pts(dims, transform): 90 | x = np.arange(dims[0]) 91 | y = np.arange(dims[1]) 92 | z = np.arange(dims[2]) 93 | x = x * transform[0, 0] + transform[0, 3] 94 | y = y * transform[1, 1] + transform[1, 3] 95 | z = z * transform[2, 2] + transform[2, 3] 96 | 97 | return x, y, z 98 | 99 | 100 | def eval_sdf_interp(sdf_interp, pc, handle_oob="except", oob_val=0.0): 101 | """param: 102 | handle_oob: dictates what to do with out of bounds points. Must 103 | take either 'except', 'mask' or 'fill'. 104 | """ 105 | 106 | reshaped = False 107 | if pc.ndim != 2: 108 | reshaped = True 109 | pc_shape = pc.shape[:-1] 110 | pc = pc.reshape(-1, 3) 111 | 112 | if handle_oob == "except": 113 | sdf_interp.bounds_error = True 114 | elif handle_oob == "mask": 115 | dummy_val = 1e99 116 | sdf_interp.bounds_error = False 117 | sdf_interp.fill_value = dummy_val 118 | elif handle_oob == "fill": 119 | sdf_interp.bounds_error = False 120 | sdf_interp.fill_value = oob_val 121 | else: 122 | assert True, "handle_oob must take a recognised value." 123 | 124 | sdf = sdf_interp(pc) 125 | 126 | if reshaped: 127 | sdf = sdf.reshape(pc_shape) 128 | 129 | if handle_oob == "mask": 130 | valid_mask = sdf != dummy_val 131 | return sdf, valid_mask 132 | 133 | return sdf 134 | 135 | 136 | def get_colormap(sdf_range=[-2, 2], surface_cutoff=0.01): 137 | white = np.array([1.0, 1.0, 1.0, 1.0]) 138 | sdf_range[1] += surface_cutoff - (sdf_range[1] % surface_cutoff) 139 | sdf_range[0] -= surface_cutoff - (-sdf_range[0] % surface_cutoff) 140 | 141 | positive_n_cols = int(sdf_range[1] / surface_cutoff) 142 | viridis = cm.get_cmap("viridis", positive_n_cols) 143 | positive_colors = viridis(np.linspace(0.2, 1, int(positive_n_cols))) 144 | positive_colors[0] = white 145 | 146 | negative_n_cols = int(np.abs(sdf_range[0]) / surface_cutoff) 147 | redpurple = cm.get_cmap("RdPu", negative_n_cols).reversed() 148 | negative_colors = redpurple(np.linspace(0.0, 0.7, negative_n_cols)) 149 | negative_colors[-1] = white 150 | 151 | colors = np.concatenate((negative_colors, white[None, :], positive_colors), axis=0) 152 | sdf_cmap = ListedColormap(colors) 153 | 154 | norm = mpl.colors.Normalize(sdf_range[0], sdf_range[1]) 155 | sdf_cmap_fn = cm.ScalarMappable(norm=norm, cmap=sdf_cmap) 156 | # plt.colorbar(sdf_cmap_fn) 157 | # plt.show() 158 | return sdf_cmap_fn 159 | 160 | 161 | def voxelize_subdivide( 162 | mesh, pitch, origin_voxel=np.zeros(3), max_iter=10, edge_factor=2.0 163 | ): 164 | """ 165 | Adapted from trimesh function allow for shifts in the origin 166 | of the SDF grid. i.e. there doesn't need to be a voxel with 167 | centere at [0, 0, 0]. 168 | 169 | Voxelize a surface by subdividing a mesh until every edge is 170 | shorter than: (pitch / edge_factor) 171 | Parameters 172 | ----------- 173 | mesh: Trimesh object 174 | pitch: float, side length of a single voxel cube 175 | max_iter: int, cap maximum subdivisions or None for no limit. 176 | edge_factor: float, 177 | Returns 178 | ----------- 179 | VoxelGrid instance representing the voxelized mesh. 180 | """ 181 | max_edge = pitch / edge_factor 182 | 183 | if max_iter is None: 184 | longest_edge = np.linalg.norm( 185 | mesh.vertices[mesh.edges[:, 0]] - mesh.vertices[mesh.edges[:, 1]], axis=1 186 | ).max() 187 | max_iter = max(int(np.ceil(np.log2(longest_edge / max_edge))), 0) 188 | 189 | # get the same mesh sudivided so every edge is shorter 190 | # than a factor of our pitch 191 | v, f = trimesh.remesh.subdivide_to_size( 192 | mesh.vertices, mesh.faces, max_edge=max_edge, max_iter=max_iter 193 | ) 194 | 195 | # convert the vertices to their voxel grid position 196 | hit = (v - origin_voxel) / pitch 197 | 198 | # Provided edge_factor > 1 and max_iter is large enough, this is 199 | # sufficient to preserve 6-connectivity at the level of voxels. 200 | hit = np.round(hit).astype(int) 201 | 202 | # remove duplicates 203 | unique, inverse = trimesh.grouping.unique_rows(hit) 204 | 205 | # get the voxel centers in model space 206 | occupied_index = hit[unique] 207 | 208 | origin_index = occupied_index.min(axis=0) 209 | origin_position = origin_voxel + origin_index * pitch 210 | 211 | return trimesh.voxel.base.VoxelGrid( 212 | trimesh.voxel.encoding.SparseBinaryEncoding(occupied_index - origin_index), 213 | transform=trimesh.transformations.scale_and_translate( 214 | scale=pitch, translate=origin_position 215 | ), 216 | ) 217 | 218 | 219 | def sdf_from_occupancy(occ_map, voxel_size): 220 | # Convert occupancy field to sdf field 221 | inv_occ_map = 1 - occ_map 222 | 223 | # Get signed distance from occupancy map and inv map 224 | map_dist = ndimage.distance_transform_edt(inv_occ_map) 225 | inv_map_dist = ndimage.distance_transform_edt(occ_map) 226 | 227 | sdf = map_dist - inv_map_dist 228 | 229 | # metric units 230 | sdf = sdf.astype(float) 231 | sdf = sdf * voxel_size 232 | 233 | return sdf 234 | 235 | 236 | def sdf_from_mesh(mesh, voxel_size, extend_factor=0.15, origin_voxel=np.zeros(3)): 237 | # Convert mesh to occupancy field 238 | voxels = voxelize_subdivide(mesh, voxel_size, origin_voxel=origin_voxel) 239 | voxels = voxels.fill() 240 | occ_map = voxels.matrix 241 | transform = voxels.transform 242 | 243 | # Extend voxel grid around object 244 | extend = np.array(occ_map.shape) * extend_factor 245 | extend = np.repeat(extend, 2).reshape(3, 2) 246 | extend = np.round(extend).astype(int) 247 | occ_map = np.pad(occ_map, extend) 248 | transform[:3, 3] -= extend[:, 0] * voxel_size 249 | 250 | sdf = sdf_from_occupancy(occ_map, voxel_size) 251 | 252 | return sdf, np.array(transform) 253 | 254 | 255 | def colorize_mesh(color_pcd, mesh, sigma=0.01): 256 | """ 257 | Colorize the mesh by interpolating the colors of the point cloud with Gaussian kernel 258 | """ 259 | # downsample the point cloud 260 | color_pcd = color_pcd.voxel_down_sample(voxel_size=0.001) 261 | pc_positions = color_pcd.point.positions.numpy().astype(np.float64) 262 | pc_colors = color_pcd.point.colors.numpy() 263 | pc_tree = KDTree(pc_positions) 264 | # Compute the distances between each mesh vertex and all the points in the point cloud 265 | distances, indices = pc_tree.query(np.asarray(mesh.vertices), k=20) 266 | # Compute the weights for each neighboring point based on its distance to the vertex using a Gaussian kernel 267 | weights = np.exp(-(distances**2) / (2 * sigma**2)) 268 | weights /= np.sum(weights, axis=1)[:, np.newaxis] 269 | mesh_colors = np.sum(weights[:, :, np.newaxis] * pc_colors[indices], axis=1) 270 | return o3d.utility.Vector3dVector(mesh_colors) 271 | --------------------------------------------------------------------------------