├── .github
    ├── fair.png
    ├── preview.gif
    ├── urdf_visualizer.png
    └── tactile_transformer.png
├── neuralfeels
    ├── __init__.py
    ├── contrib
    │   ├── __init__.py
    │   ├── sam
    │   │   ├── __init__.py
    │   │   └── test_sam.py
    │   ├── urdf
    │   │   ├── SceneGraph
    │   │   │   ├── __init__.py
    │   │   │   ├── MeshNode.py
    │   │   │   ├── Transform.py
    │   │   │   ├── SceneNode.py
    │   │   │   └── SceneGraph.py
    │   │   ├── URDF
    │   │   │   ├── URDFTree
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── ExLink.py
    │   │   │   │   └── URDFTree.py
    │   │   │   ├── Parser
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── Joint.py
    │   │   │   │   ├── Link.py
    │   │   │   │   └── URDFParser.py
    │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   ├── README.md
    │   │   └── viz.py
    │   └── tactile_transformer
    │   │   ├── __init__.py
    │   │   ├── custom_augmentation.py
    │   │   ├── fusion.py
    │   │   ├── head.py
    │   │   ├── README.md
    │   │   ├── dpt_model.py
    │   │   ├── reassemble.py
    │   │   ├── touch_vit.py
    │   │   ├── loss.py
    │   │   ├── tactile_depth.py
    │   │   └── utils.py
    ├── eval
    │   ├── __init__.py
    │   ├── occlusion_plot.py
    │   ├── group_plot.py
    │   ├── feelsight_init.py
    │   └── metrics.py
    ├── geometry
    │   ├── __init__.py
    │   ├── align_utils.py
    │   └── frustum.py
    ├── datasets
    │   ├── __init__.py
    │   ├── mesh_to_sdf_test.py
    │   ├── image_transforms.py
    │   ├── redwood_depth_noise_model.py
    │   ├── dataset.py
    │   ├── data_util.py
    │   └── sdf_util.py
    ├── viz
    │   ├── __init__.py
    │   ├── plot_utils.py
    │   ├── show_object_dataset.py
    │   ├── debug.py
    │   ├── rotate_object_video.py
    │   └── draw.py
    └── modules
    │   ├── __init__.py
    │   ├── misc.py
    │   ├── render.py
    │   ├── object.py
    │   ├── allegro.py
    │   └── loss.py
├── scripts
    ├── config
    │   ├── main
    │   │   ├── touch_depth
    │   │   │   ├── gt.yaml
    │   │   │   └── vit.yaml
    │   │   ├── data
    │   │   │   └── default.yaml
    │   │   ├── scene
    │   │   │   └── default.yaml
    │   │   ├── vi.yaml
    │   │   ├── baseline.yaml
    │   │   ├── tac.yaml
    │   │   ├── vitac.yaml
    │   │   ├── eval
    │   │   │   └── default.yaml
    │   │   ├── viz
    │   │   │   └── default.yaml
    │   │   ├── sensor
    │   │   │   ├── realsense.yaml
    │   │   │   └── digit.yaml
    │   │   ├── train
    │   │   │   └── default.yaml
    │   │   └── pose
    │   │   │   └── default.yaml
    │   ├── launcher
    │   │   └── basic.yaml
    │   └── config.yaml
    ├── run
    └── run.py
├── .pre-commit-config.yaml
├── LICENSE
├── setup.py
├── CONTRIBUTING.md
├── environment.yml
├── .gitignore
├── install.sh
├── data
    └── README.md
└── CODE_OF_CONDUCT.md


/.github/fair.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/neuralfeels/HEAD/.github/fair.png


--------------------------------------------------------------------------------
/.github/preview.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/neuralfeels/HEAD/.github/preview.gif


--------------------------------------------------------------------------------
/.github/urdf_visualizer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/neuralfeels/HEAD/.github/urdf_visualizer.png


--------------------------------------------------------------------------------
/.github/tactile_transformer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/neuralfeels/HEAD/.github/tactile_transformer.png


--------------------------------------------------------------------------------
/neuralfeels/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/sam/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 


--------------------------------------------------------------------------------
/scripts/config/main/touch_depth/gt.yaml:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | mode : gt
7 | 


--------------------------------------------------------------------------------
/scripts/config/main/data/default.yaml:
--------------------------------------------------------------------------------
 1 | dataset: feelsight_real
 2 | object: bell_pepper
 3 | log: '00'
 4 | 
 5 | dataset_path: "data/${main.data.dataset}/${main.data.object}/${main.data.log}"
 6 | 
 7 | gt_sdf_dir: data/assets/gt_models
 8 | 
 9 | train_fps: 1
10 | 


--------------------------------------------------------------------------------
/neuralfeels/eval/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | # flake8: noqa
7 | 
8 | from . import metrics
9 | 


--------------------------------------------------------------------------------
/scripts/config/launcher/basic.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | defaults: 
 3 |   - override /hydra/launcher: joblib
 4 |   - _self_
 5 | 
 6 | hydra:
 7 |   sweep:
 8 |     dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
 9 |   launcher:
10 |     n_jobs: 1
11 |     pre_dispatch: 0
12 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/SceneGraph/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .SceneGraph import SceneGraph
7 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/URDF/URDFTree/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .URDFTree import URDFTree
7 | 


--------------------------------------------------------------------------------
/neuralfeels/geometry/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | # flake8: noqa
7 | 
8 | from . import frustum, transform
9 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/tactile_transformer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .tactile_depth import TactileDepth
7 | 


--------------------------------------------------------------------------------
/neuralfeels/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from . import data_util, dataset, image_transforms, sdf_util
7 | 


--------------------------------------------------------------------------------
/neuralfeels/viz/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | # flake8: noqa
7 | 
8 | from . import draw, neuralfeels_gui, sdf_viewer
9 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/URDF/Parser/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .Link import Link
7 | from .URDFParser import URDFParser
8 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/URDF/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .Parser import URDFParser
7 | from .URDFTree import URDFTree
8 | 


--------------------------------------------------------------------------------
/neuralfeels/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | # flake8: noqa
7 | 
8 | from . import loss, model, render, sample, trainer
9 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | 
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | from .SceneGraph import SceneGraph
7 | from .URDF import URDFParser, URDFTree
8 | 


--------------------------------------------------------------------------------
/scripts/config/main/scene/default.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | grid_dim : 200
 8 | crop_dist: 5e-3
 9 | mesh_interval: 10
10 | 
11 | object_limits : 0
12 | rotate_z: 0
13 | extents:  [0.15, 0.15, 0.15]
14 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/psf/black
 3 |     rev: 24.4.2
 4 |     hooks:
 5 |       - id: black
 6 |         files: |
 7 |           (?x)^(
 8 |             neuralfeels |
 9 |             scripts
10 |           )
11 | 
12 |   - repo: https://github.com/pycqa/isort
13 |     rev: 5.13.2
14 |     hooks:
15 |       - id: isort
16 |         args: ["--profile", "black"]
17 |         files: | 
18 |           (?x)^(
19 |             neuralfeels |
20 |             scripts
21 |           )


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/README.md:
--------------------------------------------------------------------------------
 1 | ## Open3D URDF visualization
 2 | 
 3 | This code is based on [Helper3D](https://github.com/Jianghanxiao/Helper3D), a super useful tool to visualize URDFs dynamically in Open3D. We thank the authors for their work, and we have made some modifications to the code to make it work with our project.
 4 | 
 5 | <img src="../../../.github/urdf_visualizer.png" width="500">
 6 | 
 7 | ### Test URDF visualization
 8 | 
 9 | ```bash
10 | python neuralfeels/contrib/urdf/viz.py
11 | ```
12 | 


--------------------------------------------------------------------------------
/scripts/config/main/vi.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Vision-only condfig
 7 | 
 8 | defaults:
 9 |   - data: default
10 |   - eval: default 
11 |   - train: default
12 |   - pose: default
13 |   - scene: default
14 |   - viz: default
15 |   - sensor@sensor0: realsense
16 |   - _self_
17 | 
18 | mode: vision
19 | occlusion: False
20 | 
21 | sensor0: 
22 |   name: realsense_front_left
23 | 


--------------------------------------------------------------------------------
/scripts/config/main/baseline.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Baseline three-camera config
 7 | 
 8 | defaults:
 9 |   - data: default
10 |   - eval: default 
11 |   - train: default
12 |   - pose: default
13 |   - scene: default
14 |   - viz: default
15 |   - sensor@sensor0: realsense
16 |   - sensor@sensor1: realsense
17 |   - sensor@sensor2: realsense
18 |   - _self_
19 | 
20 | mode: baseline
21 | occlusion: False
22 | 
23 | sensor0: 
24 |   name: realsense_front_left
25 | sensor1: 
26 |   name: realsense_back_right
27 | sensor2: 
28 |   name: realsense_top_down
29 | 


--------------------------------------------------------------------------------
/scripts/config/main/tac.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Tactile-only config
 7 | 
 8 | defaults:
 9 |   - data: default
10 |   - eval: default 
11 |   - train: default
12 |   - pose: default
13 |   - scene: default
14 |   - viz: default
15 |   - sensor@sensor0: digit
16 |   - sensor@sensor1: digit
17 |   - sensor@sensor2: digit
18 |   - sensor@sensor3: digit
19 |   - _self_
20 | 
21 | mode: tactile
22 | occlusion: False
23 | 
24 | sensor0: 
25 |   name: digit_thumb
26 | sensor1: 
27 |   name: digit_index
28 | sensor2: 
29 |   name: digit_middle
30 | sensor3: 
31 |   name: digit_ring


--------------------------------------------------------------------------------
/scripts/config/main/vitac.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Visuo-tactile config
 7 | 
 8 | defaults:
 9 |   - data: default
10 |   - eval: default 
11 |   - train: default
12 |   - pose: default
13 |   - scene: default
14 |   - viz: default
15 |   - sensor@sensor0: realsense
16 |   - sensor@sensor1: digit
17 |   - sensor@sensor2: digit
18 |   - sensor@sensor3: digit
19 |   - sensor@sensor4: digit
20 |   - _self_
21 | 
22 | mode: vitac
23 | occlusion: False
24 | 
25 | sensor0: 
26 |   name: realsense_front_left
27 | sensor1: 
28 |   name: digit_thumb
29 | sensor2: 
30 |   name: digit_index
31 | sensor3: 
32 |   name: digit_middle
33 | sensor4: 
34 |   name: digit_ring
35 | 


--------------------------------------------------------------------------------
/scripts/config/config.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | defaults: 
 7 |   - main: vitac
 8 |   - launcher: basic
 9 |   - _self_
10 | 
11 | user: suddhu
12 | profile: False
13 | seed: 1
14 | gpu_id: 0
15 | create_display: False
16 | expt_name: "${main.train.train_mode}"
17 | vox_size: "${main.train.gt_voxel_size}"
18 | 
19 | sweep_dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
20 | 
21 | hydra:
22 |   job_logging:
23 |     root:
24 |       handlers: []
25 |   job:
26 |     chdir: true
27 |   run:
28 |     dir: ${sweep_dir}/${main.data.object}/${main.data.log}/${main.mode}/${expt_name}_${seed}
29 |   sweep:
30 |     dir: ${sweep_dir}
31 |     subdir: ${main.data.object}/${main.data.log}/${main.mode}/${expt_name}_${hydra.job.num} # standard mode


--------------------------------------------------------------------------------
/scripts/config/main/eval/default.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Eval parameters
 7 | 
 8 | do_eval: 0
 9 | eval_freq_s: 1
10 | sdf_eval: 1
11 | mesh_eval: 0
12 | 
13 | save_period: 10
14 | save_slices: 0
15 | save_meshes: 0
16 | 
17 | # if true saves intermediate reconstruction (mesh/point cloud) instead of just final one
18 | save_intermediate_recons: True
19 | # if true saves point clouds from SDF (at the same time mesh is saved)
20 | save_rendered_pcs: False  
21 | # this is a maximum. The actual number might be less due to filtering rays not hitting a surface
22 | num_points_pcs: 2000 
23 | num_points_f_score: 10000
24 | f_score_T: [2e-2, 1.8e-2, 1.6e-2, 1.4e-2, 1.2e-2, 1e-2, 9e-3, 8e-3, 7e-3, 6e-3, 5e-3, 4e-3, 3e-3, 2e-3, 1e-3] # range from 2cm to 1mm
25 | which_f_score: 10 # which one to display live, choose index of 5e-3


--------------------------------------------------------------------------------
/scripts/config/main/viz/default.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Visualizer settings
 7 | 
 8 | meshes: 
 9 |   mesh_rec: True
10 |   mesh_rec_crop: False
11 |   write_all_meshes: False
12 |   save_rotate: True
13 |   save_neural_field: True
14 |   allegro: True
15 |   has_gt_object: True # if dataset does not have gt object, set to False
16 |   show_gt_object: False
17 |   sensors_est: False 
18 |   transparent: False 
19 | 
20 | debug: 
21 |   rays: False
22 |   frontend: False
23 |   bbox: False 
24 |   origin: False 
25 | 
26 | layers: 
27 |   colormap: Color # Sensor, Normals, FScore, Color, n/a
28 |   keyframes: None # None, Latest, All
29 |   pointcloud: None # None, Both, Vision, Touch
30 | 
31 | misc: 
32 |   record: False
33 |   downsample_threshold: 50000
34 |   rotate: False
35 |   follow : False
36 |   render_stream: False 
37 |   render_open3d: True
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/URDF/URDFTree/ExLink.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D
 7 | 
 8 | 
 9 | class ExLink:
10 |     def __init__(self, link):
11 |         self.link = link
12 |         self.parent = None
13 |         self.children = []
14 |         self.joint = None
15 | 
16 |     def setParent(self, parent):
17 |         self.parent = parent
18 | 
19 |     def addChild(self, child):
20 |         self.children.append(child)
21 | 
22 |     def setJoint(self, joint):
23 |         self.joint = joint
24 | 
25 |     def __repr__(self):
26 |         output = {}
27 |         output["link"] = self.link
28 |         if self.parent != None:
29 |             output["parent"] = self.parent.link.link_name
30 |         else:
31 |             output["parent"] = None
32 |         output["children"] = [child.link.link_name for child in self.children]
33 |         output["joint"] = self.joint
34 |         return str(output)
35 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/SceneGraph/MeshNode.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D
 7 | 
 8 | 
 9 | import copy
10 | import re
11 | 
12 | import open3d as o3d
13 | 
14 | 
15 | class MeshNode:
16 |     def __init__(self):
17 |         self.mesh = None
18 | 
19 |     def addMesh(self, mesh):
20 |         if self.mesh == None:
21 |             self.mesh = mesh
22 |         else:
23 |             self.mesh += mesh
24 | 
25 |     def addMeshFile(self, mesh_file, color):
26 |         # Read the mesh from obj file
27 |         mesh_file = re.sub("allegro/allegro", "allegro", mesh_file)
28 |         mesh = o3d.io.read_triangle_mesh(mesh_file)
29 |         mesh.paint_uniform_color(color)
30 |         self.addMesh(mesh)
31 | 
32 |     def getMesh(self, worldMatrix):
33 |         if self.mesh == None:
34 |             return None
35 |         new_mesh = copy.deepcopy(self.mesh)
36 |         new_mesh.transform(worldMatrix)
37 |         return new_mesh
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | MIT License
 3 | 
 4 | Copyright (c) Meta Platforms, Inc. and affiliates.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/neuralfeels/datasets/mesh_to_sdf_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | # Test script to visualize the SDF of a mesh, SDFViewer is taken from iSDF
 8 | 
 9 | import os
10 | 
11 | import git
12 | import numpy as np
13 | 
14 | from neuralfeels.datasets import sdf_util
15 | from neuralfeels.datasets.sdf_util import load_gt_mesh
16 | from neuralfeels.viz import sdf_viewer
17 | 
18 | root = git.Repo(".", search_parent_directories=True).working_tree_dir
19 | 
20 | 
21 | def main():
22 |     mesh_path = os.path.join(
23 |         root, "data/assets/gt_models/ycb/contactdb_rubber_duck.urdf"
24 |     )
25 |     mesh, _ = load_gt_mesh(mesh_path)
26 |     sdf, transform = sdf_util.sdf_from_mesh(
27 |         mesh=mesh, voxel_size=5e-4, extend_factor=0.1, origin_voxel=np.zeros(3)
28 |     )
29 |     sdf_viewer.SDFViewer(
30 |         mesh=mesh,
31 |         sdf_grid=sdf,
32 |         sdf_range=None,
33 |         grid2world=transform,
34 |         surface_cutoff=0.001,
35 |         colormap=True,
36 |     )
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import setuptools
 7 | 
 8 | with open("README.md", "r") as fh:
 9 |     long_description = fh.read()
10 | 
11 | 
12 | setuptools.setup(
13 |     name="neuralfeels",
14 |     version="0.0.1",
15 |     author="Meta Research",
16 |     description="Neural Feels.",
17 |     long_description=long_description,
18 |     long_description_content_type="text/markdown",
19 |     url="https://github.com/facebookresearch/neuralfeels",
20 |     packages=["neuralfeels"],
21 |     classifiers=[
22 |         "Programming Language :: Python :: 3",
23 |         "License :: OSI Approved :: MIT License",
24 |         "Intended Audience :: Science/Research",
25 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
26 |     ],
27 |     python_requires=">=3.8",
28 |     install_requires=[
29 |         "pyserial==3.5",
30 |         "betterproto==2.0.0b5",
31 |         "cobs==1.2.0",
32 |         "google-api-python-client==2.97.0",
33 |         "google-auth-httplib2==0.1.0",
34 |         "google-auth-oauthlib==0.5.0",
35 |     ],
36 | )
37 | 


--------------------------------------------------------------------------------
/neuralfeels/datasets/image_transforms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | 
10 | class BGRtoRGB(object):
11 |     """bgr format to rgb"""
12 | 
13 |     def __call__(self, image):
14 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
15 |         return image
16 | 
17 | 
18 | class DepthTransform(object):
19 |     """
20 |     Transforms tactile depth from the gel coordinate system to the camera coordinate system
21 |     The camera is placed 0.022 m behind the gel surface
22 |     """
23 | 
24 |     def __init__(self, cam_dist):
25 |         self.cam_dist = cam_dist
26 | 
27 |     def __call__(self, depth):
28 |         depth = depth.astype(np.float32)
29 |         depth += self.cam_dist
30 |         depth[depth == self.cam_dist] = np.nan
31 |         return depth.astype(np.float32)
32 | 
33 | 
34 | class DepthScale(object):
35 |     """scale depth to meters"""
36 | 
37 |     def __init__(self, scale):
38 |         self.scale = scale
39 | 
40 |     def __call__(self, depth):
41 |         depth = depth.astype(np.float32)
42 |         return depth * self.scale
43 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/tactile_transformer/custom_augmentation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT
 7 | 
 8 | import numpy as np
 9 | import torch
10 | 
11 | 
12 | class ToMask(object):
13 |     """
14 |     Convert a 3 channel RGB image into a 1 channel segmentation mask
15 |     """
16 | 
17 |     def __init__(self, palette_dictionnary):
18 |         self.nb_classes = len(palette_dictionnary)
19 |         self.palette_dictionnary = palette_dictionnary
20 | 
21 |     def __call__(self, pil_image):
22 |         # avoid taking the alpha channel
23 |         image_array = np.array(pil_image)
24 |         # get only one channel for the output
25 |         output_array = np.zeros(image_array.shape, dtype="int")
26 | 
27 |         for label in self.palette_dictionnary.keys():
28 |             rgb_color = self.palette_dictionnary[label]["color"]
29 |             mask = image_array == rgb_color
30 |             output_array[mask] = int(label)
31 | 
32 |         output_array = torch.from_numpy(output_array).long()
33 |         return output_array
34 | 


--------------------------------------------------------------------------------
/scripts/config/main/sensor/realsense.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Config for the realsense
 7 | 
 8 | name: 
 9 | 
10 | sampling: 
11 |   n_rays: 400
12 |   n_strat_samples: 30
13 |   n_surf_samples: 10
14 |   depth_range: [0.3, 1.0]   # [0.6, 6.0] for D455, [0.3, 3.0] for D435
15 |   surface_samples_offset: 1e-3 # needs to be small to capture surface detail
16 |   dist_behind_surf: 2e-2 # needs to be larger to carve out object extents
17 |   loss_ratio : 1.0
18 |   free_space_ratio: 0.7  # used for mapping, but not tracking
19 | 
20 | kf_min_loss: 1e-2
21 | 
22 | masks: sam_vit_l # read (gt from file), sam_vit_h, sam_vit_l, sam_vit_b  
23 | sim_noise_iters: 5
24 | 
25 | # (empirical) prefers this area of mask from multi-mask SAM output
26 | optimal_mask_size: 
27 |   realsense_front_left: 15000.0
28 |   realsense_back_right: 5000.0
29 |   realsense_top_down: 4000.0
30 | 
31 | # (empirical) z-offset for pixel prompt wrt grasp center
32 | sam_offset:
33 |   realsense_front_left: 0.0
34 |   realsense_back_right: 0.01
35 |   realsense_top_down: 0.0
36 | 
37 | 
38 | viz:
39 |   reduce_factor: 1
40 |   reduce_factor_up: 1


--------------------------------------------------------------------------------
/scripts/config/main/train/default.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Optimizer class
 7 | 
 8 | optimizer:
 9 |   device: cuda
10 |   checkpoint:
11 |   incremental : True
12 |   lr: 2e-4 
13 |   weight_decay: 1e-6
14 |   num_iters: 1
15 |   map_init_iters: 500
16 | 
17 | model:
18 |   do_active: 0
19 |   scale_output: 1.0
20 |   noise_std: 
21 |     feelsight : [2e-3, 2e-3] # [vision, touch]
22 |     feelsight_real : [1e-3, 5e-3] # [vision, touch]
23 |   window_size: 10
24 |   num_layers: 3 # num_layers - 1 hidden layers
25 |   hidden_feature_size: 64
26 |   kf_time : 0.2
27 |   milestones: [1, 2, 3]
28 |   gamma: 0.5
29 | 
30 | pos_encoding: 
31 |   n_levels: 19 # previous: 19
32 |   n_features_per_level: 2
33 |   log2_hashmap_size:  23 # previous: 23
34 |   base_resolution: 4
35 |   per_level_scale: 1.3
36 | 
37 | loss:
38 |   bounds_method: pc
39 |   loss_type: L1
40 |   trunc_weight: 10.0
41 |   eik_weight: 0.0
42 |   trunc_distance: 5e-3
43 | 
44 | train_mode: slam # pose, map, slam
45 | gt_voxel_size: 5e-4
46 | load_checkpoint_model: False
47 | grasp: False
48 | 
49 | batch:
50 |   train_time_min: 0.5
51 |   max_frames: 10


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to neuralfeels
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Pull Requests
 6 | We actively welcome your pull requests.
 7 | 
 8 | 1. Fork the repo and create your branch from `main`.
 9 | 2. If you've added code that should be tested, add tests.
10 | 3. If you've changed APIs, update the documentation.
11 | 4. Ensure the test suite passes.
12 | 5. Make sure your code lints.
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 | 
15 | ## Contributor License Agreement ("CLA")
16 | In order to accept your pull request, we need you to submit a CLA. You only need
17 | to do this once to work on any of Facebook's open source projects.
18 | 
19 | Complete your CLA here: <https://code.facebook.com/cla>
20 | 
21 | ## Issues
22 | We use GitHub issues to track public bugs. Please ensure your description is
23 | clear and has sufficient instructions to be able to reproduce the issue.
24 | 
25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
26 | disclosure of security bugs. In those cases, please go through the process
27 | outlined on that page and do not file a public issue.
28 | 
29 | ## License
30 | By contributing to neuralfeels, you agree that your contributions will be licensed
31 | under the LICENSE file in the root directory of this source tree.


--------------------------------------------------------------------------------
/neuralfeels/eval/occlusion_plot.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | """
 7 | Plot graph of pose error v.s. noise for a sweep of neuralfeels experiments
 8 | Usage: python neuralfeels/eval/noise_plot.py log_path=<LOG_PATH> # e.g. multirun/2023-07-31/14-27-43 
 9 | """
10 | 
11 | import os
12 | 
13 | import git
14 | import hydra
15 | from omegaconf import DictConfig
16 | 
17 | from neuralfeels.viz.plot_metrics import (
18 |     pose_error_vs_occlusion,
19 |     pose_errors_vs_camera_frustums,
20 | )
21 | 
22 | root = git.Repo(".", search_parent_directories=True).working_tree_dir
23 | 
24 | from pathlib import Path
25 | 
26 | 
27 | @hydra.main(version_base=None, config_path="config/", config_name="group_error")
28 | def main(cfg: DictConfig) -> None:
29 |     log_path = os.path.join(root, cfg.log_path)
30 |     if log_path[-1] == "/":
31 |         log_path = log_path[:-1]
32 |     all_expts = []
33 |     for path in Path(log_path).rglob("stats.pkl"):
34 |         expt_path = str(path.parent).replace(log_path + "/", "")
35 |         all_expts.append(expt_path)
36 | 
37 |     pose_errors_vs_camera_frustums(all_expts, log_path)
38 |     pose_error_vs_occlusion(all_expts, log_path)
39 |     print(f"All outputs saved at {log_path}")
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     main()
44 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/SceneGraph/Transform.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | # Borrow idea form pySG repo: https://github.com/jmanek/pySG
12 | class Transform:
13 |     # Only support matrix currently
14 |     def __init__(self):
15 |         self._matrix = np.eye(4)
16 |         # Matrix for calculating the latest matrix, the order will always be sclae -> rotate -> translate
17 |         self._transMat = np.eye(4)
18 |         self._rotMat = np.eye(4)
19 |         self._scaleMat = np.eye(4)
20 | 
21 |     def getMatrix(self):
22 |         self.updateMatrix()
23 |         return self._matrix
24 | 
25 |     def updateMatrix(self):
26 |         self._matrix = np.dot(self._transMat, np.dot(self._rotMat, self._scaleMat))
27 | 
28 |     def translateMat(self, transMat):
29 |         # Apply the translation after previous _transMat
30 |         self._transMat = np.dot(transMat, self._transMat)
31 | 
32 |     def rotateMat(self, rotMat):
33 |         # Apply the rotation after previous _rotMat
34 |         self._rotMat = np.dot(rotMat, self._rotMat)
35 | 
36 |     def scaleMat(self, scaleMat):
37 |         # Apply the scale after previous _scaleMat
38 |         self._scaleMat = np.dot(scaleMat, self._scaleMat)
39 | 


--------------------------------------------------------------------------------
/scripts/config/main/sensor/digit.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Config for DIGIT sensor
 7 | 
 8 | name:
 9 | 
10 | tactile_depth:
11 |   mode: vit
12 |   use_real_data: True
13 | 
14 | sampling: 
15 |   n_rays: 5 # samples/area should be somewhat consistent with vision
16 |   n_strat_samples: 10
17 |   n_surf_samples: 10
18 |   depth_range: [-0.01, 0.05]   # sampling from behind the digit camera to beyond the surface
19 |   surface_samples_offset: 1e-3 # needs to be small to capture surface detail
20 |   dist_behind_surf: 2e-2 # needs to be larger to carve out object extents
21 |   loss_ratio : 0.1 # slower lr for tactile because of less FoV
22 |   free_space_ratio: 0.0  # used for mapping, but not tracking
23 | 
24 | kf_min_loss: 1e-2
25 | 
26 | # taken from config_digit_shadow.yml from tacto
27 | gel:
28 |   origin: [0.022, 0, 0] # Center coordinate of the gel, in meters
29 |   width: 0.02 # Width of the gel, y-axis, in meters
30 |   height: 0.03 # Height of the gel, z-axis, in meters
31 |   curvature: True  # Model the gel as curve? True/False
32 |   curvatureMax: 0.004  # Deformation of the gel due to convexity
33 |   R: 0.1 # Radius of curved gel
34 |   countW: 100 # Number of samples for horizontal direction; higher the finer details
35 | 
36 | viz:
37 |   reduce_factor: 1
38 |   reduce_factor_up: 1


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/viz.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import os
 7 | 
 8 | import git
 9 | import numpy as np
10 | import open3d as o3d
11 | 
12 | from neuralfeels.contrib.urdf import SceneGraph, URDFParser, URDFTree
13 | 
14 | root = git.Repo(".", search_parent_directories=True).working_tree_dir
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     URDF_file = os.path.join(root, "data/assets/allegro/allegro_digit_left_ball.urdf")
19 |     # Parse the URDF file
20 |     parser = URDFParser(URDF_file)
21 |     parser.parse()
22 |     # Construct the URDF tree
23 |     links = parser.links
24 |     joints = parser.joints
25 |     tree = URDFTree(links, joints)
26 |     # Construct the scene graph
27 |     init_pose = np.array(
28 |         [
29 |             0.0627,
30 |             1.2923,
31 |             0.3383,
32 |             0.1088,
33 |             0.0724,
34 |             1.1983,
35 |             0.1551,
36 |             0.1499,
37 |             0.1343,
38 |             1.1736,
39 |             0.5355,
40 |             0.2164,
41 |             1.1202,
42 |             1.1374,
43 |             0.8535,
44 |             -0.0852,
45 |         ]
46 |     )
47 | 
48 |     init_pose = np.zeros(16)
49 |     init_pose[12] += 1.4
50 |     scene = SceneGraph(tree.root, init_pose)
51 |     mesh = scene.getMesh()
52 | 
53 |     o3d.visualization.draw_geometries(mesh)
54 | 


--------------------------------------------------------------------------------
/scripts/config/main/pose/default.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Pose opt class
 7 | 
 8 | timer: false
 9 | grasp_threshold: 100
10 | show_samples: False
11 | method: second-order # first-order, second-order
12 | window_size: 3 # TODO: change this 
13 | 
14 | second_order:
15 |     optimizer_cls: LevenbergMarquardt
16 |     linear_solver_cls: CholeskyDenseSolver
17 |     linearization_cls: DenseLinearization
18 |     optimizer_kwargs:
19 |       track_best_solution: True
20 |       verbose: false
21 |       __keep_final_step_size__: true
22 |       adaptive_damping: true
23 |     autograd_strategy: forward-mode # autograd_strategy: forward-mode for pose estimation
24 |     tsdf_method: analytic # [analytic, numerical, autodiff]
25 |     vectorize: true # true for pose estimation
26 |     test_jacobians: false # debugging only
27 |     empty_cuda_cache: false
28 |     lm_iters: 20
29 |     num_iters: 2
30 |     step_size: 1.0
31 |     tsdf_w: 1e-2
32 |     regularize: true
33 |     reg_w: 1e-2
34 |     icp: true
35 |     icp_w: 1e0
36 |     icp_fitness: 0.5
37 |     icp_inlier_rmse: 5e-3
38 |     icp_thresh: [5, 0.01] # [rotation (deg), translation (m)]
39 | 
40 | 
41 | loss_type: L1
42 | n_rays_per_sensor_vision: 300 # total rays = n_rays_per_sensor * n_valid_sensors
43 | n_rays_per_sensor_tactile: 25 # total rays = n_rays_per_sensor * n_valid_sensors
44 | w_vision: 1.0
45 | w_tactile: 1.0
46 | 


--------------------------------------------------------------------------------
/neuralfeels/viz/plot_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | import pandas as pd
 8 | 
 9 | 
10 | def smooth_data(y, N=5):
11 |     # rolling avg. over N timesteps
12 |     df = pd.DataFrame()
13 |     df["y"] = y
14 |     df_smooth = df.rolling(N).mean()
15 |     df_smooth["y"][0 : N - 1] = y[0 : N - 1]  # first N readings are as-is
16 |     return df_smooth["y"]
17 | 
18 | 
19 | feelsight_sim_objects = [
20 |     "contactdb_rubber_duck",
21 |     "contactdb_elephant",
22 |     "077_rubiks_cube",
23 |     "large_dice",
24 |     "016_pear",
25 |     "015_peach",
26 |     "010_potted_meat_can",
27 |     "073-f_lego_duplo",
28 | ]
29 | 
30 | feelsight_sim_mesh_diag = {
31 |     "contactdb_rubber_duck": 0.14833374114812853,
32 |     "contactdb_elephant": 0.1850651169858869,
33 |     "077_rubiks_cube": 0.12201651401757059,
34 |     "large_dice": 0.08720458052763055,
35 |     "016_pear": 0.13722709752814855,
36 |     "015_peach": 0.10593046598594759,
37 |     "010_potted_meat_can": 0.1449591345276316,
38 |     "073-f_lego_duplo": 0.06760945759285457,
39 | }
40 | 
41 | feelsight_real_objects = [
42 |     "bell_pepper",
43 |     "large_dice",
44 |     "peach",
45 |     "pear",
46 |     "pepper_grinder",
47 |     "rubiks_cube_small",
48 | ]
49 | 
50 | feelsight_real_mesh_diag = {
51 |     "bell_pepper": 0.14895704905777368,
52 |     "large_dice": 0.08720458052763055,
53 |     "peach": 0.10578790231401698,
54 |     "pear": 0.13838421462002087,
55 |     "pepper_grinder": 0.14848234731441984,
56 |     "rubiks_cube_small": 0.09042267417523107,
57 | }
58 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/URDF/Parser/Joint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | class Joint:
12 |     def __init__(self, joint_name, joint_type, child_name, parent_name):
13 |         self.joint_name = joint_name
14 |         self.joint_type = joint_type
15 |         self.child_name = child_name
16 |         self.parent_name = parent_name
17 |         # Naming rule: concaten tag name as the variable name, and attribute name as the key
18 |         # If the tag just has one attribute, ignore the dictionary
19 |         self.origin = {"xyz": np.array([0, 0, 0]), "rpy": np.array([0, 0, 0])}
20 |         self.axis = np.array([1, 0, 0])
21 |         self.limit = {"lower": 0, "upper": 0}
22 | 
23 |     def setOriginXyz(self, xyz):
24 |         self.origin["xyz"] = np.array(xyz)
25 | 
26 |     def setOriginRpy(self, rpy):
27 |         self.origin["rpy"] = np.array(rpy)
28 | 
29 |     def setAxis(self, axis):
30 |         self.axis = np.array(axis)
31 | 
32 |     def setLimitLower(self, lower):
33 |         self.limit["lower"] = lower
34 | 
35 |     def setLimitUpper(self, upper):
36 |         self.limit["upper"] = upper
37 | 
38 |     def __repr__(self):
39 |         output = {}
40 |         output["name"] = self.joint_name
41 |         output["type"] = self.joint_type
42 |         output["child_name"] = self.child_name
43 |         output["parent_name"] = self.parent_name
44 |         output["origin"] = self.origin
45 |         output["axis"] = self.axis
46 |         output["limit"] = self.limit
47 | 
48 |         return str(output)
49 | 


--------------------------------------------------------------------------------
/neuralfeels/modules/misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Miscellaneous utility functions
 7 | 
 8 | import gc
 9 | import os
10 | import shutil
11 | from typing import Dict
12 | 
13 | import numpy as np
14 | import torch
15 | from scipy.spatial.transform import Rotation as R
16 | from termcolor import cprint
17 | 
18 | 
19 | def print_once(string, bucket=[]):
20 |     """
21 |     Print statement only once: https://stackoverflow.com/a/75484543
22 |     """
23 |     if string not in bucket:
24 |         print(string)
25 |         bucket.append(string)
26 |     if len(bucket) > 50:
27 |         del bucket[:-1]
28 | 
29 | 
30 | def gpu_usage_check():
31 |     available, total = torch.cuda.mem_get_info("cuda:0")
32 |     availableGb = available / (1024**3)
33 |     ratioGb = available / total
34 |     if ratioGb < 0.1:
35 |         cprint(f"WARNING: {availableGb}GB available on GPU", color="red")
36 |         gc.collect()
37 |         torch.cuda.empty_cache()
38 | 
39 | 
40 | def remove_and_mkdir(results_path: str) -> None:
41 |     """
42 |     Remove directory (if exists) and create
43 |     """
44 |     if os.path.exists(results_path):
45 |         shutil.rmtree(results_path)
46 |     os.makedirs(results_path)
47 | 
48 | 
49 | def pose_from_config(cfg: Dict):
50 |     T = np.eye(4)
51 |     T[:3, :3] = R.from_quat(
52 |         [
53 |             cfg["rotation"]["x"],
54 |             cfg["rotation"]["y"],
55 |             cfg["rotation"]["z"],
56 |             cfg["rotation"]["w"],
57 |         ]
58 |     ).as_matrix()
59 |     T[:3, 3] = np.array(
60 |         [cfg["translation"]["x"], cfg["translation"]["y"], cfg["translation"]["z"]]
61 |     )
62 |     return T
63 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/tactile_transformer/fusion.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT
 7 | 
 8 | # Fusion module for tactile transformer
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | 
13 | 
14 | class ResidualConvUnit(nn.Module):
15 |     def __init__(self, features):
16 |         super().__init__()
17 | 
18 |         self.conv1 = nn.Conv2d(
19 |             features, features, kernel_size=3, stride=1, padding=1, bias=True
20 |         )
21 |         self.conv2 = nn.Conv2d(
22 |             features, features, kernel_size=3, stride=1, padding=1, bias=True
23 |         )
24 |         self.relu = nn.ReLU(inplace=True)
25 | 
26 |     def forward(self, x):
27 |         """Forward pass.
28 |         Args:
29 |             x (tensor): input
30 |         Returns:
31 |             tensor: output
32 |         """
33 |         out = self.relu(x)
34 |         out = self.conv1(out)
35 |         out = self.relu(out)
36 |         out = self.conv2(out)
37 |         return out + x
38 | 
39 | 
40 | class Fusion(nn.Module):
41 |     def __init__(self, resample_dim):
42 |         super(Fusion, self).__init__()
43 |         self.res_conv1 = ResidualConvUnit(resample_dim)
44 |         self.res_conv2 = ResidualConvUnit(resample_dim)
45 | 
46 |     def forward(self, x, previous_stage=None):
47 |         if previous_stage == None:
48 |             previous_stage = torch.zeros_like(x)
49 |         output_stage1 = self.res_conv1(x)
50 |         output_stage1 += previous_stage
51 |         output_stage2 = self.res_conv2(output_stage1)
52 |         output_stage2 = nn.functional.interpolate(
53 |             output_stage2, scale_factor=2, mode="bilinear", align_corners=True
54 |         )
55 |         return output_stage2
56 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: _neuralfeels
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - _libgcc_mutex
 6 |   - _openmp_mutex
 7 |   - brotlipy
 8 |   - ca-certificates
 9 |   - certifi
10 |   - cffi
11 |   - chardet
12 |   - conda-package-handling
13 |   - idna
14 |   - ld_impl_linux-64
15 |   - libffi
16 |   - libgcc-ng
17 |   - libgomp
18 |   - libspatialindex
19 |   - libstdcxx-ng
20 |   - openssl
21 |   - pip
22 |   - pycosat
23 |   - pycparser
24 |   - pyopenssl
25 |   - python=3.9.15
26 |   - readline
27 |   - requests
28 |   - ruamel_yaml
29 |   - six
30 |   - sqlite
31 |   - tk
32 |   - tqdm
33 |   - urllib3
34 |   - wheel
35 |   - xz
36 |   - yaml
37 |   - zlib
38 |   - pip:
39 |     - cython==3.0.0
40 |     - datasets==3.1.0
41 |     - dill==0.3.7
42 |     - einops==0.6.1
43 |     - ffmpeg-python==0.2.0
44 |     - gdown==5.2.0
45 |     - gitdb==4.0.10
46 |     - gitpython==3.1.32
47 |     - gputil==1.4.0
48 |     - h5py==3.9.0
49 |     - hydra-core==1.3.2
50 |     - hydra-joblib-launcher
51 |     - hydra-submitit-launcher==1.2.0
52 |     - imageio==2.31.1
53 |     - imageio-ffmpeg==0.4.8
54 |     - imgviz==1.7.4
55 |     - ipdb==0.13.13
56 |     - ipykernel==6.25.1
57 |     - ipython==8.14.0
58 |     - ipython-genutils==0.2.0
59 |     - json5==0.9.14
60 |     - matplotlib==3.7.2
61 |     - matplotlib-inline==0.1.6
62 |     - networkx==3.1
63 |     - ninja==1.11.1 # for quicker tcnn build
64 |     - numpy==1.22.4
65 |     - numba==0.60.0
66 |     - onnx==1.14.0
67 |     - onnxruntime==1.15.1
68 |     - open3d==0.16.0
69 |     - opencv-python
70 |     - pandas==2.0.3
71 |     - pre-commit-4.0.1
72 |     - pycocotools==2.0.7
73 |     - pyglet==1.5.27
74 |     - pyopengl==3.1.0
75 |     - pyvirtualdisplay==3.0
76 |     - pyvista==0.41.1
77 |     - pyyaml==6.0.1
78 |     - rtree==1.0.1
79 |     - scikit-image==0.21.0
80 |     - scikit-learn==1.3.0
81 |     - scipy==1.11.2
82 |     - seaborn==0.12.2
83 |     - shapely==2.0.1
84 |     - snakeviz==2.2.0
85 |     - termcolor==2.3.0
86 |     - timm==0.9.5
87 |     - trimesh==3.23.3
88 |     - urdf-parser-py==0.0.4
89 |     - wandb==0.15.8
90 |     - yappi==1.4.0
91 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/URDF/Parser/Link.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | class Link:
12 |     def __init__(self, link_name):
13 |         self.link_name = link_name
14 |         self.color = [0.0, 0.0, 0.0]
15 |         # Naming rule: concaten tag name as the variable name, and attribute name as the key
16 |         self.visuals = []
17 | 
18 |     def hasVisual(self):
19 |         if len(self.visuals) == 0:
20 |             return False
21 |         return True
22 | 
23 |     def addVisual(self, visual_name=None):
24 |         self.visuals.append(Visual(visual_name))
25 | 
26 |     def setVisualMeshScale(self, scale):
27 |         current_visual = len(self.visuals) - 1
28 |         self.visuals[current_visual].geometry_mesh["scale"] = np.array(scale)
29 | 
30 |     def setVisualOriginXyz(self, xyz):
31 |         current_visual = len(self.visuals) - 1
32 |         self.visuals[current_visual].origin["xyz"] = np.array(xyz)
33 | 
34 |     def setVisualOriginRpy(self, rpy):
35 |         current_visual = len(self.visuals) - 1
36 |         self.visuals[current_visual].origin["rpy"] = np.array(rpy)
37 | 
38 |     def setVisualGeometryMeshFilename(self, filename):
39 |         current_visual = len(self.visuals) - 1
40 |         self.visuals[current_visual].geometry_mesh["filename"] = filename
41 | 
42 |     def __repr__(self):
43 |         output = {}
44 |         output["name"] = self.link_name
45 |         output["visual"] = self.visuals
46 |         return str(output)
47 | 
48 | 
49 | class Visual:
50 |     def __init__(self, visual_name=None):
51 |         self.visual_name = visual_name
52 |         self.origin = {"xyz": np.array([0, 0, 0]), "rpy": np.array([0, 0, 0])}
53 |         self.geometry_mesh = {"filename": None, "scale": np.array([1.0, 1.0, 1.0])}
54 | 
55 |     def __repr__(self):
56 |         output = {}
57 |         output["origin"] = self.origin
58 |         output["mesh"] = self.geometry_mesh["filename"]
59 |         return str(output)
60 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/URDF/URDFTree/URDFTree.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Adapted from https://github.com/Jianghanxiao/Helper3D
 7 | 
 8 | from .ExLink import ExLink
 9 | 
10 | 
11 | class URDFTree:
12 |     # Construct the URDF tree based on the parser
13 |     def __init__(self, links, joints):
14 |         self.links = links
15 |         self.joints = joints
16 |         # Init EsLinks (extended links: include joint info in the child part; parent and child info)
17 |         self.exLinks = {}
18 |         self.initExlinks()
19 |         # Build the tree and find the root (If not strictly a tree, consttruct a virtual root)
20 |         self.buildTree()
21 |         self.root = None
22 |         self.findRoot()
23 | 
24 |     def initExlinks(self):
25 |         # Create extended links list
26 |         for link_name in self.links:
27 |             exLink = ExLink(self.links[link_name])
28 |             self.exLinks[link_name] = exLink
29 | 
30 |     def buildTree(self):
31 |         for joint_name in self.joints:
32 |             joint = self.joints[joint_name]
33 |             # Connect child and parent through parent and children in exLink
34 |             child_name = joint.child_name
35 |             parent_name = joint.parent_name
36 |             child = self.exLinks[child_name]
37 |             parent = self.exLinks[parent_name]
38 |             child.setJoint(joint)
39 |             child.setParent(parent)
40 |             parent.addChild(child)
41 | 
42 |     def findRoot(self):
43 |         roots = []
44 |         for link_name in self.exLinks:
45 |             link = self.exLinks[link_name]
46 |             if link.parent == None:
47 |                 roots.append(link)
48 |         if len(roots) == 0:
49 |             raise RuntimeError("Invalid: No root nodes for the URDF")
50 |         elif len(roots) == 1:
51 |             self.root = roots[0]
52 |         else:
53 |             # Construct a virtual root to connect all nodes without a parent
54 |             self.root = ExLink(None)
55 |             for child in roots:
56 |                 self.root.addChild(child)
57 |                 child.setParent(self.root)
58 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/tactile_transformer/head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT
 7 | 
 8 | # Depth and segmentation head for tactile transformer
 9 | 
10 | import torch.nn as nn
11 | 
12 | 
13 | class Interpolate(nn.Module):
14 |     def __init__(self, scale_factor, mode, align_corners=False):
15 |         super(Interpolate, self).__init__()
16 |         self.interp = nn.functional.interpolate
17 |         self.scale_factor = scale_factor
18 |         self.mode = mode
19 |         self.align_corners = align_corners
20 | 
21 |     def forward(self, x):
22 |         x = self.interp(
23 |             x,
24 |             scale_factor=self.scale_factor,
25 |             mode=self.mode,
26 |             align_corners=self.align_corners,
27 |         )
28 |         return x
29 | 
30 | 
31 | class HeadDepth(nn.Module):
32 |     def __init__(self, features):
33 |         super(HeadDepth, self).__init__()
34 |         self.head = nn.Sequential(
35 |             nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1),
36 |             Interpolate(scale_factor=2, mode="bilinear", align_corners=True),
37 |             nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1),
38 |             nn.ReLU(),
39 |             nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
40 |             # nn.ReLU()
41 |             nn.Sigmoid(),
42 |         )
43 | 
44 |     def forward(self, x):
45 |         x = self.head(x)
46 |         # x = (x - x.min())/(x.max()-x.min() + 1e-15)
47 |         return x
48 | 
49 | 
50 | class HeadSeg(nn.Module):
51 |     def __init__(self, features, nclasses=2):
52 |         super(HeadSeg, self).__init__()
53 |         self.head = nn.Sequential(
54 |             nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1),
55 |             Interpolate(scale_factor=2, mode="bilinear", align_corners=True),
56 |             nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1),
57 |             nn.ReLU(),
58 |             nn.Conv2d(32, nclasses, kernel_size=1, stride=1, padding=0),
59 |         )
60 | 
61 |     def forward(self, x):
62 |         x = self.head(x)
63 |         return x
64 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/tactile_transformer/README.md:
--------------------------------------------------------------------------------
 1 | ## Tactile transformer: vision-based touch to depth
 2 | 
 3 | https://github.com/user-attachments/assets/148ec457-d9d1-415f-887f-73ebed8a568b
 4 | 
 5 | While vision-based touch sensors interpret contact geometry as images, they remain out-of-distribution from natural images. The embedded camera directly perceives the illuminated gelpad, and contact depth is either obtained via photometric stereo, or supervised learning. Existing touch-to-depth relies on convolution, however recent work has shown the benefit of a ViT for dense depth prediction in natural images. We present a tactile transformer for predicting contact depth from vision-based touch, trained entirely in simulation to generalize across multiple real-world DIGIT sensors. For use, refer to the [NeuralFeels](https://github.com/facebookresearch/neuralfeels) repository.
 6 | 
 7 | Our code is based on [FocusOnDepth](https://github.com/antocad/FocusOnDepth), a re-implementation of the popular [DPT](https://github.com/isl-org/DPT) vision transformer. We make necessary modification to work for tactile images, and share the weights online. The models `data/tactile_transformer/dpt_real.p` and `data/tactile_transformer/dpt_sim.p` are trained on TACTO data from simulated interaction with YCB objects. The models differ slightly in the augmentations used during data generation. 
 8 | 
 9 | ## Example script
10 | 
11 | <img src="../../../.github/tactile_transformer.png" width="90%">
12 | 
13 | *Tactile transformer depth outputs (colormapped) for simulated data from interaction with YCB objects.*
14 | 
15 | First download tactile data from `YCB` objects: 
16 | ```bash
17 | cd data 
18 | gdown https://drive.google.com/drive/folders/1a-8vfMCkW52BpWOPfqk5WM5zsSjBfhN1?usp=sharing --folder
19 | mv sim tacto_data
20 | cd tacto_data && unzip -q '*.zip' && rm *.zip
21 | cd ../..
22 | ```
23 | 
24 | Run the test script
25 | ```bash
26 | python neuralfeels/contrib/tactile_transformer/touch_vit.py
27 | ```
28 | 
29 | ## Citation
30 | 
31 | If you find NeuralFeels useful in your research, please consider citing our paper:
32 | 
33 | ```bibtex
34 | @article{suresh2024neuralfeels,
35 |   title={{N}eural feels with neural fields: {V}isuo-tactile perception for in-hand manipulation},
36 |   author={Suresh, Sudharshan and Qi, Haozhi and Wu, Tingfan and Fan, Taosha and Pineda, Luis and Lambeta, Mike and Malik, Jitendra and Kalakrishnan, Mrinal and Calandra, Roberto and Kaess, Michael and Ortiz, Joseph and Mukadam, Mustafa},
37 |   journal={Science Robotics},
38 |   pages={adl0628},
39 |   year={2024},
40 |   publisher={American Association for the Advancement of Science}
41 | }
42 | ```
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | res
  2 | results/*
  3 | 
  4 | # mesh files 
  5 | *.ply
  6 | *.obj
  7 | *core
  8 | 
  9 | # save files 
 10 | *.pkl
 11 | 
 12 | data/*
 13 | !data/README.md
 14 | .vscode
 15 | .hydra
 16 | /tools/*
 17 | outputs/*
 18 | multirun/*
 19 | *.code-workspace
 20 | 
 21 | # Byte-compiled / optimized / DLL files
 22 | __pycache__/
 23 | *.py[cod]
 24 | *$py.class
 25 | 
 26 | # C extensions
 27 | *.so
 28 | *.p
 29 | *.tar
 30 | 
 31 | # Distribution / packaging
 32 | .Python
 33 | build/
 34 | develop-eggs/
 35 | ^dist/
 36 | downloads/
 37 | eggs/
 38 | .eggs/
 39 | lib/
 40 | lib64/
 41 | parts/
 42 | sdist/
 43 | var/
 44 | wheels/
 45 | pip-wheel-metadata/
 46 | share/python-wheels/
 47 | *.egg-info/
 48 | .installed.cfg
 49 | *.egg
 50 | MANIFEST
 51 | 
 52 | # PyInstaller
 53 | #  Usually these files are written by a python script from a template
 54 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 55 | *.manifest
 56 | *.spec
 57 | 
 58 | # Installer logs
 59 | pip-log.txt
 60 | pip-delete-this-directory.txt
 61 | 
 62 | # Unit test / coverage reports
 63 | htmlcov/
 64 | .tox/
 65 | .nox/
 66 | .coverage
 67 | .coverage.*
 68 | .cache
 69 | nosetests.xml
 70 | coverage.xml
 71 | *.cover
 72 | .hypothesis/
 73 | .pytest_cache/
 74 | 
 75 | # Translations
 76 | *.mo
 77 | *.pot
 78 | 
 79 | # Django stuff:
 80 | *.log
 81 | local_settings.py
 82 | db.sqlite3
 83 | 
 84 | # Flask stuff:
 85 | instance/
 86 | .webassets-cache
 87 | 
 88 | # Scrapy stuff:
 89 | .scrapy
 90 | 
 91 | # Sphinx documentation
 92 | docs/_build/
 93 | 
 94 | # PyBuilder
 95 | target/
 96 | 
 97 | # Jupyter Notebook
 98 | .ipynb_checkpoints
 99 | 
100 | # IPython
101 | profile_default/
102 | ipython_config.py
103 | 
104 | # pyenv
105 | .python-version
106 | 
107 | # pipenv
108 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
109 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
110 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
111 | #   install all needed dependencies.
112 | #Pipfile.lock
113 | 
114 | # celery beat schedule file
115 | celerybeat-schedule
116 | 
117 | # SageMath parsed files
118 | *.sage.py
119 | 
120 | # Environments
121 | .env
122 | .venv
123 | env/
124 | venv/
125 | ENV/
126 | env.bak/
127 | venv.bak/
128 | 
129 | # Spyder project settings
130 | .spyderproject
131 | .spyproject
132 | 
133 | # Rope project settings
134 | .ropeproject
135 | 
136 | # mkdocs documentation
137 | /site
138 | 
139 | # mypy
140 | .mypy_cache/
141 | .dmypy.json
142 | dmypy.json
143 | 
144 | # Pyre type checker
145 | .pyre/
146 | 
147 | __MACOSX
148 | 
149 | # Misc
150 | .nfs*
151 | 


--------------------------------------------------------------------------------
/neuralfeels/modules/render.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # SDF depth rendering, based on iSDF: https://github.com/facebookresearch/iSDF
 7 | 
 8 | import torch
 9 | 
10 | from neuralfeels.geometry import transform
11 | from neuralfeels.modules.model import gradient
12 | 
13 | 
14 | def sdf_render_depth(z_vals, sdf, t):
15 |     """
16 |     Basic method for rendering depth from SDF using samples along a ray.
17 |     Assumes z_vals are ordered small -> large.
18 |     Assumes sdf are ordered from expected small -> expected large
19 |     """
20 |     # assert (z_vals[0].sort()[1].cpu() == torch.arange(len(z_vals[0]))).all()
21 | 
22 |     # z_vals are sorted from gel to camera
23 |     # sdfs sorted negative to positive (inside to outside)
24 |     n = sdf.size(1)  # n_sample per ray
25 | 
26 |     inside = sdf < 0  # sdf indices outside object
27 |     ixs = torch.arange(0, n, 1, device=sdf.device)  # ascending order [0, n]
28 |     mul = inside * ixs  # keep only inside points
29 |     max_ix = mul.argmax(dim=1)  # smallest -ve value before intersection
30 | 
31 |     arange = torch.arange(z_vals.size(0), device=sdf.device)  # [0 - n_pixels]
32 |     depths = (
33 |         z_vals[arange, max_ix] + sdf[arange, max_ix] * t
34 |     )  # sdf will always be +ve, z_vals always -ve
35 | 
36 |     # if no zero crossing found
37 |     depths[max_ix == 0] = torch.nan
38 |     # print(torch.sum(~torch.isnan(depths)) / len(depths.view(-1)))
39 |     return depths
40 | 
41 | 
42 | # Compute surface normals in the camera frame
43 | def render_normals(T_WC, render_depth, sdf_map, dirs_C):
44 |     origins, dirs_W = transform.origin_dirs_W(T_WC, dirs_C)
45 |     origins = origins.view(-1, 3)
46 |     dirs_W = dirs_W.view(-1, 3)
47 | 
48 |     pc = origins + (dirs_W * (render_depth.flatten()[:, None]))
49 |     pc.requires_grad_()
50 |     sdf = sdf_map(pc.unsqueeze(0))
51 |     sdf_grad = gradient(pc, sdf)
52 | 
53 |     surface_normals_W = -sdf_grad / (sdf_grad.norm(dim=1, keepdim=True) + 1e-6)
54 |     R_CW = T_WC[:, :3, :3].inverse()
55 |     surface_normals_C = (R_CW * surface_normals_W[..., None, :]).sum(dim=-1)
56 | 
57 |     surface_normals_C = surface_normals_C.view(
58 |         render_depth.shape[0], render_depth.shape[1], 3
59 |     )
60 |     return surface_normals_C
61 | 
62 | 
63 | def render_weighted(weights, vals, dim=-1, normalise=False):
64 |     """
65 |     General rendering function using weighted sum.
66 |     """
67 |     weighted_vals = weights * vals
68 |     render = weighted_vals.sum(dim=dim)
69 |     if normalise:
70 |         n_samples = weights.size(dim)
71 |         render = render / n_samples
72 | 
73 |     return render
74 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 4 | 
 5 | # This source code is licensed under the MIT license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | usage="$(basename "$0") [-h] [-e ENV_NAME] [-f INSTALL_FAIROTAG] --
 9 | Install the neuralfeels environment
10 | where:
11 |     -h  show this help text
12 |     -e  name of the environment, default=_neuralfeels
13 | "
14 | 
15 | options=':he:'
16 | while getopts $options option; do
17 |     case "$option" in
18 |     h)
19 |         echo "$usage"
20 |         exit
21 |         ;;
22 |     e) ENV_NAME=$OPTARG ;;
23 |     :)
24 |         printf "missing argument for -%s\n" "$OPTARG" >&2
25 |         echo "$usage" >&2
26 |         exit 1
27 |         ;;
28 |     \?)
29 |         printf "illegal option: -%s\n" "$OPTARG" >&2
30 |         echo "$usage" >&2
31 |         exit 1
32 |         ;;
33 |     esac
34 | done
35 | 
36 | # if ENV_NAME is not set, then set it to _neuralfeels
37 | if [ -z "$ENV_NAME" ]; then
38 |     ENV_NAME=_neuralfeels
39 | fi
40 | 
41 | echo "Environment Name: $ENV_NAME"
42 | 
43 | unset PYTHONPATH LD_LIBRARY_PATH
44 | 
45 | # # remove any exisiting env
46 | micromamba remove -y -n $ENV_NAME --all
47 | micromamba env create -y --name $ENV_NAME --file environment.yml
48 | micromamba activate $ENV_NAME
49 | 
50 | # Following the instructions from https://docs.nerf.studio/quickstart/installation.html for the right combination of cuda / torch / tinycudann
51 | python -m pip install --upgrade pip
52 | pip uninstall torch torchvision functorch tinycudann -y
53 | pip install torch==2.1.2+cu118 torchvision==0.16.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 -y
54 | micromamba install -c "nvidia/label/cuda-11.8.0" cuda-toolkit
55 | 
56 | # Check if the install is successful
57 | python -c "import torch; assert torch.cuda.is_available()"
58 | if nvcc --version &>/dev/null; then
59 |     echo "nvcc is installed and working."
60 | else
61 |     echo "nvcc is not installed or not in PATH."
62 |     exit 1
63 | fi
64 | 
65 | # Install tinycudann for instant-ngp backbone. Common issues:
66 | # - Setup with gcc/g++ 9 if it throws errors (see issue: https://github.com/NVlabs/tiny-cuda-nn/issues/284)
67 | # - Differing compute capabilities: https://github.com/NVlabs/tiny-cuda-nn/issues/341#issuecomment-1651814335
68 | pip install ninja \
69 |     git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch \
70 |     git+https://github.com/facebookresearch/segment-anything.git \ 
71 | git+https://github.com/suddhu/tacto.git@master
72 | 
73 | # Install github.com/facebookresearch/theseus
74 | micromamba install -y suitesparse # required for theseus
75 | pip install theseus-ai
76 | 
77 | # Install neuralfeels package
78 | pip install -e .
79 | 
80 | # Make entrypoint executable
81 | chmod +x scripts/run
82 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/sam/test_sam.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Test multi-mask SAM model with point prompts on sample data from neuralfeels
 7 | 
 8 | import matplotlib.pyplot as plt
 9 | import numpy as np
10 | from PIL import Image
11 | from segment_anything import SamPredictor, sam_model_registry
12 | 
13 | 
14 | # detect fingers in 2D and compute mask from that
15 | def show_mask(mask, ax, random_color=False):
16 |     if random_color:
17 |         color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
18 |     else:
19 |         color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
20 |     h, w = mask.shape[-2:]
21 |     mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
22 |     ax.imshow(mask_image)
23 | 
24 | 
25 | def show_points(coords, labels, ax, marker_size=375):
26 |     pos_points = coords[labels == 1]
27 |     neg_points = coords[labels == 0]
28 |     ax.scatter(
29 |         pos_points[:, 0],
30 |         pos_points[:, 1],
31 |         color="green",
32 |         marker="*",
33 |         s=marker_size,
34 |         edgecolor="white",
35 |         linewidth=1.25,
36 |     )
37 |     ax.scatter(
38 |         neg_points[:, 0],
39 |         neg_points[:, 1],
40 |         color="red",
41 |         marker="*",
42 |         s=marker_size,
43 |         edgecolor="white",
44 |         linewidth=1.25,
45 |     )
46 | 
47 | 
48 | def show_box(box, ax):
49 |     x0, y0 = box[0], box[1]
50 |     w, h = box[2] - box[0], box[3] - box[1]
51 |     ax.add_patch(
52 |         plt.Rectangle((x0, y0), w, h, edgecolor="green", facecolor=(0, 0, 0, 0), lw=2)
53 |     )
54 | 
55 | 
56 | def main():
57 |     sam_checkpoint = "data/segment-anything/sam_vit_h_4b8939.pth"
58 |     device = "cuda"
59 |     model_type = "default"
60 | 
61 |     sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
62 |     sam.to(device=device)
63 |     predictor = SamPredictor(sam)
64 | 
65 |     image_path = (
66 |         "data/feelsight_real/rubiks_cube_small/00/realsense/front-left/image/0.jpg"
67 |     )
68 | 
69 |     with Image.open(image_path) as im:
70 |         image = np.asarray(im)
71 | 
72 |     predictor.set_image(image)
73 | 
74 |     input_point = np.array([[390, 205]])
75 |     input_label = np.array([1])
76 | 
77 |     masks, scores, logits = predictor.predict(
78 |         point_coords=input_point,
79 |         point_labels=input_label,
80 |         box=None,
81 |         multimask_output=True,
82 |     )
83 | 
84 |     for i, (mask, score) in enumerate(zip(masks, scores)):
85 |         plt.figure(figsize=(10, 10))
86 |         plt.imshow(image)
87 |         show_mask(mask, plt.gca())
88 |         show_points(input_point, input_label, plt.gca())
89 |         plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
90 |         plt.axis("off")
91 |         plt.show()
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     main()
96 | 


--------------------------------------------------------------------------------
/neuralfeels/geometry/align_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Adapted from https://github.com/psodhi/tactile-in-hand
 7 | 
 8 | import copy
 9 | 
10 | import numpy as np
11 | import open3d as o3d
12 | 
13 | 
14 | def visualize_registration(source, target, transformation, vis3d=None, colors=None):
15 |     """Open3D visualizer for registration"""
16 |     source_copy = copy.deepcopy(source)
17 |     target_copy = copy.deepcopy(target)
18 | 
19 |     source_copy.transform(transformation)
20 | 
21 |     clouds = [source, target_copy, source_copy]
22 | 
23 |     if colors is not None:
24 |         clouds[0].paint_uniform_color(colors[0])  # black, source
25 |         clouds[1].paint_uniform_color(colors[1])  # green, target
26 |         clouds[2].paint_uniform_color(colors[2])  # red, transformed
27 | 
28 |     vis3d.add_geometry(clouds[0])
29 |     vis3d.add_geometry(clouds[1])
30 |     vis3d.add_geometry(clouds[2])
31 | 
32 |     vis3d.run()
33 |     vis3d.destroy_window()
34 | 
35 | 
36 | def icp(
37 |     source: o3d.geometry.PointCloud,
38 |     target: o3d.geometry.PointCloud,
39 |     T_init=np.eye(4),
40 |     mcd=0.01,
41 |     max_iter=15,
42 | ):
43 |     """Point to point ICP registration
44 | 
45 |     Args:
46 |         source: source point cloud
47 |         target: target point cloud
48 |         T_init : Defaults to np.eye(4).
49 |         mcd :  Defaults to 0.01.
50 |         max_iter : Defaults to 15.
51 |     """
52 |     result = o3d.pipelines.registration.registration_icp(
53 |         source=source,
54 |         target=target,
55 |         max_correspondence_distance=mcd,
56 |         init=T_init,
57 |         estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPoint(),
58 |         criteria=o3d.pipelines.registration.ICPConvergenceCriteria(
59 |             max_iteration=max_iter
60 |         ),
61 |     )
62 |     transformation = result.transformation
63 |     metrics = [result.fitness, result.inlier_rmse, result.correspondence_set]
64 |     return transformation, metrics
65 | 
66 | 
67 | def register(
68 |     points3d_1,
69 |     points3d_2,
70 |     T_init=np.eye(4),
71 |     debug_vis=False,
72 | ):
73 |     """Register two point clouds using ICP and returns the 6DoF transformation"""
74 | 
75 |     cloud_1, cloud_2 = (points3d_1, points3d_2)
76 | 
77 |     T_reg, metrics_reg = icp(source=cloud_1, target=cloud_2, T_init=T_init)
78 | 
79 |     # print("T_reg: ", T_reg)
80 |     if debug_vis:
81 |         colors = [
82 |             [0, 0, 0],
83 |             [0, 1, 0],
84 |             [1, 0, 0],
85 |         ]  # black, green, red
86 | 
87 |         vis3d = o3d.visualization.Visualizer()
88 |         vis3d.create_window()
89 |         visualize_registration(
90 |             source=cloud_1,
91 |             target=cloud_2,
92 |             transformation=T_reg,
93 |             vis3d=vis3d,
94 |             colors=colors,
95 |         )
96 | 
97 |     return T_reg, metrics_reg
98 | 


--------------------------------------------------------------------------------
/neuralfeels/modules/object.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Dataloader for object data in neuralfeels
 7 | 
 8 | import os
 9 | 
10 | import dill as pickle
11 | import numpy as np
12 | import theseus as th
13 | import torch
14 | 
15 | 
16 | class Object:
17 |     def __init__(
18 |         self,
19 |         map_mode: False,
20 |         dataset_path: str = None,
21 |         device: str = "cuda",
22 |     ):
23 |         """Dataloader for object data in neuralfeels"""
24 |         super(Object, self).__init__()
25 | 
26 |         self.data_path = None
27 |         if dataset_path is not None:
28 |             self.data_path = os.path.join(dataset_path, "data.pkl")
29 |             with open(self.data_path, "rb") as p:
30 |                 self.data = pickle.load(p)
31 |             self.object_pose_gt = torch.tensor(
32 |                 np.array(self.data["object"]["pose"]),
33 |                 device=device,
34 |                 dtype=torch.float32,
35 |             )
36 |         else:
37 |             self.object_pose_gt = torch.eye(4, device=device).unsqueeze(0)
38 |             # tensor large enough for 180 seconds of data at 30 fps
39 |             self.object_pose_gt = self.object_pose_gt.repeat(180 * 30, 1, 1)
40 | 
41 |         # current_pose_offset: computes the differences between the current and ground truth pose at every iteration. Needed to isolate
42 |         # pose errors from map errors in F-score computation. (only applicable for mode=SLAM)
43 |         self.current_pose_offset = np.eye(4)
44 | 
45 |         if map_mode:
46 |             # if mapping, initialize the tracking problem with ground-truth
47 |             self.object_pose_track = self.object_pose_gt.clone()
48 |         else:
49 |             # if slam/pure pose, initialize the tracking problem with identity
50 |             self.object_pose_track = torch.zeros_like(self.object_pose_gt)
51 |             self.object_pose_track[0] = torch.eye(4, device=device)
52 | 
53 |     def add_noise_to_poses(self, poses, noise_cfg):
54 |         """
55 |         Corrupt poses with noise
56 |         """
57 | 
58 |         N = poses.shape[0]
59 |         pose_noise = th.SE3.exp_map(
60 |             torch.cat(
61 |                 [
62 |                     noise_cfg.translation
63 |                     * (
64 |                         2.0 * torch.rand((N, 3), device=poses.device) - 1
65 |                     ),  # scale translation noise n_t * [-1, 1]
66 |                     noise_cfg.rotation
67 |                     * (
68 |                         2 * torch.rand((N, 3), device=poses.device) - 1
69 |                     ),  # scale rotation noise n_r * [-1, 1]
70 |                 ],
71 |                 dim=1,
72 |             )
73 |         ).to_matrix()
74 | 
75 |         return poses @ pose_noise
76 | 
77 |     def save_baseline(self):
78 |         # save pickle file with added baseline
79 |         self.data["object"]["pose"] = list(self.object_pose_track.clone().cpu().numpy())
80 |         with open(self.data_path, "wb") as p:
81 |             pickle.dump(self.data, p)
82 |         print("Saved baseline poses to: ", self.data_path)
83 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | # *Feelsight* : A visuo-tactile robot manipulation dataset
 2 | 
 3 | <!-- Provide a quick summary of the dataset. -->
 4 | 
 5 | <div style="text-align: center;">
 6 |     <video width="80%" onmouseover="this.pause()" onmouseout="this.play()" autoplay="" loop="" muted="">
 7 |         <source src="https://suddhu.github.io/neural-feels/video/dataset_zoom.mp4" type="video/mp4">
 8 |     </video>
 9 | </div>
10 | 
11 | <br> 
12 | 
13 | The FeelSight dataset is a dataset of vision, touch, and proprioception data collected from in-hand rotation of objects via an RL policy. It consists of a total of 70 experiments, 30 in the real-world and 40 in simulation, each lasting 30 seconds. For training neural field models with FeelSight, refer to the [NeuralFeels](https://github.com/facebookresearch/neuralfeels) repository.
14 | 
15 | ## Simulation data 
16 | 
17 | [Our simulated data](https://suddhu.github.io/neural-feels/video/feelsight_sim_rubber_duck.mp4") is collected in IsaacGym with TACTO touch simulation in the loop. 
18 | 
19 | ## Real-world data
20 | 
21 | [Here's](https://suddhu.github.io/neural-feels/video/feelsight_real_bell_pepper.mp4") an example of real-world data from our three-camera setup and the DIGIT-Allegro hand.
22 | 
23 | ## Robot setup 
24 | 
25 | The Allegro hand is mounted on the Franka Emika Panda robot. The hand is sensorized with DIGIT tactile sensors, and surrounded by three Intel RealSense cameras.
26 | 
27 | <img src="https://suddhu.github.io/neural-feels/img/robot_cell.jpg" width="90%">
28 | 
29 | ## Dataset structure
30 | 
31 | For dataloaders, refer to the [NeuralFeels](https://github.com/facebookresearch/neuralfeels) repository.
32 | 
33 | ```bash
34 | feelsight/ # root directory, either feelsight or feelsight_real
35 | ├── object_1/ # e.g. 077_rubiks_cube
36 | │   ├── 00/ # log directory
37 | │   │   ├── allegro/ # tactile sensor data
38 | │   │   │    ├── index/ # finger id
39 | │   │   │    │    ├── depth # only in sim, ground-truth
40 | |   |   |    |    |     └── ..jpg 
41 | │   │   │    │    ├── image # RGB tactile images
42 | |   |   |    |    |     └── ..jpg 
43 | │   │   │    │    └── mask # only in sim, ground-truth
44 | |   |   |    |          └── ..jpg  
45 | │   │   │    └── ..
46 | │   │   ├── realsense/ # RGB-D data
47 | │   │   │    ├── front-left/ # camera id
48 | │   │   │    │    ├── image # RGB images 
49 | |   |   |    |    |     └── ..jpg
50 | │   │   │    │    ├── seg # only in sim, ground-truth
51 | |   |   |    |    |     └── ..jpg
52 | │   │   │    |    └── depth.npz # depth images
53 | │   │   ├── object_1.mp4 # video of sensor stream
54 | │   │   └── data.pkl # proprioception data
55 | │   └── ..
56 | ├── object_2/
57 | │   └── ..
58 | └── ..
59 | ```
60 | 
61 | ## Citation
62 | 
63 | If you find NeuralFeels useful in your research, please consider citing our paper:
64 | 
65 | ```bibtex
66 | @article{suresh2024neuralfeels,
67 |   title={{N}eural feels with neural fields: {V}isuo-tactile perception for in-hand manipulation},
68 |   author={Suresh, Sudharshan and Qi, Haozhi and Wu, Tingfan and Fan, Taosha and Pineda, Luis and Lambeta, Mike and Malik, Jitendra and Kalakrishnan, Mrinal and Calandra, Roberto and Kaess, Michael and Ortiz, Joseph and Mukadam, Mustafa},
69 |   journal={Science Robotics},
70 |   pages={adl0628},
71 |   year={2024},
72 |   publisher={American Association for the Advancement of Science}
73 | }
74 | ```
75 | 


--------------------------------------------------------------------------------
/neuralfeels/datasets/redwood_depth_noise_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | # Modified from Habitat-Sim (https://aihabitat.org/docs/habitat-sim/habitat_sim.sensors.noise_models.RedwoodDepthNoiseModel.html) and based on the
 7 | # Redwood Depth Noise Model (http://redwood-data.org/indoor/data/simdepth.py) from
 8 | # choi2015robust (https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Choi_Robust_Reconstruction_of_2015_CVPR_paper.pdf)
 9 | 
10 | import numba
11 | import numpy as np
12 | 
13 | try:
14 |     import torch
15 |     from torch import Tensor
16 | except ImportError:
17 |     torch = None
18 | 
19 | 
20 | # Read about the noise model here: http://www.alexteichman.com/octo/clams/
21 | # Original source code: http://redwood-data.org/indoor/data/simdepth.py
22 | @numba.jit(nopython=True, fastmath=True)
23 | def _undistort(x, y, z, model):
24 |     i2 = int((z + 1) / 2)
25 |     i1 = int(i2 - 1)
26 |     a = (z - (i1 * 2.0 + 1.0)) / 2.0
27 |     x = x // 8
28 |     y = y // 6
29 |     f = (1.0 - a) * model[y, x, min(max(i1, 0), 4)] + a * model[y, x, min(i2, 4)]
30 | 
31 |     if f < 1e-5:
32 |         return 0.0
33 | 
34 |     return z / f
35 | 
36 | 
37 | @numba.jit(nopython=True, parallel=True, fastmath=True)
38 | def _simulate(gt_depth, model, noise_multiplier):
39 |     noisy_depth = np.empty_like(gt_depth)
40 | 
41 |     H, W = gt_depth.shape
42 |     ymax, xmax = H - 1.0, W - 1.0
43 | 
44 |     rand_nums = np.random.randn(H, W, 3).astype(np.float32)
45 | 
46 |     # Parallelize just the outer loop.  This doesn't change the speed
47 |     # noticably but reduces CPU usage compared to two parallel loops
48 |     for j in numba.prange(H):
49 |         for i in range(W):
50 |             y = int(
51 |                 min(max(j + rand_nums[j, i, 0] * 0.25 * noise_multiplier, 0.0), ymax)
52 |                 + 0.5
53 |             )
54 |             x = int(
55 |                 min(max(i + rand_nums[j, i, 1] * 0.25 * noise_multiplier, 0.0), xmax)
56 |                 + 0.5
57 |             )
58 | 
59 |             # Downsample
60 |             d = gt_depth[y - y % 2, x - x % 2]
61 |             # If the depth is greater than 10, the sensor will just return 0
62 |             if d >= 10.0:
63 |                 noisy_depth[j, i] = 0.0
64 |             else:
65 |                 # Distort
66 |                 # The noise model was originally made for a 640x480 sensor,
67 |                 # so re-map our arbitrarily sized sensor to that size!
68 |                 undistorted_d = _undistort(
69 |                     int(x / xmax * 639.0 + 0.5), int(y / ymax * 479.0 + 0.5), d, model
70 |                 )
71 | 
72 |                 if undistorted_d == 0.0:
73 |                     noisy_depth[j, i] = 0.0
74 |                 else:
75 |                     denom = round(
76 |                         (
77 |                             35.130 / undistorted_d
78 |                             + rand_nums[j, i, 2] * 0.027778 * noise_multiplier
79 |                         )
80 |                         * 8.0
81 |                     )
82 |                     if denom <= 1e-5:
83 |                         noisy_depth[j, i] = 0.0
84 |                     else:
85 |                         noisy_depth[j, i] = 35.130 * 8.0 / denom
86 | 
87 |     return noisy_depth
88 | 


--------------------------------------------------------------------------------
/scripts/config/main/touch_depth/vit.yaml:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and its affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | mode : vit
  7 | weights: dpt_real
  8 | 
  9 | settings: 
 10 |   real: 
 11 |     blend_sz : 0
 12 |     border : 0
 13 |     ratio : 1.2 # multiplier to decide the contact threshold. Set higher to reduce false positives
 14 |     clip : 10 # sets all heightmap outputs < clip pix to zero. Set higher to reduce noisy predictions
 15 |     batch_size : 1
 16 |     bg_id: null
 17 | 
 18 |   sim: 
 19 |     blend_sz : 0
 20 |     border : 0
 21 |     ratio : 0 # multiplier to decide the contact threshold. Set higher to reduce false positives
 22 |     clip : 5 # sets all heightmap outputs < clip pix to zero. Set higher to reduce noisy predictions
 23 |     batch_size : 1
 24 |     bg_id: 10
 25 | 
 26 | General:
 27 |     device: cuda
 28 |     type : depth
 29 |     model_timm : vit_small_patch16_224.dino
 30 |     emb_dim : 384
 31 |     hooks : [2, 5, 8, 11]
 32 |     read : projection
 33 |     resample_dim : 128
 34 |     optim : adam
 35 |     lr_backbone : 1e-5
 36 |     lr_scratch : 1e-4
 37 |     loss_depth : mse
 38 |     loss_segmentation : ce
 39 |     momentum : 0.9
 40 |     epochs : 500
 41 |     batch_size : 50
 42 |     path_model : data/tactile_transformer
 43 |     path_input_images : data/tacto_data/004_sugar_box/00/tactile_images
 44 |     path_predicted_images : output
 45 |     seed : 0
 46 |     patch_size : 16
 47 | 
 48 | Dataset:
 49 |     paths:
 50 |         path_dataset : input
 51 |         list_datasets: [
 52 |             "002_master_chef_can",
 53 |             "003_cracker_box",
 54 |             "007_tuna_fish_can",
 55 |             "008_pudding_box",
 56 |             "009_gelatin_box",
 57 |             "010_potted_meat_can",
 58 |             "011_banana",
 59 |             "012_strawberry",
 60 |             "013_apple",
 61 |             "014_lemon",
 62 |             "015_peach",
 63 |             "016_pear",
 64 |             "017_orange",
 65 |             "018_plum",
 66 |             "019_pitcher_base",
 67 |             "024_bowl",
 68 |             "026_sponge",
 69 |             "029_plate",
 70 |             "030_fork",
 71 |             "031_spoon",
 72 |             "032_knife",
 73 |             "033_spatula",
 74 |             "036_wood_block",
 75 |             "040_large_marker",
 76 |             "044_flat_screwdriver",
 77 |             "050_medium_clamp",
 78 |             "051_large_clamp",
 79 |             "052_extra_large_clamp",
 80 |             "053_mini_soccer_ball",
 81 |             "054_softball",
 82 |             "056_tennis_ball",
 83 |             "057_racquetball",
 84 |             "058_golf_ball",
 85 |             "061_foam_brick",
 86 |             "062_dice",
 87 |             "065-a_cups",
 88 |             "065-b_cups",
 89 |             "070-a_colored_wood_blocks",
 90 |             "072-a_toy_airplane",
 91 |             "077_rubiks_cube"]
 92 |         path_images : tactile_images
 93 |         path_segmentations : gt_contactmasks
 94 |         path_depths : gt_heightmaps
 95 |     extensions :
 96 |         ext_images : .jpg
 97 |         ext_segmentations : .jpg
 98 |         ext_depths : .jpg
 99 |     splits:
100 |         split_train : 0.6
101 |         split_val : 0.2
102 |         split_test : 0.2
103 |     transforms:
104 |         resize : [224, 224]
105 |         p_flip : 0.0
106 |         p_crop : 0.0
107 |         p_rot : 0.0
108 |     classes:
109 |         "1":
110 |             name: contact
111 |             color: 255
112 | wandb :
113 |     enable : true
114 |     username : suddhu
115 |     images_to_show : 5
116 |     im_h : 640
117 |     im_w : 480
118 | 


--------------------------------------------------------------------------------
/neuralfeels/eval/group_plot.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | """
 7 | Plot group statistics for a sweep of neuralfeels experiments
 8 | Usage: python neuralfeels/eval/group_plot.py log_path=<LOG_PATH> # e.g. multirun/2023-07-31/14-27-43 
 9 | """
10 | 
11 | import os
12 | 
13 | import git
14 | import hydra
15 | from omegaconf import DictConfig
16 | from tqdm import tqdm
17 | 
18 | from neuralfeels.viz.plot_metrics import (
19 |     avg_map_error_over_time,
20 |     avg_map_error_per_experiment,
21 |     avg_map_error_per_modality,
22 |     avg_map_error_per_object,
23 |     avg_pose_error_over_time,
24 |     avg_pose_error_per_camera_placement,
25 |     avg_pose_error_per_experiment,
26 |     avg_pose_error_per_modality,
27 |     avg_pose_error_per_object,
28 |     avg_pose_error_per_optimizer,
29 |     avg_pose_error_per_shape_res,
30 |     avg_precision_over_time,
31 |     avg_recall_over_time,
32 |     avg_timing_per_modality,
33 |     avg_timing_per_optimizer,
34 |     draw_map_error,
35 |     draw_pose_error,
36 |     get_dataframe,
37 |     map_error_vs_thresh,
38 |     success_failure_stats,
39 | )
40 | 
41 | root = git.Repo(".", search_parent_directories=True).working_tree_dir
42 | 
43 | from pathlib import Path
44 | 
45 | 
46 | @hydra.main(version_base=None, config_path="config/", config_name="group_error")
47 | def main(cfg: DictConfig) -> None:
48 |     log_path = os.path.join(root, cfg.log_path)
49 |     if log_path[-1] == "/":
50 |         log_path = log_path[:-1]
51 |     all_expts = []
52 |     for path in Path(log_path).rglob("stats.pkl"):
53 |         expt_path = str(path.parent).replace(log_path + "/", "")
54 |         all_expts.append(expt_path)
55 |     which_f_score = cfg.which_f_score
56 |     print(f"Found {len(all_expts)} experiments in {log_path}: {all_expts}")
57 |     df_combined = get_dataframe(all_expts, log_path, which_f_score)
58 | 
59 |     # assert len(df_combined["slam_mode"].unique()) == 1  # only one slam_mode per plot
60 |     slam_mode = df_combined["slam_mode"].unique()[0]
61 | 
62 |     avg_timing_per_optimizer(df_combined, log_path)
63 |     avg_timing_per_modality(df_combined, log_path)
64 | 
65 |     if slam_mode in ["pose", "slam"]:
66 |         avg_pose_error_over_time(df_combined, log_path)
67 |         avg_pose_error_per_modality(df_combined, log_path)
68 |         avg_pose_error_per_optimizer(df_combined, log_path)
69 |         avg_pose_error_per_object(df_combined, log_path)
70 |         avg_pose_error_per_camera_placement(df_combined, log_path)
71 |         success_failure_stats(df_combined)
72 |     if slam_mode in ["map", "slam"]:
73 |         avg_map_error_over_time(df_combined, log_path)
74 |         avg_precision_over_time(df_combined, log_path)
75 |         avg_recall_over_time(df_combined, log_path)
76 |         avg_map_error_per_modality(df_combined, log_path)
77 |         avg_map_error_per_object(df_combined, log_path)
78 |         map_error_vs_thresh(all_expts, log_path)
79 | 
80 |     if slam_mode in ["pose", "slam"]:
81 |         avg_pose_error_per_experiment(df_combined, log_path)
82 |         avg_pose_error_per_shape_res(df_combined, log_path)
83 |     if slam_mode in ["map", "slam"]:
84 |         avg_map_error_per_experiment(df_combined, log_path)
85 |     if cfg.individual:
86 |         print("Drawing individual plots")
87 |         for expt_path in tqdm(all_expts):
88 |             if "map" in expt_path or "slam" in expt_path:
89 |                 draw_map_error(expt_path=expt_path)
90 |             if "pose" in expt_path or "slam" in expt_path:
91 |                 draw_pose_error(expt_path=expt_path, slam_mode=slam_mode)
92 |     print(f"All outputs saved at {log_path}")
93 | 
94 | 
95 | if __name__ == "__main__":
96 |     main()
97 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to make participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | This Code of Conduct also applies outside the project spaces when there is a
56 | reasonable belief that an individual's behavior may have a negative impact on
57 | the project or its community.
58 | 
59 | ## Enforcement
60 | 
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported by contacting the project team at <opensource-conduct@fb.com>. All
63 | complaints will be reviewed and investigated and will result in a response that
64 | is deemed necessary and appropriate to the circumstances. The project team is
65 | obligated to maintain confidentiality with regard to the reporter of an incident.
66 | Further details of specific enforcement policies may be posted separately.
67 | 
68 | Project maintainers who do not follow or enforce the Code of Conduct in good
69 | faith may face temporary or permanent repercussions as determined by other
70 | members of the project's leadership.
71 | 
72 | ## Attribution
73 | 
74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
76 | 
77 | [homepage]: https://www.contributor-covenant.org
78 | 
79 | For answers to common questions about this code of conduct, see
80 | https://www.contributor-covenant.org/faq
81 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/tactile_transformer/dpt_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT
  7 | 
  8 | # Tactile transformer model
  9 | 
 10 | import numpy as np
 11 | import timm
 12 | import torch.nn as nn
 13 | 
 14 | from neuralfeels.contrib.tactile_transformer.fusion import Fusion
 15 | from neuralfeels.contrib.tactile_transformer.head import HeadDepth, HeadSeg
 16 | from neuralfeels.contrib.tactile_transformer.reassemble import Reassemble
 17 | 
 18 | 
 19 | class DPTModel(nn.Module):
 20 |     def __init__(
 21 |         self,
 22 |         image_size=(3, 384, 384),
 23 |         patch_size=16,
 24 |         emb_dim=1024,
 25 |         resample_dim=256,
 26 |         read="projection",
 27 |         num_layers_encoder=24,
 28 |         hooks=[5, 11, 17, 23],
 29 |         reassemble_s=[4, 8, 16, 32],
 30 |         transformer_dropout=0,
 31 |         nclasses=2,
 32 |         type="full",
 33 |         model_timm="vit_large_patch16_384",
 34 |         pretrained=False,
 35 |     ):
 36 |         """
 37 |         type : {"full", "depth", "segmentation"}
 38 |         image_size : (c, h, w)
 39 |         patch_size : *a square*
 40 |         emb_dim <=> D (in the paper)
 41 |         resample_dim <=> ^D (in the paper)
 42 |         read : {"ignore", "add", "projection"}
 43 |         """
 44 |         super().__init__()
 45 | 
 46 |         self.transformer_encoders = timm.create_model(model_timm, pretrained=pretrained)
 47 |         self.type_ = type
 48 | 
 49 |         # Register hooks
 50 |         self.activation = {}
 51 |         self.hooks = hooks
 52 |         self._get_layers_from_hooks(self.hooks)
 53 | 
 54 |         # Reassembles Fusion
 55 |         self.reassembles = []
 56 |         self.fusions = []
 57 |         for s in reassemble_s:
 58 |             self.reassembles.append(
 59 |                 Reassemble(image_size, read, patch_size, s, emb_dim, resample_dim)
 60 |             )
 61 |             self.fusions.append(Fusion(resample_dim))
 62 |         self.reassembles = nn.ModuleList(self.reassembles)
 63 |         self.fusions = nn.ModuleList(self.fusions)
 64 | 
 65 |         # Head
 66 |         if type == "full":
 67 |             self.head_depth = HeadDepth(resample_dim)
 68 |             self.head_segmentation = HeadSeg(resample_dim, nclasses=nclasses)
 69 |         elif type == "depth":
 70 |             self.head_depth = HeadDepth(resample_dim)
 71 |             self.head_segmentation = None
 72 |         else:
 73 |             self.head_depth = None
 74 |             self.head_segmentation = HeadSeg(resample_dim, nclasses=nclasses)
 75 | 
 76 |     def forward(self, img):
 77 | 
 78 |         t = self.transformer_encoders(img)
 79 |         previous_stage = None
 80 |         for i in np.arange(len(self.fusions) - 1, -1, -1, dtype=int):
 81 |             hook_to_take = "t" + str(self.hooks[int(i)])
 82 |             activation_result = self.activation[hook_to_take]
 83 |             reassemble_result = self.reassembles[i](activation_result)
 84 |             fusion_result = self.fusions[i](reassemble_result, previous_stage)
 85 |             previous_stage = fusion_result
 86 |         out_depth = None
 87 |         out_segmentation = None
 88 |         if self.head_depth != None:
 89 |             out_depth = self.head_depth(previous_stage)
 90 |         if self.head_segmentation != None:
 91 |             out_segmentation = self.head_segmentation(previous_stage)
 92 |         return out_depth, out_segmentation
 93 | 
 94 |     def _get_layers_from_hooks(self, hooks: list):
 95 |         def get_activation(name):
 96 |             def hook(model, input, output):
 97 |                 self.activation[name] = output
 98 | 
 99 |             return hook
100 | 
101 |         for h in hooks:
102 |             self.transformer_encoders.blocks[h].register_forward_hook(
103 |                 get_activation("t" + str(h))
104 |             )
105 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/SceneGraph/SceneNode.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Adapted from https://github.com/Jianghanxiao/Helper3D
  7 | 
  8 | import numpy as np
  9 | import open3d as o3d
 10 | 
 11 | from .MeshNode import MeshNode
 12 | from .Transform import Transform
 13 | 
 14 | 
 15 | class SceneNode:
 16 |     def __init__(self, parent=None):
 17 |         self.parent = parent
 18 |         self.children = []
 19 |         self.name = None
 20 |         # Store the local transform and world transform
 21 |         self.localTransform = Transform()
 22 |         self.worldMatrix = np.eye(4)
 23 |         self._transformHelper = o3d.geometry.TriangleMesh.create_coordinate_frame()
 24 |         # Store the mesh and deal with functions draw the mesh based on the transform
 25 |         self.meshNode = MeshNode()
 26 |         self.joint = None
 27 | 
 28 |     def setParent(self, parent):
 29 |         if parent == None:
 30 |             raise RuntimeError("Invalid Parent: parent is not in the SceneNode type")
 31 |         self.parent = parent
 32 |         self.worldMatrix = np.dot(
 33 |             self.parent.worldMatrix, self.localTransform.getMatrix()
 34 |         )
 35 | 
 36 |     def update(self):
 37 |         # Update the worldMatrix of current scene node
 38 |         if self.parent != None:
 39 |             self.worldMatrix = np.dot(
 40 |                 self.parent.worldMatrix, self.localTransform.getMatrix()
 41 |             )
 42 |         else:
 43 |             self.worldMatrix = self.localTransform.getMatrix()
 44 |         # Update the worldMatrix for all it children
 45 |         for child in self.children:
 46 |             child.update()
 47 | 
 48 |     def addChild(self, child):
 49 |         # child should also be SceneNode
 50 |         self.children.append(child)
 51 | 
 52 |     def addMesh(self, mesh):
 53 |         # mesh should be in open3d form
 54 |         self.meshNode.addMesh(mesh)
 55 | 
 56 |     def addMeshFile(self, mesh_file, color):
 57 |         self.meshNode.addMeshFile(mesh_file, color)
 58 | 
 59 |     def getMesh(self):
 60 |         # Get the new mesh based on the world Matrix (Assume that the matrix has been updatated)
 61 |         new_mesh = self.meshNode.getMesh(self.worldMatrix)
 62 |         if new_mesh != None:
 63 |             new_mesh = [new_mesh]
 64 |         else:
 65 |             new_mesh = []
 66 |         # add mesh from all children
 67 |         for child in self.children:
 68 |             new_mesh += child.getMesh()
 69 |         return new_mesh
 70 | 
 71 |     def resetlocalTransform(self):
 72 |         self.localTransform = Transform()
 73 | 
 74 |     def translate(self, translation):
 75 |         # translation should be in the array form np.arraay([float, float, float])
 76 |         transMat = np.array(
 77 |             [
 78 |                 [1, 0, 0, translation[0]],
 79 |                 [0, 1, 0, translation[1]],
 80 |                 [0, 0, 1, translation[2]],
 81 |                 [0, 0, 0, 1],
 82 |             ]
 83 |         )
 84 |         self.localTransform.translateMat(transMat)
 85 | 
 86 |     def scale(self, scale):
 87 |         s = np.eye(4)
 88 |         s[:3, :3] = np.diag(scale)
 89 |         self.localTransform.scaleMat(s)
 90 | 
 91 |     def rotate(self, axis, angle):
 92 |         # Convert axis into 3*1 array
 93 |         axis = axis / np.linalg.norm(axis)
 94 |         axisAngle = axis * angle
 95 |         # matrix here is 3*3
 96 |         matrix = self._transformHelper.get_rotation_matrix_from_axis_angle(axisAngle)
 97 |         rotMat = np.eye(4)
 98 |         rotMat[0:3, 0:3] = matrix
 99 |         self.localTransform.rotateMat(rotMat)
100 | 
101 |     def rotateXYZ(self, angle):
102 |         # angle should be in array form [float, float, float] in radius
103 |         matrix = self._transformHelper.get_rotation_matrix_from_xyz(angle)
104 |         rotMat = np.eye(4)
105 |         rotMat[0:3, 0:3] = matrix
106 |         self.localTransform.rotateMat(rotMat)
107 | 


--------------------------------------------------------------------------------
/neuralfeels/eval/feelsight_init.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | """
 7 | dictionary of init_poses for all objects hand-computed in meshlab. 
 8 | "default" represents the front-left-cam viewing angle 
 9 | the rotated camera views are copied from meshlab to align with the first frame of the logs 
10 | In some cases, translation offset is added for long objects
11 | """
12 | 
13 | feelsight_init_poses = {
14 |     "default": "0.317553 -0.948214 0.00707686 0 0.402073 0.141404 0.904623 0 -0.858777 -0.28442 0.426154 0 0 0 0 1",
15 |     "bell_pepper": {
16 |         "00": "-0.447304 0.888958 0.0983457 0 -0.358188 -0.278809 0.891048 0 0.819524 0.363344 0.443127 0 0 0 0 1",
17 |         "01": "-0.987494 0.127347 0.0929425 0 -0.0292416 -0.727241 0.685759 0 0.154921 0.674466 0.72187 0 0 0 0 1",
18 |         "02": "-0.850107 0.518781 0.0904702 0 -0.385385 -0.729957 0.564483 0 0.358882 0.445005 0.820472 0 0 0 0 1",
19 |         "03": "-0.0444936 0.982537 0.180669 0 -0.499921 -0.178473 0.847482 0 0.864928 -0.0526125 0.499132 0 0 0 0 1",
20 |         "04": "0.825204 -0.284835 -0.487759 0 -0.564694 -0.396674 -0.72372 0 0.012659 0.87265 -0.488181 0 0 0 0 1",
21 |     },
22 |     "large_dice": {
23 |         "00": "0.735374 0.671656 -0.0900187 0 -0.674798 0.713579 -0.188288 0 -0.0622293 0.199207 0.97798 0 0 0 0 1",
24 |         "01": "-0.500624 0.04705 -0.864385 0 -0.780434 -0.456581 0.42715 0 -0.374564 0.888437 0.265294 0 0 0 0 1",
25 |         "02": "-0.0637699 0.801287 -0.594872 0 0.218083 0.592867 0.775208 0 0.973845 -0.0802967 -0.212554 0 0 0 0 1",
26 |         "03": "-0.0822555 0.435224 0.896557 0 0.351904 0.854341 -0.382445 0 -0.932415 0.284044 -0.223431 0 0 0 0 1",
27 |         "04": "-0.369389 -0.922218 -0.114302 0 -0.929275 0.366602 0.04529 0 0.000136163 0.122948 -0.992413 0 0 0 0 1",
28 |     },
29 |     "peach": {
30 |         "00": "0.0258558 0.9112 0.411152 0 -0.151259 -0.402988 0.902619 0 0.988156 -0.0855286 0.127407 0 0 0 0 1",
31 |         "01": "0.979369 -0.102887 0.173927 0 -0.0973723 0.513909 0.8523 0 -0.177073 -0.851652 0.493289 0 0 0 0 1",
32 |         "02": "-0.46817 0.309157 -0.827792 0 0.864185 -0.0352976 -0.501934 0 -0.184396 -0.950356 -0.250644 0 0 0 0 1",
33 |         "03": "-0.609501 0.509715 0.607206 0 0.186121 -0.652509 0.734569 0 0.770628 0.560734 0.302836 0 0 0 0 1",
34 |         "04": "-0.385757 -0.786756 0.481877 0 0.316775 0.377603 0.870097 0 -0.866513 0.488293 0.103561 0 0 0 0 1",
35 |     },
36 |     "pear": {
37 |         "00": "0.452873 -0.851159 0.265394 0 0.431213 0.469642 0.770384 0 -0.78036 -0.234445 0.579719 0 0 0 0 1",
38 |         "01": "0.274908 0.865858 -0.41799 0 0.543147 -0.498581 -0.675579 0 -0.793358 -0.0413085 -0.607352 0 0 0 0 1",
39 |         "02": "0.324129 -0.93561 0.139906 0 0.0372977 0.160415 0.986345 0 -0.945277 -0.314485 0.0868914 0 0 0 0 1",
40 |         "03": "0.420609 -0.906618 0.0336632 0 0.0912765 0.079205 0.992671 0 -0.902639 -0.414453 0.116068 0 0 0 0 1",
41 |         "04": "0.438803 -0.86858 -0.230265 0 -0.0173117 -0.264377 0.964264 0 -0.898417 -0.419135 -0.131045 0 0 0 0 1",
42 |     },
43 |     "pepper_grinder": {
44 |         "00": "0.317113 -0.944011 0.0910105 0 0.647693 0.285669 0.70632 0 -0.692772 -0.165037 0.702018 -0.02 0 0 0 1",
45 |         "01": "0.317113 -0.944011 0.0910105 0 0.647693 0.285669 0.70632 0 -0.692772 -0.165037 0.702018 -0.02 0 0 0 1",
46 |         "02": "0.317113 -0.944011 0.0910105 0 0.647693 0.285669 0.70632 0 -0.692772 -0.165037 0.702018 -0.02 0 0 0 1",
47 |         "03": "0.317113 -0.944011 0.0910105 0 0.647693 0.285669 0.70632 0 -0.692772 -0.165037 0.702018 -0.02 0 0 0 1",
48 |         "04": "0.317113 -0.944011 0.0910105 0 0.647693 0.285669 0.70632 0 -0.692772 -0.165037 0.702018 -0.02 0 0 0 1",
49 |     },
50 |     "rubiks_cube_small": {
51 |         "00": "0.904895 -0.033284 -0.424332 0 -0.418351 -0.253193 -0.872282 0 -0.078405 0.966843 -0.243038 0 0 0 0 1",
52 |         "01": "0.440733 0.0711927 -0.894811 0 0.885631 0.128017 0.446396 0 0.14633 -0.989213 -0.00662905 0 0 0 0 1",
53 |         "02": "0.862521 0.503581 -0.0496423 0 -0.502013 0.863885 0.0410602 0 0.0635623 -0.0104942 0.997923 0 0 0 0 1",
54 |         "03": "0.117835 -0.397397 0.91005 0 0.357761 -0.837905 -0.412216 0 0.926349 0.374153 0.0434396 0 0 0 0 1",
55 |         "04": "0.437746 -0.104018 0.893062 0 0.855152 -0.258576 -0.449281 0 0.277658 0.960374 -0.0242391 0 0 0 0 1",
56 |     },
57 | }
58 | 


--------------------------------------------------------------------------------
/neuralfeels/eval/metrics.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Compute metrics for neuralfeels evaluation
  7 | 
  8 | import time
  9 | 
 10 | import numpy as np
 11 | import torch
 12 | import trimesh
 13 | from scipy.spatial import cKDTree as KDTree
 14 | 
 15 | np.set_printoptions(precision=2, suppress=True)
 16 | 
 17 | 
 18 | def start_timing():
 19 |     if torch.cuda.is_available():
 20 |         torch.cuda.synchronize()
 21 |         start = torch.cuda.Event(enable_timing=True)
 22 |         end = torch.cuda.Event(enable_timing=True)
 23 |         start.record()
 24 |     else:
 25 |         start = time.perf_counter()
 26 |         end = None
 27 |     return start, end
 28 | 
 29 | 
 30 | def end_timing(start, end):
 31 |     if torch.cuda.is_available():
 32 |         torch.cuda.synchronize()
 33 |         end.record()
 34 |         # Waits for everything to finish running
 35 |         torch.cuda.synchronize()
 36 |         elapsed_time = start.elapsed_time(end)
 37 |     else:
 38 |         end = time.perf_counter()
 39 |         elapsed_time = end - start
 40 |         # Convert to milliseconds to have the same units
 41 |         # as torch.cuda.Event.elapsed_time
 42 |         elapsed_time = elapsed_time * 1000
 43 |     return elapsed_time
 44 | 
 45 | 
 46 | def average_3d_error(point_cloud1, point_cloud2):
 47 |     # point_cloud1, point_cloud2: numpy arrays of shape (N, 3)
 48 |     # ADD-S: symmetric average 3D error pose metric (https://arxiv.org/pdf/1711.00199.pdf)
 49 |     # find nearest neighbors for each point in point_cloud1
 50 |     tree = KDTree(point_cloud2)
 51 |     distances, _ = tree.query(point_cloud1)  # returns euclidean distance
 52 |     return np.mean(distances)
 53 | 
 54 | 
 55 | def sample_trimesh_points(mesh, num_samples):
 56 |     """
 57 |     Sample points on trimesh surface
 58 |     """
 59 |     sampled_points = trimesh.sample.sample_surface(mesh, num_samples)[0]
 60 |     return sampled_points
 61 | 
 62 | 
 63 | def compute_f_score(
 64 |     gt_points_np, recon_mesh, num_mesh_samples=30000, T=[2e-2, 1e-2, 5e-3, 1e-3]
 65 | ):
 66 |     """
 67 |     https://openaccess.thecvf.com/content_ICCV_2017/papers/Park_Colored_Point_Cloud_ICCV_2017_paper.pdf
 68 |     Compute F-score between a ground truth point cloud and a reconstructed mesh.
 69 |     gt_points_np: trimesh.points.PointCloud of just poins, sampled from the surface (see
 70 |                compute_metrics.ply for more documentation)
 71 | 
 72 |     recon_mesh: trimesh.base.Trimesh of output mesh from whichever autoencoding reconstruction
 73 |               method (see compute_metrics.py for more)
 74 | 
 75 |     """
 76 | 
 77 |     gen_points_sampled = sample_trimesh_points(recon_mesh, num_mesh_samples)
 78 |     # print(f"ptp gen_points_sampled: {np.ptp(gen_points_sampled, axis=0)*1000}, gt_points_np: {np.ptp(gt_points_np, axis=0)*1000}")
 79 | 
 80 |     # one_distances is distance from each gen_points_sampled to its nearest neighbor in gt_points_np
 81 |     gt_points_kd_tree = KDTree(gt_points_np)
 82 |     one_distances, _ = gt_points_kd_tree.query(gen_points_sampled, p=2)
 83 | 
 84 |     # two_distances is distance from each gt point to its nearest neighbor in gen_points_sampled
 85 |     gen_points_kd_tree = KDTree(
 86 |         gen_points_sampled
 87 |     )  # build a KD tree for the generated points
 88 |     two_distances, _ = gen_points_kd_tree.query(
 89 |         gt_points_np, p=2
 90 |     )  # find nearest neighbors for all gt_points_np from gen_points_sampled
 91 | 
 92 |     f_scores, precisions, recalls = [], [], []
 93 |     for t in T:
 94 |         precision = (one_distances < t).sum() / len(
 95 |             gen_points_sampled
 96 |         )  # precision = percentage of gen_points_sampled that have a gt point within T mm
 97 |         recall = (two_distances < t).sum() / len(
 98 |             gt_points_np
 99 |         )  # recall = percentage of gt_points_np that have a gen_points_sampled within T mm
100 |         # compupte F-score = 2 * (precision * recall) / (precision + recall) where
101 |         # precision = percentage of gen_points_sampled that have a gt point within T mm
102 |         f_score = 2 * (precision * recall) / (precision + recall)
103 |         precisions.append(precision)
104 |         recalls.append(recall)
105 |         f_scores.append(f_score)
106 | 
107 |     _, vertex_ids = gen_points_kd_tree.query(np.array(recon_mesh.vertices))
108 |     return (f_scores, precisions, recalls, one_distances, vertex_ids)
109 | 


--------------------------------------------------------------------------------
/neuralfeels/viz/show_object_dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Viser visualization script for objects in the FeelSight dataset.
  7 | # pip install viser before running this script
  8 | 
  9 | import os
 10 | import time
 11 | from pathlib import Path
 12 | 
 13 | import git
 14 | import numpy as np
 15 | import trimesh
 16 | import viser
 17 | 
 18 | root = git.Repo(".", search_parent_directories=True).working_tree_dir
 19 | 
 20 | 
 21 | def main(
 22 |     model_path: Path = os.path.join(root, "data", "assets", "gt_models", "ycb")
 23 | ) -> None:
 24 |     # get list of folders in model_path
 25 |     object_names = os.listdir(model_path)
 26 |     # remove urdf files
 27 | 
 28 |     if "ycb" in model_path:
 29 |         object_names = [
 30 |             "contactdb_rubber_duck",
 31 |             "contactdb_elephant",
 32 |             "077_rubiks_cube",
 33 |             "large_dice",
 34 |             "016_pear",
 35 |             "015_peach",
 36 |             "010_potted_meat_can",
 37 |             "073-f_lego_duplo",
 38 |         ]
 39 |     else:
 40 |         object_names = [
 41 |             x for x in object_names if not x.endswith(".urdf") and x != ".DS_Store"
 42 |         ]
 43 | 
 44 |     server = viser.ViserServer()
 45 | 
 46 |     def add_selectable_mesh(
 47 |         name: str, mesh: trimesh.Trimesh, x: float, y: float
 48 |     ) -> None:
 49 |         def add_mesh() -> None:
 50 |             handle = server.add_mesh_trimesh(
 51 |                 "/" + name,
 52 |                 mesh=mesh,
 53 |                 # vertices=mesh.vertices,
 54 |                 # faces=mesh.faces,
 55 |                 position=(y, 0.0, x),
 56 |                 # color=colorsys.hls_to_rgb(
 57 |                 #     np.random.default_rng(
 58 |                 #         np.frombuffer(
 59 |                 #             hashlib.md5(name.encode("utf-8")).digest(),
 60 |                 #             dtype="uint32",
 61 |                 #         )
 62 |                 #         + 5
 63 |                 #     ).uniform(),
 64 |                 #     0.6,
 65 |                 #     0.9,
 66 |                 # ),
 67 |             )
 68 | 
 69 |             # Requires the cmk/add_click branch of viser.
 70 |             # handle.clickable = True
 71 |             # @handle.on_click
 72 |             def _(_) -> None:
 73 |                 add_mesh()
 74 | 
 75 |         add_mesh()
 76 | 
 77 |     nominal_column_width = len(object_names)
 78 |     rows_indices = np.array_split(
 79 |         np.arange(len(object_names)), np.rint(len(object_names) / nominal_column_width)
 80 |     )
 81 |     mesh_diags = []
 82 |     for row, row_indices in enumerate(rows_indices):
 83 |         for col, mesh_index in enumerate(row_indices):
 84 |             x = row * 0.12
 85 |             y = col * nominal_column_width * 0.12 / len(row_indices)
 86 |             mesh_path = os.path.join(
 87 |                 model_path, object_names[mesh_index], "textured.obj"
 88 |             )
 89 |             # check if mesh_path exists
 90 |             if not os.path.exists(mesh_path):
 91 |                 mesh_path = os.path.join(
 92 |                     model_path, object_names[mesh_index], "google_16k", "textured.obj"
 93 |                 )
 94 |             if not os.path.exists(mesh_path):
 95 |                 mesh_path = os.path.join(
 96 |                     model_path,
 97 |                     object_names[mesh_index],
 98 |                     f"{object_names[mesh_index]}.obj",
 99 |                 )
100 |             mesh = trimesh.load(
101 |                 mesh_path,
102 |                 force="mesh",
103 |             )
104 |             if isinstance(mesh.visual, trimesh.visual.texture.TextureVisuals):
105 |                 # TextureVisuals are not supported by viser yet
106 |                 mesh.visual = mesh.visual.to_color()
107 | 
108 |             # append mesh diagonal
109 |             mesh_diags.append(mesh.scale)
110 |             print(f"Added {object_names[mesh_index]} at ({x}, {y})")
111 |             print(f"Object: {object_names[mesh_index]}, mesh diagonal: {mesh.scale}")
112 |             add_selectable_mesh(object_names[mesh_index], mesh, x=x, y=y)
113 | 
114 |     # print min and max mesh diagonal
115 |     mesh_diags = np.array(mesh_diags)
116 |     print(f"Min mesh diagonal: {np.min(mesh_diags)}")
117 |     print(f"Max mesh diagonal: {np.max(mesh_diags)}")
118 |     while True:
119 |         time.sleep(10.0)
120 | 
121 | 
122 | if __name__ == "__main__":
123 |     # main()
124 |     main(
125 |         model_path=os.path.join(root, "data", "assets", "gt_models", "ycb")
126 |     )  # sim dataset
127 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/tactile_transformer/reassemble.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT
  7 | 
  8 | # Resampling code for tactile transformer
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | from einops.layers.torch import Rearrange
 13 | 
 14 | 
 15 | class Read_ignore(nn.Module):
 16 |     def __init__(self, start_index=1):
 17 |         super(Read_ignore, self).__init__()
 18 |         self.start_index = start_index
 19 | 
 20 |     def forward(self, x):
 21 |         return x[:, self.start_index :]
 22 | 
 23 | 
 24 | class Read_add(nn.Module):
 25 |     def __init__(self, start_index=1):
 26 |         super(Read_add, self).__init__()
 27 |         self.start_index = start_index
 28 | 
 29 |     def forward(self, x):
 30 |         if self.start_index == 2:
 31 |             readout = (x[:, 0] + x[:, 1]) / 2
 32 |         else:
 33 |             readout = x[:, 0]
 34 |         return x[:, self.start_index :] + readout.unsqueeze(1)
 35 | 
 36 | 
 37 | class Read_projection(nn.Module):
 38 |     def __init__(self, in_features, start_index=1):
 39 |         super(Read_projection, self).__init__()
 40 |         self.start_index = start_index
 41 |         self.project = nn.Sequential(nn.Linear(2 * in_features, in_features), nn.GELU())
 42 | 
 43 |     def forward(self, x):
 44 |         readout = x[:, 0].unsqueeze(1).expand_as(x[:, self.start_index :])
 45 |         features = torch.cat((x[:, self.start_index :], readout), -1)
 46 |         return self.project(features)
 47 | 
 48 | 
 49 | class MyConvTranspose2d(nn.Module):
 50 |     def __init__(self, conv, output_size):
 51 |         super(MyConvTranspose2d, self).__init__()
 52 |         self.output_size = output_size
 53 |         self.conv = conv
 54 | 
 55 |     def forward(self, x):
 56 |         x = self.conv(x, output_size=self.output_size)
 57 |         return x
 58 | 
 59 | 
 60 | class Resample(nn.Module):
 61 |     def __init__(self, p, s, h, emb_dim, resample_dim):
 62 |         super(Resample, self).__init__()
 63 |         assert s in [4, 8, 16, 32], "s must be in [0.5, 4, 8, 16, 32]"
 64 |         self.conv1 = nn.Conv2d(
 65 |             emb_dim, resample_dim, kernel_size=1, stride=1, padding=0
 66 |         )
 67 |         if s == 4:
 68 |             self.conv2 = nn.ConvTranspose2d(
 69 |                 resample_dim,
 70 |                 resample_dim,
 71 |                 kernel_size=4,
 72 |                 stride=4,
 73 |                 padding=0,
 74 |                 bias=True,
 75 |                 dilation=1,
 76 |                 groups=1,
 77 |             )
 78 |         elif s == 8:
 79 |             self.conv2 = nn.ConvTranspose2d(
 80 |                 resample_dim,
 81 |                 resample_dim,
 82 |                 kernel_size=2,
 83 |                 stride=2,
 84 |                 padding=0,
 85 |                 bias=True,
 86 |                 dilation=1,
 87 |                 groups=1,
 88 |             )
 89 |         elif s == 16:
 90 |             self.conv2 = nn.Identity()
 91 |         else:
 92 |             self.conv2 = nn.Conv2d(
 93 |                 resample_dim,
 94 |                 resample_dim,
 95 |                 kernel_size=2,
 96 |                 stride=2,
 97 |                 padding=0,
 98 |                 bias=True,
 99 |             )
100 | 
101 |     def forward(self, x):
102 |         x = self.conv1(x)
103 |         x = self.conv2(x)
104 |         return x
105 | 
106 | 
107 | class Reassemble(nn.Module):
108 |     def __init__(self, image_size, read, p, s, emb_dim, resample_dim):
109 |         """
110 |         p = patch size
111 |         s = coefficient resample
112 |         emb_dim <=> D (in the paper)
113 |         resample_dim <=> ^D (in the paper)
114 |         read : {"ignore", "add", "projection"}
115 |         """
116 |         super(Reassemble, self).__init__()
117 |         channels, image_height, image_width = image_size
118 | 
119 |         # Read
120 |         self.read = Read_ignore()
121 |         if read == "add":
122 |             self.read = Read_add()
123 |         elif read == "projection":
124 |             self.read = Read_projection(emb_dim)
125 | 
126 |         # Concat after read
127 |         self.concat = Rearrange(
128 |             "b (h w) c -> b c h w",
129 |             c=emb_dim,
130 |             h=(image_height // p),
131 |             w=(image_width // p),
132 |         )
133 | 
134 |         # Projection + Resample
135 |         self.resample = Resample(p, s, image_height, emb_dim, resample_dim)
136 | 
137 |     def forward(self, x):
138 |         x = self.read(x)
139 |         x = self.concat(x)
140 |         x = self.resample(x)
141 |         return x
142 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/SceneGraph/SceneGraph.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Adapted from https://github.com/Jianghanxiao/Helper3D
  7 | 
  8 | import re
  9 | 
 10 | import numpy as np
 11 | import open3d as o3d
 12 | 
 13 | from .SceneNode import SceneNode
 14 | 
 15 | # TODO : very slow, can we cache the SceneNodes?
 16 | 
 17 | 
 18 | class SceneGraph:
 19 |     def __init__(self, rootLink, joint_angles=None):
 20 |         self.root = SceneNode()
 21 |         self.joint_angles = joint_angles
 22 |         self.constructNode(self.root, rootLink)
 23 | 
 24 |     def update(self):
 25 |         self.root.update()
 26 | 
 27 |     def getMesh(self):
 28 |         self.update()
 29 |         meshes = self.root.getMesh()
 30 |         new_meshes = []
 31 |         for mesh in meshes:
 32 |             new_meshes.append(mesh)
 33 |         return new_meshes
 34 | 
 35 |     def updateJointAngles(self, joint_angles):
 36 |         self.joint_angles = joint_angles.cpu().numpy()
 37 |         return
 38 | 
 39 |     def rotateNode(self, node, joint_rpy):
 40 |         updates = np.nonzero(joint_rpy)[0]
 41 |         if len(updates) > 1:
 42 |             for i in [0, 1, 2]:
 43 |                 _joint_rpy = np.zeros(3)
 44 |                 _joint_rpy[i] += joint_rpy[i]
 45 |                 node.rotateXYZ(_joint_rpy)
 46 |         else:
 47 |             node.rotateXYZ(joint_rpy)
 48 | 
 49 |     def getRPY(self, node):
 50 |         joint_axis = node.joint.axis
 51 |         axis_of_rotation = np.nonzero(joint_axis)[0].squeeze().item()
 52 |         rotate_rpy = np.zeros(3)
 53 |         joint_rpy = node.joint.origin["rpy"].astype(np.float64)
 54 | 
 55 |         if "tip" not in node.joint.joint_name:
 56 |             joint_id = re.findall("\d+\.\d+", node.joint.joint_name)[0]
 57 |             joint_id = int(float(joint_id))
 58 |             rotate_rpy[axis_of_rotation] += self.joint_angles[joint_id] * (
 59 |                 -1.0 if joint_id == 13 else 1.0
 60 |             )
 61 |             # rotate_rpy[axis_of_rotation] += -3.14159 if joint_id == 12 else 0.0
 62 |             joint_rpy += rotate_rpy
 63 |         return joint_rpy
 64 | 
 65 |     def updateState(self, node=None):
 66 |         if node == None:
 67 |             node = self.root
 68 | 
 69 |         if "base_link" not in node.name:
 70 |             node.resetlocalTransform()
 71 | 
 72 |         if node.joint != None:
 73 |             # Construct the joint node firstly; Deal with xyz and rpy of the node
 74 |             joint_xyz = node.joint.origin["xyz"]
 75 |             joint_rpy = self.getRPY(node)
 76 | 
 77 |             # TODO: fix the
 78 |             # if node.name == "link_12.0":
 79 |             #     print("update state joint_rpy", joint_rpy)
 80 | 
 81 |             self.rotateNode(node, joint_rpy)
 82 |             node.translate(joint_xyz)
 83 | 
 84 |         for child_node in node.children:
 85 |             self.updateState(child_node)
 86 | 
 87 |     def constructNode(self, node, link):
 88 |         node.name = link.link.link_name
 89 | 
 90 |         node.joint = link.joint
 91 |         if node.joint != None:
 92 |             # Construct the joint node firstly; Deal with xyz and rpy of the node
 93 | 
 94 |             joint_xyz = node.joint.origin["xyz"]
 95 |             joint_rpy = self.getRPY(node)
 96 | 
 97 |             # if node.name == "link_12.0":
 98 |             #     print("construct state joint_rpy", joint_rpy)
 99 | 
100 |             self.rotateNode(node, joint_rpy)
101 |             node.translate(joint_xyz)
102 | 
103 |         # Construct the mesh nodes for multiple visuals in link
104 |         visuals = link.link.visuals
105 |         color = link.link.color
106 |         for visual in visuals:
107 |             visual_node = SceneNode(node)
108 |             node.addChild(visual_node)
109 |             visual_node.name = node.name + "_mesh:" + str(visual.visual_name)
110 |             if visual.geometry_mesh["filename"] == None:
111 |                 raise RuntimeError("Invalid File path")
112 |             visual_node.addMeshFile(visual.geometry_mesh["filename"], color)
113 |             # Deal with xyz and rpy of the visual node
114 |             visual_xyz = visual.origin["xyz"]
115 |             visual_rpy = visual.origin["rpy"]
116 |             visual_scale = visual.geometry_mesh["scale"]
117 |             visual_node.rotateXYZ(visual_rpy)
118 |             visual_node.translate(visual_xyz)
119 |             visual_node.scale(visual_scale)
120 | 
121 |         # Construct node for the children
122 |         for child in link.children:
123 |             child_node = SceneNode(node)
124 |             node.addChild(child_node)
125 |             self.constructNode(child_node, child)
126 | 


--------------------------------------------------------------------------------
/neuralfeels/viz/debug.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Debug utilities for visualizing neuralfeels outputs
  7 | 
  8 | import matplotlib.pyplot as plt
  9 | import numpy as np
 10 | import torch
 11 | from scipy.spatial.transform import Rotation as R
 12 | 
 13 | 
 14 | def viz_dirs_C(dirs_C: torch.Tensor, poses: np.ndarray, skip: int = 100):
 15 |     """
 16 |     Visualize the vector field for a single image from camera origin
 17 |     """
 18 |     poses = poses.cpu().numpy()
 19 | 
 20 |     dirs_C = dirs_C.squeeze().cpu().numpy()
 21 |     dirs_C = dirs_C.reshape(-1, 3)
 22 | 
 23 |     dirs_C = dirs_C[::skip, :]
 24 |     mags = np.linalg.norm(dirs_C[:, :2], axis=1)
 25 | 
 26 |     fig = plt.figure()
 27 |     ax = fig.gca(projection="3d")
 28 | 
 29 |     x, y, z = poses[:, 0, 3], poses[:, 1, 3], poses[:, 2, 3]
 30 | 
 31 |     u = dirs_C[:, 0]
 32 |     v = dirs_C[:, 1]
 33 |     w = dirs_C[:, 2]
 34 | 
 35 |     ax.set_box_aspect((1, 1, 1))
 36 |     ax.quiver(x, y, z, u, v, w, length=0.01, colors=plt.cm.plasma(mags))
 37 |     ax.view_init(azim=-90, elev=90)  # x-y plane
 38 |     ax.set_xlabel("x")
 39 |     ax.set_ylabel("y")
 40 |     ax.set_zlabel("z")
 41 |     plotCameras(poses, ax)
 42 |     # ax.set_xlim(-1.5, 1.5)
 43 |     # ax.set_ylim(-1.5, 1.5)
 44 |     # ax.set_zlim(-1.5, 1.5)
 45 |     ax.set_box_aspect([1, 1, 1])
 46 |     plt.show()
 47 |     return
 48 | 
 49 | 
 50 | def viz_dirs_W(origins: torch.Tensor, dirs_W: torch.Tensor, skip: int = 10):
 51 |     """
 52 |     Visualize the vector field in world coordinates for a batch of images
 53 |     """
 54 |     fig = plt.figure()
 55 |     ax = fig.gca(projection="3d")
 56 | 
 57 |     origins = origins.cpu().numpy()
 58 |     dirs_W = dirs_W.squeeze().cpu().numpy()
 59 | 
 60 |     origins = origins[::skip, :]
 61 |     dirs_W = dirs_W[::skip, :]
 62 | 
 63 |     x, y, z = origins[:, 0], origins[:, 1], origins[:, 2]
 64 | 
 65 |     u, v, w = dirs_W[:, 0], dirs_W[:, 1], dirs_W[:, 2]
 66 | 
 67 |     ax.quiver(x, y, z, u, v, w, length=0.001, color="black")
 68 |     # ax.view_init(azim=-90, elev=90) # x-y plane
 69 |     ax.set_xlabel("x")
 70 |     ax.set_ylabel("y")
 71 |     ax.set_zlabel("z")
 72 |     ax.set_box_aspect((1, 1, 1))
 73 | 
 74 |     plt.show()
 75 |     return
 76 | 
 77 | 
 78 | def pose2axes(rotm: np.ndarray):
 79 |     """
 80 |     Convert rotation matrix to x, y, z axes
 81 |     """
 82 |     x, y, z = np.array([1, 0, 0]), np.array([0, 1, 0]), np.array([0, 0, 1])
 83 |     r = R.from_matrix(rotm)  # (N, 3, 3) [qx, qy, qz, qw]
 84 |     quivers_u = r.apply(x)
 85 |     quivers_v = r.apply(y)
 86 |     quivers_w = r.apply(z)
 87 |     return quivers_u, quivers_v, quivers_w
 88 | 
 89 | 
 90 | def plotCameras(poses: np.ndarray, ax: None):
 91 |     """
 92 |     Plot camera matrices (XYZ -> RGB)
 93 |     """
 94 |     if type(poses) is not np.ndarray:
 95 |         poses = poses.cpu().numpy()
 96 | 
 97 |     axes_sz = 2e-2
 98 |     x, y, z = poses[:, 0, 3], poses[:, 1, 3], poses[:, 2, 3]
 99 |     ax.scatter(x, y, z, color="k", s=1)
100 |     u, v, w = pose2axes(poses[:, :3, :3])
101 |     ax.quiver(
102 |         x,
103 |         y,
104 |         z,
105 |         u[:, 0],
106 |         u[:, 1],
107 |         u[:, 2],
108 |         length=axes_sz,
109 |         color="r",
110 |         linewidths=0.5,
111 |         alpha=0.5,
112 |         normalize=True,
113 |     )
114 |     ax.quiver(
115 |         x,
116 |         y,
117 |         z,
118 |         v[:, 0],
119 |         v[:, 1],
120 |         v[:, 2],
121 |         length=axes_sz,
122 |         color="g",
123 |         linewidths=0.5,
124 |         alpha=0.5,
125 |         normalize=True,
126 |     )
127 |     ax.quiver(
128 |         x,
129 |         y,
130 |         z,
131 |         w[:, 0],
132 |         w[:, 1],
133 |         w[:, 2],
134 |         length=axes_sz,
135 |         color="b",
136 |         linewidths=0.5,
137 |         alpha=0.5,
138 |         normalize=True,
139 |     )
140 | 
141 |     ax.legend()
142 |     ax.set_xlabel("X")
143 |     ax.set_ylabel("Y")
144 |     ax.set_zlabel("Z")
145 |     return
146 | 
147 | 
148 | def viz_ray_samples(pc: torch.Tensor, poses: torch.Tensor, skip: int = 1):
149 |     """
150 |     Visualize the vector field in world coordinates for a batch of images
151 |     """
152 | 
153 |     fig = plt.figure()
154 |     ax = fig.gca(projection="3d")
155 | 
156 |     pc = pc.cpu().numpy()
157 |     pc = pc[::skip, :]
158 | 
159 |     x, y, z = pc[:, 0], pc[:, 1], pc[:, 2]
160 | 
161 |     ax.scatter(x, y, z)
162 |     # ax.view_init(azim=-90, elev=90) # x-y plane
163 |     ax.set_xlabel("x")
164 |     ax.set_ylabel("y")
165 |     ax.set_zlabel("z")
166 |     ax.set_box_aspect((1, 1, 1))
167 | 
168 |     plotCameras(poses, ax)
169 |     plt.show()
170 |     return
171 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/urdf/URDF/Parser/URDFParser.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Adapted from https://github.com/Jianghanxiao/Helper3D
  7 | 
  8 | import os
  9 | import xml.etree.ElementTree as ET
 10 | 
 11 | import numpy as np
 12 | 
 13 | from .Joint import Joint
 14 | from .Link import Link
 15 | 
 16 | 
 17 | def parseThreeNumber(string):
 18 |     strings = string.split(" ")
 19 |     numbers = np.array(list(map(float, strings)))
 20 |     return numbers
 21 | 
 22 | 
 23 | class URDFParser:
 24 |     def __init__(self, file_name):
 25 |         self.file_name = file_name
 26 |         self._root_path = os.path.dirname(file_name) + "/"
 27 |         self.links = {}
 28 |         self.joints = {}
 29 | 
 30 |     # Parse the URDF(XML) file into a tree structure
 31 |     def parse(self):
 32 |         # Get the XML tree
 33 |         root_xml = ET.parse(self.file_name).getroot()
 34 |         self.links_xml = root_xml.findall("link")
 35 |         self.joints_xml = root_xml.findall("joint")
 36 |         # Parse links before parsing joints
 37 |         self.parseLinks()
 38 |         self.parseJoints()
 39 | 
 40 |     def parseLinks(self):
 41 |         for link_xml in self.links_xml:
 42 |             link_name = link_xml.attrib["name"]
 43 |             link = Link(link_name)
 44 |             # Deal with multiple visuals
 45 |             visuals_xml = link_xml.findall("visual")
 46 |             for visual_xml in visuals_xml:
 47 |                 # Add new visual in link
 48 |                 if "name" in visual_xml.attrib:
 49 |                     visual_name = visual_xml.attrib["name"]
 50 |                 else:
 51 |                     visual_name = None
 52 |                 link.addVisual(visual_name)
 53 |                 # Get origin
 54 |                 origin_xml = visual_xml.find("origin")
 55 |                 if origin_xml != None:
 56 |                     if "xyz" in origin_xml.attrib:
 57 |                         xyz = parseThreeNumber(origin_xml.attrib["xyz"])
 58 |                         link.setVisualOriginXyz(xyz)
 59 |                     if "rpy" in origin_xml.attrib:
 60 |                         rpy = parseThreeNumber(origin_xml.attrib["rpy"])
 61 |                         link.setVisualOriginRpy(rpy)
 62 |                 # Get geometry
 63 |                 geometry_xml = visual_xml.find("geometry")
 64 |                 if geometry_xml != None:
 65 |                     mesh_xml = geometry_xml.find("mesh")
 66 |                     if mesh_xml != None:
 67 |                         filename = mesh_xml.attrib["filename"]
 68 |                         link.setVisualGeometryMeshFilename(self._root_path + filename)
 69 |                         if "scale" in mesh_xml.attrib:
 70 |                             scale = parseThreeNumber(mesh_xml.attrib["scale"])
 71 |                             link.setVisualMeshScale(scale)
 72 | 
 73 |                 if visual_xml.find("material"):
 74 |                     color_xml = visual_xml.find("material").find("color")
 75 |                     if color_xml != None:
 76 |                         link.color = np.fromstring(
 77 |                             color_xml.attrib["rgba"], dtype=float, sep=" "
 78 |                         )[:3]
 79 | 
 80 |             self.links[link_name] = link
 81 | 
 82 |     def parseJoints(self):
 83 |         for joint_xml in self.joints_xml:
 84 |             joint_name = joint_xml.attrib["name"]
 85 |             joint_type = joint_xml.attrib["type"]
 86 |             child_name = joint_xml.find("child").attrib["link"]
 87 |             parent_name = joint_xml.find("parent").attrib["link"]
 88 |             joint = Joint(joint_name, joint_type, child_name, parent_name)
 89 |             # Get origin
 90 |             origin_xml = joint_xml.find("origin")
 91 |             if origin_xml != None:
 92 |                 if "xyz" in origin_xml.attrib:
 93 |                     xyz = parseThreeNumber(origin_xml.attrib["xyz"])
 94 |                     joint.setOriginXyz(xyz)
 95 |                 if "rpy" in origin_xml.attrib:
 96 |                     rpy = parseThreeNumber(origin_xml.attrib["rpy"])
 97 |                     joint.setOriginRpy(rpy)
 98 |             # Get Axis
 99 |             axis_xml = joint_xml.find("axis")
100 |             if axis_xml != None:
101 |                 axis = parseThreeNumber(axis_xml.attrib["xyz"])
102 |                 joint.setAxis(axis)
103 |             # Get Limit
104 |             limit_xml = joint_xml.find("limit")
105 |             if limit_xml != None:
106 |                 lower = float(limit_xml.attrib["lower"])
107 |                 upper = float(limit_xml.attrib["upper"])
108 |                 joint.setLimitLower(lower)
109 |                 joint.setLimitUpper(upper)
110 |             self.joints[joint_name] = joint
111 | 
112 | 
113 | # Test the parser
114 | if __name__ == "__main__":
115 |     file_name = "../../../data/43074/mobility.urdf"
116 |     parser = URDFParser(file_name)
117 |     parser.parse()
118 | 
119 |     print(parser.links)
120 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/tactile_transformer/touch_vit.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT
  7 | 
  8 | import os
  9 | from glob import glob
 10 | 
 11 | import git
 12 | import hydra
 13 | import numpy as np
 14 | import torch
 15 | from hydra.utils import to_absolute_path
 16 | from omegaconf import DictConfig
 17 | from PIL import Image
 18 | from torchvision import transforms
 19 | 
 20 | from neuralfeels.contrib.tactile_transformer.dpt_model import DPTModel
 21 | from neuralfeels.contrib.tactile_transformer.utils import (
 22 |     apply_jet_colormap,
 23 |     concat_images,
 24 |     create_dir,
 25 | )
 26 | 
 27 | root = git.Repo(".", search_parent_directories=True).working_tree_dir
 28 | 
 29 | 
 30 | class TouchVIT:
 31 |     """
 32 |     Image to 3D model for DIGIT
 33 |     """
 34 | 
 35 |     def __init__(self, cfg: DictConfig):
 36 |         super(TouchVIT, self).__init__()
 37 | 
 38 |         self.config = cfg
 39 |         input_dir = to_absolute_path(self.config["General"]["path_input_images"])
 40 |         self.input_images = glob(f"{input_dir}/*.jpg") + glob(f"{input_dir}/*.png")
 41 | 
 42 |         self.type = self.config["General"]["type"]
 43 | 
 44 |         self.device = torch.device(
 45 |             self.config["General"]["device"] if torch.cuda.is_available() else "cpu"
 46 |         )
 47 |         # print("device: %s" % self.device)
 48 |         resize = self.config["Dataset"]["transforms"]["resize"]
 49 |         self.model = DPTModel(
 50 |             image_size=(3, resize[0], resize[1]),
 51 |             emb_dim=self.config["General"]["emb_dim"],
 52 |             resample_dim=self.config["General"]["resample_dim"],
 53 |             read=self.config["General"]["read"],
 54 |             nclasses=len(self.config["Dataset"]["classes"]),
 55 |             hooks=self.config["General"]["hooks"],
 56 |             model_timm=self.config["General"]["model_timm"],
 57 |             type=self.type,
 58 |             patch_size=self.config["General"]["patch_size"],
 59 |         )
 60 |         path_model = to_absolute_path(
 61 |             os.path.join(
 62 |                 self.config["General"]["path_model"],
 63 |                 f"{self.config['weights']}.p",
 64 |             )
 65 |         )
 66 | 
 67 |         # print(f"TouchVIT path: {path_model}")
 68 |         self.model.load_state_dict(
 69 |             torch.load(path_model, map_location=self.device)["model_state_dict"]
 70 |         )
 71 |         self.model.eval()
 72 |         self.model.to(self.device)
 73 |         self.transform_image = transforms.Compose(
 74 |             [
 75 |                 transforms.Resize((resize[0], resize[1])),
 76 |                 transforms.ToTensor(),
 77 |                 transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
 78 |             ]
 79 |         )
 80 |         self.output_dir = self.config["General"]["path_predicted_images"]
 81 | 
 82 |     def image2heightmap(self, image):
 83 |         image = Image.fromarray(image)
 84 |         original_size = image.size
 85 |         image = self.transform_image(image).unsqueeze(0)
 86 |         image = image.to(self.device).float()
 87 | 
 88 |         output_depth, _ = self.model(image)  # [0 - 1] output
 89 | 
 90 |         output_depth = transforms.ToPILImage()(output_depth.squeeze(0).float()).resize(
 91 |             original_size, resample=Image.BICUBIC
 92 |         )  # [0 - 255] output
 93 |         return transforms.PILToTensor()(output_depth).squeeze()
 94 | 
 95 |     def run(self):
 96 |         path_dir_depths = os.path.join(self.output_dir, "depths")
 97 |         create_dir(self.output_dir)
 98 |         create_dir(path_dir_depths)
 99 | 
100 |         output_depths, input_images = [], []
101 |         for images in self.input_images[:10]:
102 |             pil_im = Image.open(images)
103 |             im = np.array(pil_im)
104 |             with torch.no_grad():
105 |                 output_depth = self.image2heightmap(im)
106 |                 output_depths.append(output_depth)
107 |             input_images.append(pil_im)
108 | 
109 |         # Convert list of tensors to image collage
110 |         output_depths = [transforms.ToPILImage()(depth) for depth in output_depths]
111 |         # Concatenate all 10 PIL images
112 |         collage_depth = concat_images(output_depths, direction="horizontal")
113 |         collage_depth = apply_jet_colormap(collage_depth)
114 |         collage_images = concat_images(input_images, direction="horizontal")
115 |         collage = concat_images([collage_images, collage_depth], direction="vertical")
116 |         # add jet colormap to the collage
117 |         collage.show()
118 | 
119 | 
120 | @hydra.main(
121 |     version_base=None,
122 |     config_path=os.path.join(root, "scripts/config/main/touch_depth"),
123 |     config_name="vit",
124 | )
125 | def main(cfg: DictConfig):
126 |     cfg.weights = "dpt_sim"
127 |     t = TouchVIT(cfg)
128 |     t.run()
129 | 
130 | 
131 | if __name__ == "__main__":
132 |     main()
133 | 


--------------------------------------------------------------------------------
/neuralfeels/modules/allegro.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Class for Allegro hand joint state and forward kinematics
  7 | 
  8 | import os
  9 | from typing import Dict
 10 | 
 11 | import dill as pickle
 12 | import git
 13 | import numpy as np
 14 | import theseus as th
 15 | import torch
 16 | from torchkin import Robot, get_forward_kinematics_fns
 17 | 
 18 | from neuralfeels.modules.misc import pose_from_config
 19 | 
 20 | root = git.Repo(".", search_parent_directories=True).working_tree_dir
 21 | 
 22 | 
 23 | class Allegro:
 24 |     def __init__(
 25 |         self,
 26 |         dataset_path: str = None,
 27 |         base_pose: Dict = None,
 28 |         device: str = "cuda",
 29 |     ):
 30 |         """Allegro hand dataloader for neuralfeels data"""
 31 |         super(Allegro, self).__init__()
 32 |         assert (dataset_path is None) != (base_pose is None)
 33 |         self.device = device
 34 | 
 35 |         urdf_path = os.path.join(
 36 |             root, "data/assets/allegro/allegro_digit_left_ball.urdf"
 37 |         )  # Allegro hand URDF file
 38 |         self.robot, self.fkin, self.links, self.joint_map = load_robot(
 39 |             urdf_file=urdf_path, num_dofs=16, device=device
 40 |         )
 41 | 
 42 |         if dataset_path is not None:
 43 |             # Load base pose and jointstate vectors
 44 |             data_path = os.path.join(dataset_path, "data.pkl")
 45 |             with open(data_path, "rb") as p:
 46 |                 self.data = pickle.load(p)
 47 |             self.allegro_pose = self.data["allegro"]["base_pose"]
 48 |             self.joint_states = torch.tensor(
 49 |                 self.data["allegro"]["joint_state"], device=device, dtype=torch.float32
 50 |             )
 51 |         else:
 52 |             self.allegro_pose = pose_from_config(base_pose)
 53 | 
 54 |     def _hora_to_neural(self, finger_poses):
 55 |         """
 56 |         Convert the DIGIT urdf reference frame (bottom of the sensor) to neural SLAM frame
 57 |         """
 58 |         finger_poses = finger_poses @ np.linalg.inv(
 59 |             np.array(
 60 |                 [
 61 |                     [0.000000, -1.000000, 0.000000, 0.000021],
 62 |                     [0.000000, 0.000000, 1.000000, -0.017545],
 63 |                     [-1.000000, 0.000000, 0.000000, -0.002132],
 64 |                     [0.000000, 0.000000, 0.000000, 1.000000],
 65 |                 ]
 66 |             )
 67 |         )
 68 |         return finger_poses
 69 | 
 70 |     def get_fk(self, idx=None, joint_state=None):
 71 |         """Forward kinematics using theseus torchkin"""
 72 | 
 73 |         assert idx is None or joint_state is None
 74 |         if joint_state is not None:
 75 |             joint_states = torch.tensor(joint_state, device=self.device)
 76 |         else:
 77 |             if idx >= len(self.joint_states):
 78 |                 return None
 79 |             joint_states = self.joint_states[idx].clone()
 80 | 
 81 |         # joint states is saved as [index, middle, ring, thumb]
 82 |         self.current_joint_state = joint_states  # for viz
 83 | 
 84 |         # Swap index and ring for left-hand, theseus FK requires this
 85 |         joint_states_theseus = joint_states.clone()
 86 |         joint_states_theseus[[0, 1, 2, 3]], joint_states_theseus[[8, 9, 10, 11]] = (
 87 |             joint_states_theseus[[8, 9, 10, 11]],
 88 |             joint_states_theseus[[0, 1, 2, 3]],
 89 |         )
 90 | 
 91 |         # Change to breadth-first order, theseus needs this too
 92 |         joint_states_theseus = joint_states_theseus[self.joint_map]
 93 |         j = th.Vector(
 94 |             tensor=joint_states_theseus.unsqueeze(0),
 95 |             name="joint_states",
 96 |         )
 97 |         link_poses = self.fkin(j.tensor)
 98 |         digit_poses = torch.vstack(link_poses).to(self.robot.device)
 99 |         digit_poses = th.SE3(tensor=digit_poses).to_matrix().cpu().numpy()
100 | 
101 |         base_tf = np.repeat(
102 |             self.allegro_pose[np.newaxis, :, :], digit_poses.shape[0], axis=0
103 |         )
104 |         digit_poses = base_tf @ digit_poses
105 |         digit_poses = self._hora_to_neural(digit_poses)
106 |         return {k: v for k, v in zip(list(self.links.keys()), list(digit_poses))}
107 | 
108 |     def get_base_pose(self):
109 |         return self.allegro_pose
110 | 
111 | 
112 | def load_robot(urdf_file: str, num_dofs: int, device):
113 |     """Load robot from URDF file and cache FK functions"""
114 |     robot = Robot.from_urdf_file(urdf_file, device=device)
115 |     links = {
116 |         "digit_index": "link_3.0_tip",
117 |         "digit_middle": "link_7.0_tip",
118 |         "digit_ring": "link_11.0_tip",
119 |         "digit_thumb": "link_15.0_tip",
120 |     }
121 | 
122 |     # FK function is applied breadth-first, so swap the indices from the allegro convention
123 |     joint_map = torch.tensor(
124 |         [joint.id for joint in robot.joint_map.values() if joint.id < num_dofs],
125 |         device=device,
126 |     )
127 |     # base, index, middle, ring, thumb
128 |     fkin, *_ = get_forward_kinematics_fns(robot, list(links.values()))
129 |     return (robot, fkin, links, joint_map)
130 | 


--------------------------------------------------------------------------------
/neuralfeels/datasets/dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | import os
  7 | 
  8 | import cv2
  9 | import git
 10 | import numpy as np
 11 | import torch
 12 | from torch.utils.data import Dataset
 13 | from tqdm import tqdm
 14 | 
 15 | from neuralfeels.datasets import redwood_depth_noise_model as noise_model
 16 | 
 17 | root = git.Repo(".", search_parent_directories=True).working_tree_dir
 18 | 
 19 | 
 20 | class VisionDataset(torch.utils.data.Dataset):
 21 |     """Realsense data loader for neuralfeels dataset"""
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |         root_dir: str,
 26 |         gt_seg: bool,
 27 |         sim_noise_iters: float,
 28 |         col_ext: str = ".jpg",
 29 |     ):
 30 |         # pre-load depth data
 31 |         depth_file = os.path.join(root_dir, "depth.npz")
 32 |         depth_loaded = np.load(depth_file, fix_imports=True, encoding="latin1")
 33 |         self.depth_data = depth_loaded["depth"]
 34 |         self.depth_scale = depth_loaded["depth_scale"]
 35 |         self.depth_data = self.depth_data.astype(np.float32)
 36 |         self.depth_data = self.depth_data * self.depth_scale
 37 | 
 38 |         if sim_noise_iters > 0:
 39 |             # add noise to the clean simulation depth data
 40 |             # At 1 meter distance an accuracy of 2.5 mm to 5 mm  (https://github.com/IntelRealSense/librealsense/issues/7806).
 41 |             # We operate at roughly 0.5 meter, we empirally pick 2mm as the noise std.
 42 |             # Adding the noise here allows us to ablate the effect of depth noise on the performance of the system.
 43 |             self.dist_model = np.load(
 44 |                 os.path.join(root, "data", "feelsight", "redwood-depth-dist-model.npy")
 45 |             )
 46 |             self.dist_model = self.dist_model.reshape(80, 80, 5)
 47 |             for i, depth in enumerate(tqdm(self.depth_data)):
 48 |                 depth = noise_model._simulate(-depth, self.dist_model, sim_noise_iters)
 49 |                 self.depth_data[i, :, :] = -depth
 50 | 
 51 |         self.rgb_dir = os.path.join(root_dir, "image")
 52 |         self.seg_dir = os.path.join(root_dir, "seg")
 53 |         self.col_ext = col_ext
 54 |         self.gt_seg = gt_seg
 55 | 
 56 |     def __len__(self):
 57 |         return len(os.listdir(self.rgb_dir))
 58 | 
 59 |     def __getitem__(self, idx):
 60 |         if torch.is_tensor(idx):
 61 |             idx = idx.tolist()
 62 | 
 63 |         rgb_file = os.path.join(self.rgb_dir, f"{idx}" + self.col_ext)
 64 |         image = cv2.imread(rgb_file)
 65 |         depth = self.depth_data[idx]
 66 | 
 67 |         if self.gt_seg:
 68 |             mask = self.get_gt_seg(idx)
 69 |             depth = depth * mask  # mask depth with gt segmentation
 70 | 
 71 |         return image, depth
 72 | 
 73 |     def get_avg_seg_area(self):
 74 |         """
 75 |         Returns the average segmentation area of the dataset
 76 |         """
 77 |         seg_area = 0.0
 78 |         for i in range(len(self)):
 79 |             mask = self.get_gt_seg(i)
 80 |             seg_area += mask.sum() / mask.size
 81 |         seg_area /= len(self)
 82 |         return seg_area
 83 | 
 84 |     def get_gt_seg(self, idx: int):
 85 |         """
 86 |         Returns a binary mask of the segmentation ground truth
 87 |         """
 88 |         seg_file = os.path.join(self.seg_dir, f"{idx}" + self.col_ext)
 89 |         mask = cv2.imread(seg_file, 0).astype(np.int64)
 90 |         # round every pixel to either 0, 255/2, 255
 91 |         mask = np.round(mask / 127.5) * 127.5
 92 |         # check if there exists three classes, if not return empty mask
 93 |         if np.unique(mask).size != 3:
 94 |             mask = np.zeros_like(mask)
 95 |         else:
 96 |             mask = mask == 255
 97 |         return mask
 98 | 
 99 | 
100 | class TactileDataset(Dataset):
101 |     def __init__(
102 |         self,
103 |         root_dir: str,
104 |         gt_depth: bool,
105 |         col_ext: str = ".jpg",
106 |     ):
107 |         """DIGIT dataset loader for neuralfeels dataset"""
108 |         self.rgb_dir = os.path.join(root_dir, "image")
109 |         self.depth_dir = os.path.join(root_dir, "depth")
110 |         self.mask_dir = os.path.join(root_dir, "mask")
111 |         self.col_ext = col_ext
112 |         self.gt_depth = gt_depth
113 | 
114 |     def __len__(self):
115 |         return len(os.listdir(self.rgb_dir))
116 | 
117 |     def __getitem__(self, idx):
118 |         if torch.is_tensor(idx):
119 |             idx = idx.tolist()
120 |         rgb_file = os.path.join(self.rgb_dir, f"{idx}" + self.col_ext)
121 |         image = cv2.imread(rgb_file)
122 | 
123 |         depth = None
124 |         if self.gt_depth:
125 |             depth_file = os.path.join(self.depth_dir, f"{idx}" + self.col_ext)
126 |             mask_file = os.path.join(self.mask_dir, f"{idx}" + self.col_ext)
127 |             depth = cv2.imread(depth_file, 0).astype(np.int64)
128 | 
129 |             depth[depth < 0] = 0
130 | 
131 |             mask = cv2.imread(mask_file, 0).astype(np.int64)
132 |             mask = mask > 255 / 2
133 |             if mask.sum() / mask.size < 0.01:
134 |                 # tiny mask, ignore
135 |                 mask *= False
136 | 
137 |             depth = depth * mask  # apply contact mask
138 | 
139 |         return image, depth
140 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/tactile_transformer/loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT
  7 | 
  8 | # Loss functions for tactile transformer
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | 
 13 | 
 14 | def compute_scale_and_shift(prediction, target, mask):
 15 |     # system matrix: A = [[a_00, a_01], [a_10, a_11]]
 16 |     a_00 = torch.sum(mask * prediction * prediction, (1, 2))
 17 |     a_01 = torch.sum(mask * prediction, (1, 2))
 18 |     a_11 = torch.sum(mask, (1, 2))
 19 | 
 20 |     # right hand side: b = [b_0, b_1]
 21 |     b_0 = torch.sum(mask * prediction * target, (1, 2))
 22 |     b_1 = torch.sum(mask * target, (1, 2))
 23 | 
 24 |     # solution: x = A^-1 . b = [[a_11, -a_01], [-a_10, a_00]] / (a_00 * a_11 - a_01 * a_10) . b
 25 |     x_0 = torch.zeros_like(b_0)
 26 |     x_1 = torch.zeros_like(b_1)
 27 | 
 28 |     det = a_00 * a_11 - a_01 * a_01
 29 |     valid = det.nonzero()
 30 | 
 31 |     x_0[valid] = (a_11[valid] * b_0[valid] - a_01[valid] * b_1[valid]) / det[valid]
 32 |     x_1[valid] = (-a_01[valid] * b_0[valid] + a_00[valid] * b_1[valid]) / det[valid]
 33 | 
 34 |     return x_0, x_1
 35 | 
 36 | 
 37 | def reduction_batch_based(image_loss, M):
 38 |     # average of all valid pixels of the batch
 39 |     # avoid division by 0 (if sum(M) = sum(sum(mask)) = 0: sum(image_loss) = 0)
 40 |     divisor = torch.sum(M)
 41 | 
 42 |     if divisor == 0:
 43 |         return 0
 44 |     else:
 45 |         return torch.sum(image_loss) / divisor
 46 | 
 47 | 
 48 | def reduction_image_based(image_loss, M):
 49 |     # mean of average of valid pixels of an image
 50 | 
 51 |     # avoid division by 0 (if M = sum(mask) = 0: image_loss = 0)
 52 |     valid = M.nonzero()
 53 | 
 54 |     image_loss[valid] = image_loss[valid] / M[valid]
 55 | 
 56 |     return torch.mean(image_loss)
 57 | 
 58 | 
 59 | def mse_loss(prediction, target, mask, reduction=reduction_batch_based):
 60 |     M = torch.sum(mask, (1, 2))
 61 |     res = prediction - target
 62 |     image_loss = torch.sum(mask * res * res, (1, 2))
 63 | 
 64 |     return reduction(image_loss, 2 * M)
 65 | 
 66 | 
 67 | def gradient_loss(prediction, target, mask, reduction=reduction_batch_based):
 68 |     M = torch.sum(mask, (1, 2))
 69 | 
 70 |     diff = prediction - target
 71 |     diff = torch.mul(mask, diff)
 72 | 
 73 |     grad_x = torch.abs(diff[:, :, 1:] - diff[:, :, :-1])
 74 |     mask_x = torch.mul(mask[:, :, 1:], mask[:, :, :-1])
 75 |     grad_x = torch.mul(mask_x, grad_x)
 76 | 
 77 |     grad_y = torch.abs(diff[:, 1:, :] - diff[:, :-1, :])
 78 |     mask_y = torch.mul(mask[:, 1:, :], mask[:, :-1, :])
 79 |     grad_y = torch.mul(mask_y, grad_y)
 80 | 
 81 |     image_loss = torch.sum(grad_x, (1, 2)) + torch.sum(grad_y, (1, 2))
 82 | 
 83 |     return reduction(image_loss, M)
 84 | 
 85 | 
 86 | class MSELoss(nn.Module):
 87 |     def __init__(self, reduction="batch-based"):
 88 |         super().__init__()
 89 | 
 90 |         if reduction == "batch-based":
 91 |             self.__reduction = reduction_batch_based
 92 |         else:
 93 |             self.__reduction = reduction_image_based
 94 | 
 95 |     def forward(self, prediction, target, mask):
 96 |         return mse_loss(prediction, target, mask, reduction=self.__reduction)
 97 | 
 98 | 
 99 | class GradientLoss(nn.Module):
100 |     def __init__(self, scales=4, reduction="batch-based"):
101 |         super().__init__()
102 | 
103 |         if reduction == "batch-based":
104 |             self.__reduction = reduction_batch_based
105 |         else:
106 |             self.__reduction = reduction_image_based
107 | 
108 |         self.__scales = scales
109 | 
110 |     def forward(self, prediction, target, mask):
111 |         total = 0
112 | 
113 |         for scale in range(self.__scales):
114 |             step = pow(2, scale)
115 | 
116 |             total += gradient_loss(
117 |                 prediction[:, ::step, ::step],
118 |                 target[:, ::step, ::step],
119 |                 mask[:, ::step, ::step],
120 |                 reduction=self.__reduction,
121 |             )
122 | 
123 |         return total
124 | 
125 | 
126 | class ScaleAndShiftInvariantLoss(nn.Module):
127 |     def __init__(self, alpha=0.5, scales=4, reduction="batch-based"):
128 |         super().__init__()
129 | 
130 |         self.__data_loss = MSELoss(reduction=reduction)
131 |         self.__regularization_loss = GradientLoss(scales=scales, reduction=reduction)
132 |         self.__alpha = alpha
133 | 
134 |         self.__prediction_ssi = None
135 | 
136 |     def forward(self, prediction, target):
137 |         # preprocessing
138 |         mask = target > 0
139 | 
140 |         # calcul
141 |         scale, shift = compute_scale_and_shift(prediction, target, mask)
142 |         # print(scale, shift)
143 |         self.__prediction_ssi = scale.view(-1, 1, 1) * prediction + shift.view(-1, 1, 1)
144 | 
145 |         total = self.__data_loss(self.__prediction_ssi, target, mask)
146 |         if self.__alpha > 0:
147 |             total += self.__alpha * self.__regularization_loss(
148 |                 self.__prediction_ssi, target, mask
149 |             )
150 | 
151 |         return total
152 | 
153 |     def __get_prediction_ssi(self):
154 |         return self.__prediction_ssi
155 | 
156 |     prediction_ssi = property(__get_prediction_ssi)
157 | 


--------------------------------------------------------------------------------
/neuralfeels/geometry/frustum.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Adapted from iSDF: https://github.com/facebookresearch/iSDF
  7 | 
  8 | import numpy as np
  9 | import torch
 10 | import trimesh
 11 | 
 12 | from neuralfeels import viz
 13 | from neuralfeels.geometry import transform
 14 | from neuralfeels.modules import sample
 15 | 
 16 | 
 17 | def get_frustum_normals(R_WC, H, W, fx, fy, cx, cy):
 18 |     c = np.array([0, W, W, 0])
 19 |     r = np.array([0, 0, H, H])
 20 |     x = (c - cx) / fx
 21 |     y = (r - cy) / fy
 22 |     corner_dirs_C = np.vstack((x, y, np.ones(4))).T
 23 |     corner_dirs_W = (R_WC * corner_dirs_C[..., None, :]).sum(axis=-1)
 24 | 
 25 |     frustum_normals = np.empty((4, 3))
 26 |     frustum_normals[0] = np.cross(corner_dirs_W[0], corner_dirs_W[1])
 27 |     frustum_normals[1] = np.cross(corner_dirs_W[1], corner_dirs_W[2])
 28 |     frustum_normals[2] = np.cross(corner_dirs_W[2], corner_dirs_W[3])
 29 |     frustum_normals[3] = np.cross(corner_dirs_W[3], corner_dirs_W[0])
 30 |     frustum_normals = frustum_normals / np.linalg.norm(frustum_normals, axis=1)[:, None]
 31 | 
 32 |     return frustum_normals
 33 | 
 34 | 
 35 | def check_inside_frustum(points, cam_center, frustum_normals):
 36 |     """For a point to be within the frustrum, the projection on each normal
 37 |     vector must be positive.
 38 |     params:
 39 |     """
 40 |     pts = points - cam_center
 41 |     dots = np.dot(pts, frustum_normals.T)
 42 |     return (dots >= 0).all(axis=1)
 43 | 
 44 | 
 45 | def is_visible(
 46 |     points, T_WC, depth, H, W, fx, fy, cx, cy, trunc=0.2, use_projection=True
 47 | ):
 48 |     """Are points visible to in this frame.
 49 |     Up to trunc metres behind the surface counts in visible region.
 50 |     """
 51 |     # forward project points
 52 |     K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
 53 |     ones = np.ones([len(points), 1])
 54 |     homog_points = np.concatenate((points, ones), axis=-1)
 55 |     points_C = (np.linalg.inv(T_WC) @ homog_points.T)[:3]
 56 |     uv = K @ points_C
 57 |     z = uv[2]
 58 |     uv = uv[:2] / z
 59 |     uv = uv.T
 60 | 
 61 |     if use_projection:
 62 |         x_valid = np.logical_and(uv[:, 0] > 0, uv[:, 0] < W)
 63 |         y_valid = np.logical_and(uv[:, 1] > 0, uv[:, 1] < H)
 64 |         xy_valid = np.logical_and(x_valid, y_valid)
 65 |     else:  # use frustrum
 66 |         R_WC = T_WC[:3, :3]
 67 |         cam_center = T_WC[:3, 3]
 68 | 
 69 |         frustum_normals = get_frustum_normals(R_WC, H, W, fx, fy, cx, cy)
 70 | 
 71 |         xy_valid = check_inside_frustum(points, cam_center, frustum_normals)
 72 | 
 73 |     uv = uv.astype(int)
 74 |     depth_vals = depth[uv[xy_valid, 1], uv[xy_valid, 0]]
 75 |     max_depths = np.full(len(uv), -np.inf)
 76 |     max_depths[xy_valid] = depth_vals + trunc
 77 |     z_valid = np.logical_and(z > 0, z < max_depths)
 78 | 
 79 |     inside = np.logical_and(xy_valid, z_valid)
 80 | 
 81 |     return inside
 82 | 
 83 | 
 84 | def test_inside_frustum(T_WC, depth):
 85 |     fx, fy = 600.0, 600.0
 86 |     cx, cy = 600.0, 340.0
 87 |     H, W = 680, 1200.0
 88 | 
 89 |     # show camera
 90 |     scene = trimesh.Scene()
 91 |     viz.draw.draw_cams(1, T_WC, scene)
 92 | 
 93 |     # show random point cloud
 94 |     points = np.random.normal(0.0, 2.0, [1000, 3])
 95 |     visible = is_visible(points, T_WC, depth, H, W, fx, fy, cx, cy)
 96 |     cols = np.full(points.shape, [255, 0, 0])
 97 |     cols[visible] = [0, 255, 0]
 98 |     pc = trimesh.PointCloud(points, cols)
 99 |     scene.add_geometry(pc)
100 | 
101 |     # show rays
102 |     sparse = 20
103 |     dirs_C = transform.ray_dirs_C(
104 |         1,
105 |         int(H / sparse),
106 |         int(W / sparse),
107 |         fx / sparse,
108 |         fy / sparse,
109 |         cx / sparse,
110 |         cy / sparse,
111 |         "cpu",
112 |         depth_type="z",
113 |     )
114 |     dirs_C = dirs_C.view(1, -1, 3)
115 |     dirs_C = dirs_C.cpu().numpy()
116 |     dirs_W = (T_WC[:3, :3] * dirs_C[..., None, :]).sum(axis=-1)
117 |     n_rays = dirs_W.shape[1]
118 |     sparse_depth = depth[::sparse, ::sparse]
119 |     max_depth = torch.from_numpy(sparse_depth + 0.9).flatten()
120 |     z_vals = sample.stratified_sample(0.2, max_depth, n_rays, "cpu", n_bins=12)
121 |     dirs_W = torch.from_numpy(dirs_W)
122 |     dirs_W = dirs_W.squeeze()
123 |     origins = torch.from_numpy(T_WC[:3, 3])
124 |     origins = origins[None, :].repeat(n_rays, 1)
125 |     rays_pc = origins[:, None, :] + (dirs_W[:, None, :] * z_vals[:, :, None])
126 |     rays_pc = rays_pc.reshape(-1, 3).numpy()
127 |     visible_rays = is_visible(rays_pc, T_WC, depth, H, W, fx, fy, cx, cy)
128 |     ray_col = np.full(rays_pc.shape, [255, 0, 0])
129 |     ray_col[visible_rays] = [0, 255, 0]
130 |     rays_tmpc = trimesh.PointCloud(rays_pc, ray_col)
131 |     scene.add_geometry(rays_tmpc)
132 | 
133 |     # show frustum normals
134 |     starts = T_WC[:3, 3][None, :].repeat(4, 0)
135 |     frustum_normals = get_frustum_normals(T_WC[:3, :3], H, W, fx, fy, cx, cy)
136 |     normal_ends = T_WC[:3, 3] + frustum_normals * 4
137 |     normal_lines = np.concatenate((starts[:, None, :], normal_ends[:, None, :]), axis=1)
138 |     normal_paths = trimesh.load_path(normal_lines)
139 |     normal_paths.colors = [[255, 255, 0, 255]] * 3
140 |     scene.add_geometry(normal_paths)
141 | 
142 |     # show rays in corners of frame
143 |     # ends = C + corner_dirs_W * 3
144 |     # lines = np.concatenate((starts[:, None, :], ends[:, None, :]), axis=1)
145 |     # paths = trimesh.load_path(lines)
146 |     # paths.colors = [[0, 255, 0, 255]] * len(lines)
147 |     # scene.add_geometry(paths)
148 | 
149 |     scene.show()
150 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/tactile_transformer/tactile_depth.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT
  7 | 
  8 | # Wrapper for tactile depth estimation model
  9 | 
 10 | import collections
 11 | import os.path as osp
 12 | 
 13 | import cv2
 14 | import numpy as np
 15 | import tacto
 16 | import torch
 17 | from hydra import compose
 18 | 
 19 | from neuralfeels.contrib.tactile_transformer.touch_vit import TouchVIT
 20 | 
 21 | dname = osp.dirname(osp.abspath(__file__))
 22 | 
 23 | 
 24 | class TactileDepth:
 25 |     def __init__(self, depth_mode, real=False, device="cuda"):
 26 |         super(TactileDepth, self).__init__()
 27 | 
 28 |         cfg = compose(config_name=f"main/touch_depth/{depth_mode}").main.touch_depth
 29 | 
 30 |         cfg.weights = "dpt_real" if real else "dpt_sim"
 31 | 
 32 |         if depth_mode == "gt":
 33 |             self.model = None
 34 |             return
 35 |         if depth_mode == "vit":
 36 |             # print("Loading ViT depth model----")
 37 |             self.model = TouchVIT(cfg=cfg)
 38 |         else:
 39 |             raise NotImplementedError(f"Mode not implemented: {cfg.mode}")
 40 |         # print("done")
 41 |         self.device = device
 42 | 
 43 |         settings_config = cfg.settings.real if real else cfg.settings.sim
 44 |         self.b, self.r, self.clip = (
 45 |             settings_config.border,
 46 |             settings_config.ratio,
 47 |             settings_config.clip,
 48 |         )
 49 | 
 50 |         self.bg_id = settings_config.bg_id
 51 |         self.blend_sz = settings_config.blend_sz
 52 |         self.heightmap_window = collections.deque([])
 53 | 
 54 |         # background templates for heightmap2mask
 55 |         self.bg_template = {}
 56 | 
 57 |     def image2heightmap(self, image: np.ndarray, sensor_name: str = "digit_0"):
 58 |         if sensor_name not in self.bg_template:
 59 |             if self.bg_id is None:
 60 |                 print(
 61 |                     f"{sensor_name} not in background images, generating new background template using first frame"
 62 |                 )
 63 |                 self.bg_template[sensor_name] = self.model.image2heightmap(image)
 64 |             else:
 65 |                 print(
 66 |                     f"{sensor_name} not in background images, generating new background template from bg_id {self.bg_id}"
 67 |                 )
 68 |                 self.bg_template[sensor_name] = self.model.image2heightmap(
 69 |                     cv2.imread(tacto.get_background_image_path(self.bg_id))
 70 |                 )
 71 |             self.bg_template[sensor_name] = self.bg_template[sensor_name].to(
 72 |                 dtype=float, device=self.device
 73 |             )
 74 |         heightmap = self.model.image2heightmap(image)
 75 |         return self.blend_heightmaps(heightmap)
 76 | 
 77 |     def heightmap2mask(
 78 |         self, heightmap: torch.tensor, sensor_name: str = "digit_0"
 79 |     ) -> torch.Tensor:
 80 |         """Thresholds heightmap to return binary contact mask
 81 | 
 82 |         Args:
 83 |             heightmap: single tactile image
 84 | 
 85 |         Returns:
 86 |             padded_contact_mask: contact mask [True: is_contact, False: no_contact]
 87 | 
 88 |         """
 89 | 
 90 |         heightmap = heightmap.squeeze().to(self.device)
 91 |         bg_template = self.bg_template[sensor_name]
 92 |         # scale bg_template to match heightmap if different size
 93 |         if bg_template.shape != heightmap.shape:
 94 |             bg_template = torch.nn.functional.interpolate(
 95 |                 bg_template[None, None, :, :], heightmap.shape[-2:], mode="bilinear"
 96 |             ).squeeze()
 97 | 
 98 |         init_height = bg_template
 99 |         if self.b:
100 |             heightmap = heightmap[self.b : -self.b, self.b : -self.b]
101 |             init_height = init_height[self.b : -self.b, self.b : -self.b]
102 |         diff_heights = heightmap - init_height
103 |         diff_heights[diff_heights < self.clip] = 0
104 |         threshold = torch.quantile(diff_heights, 0.9) * self.r
105 |         contact_mask = diff_heights > threshold
106 |         padded_contact_mask = torch.zeros_like(bg_template, dtype=bool)
107 | 
108 |         if self.b:
109 |             padded_contact_mask[self.b : -self.b, self.b : -self.b] = contact_mask
110 |         else:
111 |             padded_contact_mask = contact_mask
112 |         return padded_contact_mask
113 | 
114 |     def blend_heightmaps(self, heightmap: torch.Tensor) -> torch.Tensor:
115 |         """Exponentially weighted heightmap blending.
116 | 
117 |         Args:
118 |             heightmap: input heightmap
119 | 
120 |         Returns:
121 |             blended_heightmap: output heightmap blended over self.heightmap_window
122 | 
123 |         """
124 | 
125 |         if not self.blend_sz:
126 |             return heightmap
127 | 
128 |         if len(self.heightmap_window) >= self.blend_sz:
129 |             self.heightmap_window.popleft()
130 | 
131 |         self.heightmap_window.append(heightmap)
132 |         n = len(self.heightmap_window)
133 | 
134 |         weights = torch.tensor(
135 |             [x / n for x in range(1, n + 1)], device=heightmap.device
136 |         )  # exponentially weighted time series costs
137 | 
138 |         weights = torch.exp(weights) / torch.sum(torch.exp(weights))
139 | 
140 |         all_heightmaps = torch.stack(list(self.heightmap_window))
141 |         blended_heightmap = torch.sum(
142 |             (all_heightmaps * weights[:, None, None]) / weights.sum(), dim=0
143 |         )  # weighted average
144 | 
145 |         # view_subplots([heightmap, blended_heightmap], [["heightmap", "blended_heightmap"]])
146 |         return blended_heightmap
147 | 


--------------------------------------------------------------------------------
/scripts/run:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Entrypoint bash script for neuralfeels
  3 | # Usage: ./scripts/run DATASET SLAM_MODE MODALITY OBJECT LOG FPS RECORD OPEN3D
  4 | #
  5 | # Arguments:
  6 | if [[ $1 == "--help" ]]; then
  7 |     echo "Usage: ./scripts/run DATASET SLAM_MODE MODALITY OBJECT LOG FPS RECORD OPEN3D"
  8 |     echo ""
  9 |     echo "Arguments:"
 10 |     echo "  DATASET: string    # The dataset to be used, options are 'feelsight', 'feelsight_real'"
 11 |     echo "  SLAM_MODE: string  # The mode to be used, options are 'slam', 'pose', 'map'"
 12 |     echo "  MODALITY: string   # The modality to be used, options are 'vitac', 'vi', 'tac'"
 13 |     echo "  OBJECT: string     # The object to be used, e.g., '077_rubiks_cube'"
 14 |     echo "  LOG: string        # The log identifier, e.g., '00', '01', '02'"
 15 |     echo "  FPS: integer       # The frames per second, e.g., '1', '5'"
 16 |     echo "  RECORD: integer    # Whether to record the session, options are '1' (yes) or '0' (no)"
 17 |     echo "  OPEN3D: integer    # Whether to use Open3D, options are '1' (yes) or '0' (no)"
 18 |     echo ""
 19 |     echo "Presets:"
 20 |     echo "  --slam-sim         # Run neural SLAM in simulation with rubber duck"
 21 |     echo "  --pose-sim         # Run neural tracking in simulation with Rubik's cube"
 22 |     echo "  --slam-real        # Run neural SLAM in real-world with bell pepper"
 23 |     echo "  --pose-real        # Run neural tracking in real-world with large dice"
 24 |     echo "  --three-cam        # Three camera pose tracking in real-world with large dice"
 25 |     echo "  --occlusion-sim    # Run neural tracking in simulation with occlusion logs"
 26 |     exit 0
 27 | elif [[ $1 = "--slam-sim" ]]; then
 28 |     # Example of neural SLAM in simulation with rubber duck
 29 |     DATASET="feelsight"
 30 |     SLAM_MODE="slam"
 31 |     MODALITY="vitac"
 32 |     OBJECT="contactdb_rubber_duck"
 33 |     LOG="00"
 34 |     FPS=1
 35 |     RECORD=1
 36 |     OPEN3D=1
 37 | elif [[ $1 = "--pose-sim" ]]; then
 38 |     # Example of neural tracking in simulation with Rubik's cube
 39 |     DATASET="feelsight"
 40 |     SLAM_MODE="pose"
 41 |     MODALITY="vitac"
 42 |     OBJECT="077_rubiks_cube"
 43 |     LOG="00"
 44 |     FPS=1
 45 |     RECORD=1
 46 |     OPEN3D=1
 47 | elif [[ $1 = "--slam-real" ]]; then
 48 |     # Example of neural SLAM in real-world with bell pepper
 49 |     DATASET="feelsight_real"
 50 |     SLAM_MODE="slam"
 51 |     MODALITY="vitac"
 52 |     OBJECT="bell_pepper"
 53 |     LOG="00"
 54 |     FPS=1
 55 |     RECORD=1
 56 |     OPEN3D=1
 57 | elif [[ $1 = "--pose-real" ]]; then
 58 |     # Example of neural tracking in real-world with large dice
 59 |     DATASET="feelsight_real"
 60 |     SLAM_MODE="pose"
 61 |     MODALITY="vitac"
 62 |     OBJECT="large_dice"
 63 |     LOG="00"
 64 |     FPS=1
 65 |     RECORD=1
 66 |     OPEN3D=1
 67 | elif [[ $1 = "--three-cam" ]]; then
 68 |     # Three camera pose tracking in real-world with large dice
 69 |     DATASET="feelsight_real"
 70 |     SLAM_MODE="pose"
 71 |     MODALITY="vi"
 72 |     OBJECT="large_dice"
 73 |     LOG="00"
 74 |     FPS=1
 75 |     RECORD=1
 76 |     OPEN3D=1
 77 |     EXTRA_ARGS="main=baseline"
 78 | elif [[ $1 = "--occlusion-sim" ]]; then
 79 |     # Example of neural tracking in simulation with occlusion logs
 80 |     DATASET="feelsight"
 81 |     SLAM_MODE="pose"
 82 |     MODALITY="vitac"
 83 |     OBJECT="077_rubiks_cube"
 84 |     LOG="00"
 85 |     FPS=1
 86 |     RECORD=1
 87 |     OPEN3D=1
 88 |     EXTRA_ARGS="main.sensor0.masks=read main.occlusion=True"
 89 | else
 90 |     if [ $# -lt 8 ]; then
 91 |         echo "Error: Missing arguments."
 92 |         echo "Usage: $0 DATASET SLAM_MODE MODALITY OBJECT LOG FPS RECORD OPEN3D"
 93 |         echo "You provided $# arguments, but at least 8 are required."
 94 |         exit 1
 95 |     fi
 96 |     DATASET=$1
 97 |     SLAM_MODE=$2
 98 |     MODALITY=$3
 99 |     OBJECT=$4
100 |     LOG=$5
101 |     FPS=$6
102 |     RECORD=$7
103 |     OPEN3D=$8
104 |     array=($@)
105 |     len=${#array[@]}
106 |     EXTRA_ARGS=${array[@]:8:$len}
107 | fi
108 | 
109 | if [[ ${DATASET} != "feelsight" && ${DATASET} != "feelsight_real" ]]; then
110 |     echo "Error: Invalid DATASET. Options are 'feelsight', 'feelsight_real'."
111 |     exit 1
112 | elif [[ ${SLAM_MODE} != "slam" && ${SLAM_MODE} != "pose" && ${SLAM_MODE} != "map" ]]; then
113 |     echo "Error: Invalid SLAM_MODE. Options are 'slam', 'pose', 'map'."
114 |     exit 1
115 | elif [[ ${MODALITY} != "vitac" && ${MODALITY} != "vi" && ${MODALITY} != "tac" ]]; then
116 |     echo "Error: Invalid MODALITY. Options are 'vitac', 'vi', 'tac'."
117 |     exit 1
118 | elif [[ ! ${FPS} =~ ^[0-9]+$ ]]; then
119 |     echo "Error: Invalid FPS. Must be a positive integer."
120 |     exit 1
121 | elif [[ ${RECORD} != 0 && ${RECORD} != 1 ]]; then
122 |     echo "Error: Invalid RECORD. Options are '0' (no) or '1' (yes)."
123 |     exit 1
124 | elif [[ ${OPEN3D} != 0 && ${OPEN3D} != 1 ]]; then
125 |     echo "Error: Invalid OPEN3D. Options are '0' (no) or '1' (yes)."
126 |     exit 1
127 | fi
128 | 
129 | echo "Extra arguments: ${EXTRA_ARGS}"
130 | 
131 | if [ $RECORD -eq 1 ]; then
132 |     record_string="main.viz.misc.record=True main.viz.misc.render_stream=True"
133 | else
134 |     record_string=""
135 | fi
136 | 
137 | if [ $OPEN3D -eq 1 ]; then
138 |     open3d_string="main.viz.misc.render_open3d=True"
139 | else
140 |     open3d_string="main.viz.misc.render_open3d=False"
141 | fi
142 | 
143 | dir=$(date +\"outputs/%Y-%m-%d/%H-%M-%S\")
144 | argstring="main=${MODALITY} \
145 | main.data.object=${OBJECT} \
146 | main.data.log=${LOG} \
147 | main.data.dataset=${DATASET} \
148 | main.train.train_mode=${SLAM_MODE} \
149 | main.viz.meshes.mesh_rec_crop=False \
150 | main.viz.debug.origin=True \
151 | main.viz.meshes.show_gt_object=False \
152 | main.viz.meshes.transparent=False \
153 | main.data.train_fps=${FPS} \
154 | main.viz.layers.pointcloud=None \
155 | ${record_string} \
156 | sweep_dir=${dir} \
157 | ${EXTRA_ARGS}"
158 | 
159 | echo -e "python scripts/run.py ${argstring}"
160 | python scripts/run.py ${argstring}
161 | 


--------------------------------------------------------------------------------
/neuralfeels/datasets/data_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Class to store image and depth data for each frame of the optimization
  7 | 
  8 | import copy
  9 | 
 10 | import numpy as np
 11 | import torch
 12 | 
 13 | 
 14 | class FrameData:
 15 |     def __init__(
 16 |         self,
 17 |         frame_id=None,
 18 |         im_batch=None,
 19 |         im_batch_np=None,
 20 |         depth_batch=None,
 21 |         depth_batch_np=None,
 22 |         T_WC_batch=None,
 23 |         T_WC_batch_np=None,
 24 |         normal_batch=None,
 25 |         seg_pixels=None,
 26 |         frame_avg_losses=None,
 27 |         format=None,
 28 |     ):
 29 |         super(FrameData, self).__init__()
 30 | 
 31 |         self.frame_id = frame_id
 32 |         self.im_batch = im_batch
 33 |         self.im_batch_np = im_batch_np
 34 |         self.depth_batch = depth_batch
 35 |         self.depth_batch_np = depth_batch_np
 36 |         self.T_WC_batch = T_WC_batch
 37 |         self.T_WC_batch_np = T_WC_batch_np
 38 | 
 39 |         self.normal_batch = normal_batch
 40 |         self.seg_pixels = seg_pixels
 41 |         self.frame_avg_losses = frame_avg_losses
 42 | 
 43 |         self.format = format
 44 | 
 45 |         self.frame_count = 0 if frame_id is None else len(frame_id)
 46 | 
 47 |     def add_frame_data(self, data, replace):
 48 |         """
 49 |         Add new FrameData to existing FrameData.
 50 |         """
 51 |         self.frame_count += len(data.frame_id)
 52 |         self.frame_id = expand_data(self.frame_id, data.frame_id, replace)
 53 | 
 54 |         self.im_batch = expand_data(self.im_batch, data.im_batch, replace)
 55 |         self.im_batch_np = expand_data(self.im_batch_np, data.im_batch_np, replace)
 56 | 
 57 |         self.depth_batch = expand_data(self.depth_batch, data.depth_batch, replace)
 58 |         self.depth_batch_np = expand_data(
 59 |             self.depth_batch_np, data.depth_batch_np, replace
 60 |         )
 61 | 
 62 |         self.T_WC_batch = expand_data(self.T_WC_batch, data.T_WC_batch, replace)
 63 |         self.T_WC_batch_np = expand_data(
 64 |             self.T_WC_batch_np, data.T_WC_batch_np, replace
 65 |         )
 66 | 
 67 |         self.normal_batch = expand_data(self.normal_batch, data.normal_batch, replace)
 68 | 
 69 |         self.seg_pixels = expand_data(self.seg_pixels, data.seg_pixels, replace)
 70 |         device = data.im_batch.device
 71 |         empty_dist = torch.zeros([data.im_batch.shape[0]], device=device)
 72 |         self.frame_avg_losses = expand_data(self.frame_avg_losses, empty_dist, replace)
 73 | 
 74 |         if type(data.format) is not list:
 75 |             data.format = [data.format]
 76 |         if self.format is None:
 77 |             self.format = data.format
 78 |         else:
 79 |             self.format += data.format
 80 | 
 81 |     def delete_frame_data(self, indices):
 82 |         """
 83 |         Delete FrameData at given indices.
 84 |         """
 85 |         self.frame_count -= len(indices)
 86 |         self.frame_id = np.delete(self.frame_id, indices)
 87 | 
 88 |         self.im_batch = torch.cat(
 89 |             [self.im_batch[: indices[0]], self.im_batch[indices[-1] + 1 :]]
 90 |         )
 91 |         self.im_batch_np = np.delete(self.im_batch_np, indices, axis=0)
 92 | 
 93 |         self.depth_batch = torch.cat(
 94 |             [self.depth_batch[: indices[0]], self.depth_batch[indices[-1] + 1 :]]
 95 |         )
 96 |         self.depth_batch_np = np.delete(self.depth_batch_np, indices, axis=0)
 97 | 
 98 |         self.T_WC_batch = torch.cat(
 99 |             [self.T_WC_batch[: indices[0]], self.T_WC_batch[indices[-1] + 1 :]]
100 |         )
101 |         self.T_WC_batch_np = np.delete(self.T_WC_batch_np, indices, axis=0)
102 | 
103 |         if self.normal_batch is not None:
104 |             self.normal_batch = torch.cat(
105 |                 [self.normal_batch[: indices[0]], self.normal_batch[indices[-1] + 1 :]]
106 |             )
107 | 
108 |         if self.seg_pixels is not None:
109 |             self.seg_pixels = torch.cat(
110 |                 [self.seg_pixels[: indices[0]], self.seg_pixels[indices[-1] + 1 :]]
111 |             )
112 |         self.frame_avg_losses = torch.cat(
113 |             [
114 |                 self.frame_avg_losses[: indices[0]],
115 |                 self.frame_avg_losses[indices[-1] + 1 :],
116 |             ]
117 |         )
118 | 
119 |     def __len__(self):
120 |         if self.T_WC_batch is None:
121 |             return 0
122 |         else:
123 |             return self.T_WC_batch.shape[0]
124 | 
125 |     def __getitem__(self, index):
126 |         return FrameData(
127 |             frame_id=np.array(self.frame_id[index]),
128 |             im_batch=self.im_batch[index],
129 |             im_batch_np=self.im_batch_np[index],
130 |             depth_batch=self.depth_batch[index],
131 |             depth_batch_np=self.depth_batch_np[index],
132 |             T_WC_batch=self.T_WC_batch[index],
133 |             T_WC_batch_np=self.T_WC_batch_np[index],
134 |             normal_batch=(
135 |                 None if self.normal_batch is None else self.normal_batch[index]
136 |             ),
137 |             seg_pixels=None if self.seg_pixels is None else self.seg_pixels[index],
138 |             frame_avg_losses=self.frame_avg_losses[index],
139 |             format=self.format[index],
140 |         )
141 | 
142 | 
143 | def expand_data(batch, data, replace=False):
144 |     """
145 |     Add new FrameData attribute to exisiting FrameData attribute.
146 |     Either concatenate or replace last row in batch.
147 |     """
148 |     cat_fn = np.concatenate
149 |     if torch.is_tensor(data):
150 |         cat_fn = torch.cat
151 | 
152 |     if batch is None:
153 |         batch = copy.deepcopy(data)
154 | 
155 |     else:
156 |         if replace is False:
157 |             batch = cat_fn((batch, data))
158 |         else:
159 |             batch[-1] = data[0]
160 | 
161 |     return batch
162 | 


--------------------------------------------------------------------------------
/neuralfeels/viz/rotate_object_video.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """
  7 | Script to rotate a sequence of meshes and save as a video. 
  8 | """
  9 | 
 10 | import os
 11 | import pathlib
 12 | import time
 13 | 
 14 | import cv2
 15 | import ffmpeg
 16 | import git
 17 | import matplotlib.pyplot as plt
 18 | import numpy as np
 19 | import open3d as o3d
 20 | import open3d.visualization.rendering as rendering
 21 | 
 22 | root = git.Repo(".", search_parent_directories=True).working_tree_dir
 23 | 
 24 | 
 25 | def draw_rotating_geometry(mesh_dir, mesh_file):
 26 |     # create folder to save images
 27 |     image_path = os.path.join(mesh_dir, "images")
 28 | 
 29 |     def get_orbit(final_mesh, timsteps=400, num_orbits=1):
 30 |         diag = np.linalg.norm(
 31 |             np.asarray(final_mesh.get_max_bound())
 32 |             - np.asarray(final_mesh.get_min_bound())
 33 |         )
 34 |         radius = diag * 1.5
 35 |         # initialize camera at 45 degrees of circle
 36 |         orbit_size = timsteps // num_orbits
 37 |         theta = np.linspace(0, 2 * np.pi, orbit_size)
 38 |         z = np.zeros(orbit_size) + 0.33 * radius
 39 |         x = radius * np.cos(theta)
 40 |         y = radius * np.sin(theta)
 41 |         traj = np.vstack((x, y, z)).transpose()
 42 |         center = final_mesh.get_center()
 43 |         offset_traj = traj + center
 44 | 
 45 |         offset_traj = np.tile(offset_traj, (num_orbits, 1))
 46 |         return offset_traj, center
 47 | 
 48 |     final_mesh = o3d.io.read_triangle_mesh(mesh_file)
 49 | 
 50 |     if not os.path.exists(image_path):
 51 |         os.makedirs(image_path)
 52 | 
 53 |     # delete existing images
 54 |     for file in os.listdir(image_path):
 55 |         os.remove(os.path.join(image_path, file))
 56 | 
 57 |     # 30 seconds of video, with 20*30 = 600 frames
 58 |     num_iters = 500
 59 |     orbit_path, center = get_orbit(final_mesh, timsteps=num_iters, num_orbits=1)
 60 | 
 61 |     render = rendering.OffscreenRenderer(1000, 1000)
 62 |     render.setup_camera(60.0, [0, 0, 0], [0, 10, 0], [0, 0, 1])
 63 |     render.scene.set_background([1, 1, 1, 1])
 64 |     # render.scene.scene.set_sun_light([0.707, 0.0, -.707], [1.0, 1.0, 1.0],
 65 |     #                                  75000)
 66 |     # render.scene.scene.enable_sun_light(True)
 67 | 
 68 |     # Address the white background issue: https://github.com/isl-org/Open3D/issues/6020
 69 |     cg_settings = rendering.ColorGrading(
 70 |         rendering.ColorGrading.Quality.ULTRA,
 71 |         rendering.ColorGrading.ToneMapping.LINEAR,
 72 |     )
 73 | 
 74 |     obj_mat = rendering.MaterialRecord()
 75 |     mat_properties = {
 76 |         "metallic": 0.5,
 77 |         "roughness": 0.6,
 78 |         "reflectance": 0.2,
 79 |         "clearcoat": 0.0,
 80 |         "clearcoat_roughness": 0.0,
 81 |         "anisotropy": 0.3,
 82 |     }
 83 |     obj_mat.base_color = [0.9, 0.9, 0.9, 1.0]
 84 |     obj_mat.shader = "defaultLit"
 85 |     for key, val in mat_properties.items():
 86 |         setattr(obj_mat, "base_" + key, val)
 87 | 
 88 |     for i in range(num_iters):
 89 |         render.scene.set_lighting(
 90 |             rendering.Open3DScene.LightingProfile.SOFT_SHADOWS,
 91 |             -np.array(orbit_path[i, :] + [0.0, 0.0, 0.01]),
 92 |         )
 93 | 
 94 |         if i == 0:
 95 |             pcd = o3d.io.read_triangle_mesh(mesh_file, True)
 96 |             render.scene.add_geometry("pcd", pcd, obj_mat)
 97 |         render.setup_camera(60.0, center, orbit_path[i, :], [0, 0, 1])
 98 |         render.scene.view.set_color_grading(cg_settings)
 99 | 
100 |         """capture images"""
101 |         img = render.render_to_image()
102 |         time_label = i
103 |         o3d.io.write_image(os.path.join(image_path, f"{time_label:03d}.jpg"), img, 99)
104 | 
105 |     save_path = os.path.join(mesh_dir, "mesh_viz.mp4")
106 |     create_video(image_path, save_path, 30, 20)
107 | 
108 | 
109 | def get_int(file: str) -> int:
110 |     """
111 |     Extract numeric value from file name
112 |     """
113 |     return int(file.split(".")[0])
114 | 
115 | 
116 | def create_video(path, save_path, length=30, fps=20):
117 |     images = os.listdir(path)
118 |     images = [im for im in images if im.endswith(".jpg")]
119 | 
120 |     images = sorted(images, key=get_int)
121 | 
122 |     interval = 1000.0 / fps
123 | 
124 |     # Execute FFmpeg sub-process, with stdin pipe as input, and jpeg_pipe input format
125 |     process = (
126 |         ffmpeg.input("pipe:", r=str(fps))
127 |         .output(save_path, pix_fmt="yuv420p")
128 |         .overwrite_output()
129 |         .global_args("-loglevel", "warning")
130 |         .global_args("-qscale", "0")
131 |         .global_args("-y")
132 |         .run_async(pipe_stdin=True)
133 |     )
134 | 
135 |     for image in images:
136 |         image_path = os.path.join(path, image)
137 |         im = cv2.imread(image_path)
138 |         success, encoded_image = cv2.imencode(".png", im)
139 |         process.stdin.write(
140 |             encoded_image.tobytes()
141 |         )  # If broken pipe error, try mamba update ffmpeg
142 | 
143 |     # Close stdin pipe - FFmpeg fininsh encoding the output file.
144 |     process.stdin.close()
145 |     process.wait()
146 | 
147 | 
148 | def get_last_folders(root_dir):
149 |     """
150 |     Recursively traverse down all directories until we reach the last folders, and store those in a list.
151 |     """
152 |     last_folders = []
153 |     for path in root_dir.iterdir():
154 |         if path.is_dir():
155 |             # if only an obj file exists, then we have reached the last folder
156 |             if len(list(path.glob("*.obj"))) == 1:
157 |                 last_folders.append(path)
158 |             else:
159 |                 last_folders.extend(get_last_folders(path))
160 | 
161 |     if len(last_folders) == 0:
162 |         last_folders = [root_dir]
163 |     return last_folders
164 | 
165 | 
166 | # define main function
167 | if __name__ == "__main__":
168 |     mesh_dir = pathlib.Path(root) / "data/results/mesh_trials/sim"
169 |     all_mesh_dirs = get_last_folders(mesh_dir)
170 |     for mesh_dir in all_mesh_dirs:
171 |         # convert posix path to string
172 |         print(f"Processing {mesh_dir}")
173 |         # get all .obj files in mesh_dir
174 |         mesh_files = list(mesh_dir.glob("*.obj"))
175 |         final_mesh_path = None
176 |         # check if final mesh exists
177 |         if len(
178 |             [
179 |                 x.name
180 |                 for x in mesh_files
181 |                 if (("final" in x.name) or (x.name == "textured.obj"))
182 |             ]
183 |         ):
184 |             final_mesh_path = [
185 |                 x.name
186 |                 for x in mesh_files
187 |                 if (("final" in x.name) or (x.name == "textured.obj"))
188 |             ][0]
189 |             final_mesh_path = str(mesh_dir / final_mesh_path)
190 | 
191 |         if final_mesh_path is not None:
192 |             draw_rotating_geometry(mesh_dir, final_mesh_path)
193 | 


--------------------------------------------------------------------------------
/scripts/run.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Entrypoint python script for neuralfeels
  7 | 
  8 | import gc
  9 | import os
 10 | import sys
 11 | import traceback
 12 | from typing import TYPE_CHECKING
 13 | 
 14 | import cv2
 15 | import hydra
 16 | import numpy as np
 17 | import torch
 18 | from omegaconf import DictConfig
 19 | from pyvirtualdisplay import Display
 20 | from termcolor import cprint
 21 | 
 22 | if TYPE_CHECKING:
 23 |     from neuralfeels.modules.trainer import Trainer
 24 | 
 25 | 
 26 | class OptionalDisplay:
 27 |     def __init__(self, size=(1900, 1084), use_xauth=True, active=False):
 28 |         self.display = None
 29 |         if active:
 30 |             self.display = Display(size=size, use_xauth=use_xauth)
 31 | 
 32 |     def __enter__(self):
 33 |         if self.display is not None:
 34 |             self.display.__enter__()
 35 |             print(f"Display created at :{self.display.display}.")
 36 | 
 37 |     def __exit__(self, *args, **kwargs):
 38 |         if self.display is not None:
 39 |             self.display.__exit__()
 40 | 
 41 | 
 42 | def _load_frames_incremental(trainer: "Trainer", t):
 43 |     # lazy imports for tinycudann compatibility issues in cluster
 44 |     from neuralfeels.modules.misc import print_once
 45 | 
 46 |     kf_set = {sensor: None for sensor in trainer.sensor_list}
 47 | 
 48 |     trainer.update_current_time()
 49 |     add_new_frame = True if t == 0 else trainer.check_keyframe_latest()
 50 | 
 51 |     end_all = False
 52 |     if add_new_frame:
 53 |         new_frame_id = trainer.get_latest_frame_id()
 54 | 
 55 |         digit_poses = trainer.allegro.get_fk(idx=new_frame_id)
 56 |         end_all = trainer.check_end(new_frame_id)
 57 | 
 58 |         if end_all:
 59 |             if not os.path.exists(f"./visualizer/{trainer.cfg_data.object}.mp4"):
 60 |                 print_once("******End of sensor stream******")
 61 |             return kf_set, end_all
 62 | 
 63 |         trainer.update_scene_properties(new_frame_id)
 64 | 
 65 |         if t == 0:
 66 |             trainer.init_first_pose(digit_poses)
 67 | 
 68 |         added_frame = False
 69 |         for sensor_name in trainer.sensor_list:
 70 |             n_keyframes_start = trainer.n_keyframes[sensor_name]
 71 | 
 72 |             if "digit" in sensor_name:
 73 |                 frame_data = trainer.sensor[sensor_name].get_frame_data(
 74 |                     new_frame_id,
 75 |                     digit_poses[sensor_name],
 76 |                     msg_data=None,
 77 |                 )
 78 |             else:
 79 |                 frame_data = trainer.sensor[sensor_name].get_frame_data(
 80 |                     new_frame_id,
 81 |                     digit_poses,
 82 |                     trainer.latest_render_depth[sensor_name],
 83 |                     msg_data=None,
 84 |                 )
 85 | 
 86 |             added_frame = trainer.add_frame(frame_data)
 87 |             if t == 0:
 88 |                 trainer.prev_kf_time = trainer.tot_step_time
 89 | 
 90 |             # kf_set thumbnails for visualizer
 91 |             if trainer.n_keyframes[sensor_name] - n_keyframes_start:
 92 |                 new_kf = trainer.frames[sensor_name].im_batch_np[-1]
 93 |                 h = int(new_kf.shape[0] / 6)
 94 |                 w = int(new_kf.shape[1] / 6)
 95 |                 try:
 96 |                     kf_set[sensor_name] = cv2.resize(new_kf, (w, h))
 97 |                 except:
 98 |                     # print("Error in resizing keyframe image")
 99 |                     kf_set[sensor_name] = new_kf
100 | 
101 |     if add_new_frame and added_frame:
102 |         trainer.last_is_keyframe = False
103 | 
104 |     return kf_set, end_all
105 | 
106 | 
107 | def optim_iter(trainer: "Trainer", t, start_optimize=True):
108 |     # lazy imports for tinycudann compatibility issues in cluster
109 |     from neuralfeels.modules.misc import gpu_usage_check
110 | 
111 |     if trainer.incremental:
112 |         kf_set, end_all = _load_frames_incremental(trainer, t)
113 |     else:
114 |         kf_set = {sensor: None for sensor in trainer.sensor_list}
115 |         end_all = False
116 | 
117 |     status = ""
118 |     # optimization step---------------------------------------------
119 |     if start_optimize:
120 |         # Run map and pose optimization sequentially
121 |         pose_loss = trainer.step_pose()
122 |         map_loss = trainer.step_map()
123 | 
124 |         # Store losses
125 |         map_loss, pose_loss = float(map_loss or 0.0), float(pose_loss or 0.0)
126 |         pose_stats, map_stats = trainer.save_stats["pose"], trainer.save_stats["map"]
127 |         pose_error_dict, map_error_dict = pose_stats["errors"], map_stats["errors"]
128 |         pose_time_dict, map_time_dict = pose_stats["timing"], map_stats["timing"]
129 |         pose_time, pose_errors = 0.0, 0.0
130 |         map_time, map_errors, f_score_T = 0.0, 0.0, 0
131 |         if len(map_error_dict) > 0:
132 |             map_time, map_errors, f_score_T = (
133 |                 map_time_dict[-1],
134 |                 map_error_dict[-1]["f_score"][trainer.which_f_score],
135 |                 map_error_dict[-1]["f_score_T"][trainer.which_f_score],
136 |             )
137 |         if len(pose_error_dict) > 0:
138 |             pose_time, pose_errors = (
139 |                 pose_time_dict[-1],
140 |                 pose_error_dict[-1]["avg_3d_error"],
141 |             )
142 | 
143 |         # retrieve the next frame based on optimization time
144 |         trainer.tot_step_time += (map_time + pose_time) * (t > 0)
145 | 
146 |         # Print useful information
147 |         status = f"Map time: {map_time:.2f} s, Pose time: {pose_time:.2f} s, Total: {trainer.tot_step_time:.2f} s, Dataset: {trainer.current_time:.2f} s\n"
148 |         status = (
149 |             "".join(status)
150 |             + f"Pose err [{pose_errors*1000:.2f} mm] Map err (< {f_score_T*1000:.2f} mm): [{map_errors:.2f}]"
151 |         )
152 |     else:
153 |         print("Waiting for visualizer..")
154 | 
155 |     trainer.get_latest_depth_renders()
156 |     gpu_usage_check()
157 |     return status, kf_set, end_all
158 | 
159 | 
160 | @hydra.main(version_base=None, config_path="config", config_name="config")
161 | def main(cfg: DictConfig):
162 |     """Main function to run neuralfeels
163 | 
164 |     Args:
165 |         cfg (DictConfig): Hydra configuration
166 |     """
167 |     gpu_id = cfg.gpu_id
168 |     torch.set_default_device(f"cuda:{gpu_id}")
169 |     cprint(f"Using GPU: {gpu_id}", color="yellow")
170 |     try:
171 |         import open3d.visualization.gui as gui
172 | 
173 |         # lazy imports to avoid tinycudann errors when launching locally for a
174 |         # different architecture
175 |         from neuralfeels.modules.trainer import Trainer
176 |         from neuralfeels.viz import neuralfeels_gui
177 | 
178 |         seed = cfg.seed
179 |         np.random.seed(seed)
180 |         torch.manual_seed(seed)
181 | 
182 |         with OptionalDisplay(
183 |             size=(3840, 1644), use_xauth=True, active=cfg.create_display
184 |         ):
185 |             tac_slam_trainer = Trainer(cfg=cfg, gpu_id=gpu_id, ros_node=None)
186 |             # open3d vis window
187 |             app = gui.Application.instance
188 |             app.initialize()
189 |             mono = app.add_font(gui.FontDescription(gui.FontDescription.MONOSPACE))
190 |             size_ratio = 0.4  # scaling ratio w.r.t. tkinter resolution
191 |             w = neuralfeels_gui.GUI(
192 |                 tac_slam_trainer, optim_iter, mono, size_ratio, cfg.profile
193 |             )
194 |             app.run()
195 |         w.save_data()  # save all the images, meshes, plots, etc.
196 |         # clear memory
197 |         gc.collect()
198 |         torch.cuda.empty_cache()
199 | 
200 |     except Exception:
201 |         traceback.print_exc(file=sys.stderr)
202 |         raise
203 | 
204 | 
205 | if __name__ == "__main__":
206 |     main()
207 | 


--------------------------------------------------------------------------------
/neuralfeels/contrib/tactile_transformer/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Adapted from https://github.com/antocad/FocusOnDepth and https://github.com/isl-org/DPT
  7 | 
  8 | # Utility functions for tactile transformer
  9 | 
 10 | import errno
 11 | import os
 12 | from glob import glob
 13 | 
 14 | import numpy as np
 15 | import torch.nn as nn
 16 | import torch.optim as optim
 17 | from matplotlib import cm
 18 | from PIL import Image
 19 | from torch.optim.lr_scheduler import ReduceLROnPlateau
 20 | from torchvision import transforms
 21 | 
 22 | from neuralfeels.contrib.tactile_transformer.custom_augmentation import ToMask
 23 | from neuralfeels.contrib.tactile_transformer.loss import ScaleAndShiftInvariantLoss
 24 | 
 25 | 
 26 | def get_total_paths(path, ext):
 27 |     return glob(os.path.join(path, "*" + ext))
 28 | 
 29 | 
 30 | def get_splitted_dataset(
 31 |     config, split, dataset_name, path_images, path_depths, path_segmentation
 32 | ):
 33 |     list_files = [os.path.basename(im) for im in path_images]
 34 |     np.random.seed(config["General"]["seed"])
 35 |     np.random.shuffle(list_files)
 36 |     if split == "train":
 37 |         selected_files = list_files[
 38 |             : int(len(list_files) * config["Dataset"]["splits"]["split_train"])
 39 |         ]
 40 |     elif split == "val":
 41 |         selected_files = list_files[
 42 |             int(len(list_files) * config["Dataset"]["splits"]["split_train"]) : int(
 43 |                 len(list_files) * config["Dataset"]["splits"]["split_train"]
 44 |             )
 45 |             + int(len(list_files) * config["Dataset"]["splits"]["split_val"])
 46 |         ]
 47 |     else:
 48 |         selected_files = list_files[
 49 |             int(len(list_files) * config["Dataset"]["splits"]["split_train"])
 50 |             + int(len(list_files) * config["Dataset"]["splits"]["split_val"]) :
 51 |         ]
 52 | 
 53 |     path_images = [
 54 |         os.path.join(
 55 |             config["Dataset"]["paths"]["path_dataset"],
 56 |             dataset_name,
 57 |             config["Dataset"]["paths"]["path_images"],
 58 |             im[:-4] + config["Dataset"]["extensions"]["ext_images"],
 59 |         )
 60 |         for im in selected_files
 61 |     ]
 62 |     path_depths = [
 63 |         os.path.join(
 64 |             config["Dataset"]["paths"]["path_dataset"],
 65 |             dataset_name,
 66 |             config["Dataset"]["paths"]["path_depths"],
 67 |             im[:-4] + config["Dataset"]["extensions"]["ext_depths"],
 68 |         )
 69 |         for im in selected_files
 70 |     ]
 71 |     path_segmentation = [
 72 |         os.path.join(
 73 |             config["Dataset"]["paths"]["path_dataset"],
 74 |             dataset_name,
 75 |             config["Dataset"]["paths"]["path_segmentations"],
 76 |             im[:-4] + config["Dataset"]["extensions"]["ext_segmentations"],
 77 |         )
 78 |         for im in selected_files
 79 |     ]
 80 |     return path_images, path_depths, path_segmentation
 81 | 
 82 | 
 83 | def get_transforms(config):
 84 |     im_size = config["Dataset"]["transforms"]["resize"]
 85 |     transform_image = transforms.Compose(
 86 |         [
 87 |             transforms.Resize((im_size[0], im_size[1])),
 88 |             transforms.ToTensor(),  # converts to [0 - 1]
 89 |             transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
 90 |         ]
 91 |     )
 92 |     transform_depth = transforms.Compose(
 93 |         [
 94 |             transforms.Resize((im_size[0], im_size[1])),
 95 |             transforms.Grayscale(num_output_channels=1),
 96 |             transforms.ToTensor(),  # converts to [0 - 1]
 97 |         ]
 98 |     )
 99 |     transform_seg = transforms.Compose(
100 |         [
101 |             transforms.Resize(
102 |                 (im_size[0], im_size[1]),
103 |                 interpolation=transforms.InterpolationMode.NEAREST,
104 |             ),
105 |             ToMask(config["Dataset"]["classes"]),
106 |         ]
107 |     )
108 |     return transform_image, transform_depth, transform_seg
109 | 
110 | 
111 | def get_losses(config):
112 |     def NoneFunction(a, b):
113 |         return 0
114 | 
115 |     loss_depth = NoneFunction
116 |     loss_segmentation = NoneFunction
117 |     type = config["General"]["type"]
118 |     if type == "full" or type == "depth":
119 |         if config["General"]["loss_depth"] == "mse":
120 |             loss_depth = nn.L1Loss()
121 |         elif config["General"]["loss_depth"] == "ssi":
122 |             loss_depth = ScaleAndShiftInvariantLoss()
123 |     if type == "full" or type == "segmentation":
124 |         if config["General"]["loss_segmentation"] == "ce":
125 |             loss_segmentation = nn.CrossEntropyLoss()
126 |     return loss_depth, loss_segmentation
127 | 
128 | 
129 | def create_dir(directory):
130 |     try:
131 |         os.makedirs(directory)
132 |     except OSError as e:
133 |         if e.errno != errno.EEXIST:
134 |             raise
135 | 
136 | 
137 | def get_optimizer(config, net):
138 |     names = set([name.split(".")[0] for name, _ in net.named_modules()]) - set(
139 |         ["", "transformer_encoders"]
140 |     )
141 |     params_backbone = net.transformer_encoders.parameters()
142 |     params_scratch = list()
143 |     for name in names:
144 |         params_scratch += list(eval("net." + name).parameters())
145 | 
146 |     if config["General"]["optim"] == "adam":
147 |         optimizer_backbone = optim.Adam(
148 |             params_backbone, lr=config["General"]["lr_backbone"]
149 |         )
150 |         optimizer_scratch = optim.Adam(
151 |             params_scratch, lr=config["General"]["lr_scratch"]
152 |         )
153 |     elif config["General"]["optim"] == "sgd":
154 |         optimizer_backbone = optim.SGD(
155 |             params_backbone,
156 |             lr=config["General"]["lr_backbone"],
157 |             momentum=config["General"]["momentum"],
158 |         )
159 |         optimizer_scratch = optim.SGD(
160 |             params_scratch,
161 |             lr=config["General"]["lr_scratch"],
162 |             momentum=config["General"]["momentum"],
163 |         )
164 |     return optimizer_backbone, optimizer_scratch
165 | 
166 | 
167 | def get_schedulers(optimizers):
168 |     return [
169 |         ReduceLROnPlateau(optimizer, verbose=True, factor=0.8)
170 |         for optimizer in optimizers
171 |     ]
172 | 
173 | 
174 | def concat_images(images, direction="horizontal"):
175 |     widths, heights = zip(*(img.size for img in images))
176 | 
177 |     if direction == "horizontal":
178 |         total_width = sum(widths)
179 |         max_height = max(heights)
180 |         new_image = Image.new("RGB", (total_width, max_height))
181 |         x_offset = 0
182 |         for img in images:
183 |             new_image.paste(img, (x_offset, 0))
184 |             x_offset += img.width
185 |     elif direction == "vertical":
186 |         total_height = sum(heights)
187 |         max_width = max(widths)
188 |         new_image = Image.new("RGB", (max_width, total_height))
189 |         y_offset = 0
190 |         for img in images:
191 |             new_image.paste(img, (0, y_offset))
192 |             y_offset += img.height
193 |     else:
194 |         raise ValueError("Direction must be 'horizontal' or 'vertical'")
195 | 
196 |     return new_image
197 | 
198 | 
199 | def apply_jet_colormap(image):
200 |     # Convert to grayscale if not already
201 |     grayscale_image = image.convert("L")
202 | 
203 |     # Convert grayscale image to numpy array
204 |     image_np = np.array(grayscale_image)
205 | 
206 |     # Normalize image data to range [0, 1] for colormap
207 |     image_normalized = image_np / 255.0
208 | 
209 |     # Apply the jet colormap
210 |     colormap = cm.get_cmap("jet")
211 |     colored_image = colormap(image_normalized)
212 | 
213 |     # Convert back to 8-bit per channel RGB
214 |     colored_image = (colored_image[:, :, :3] * 255).astype(np.uint8)
215 | 
216 |     # Convert numpy array back to PIL image
217 |     return Image.fromarray(colored_image)
218 | 


--------------------------------------------------------------------------------
/neuralfeels/modules/loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Adapted from iSDF: https://github.com/facebookresearch/iSDF
  7 | 
  8 | import torch
  9 | 
 10 | from neuralfeels.geometry import transform
 11 | 
 12 | cosSim = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)
 13 | 
 14 | 
 15 | # method 1: ray bounds
 16 | def bounds_ray(depth_sample, z_vals, dirs_C_sample, T_WC_sample, do_grad: bool = True):
 17 |     bounds = depth_sample[:, None] - z_vals
 18 |     z_to_euclidean_depth = dirs_C_sample.norm(dim=-1)
 19 |     bounds = z_to_euclidean_depth[:, None] * bounds
 20 | 
 21 |     grad = None
 22 |     if do_grad:
 23 |         grad = grad_ray(T_WC_sample, dirs_C_sample, z_vals.shape[1] - 1)
 24 | 
 25 |     return bounds, grad
 26 | 
 27 | 
 28 | # method 2: ray bound with cos correction
 29 | def bounds_normal(
 30 |     depth_sample,
 31 |     z_vals,
 32 |     dirs_C_sample,
 33 |     norm_sample,
 34 |     normal_trunc_dist,
 35 |     T_WC_sample,
 36 |     do_grad,
 37 | ):
 38 |     ray_bounds, _ = bounds_ray(
 39 |         depth_sample, z_vals, dirs_C_sample, T_WC_sample, do_grad
 40 |     )
 41 | 
 42 |     costheta = torch.abs(cosSim(-dirs_C_sample, norm_sample))
 43 | 
 44 |     # only apply correction out to truncation distance
 45 |     sub = normal_trunc_dist * (1.0 - costheta)
 46 |     normal_bounds = ray_bounds - sub[:, None]
 47 | 
 48 |     trunc_ixs = ray_bounds < normal_trunc_dist
 49 |     trunc_vals = (ray_bounds * costheta[:, None])[trunc_ixs]
 50 |     normal_bounds[trunc_ixs] = trunc_vals
 51 | 
 52 |     grad = None
 53 |     if do_grad:
 54 |         grad = grad_ray(T_WC_sample, dirs_C_sample, z_vals.shape[1] - 1)
 55 | 
 56 |     return normal_bounds, grad
 57 | 
 58 | 
 59 | def grad_ray(T_WC_sample, dirs_C_sample, n_samples):
 60 |     """Returns the negative of the viewing direction vector"""
 61 |     _, dirs_W = transform.origin_dirs_W(T_WC_sample, dirs_C_sample)
 62 |     grad = -dirs_W[:, None, :].repeat(1, n_samples, 1)
 63 | 
 64 |     return grad
 65 | 
 66 | 
 67 | # method 3: brute force
 68 | @torch.jit.script
 69 | def bounds_pc(
 70 |     pc: torch.Tensor,
 71 |     z_vals: torch.Tensor,
 72 |     depth_sample: torch.Tensor,
 73 |     object_rays: int,
 74 |     trunc_dist: float,
 75 |     do_grad: bool = True,
 76 | ):
 77 |     with torch.no_grad():
 78 |         surf_pc = pc[:object_rays, 0]  # surface pointcloud element
 79 |         diff = pc[:, :, None] - surf_pc
 80 |         dists = diff.norm(p=2, dim=-1)
 81 |         dists, closest_ixs = torch.min(
 82 |             dists, dim=-1
 83 |         )  # closest points to the surface element
 84 |         behind_surf = torch.abs(z_vals) > torch.abs(depth_sample[:, None])
 85 |         # assert behind_surf[object_rays:].any() == False, "free space rays are not behind surface"
 86 |         dists[behind_surf] *= -1  # make distances behind surface negative
 87 |         bounds = dists
 88 | 
 89 |         if do_grad:
 90 |             ix1 = torch.arange(diff.shape[0])[:, None].repeat(1, diff.shape[1])
 91 |             ix2 = torch.arange(diff.shape[1])[None, :].repeat(diff.shape[0], 1)
 92 |             grad = diff[ix1, ix2, closest_ixs]
 93 |             grad = grad[:, 1:]
 94 |             grad = grad / grad.norm(p=2, dim=-1)[..., None]
 95 |             # flip grad vectors behind the surf
 96 |             grad[behind_surf[:, 1:]] *= -1
 97 |             return bounds, grad
 98 |         else:
 99 |             return bounds, None
100 | 
101 | 
102 | def bounds(
103 |     method,
104 |     dirs_C_sample,
105 |     depth_sample,
106 |     T_WC_sample,
107 |     z_vals,
108 |     pc,
109 |     object_rays,
110 |     trunc_dist,
111 |     norm_sample,
112 |     do_grad=True,
113 | ):
114 |     """do_grad: compute approximate gradient vector."""
115 |     assert method in ["ray", "normal", "pc"]
116 | 
117 |     if method == "ray":
118 |         bounds, grad = bounds_ray(
119 |             depth_sample, z_vals, dirs_C_sample, T_WC_sample, do_grad
120 |         )
121 | 
122 |     elif method == "normal":
123 |         bounds, grad = bounds_normal(
124 |             depth_sample,
125 |             z_vals,
126 |             dirs_C_sample,
127 |             norm_sample,
128 |             trunc_dist,
129 |             T_WC_sample,
130 |             do_grad,
131 |         )
132 | 
133 |     else:
134 |         bounds, grad = bounds_pc(
135 |             pc, z_vals, depth_sample, object_rays, trunc_dist, do_grad
136 |         )
137 | 
138 |     return bounds, grad
139 | 
140 | 
141 | def sdf_loss(sdf, bounds, t, loss_type="L1"):
142 |     """
143 |     params:
144 |     sdf: predicted sdf values.
145 |     bounds: upper bound on abs(sdf)
146 |     t: truncation distance up to which the sdf value is directly supevised.
147 |     loss_type: L1 or L2 loss.
148 |     """
149 |     # free_space_loss_mat, trunc_loss_mat = full_sdf_loss(sdf, bounds, t)
150 |     free_space_loss_mat, trunc_loss_mat = tsdf_loss(sdf, bounds, t)
151 | 
152 |     # decide which supervision based on truncation region
153 |     free_space_ixs = torch.abs(bounds) > t
154 |     free_space_loss_mat[~free_space_ixs] = 0.0
155 |     trunc_loss_mat[free_space_ixs] = 0.0
156 | 
157 |     sdf_loss_mat = free_space_loss_mat + trunc_loss_mat
158 | 
159 |     if loss_type == "L1":
160 |         sdf_loss_mat = torch.abs(sdf_loss_mat)
161 |     elif loss_type == "L2":
162 |         sdf_loss_mat = torch.square(sdf_loss_mat)
163 |     elif loss_type == "smooth_L1":
164 |         sdf_loss_mat = torch.nn.functional.smooth_l1_loss(
165 |             sdf_loss_mat, torch.zeros_like(sdf_loss_mat), reduction="none"
166 |         )
167 |     else:
168 |         raise ValueError("Must be L1 or L2")
169 | 
170 |     return sdf_loss_mat, free_space_ixs
171 | 
172 | 
173 | def full_sdf_loss(sdf, target_sdf, trunc_dist, free_space_factor=5.0):
174 |     """
175 |     For samples that lie in free space before truncation region:
176 |         loss(sdf_pred, sdf_gt) =  { max(0, sdf_pred - sdf_gt), if sdf_pred >= 0
177 |                                   { exp(-sdf_pred) - 1, if sdf_pred < 0
178 | 
179 |     For samples that lie in truncation region:
180 |         loss(sdf_pred, sdf_gt) = sdf_pred - sdf_gt
181 |     """
182 | 
183 |     # free_space_loss_mat = torch.max(
184 |     #     torch.nn.functional.relu(sdf - target_sdf),
185 |     #     torch.exp(-free_space_factor * sdf) - 1.
186 |     # )
187 |     free_space_loss_mat = sdf - trunc_dist
188 |     trunc_loss_mat = sdf - target_sdf
189 | 
190 |     return free_space_loss_mat, trunc_loss_mat
191 | 
192 | 
193 | def tsdf_loss(sdf, target_sdf, trunc_dist):
194 |     """
195 |     tsdf loss from: https://arxiv.org/pdf/2104.04532.pdf
196 |     SDF values in truncation region are scaled in range [0, 1].
197 |     """
198 |     trunc_vals = torch.sign(target_sdf) * torch.ones(sdf.shape, device=sdf.device)
199 |     free_space_loss_mat = sdf - trunc_vals
200 |     trunc_loss_mat = sdf - target_sdf / trunc_dist
201 |     return free_space_loss_mat, trunc_loss_mat
202 | 
203 | 
204 | def tot_loss(
205 |     sdf_loss_mat,
206 |     eik_loss_mat,
207 |     free_space_ixs,
208 |     bounds,
209 |     trunc_weight,
210 |     eik_weight,
211 |     vision_weights=None,
212 | ):
213 |     sdf_loss_mat[~free_space_ixs] *= trunc_weight
214 | 
215 |     if vision_weights is not None:
216 |         sdf_loss_mat = torch.mul(sdf_loss_mat, vision_weights)
217 | 
218 |     losses = {"sdf_loss": sdf_loss_mat.mean()}
219 |     tot_loss_mat = sdf_loss_mat
220 | 
221 |     # eikonal loss
222 |     if eik_loss_mat is not None:
223 |         eik_loss_mat = eik_loss_mat * eik_weight
224 |         tot_loss_mat = tot_loss_mat + eik_loss_mat
225 |         losses["eikonal_loss"] = eik_loss_mat.mean()
226 | 
227 |     tot_loss = tot_loss_mat.mean()
228 |     losses["total_loss"] = tot_loss
229 | 
230 |     return tot_loss, tot_loss_mat, losses
231 | 
232 | 
233 | def approx_loss(full_loss, binary_masks, W, H, factor=8):
234 |     w_block = W // factor
235 |     h_block = H // factor
236 |     loss_approx = full_loss.view(-1, factor, h_block, factor, w_block)
237 |     loss_approx = loss_approx.sum(dim=(2, 4))
238 |     actives = binary_masks.view(-1, factor, h_block, factor, w_block)
239 |     actives = actives.sum(dim=(2, 4))
240 |     actives[actives == 0] = 1.0
241 |     loss_approx = loss_approx / actives
242 | 
243 |     return loss_approx
244 | 
245 | 
246 | def frame_avg(
247 |     total_loss_mat,
248 |     depth_batch,
249 |     indices_b,
250 |     indices_h,
251 |     indices_w,
252 |     W,
253 |     H,
254 |     loss_approx_factor,
255 |     binary_masks,
256 |     free_space_masks,
257 | ):
258 |     # frame average losses
259 |     full_loss = torch.zeros(
260 |         depth_batch.shape, dtype=total_loss_mat.dtype, device=depth_batch.device
261 |     )
262 | 
263 |     full_loss[indices_b, indices_h, indices_w] = total_loss_mat.sum(-1).detach()
264 | 
265 |     full_loss = (
266 |         full_loss * ~free_space_masks
267 |     )  # remove those samples in free space for only surface loss
268 | 
269 |     loss_approx = approx_loss(full_loss, binary_masks, W, H, factor=loss_approx_factor)
270 |     factor = loss_approx.shape[1]
271 |     frame_sum = loss_approx.sum(dim=(1, 2))
272 |     frame_avg_loss = frame_sum / (factor * factor)
273 | 
274 |     return loss_approx, frame_avg_loss
275 | 


--------------------------------------------------------------------------------
/neuralfeels/viz/draw.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # 2D and 3D visualization utilities for neuralfeels
  7 | 
  8 | import io
  9 | 
 10 | import cv2
 11 | import numpy as np
 12 | import skimage.measure
 13 | import torch
 14 | import trimesh
 15 | from PIL import Image
 16 | 
 17 | from neuralfeels import geometry
 18 | 
 19 | 
 20 | def draw_camera(camera, transform, color=(0.0, 1.0, 0.0, 0.8), marker_height=0.2):
 21 |     marker = trimesh.creation.camera_marker(camera, marker_height=marker_height)
 22 |     marker[0].apply_transform(transform)
 23 |     marker[1].apply_transform(transform)
 24 |     marker[1].colors = (color,) * len(marker[1].entities)
 25 | 
 26 |     return marker
 27 | 
 28 | 
 29 | def draw_cameras_from_eyes(eyes, ats, up, scene):
 30 |     for eye, at in zip(eyes, ats):
 31 |         R, t = geometry.transform.look_at(eye, at, up)
 32 |         T = np.eye(4)
 33 |         T[:3, :3] = R
 34 |         T[:3, 3] = t
 35 | 
 36 |         transform = T @ geometry.transform.to_replica()
 37 |         camera = trimesh.scene.Camera(
 38 |             fov=scene.camera.fov, resolution=scene.camera.resolution
 39 |         )
 40 |         marker = draw_camera(camera, transform)
 41 |         scene.add_geometry(marker)
 42 | 
 43 | 
 44 | def draw_cams(
 45 |     batch_size, T_WC_batch_np, scene, color=None, latest_diff=True, cam_scale=1.0
 46 | ):
 47 |     no_color = color is None
 48 |     if no_color:
 49 |         color = (0.0, 1.0, 0.0, 0.8)
 50 |     for batch_i in range(batch_size):
 51 |         # if batch_i == (batch_size - 1):
 52 |         #     color = (1., 0., 0., 0.8)
 53 |         T_WC = T_WC_batch_np[batch_i]
 54 | 
 55 |         camera = trimesh.scene.Camera(
 56 |             fov=scene.camera.fov, resolution=scene.camera.resolution
 57 |         )
 58 |         marker_height = 0.3 * cam_scale
 59 |         if batch_i == batch_size - 1 and latest_diff:
 60 |             if no_color:
 61 |                 color = (1.0, 1.0, 1.0, 1.0)
 62 |                 marker_height = 0.5 * cam_scale
 63 | 
 64 |         marker = draw_camera(camera, T_WC, color=color, marker_height=marker_height)
 65 |         scene.add_geometry(marker[1])
 66 | 
 67 | 
 68 | def draw_segment(t1, t2, color=(1.0, 1.0, 0.0)):
 69 |     line_segment = trimesh.load_path([t1, t2])
 70 |     line_segment.colors = (color,) * len(line_segment.entities)
 71 | 
 72 |     return line_segment
 73 | 
 74 | 
 75 | def draw_trajectory(trajectory, scene, color=(1.0, 1.0, 0.0)):
 76 |     for i in range(trajectory.shape[0] - 1):
 77 |         if (trajectory[i] != trajectory[i + 1]).any():
 78 |             segment = draw_segment(trajectory[i], trajectory[i + 1], color)
 79 |             scene.add_geometry(segment)
 80 | 
 81 | 
 82 | def draw_pc(batch_size, pcs_cam, T_WC_batch_np, im_batch=None, scene=None):
 83 |     pcs_w = []
 84 |     cols = []
 85 |     for batch_i in range(batch_size):
 86 |         T_WC = T_WC_batch_np[batch_i]
 87 |         pc_cam = pcs_cam[batch_i]
 88 | 
 89 |         col = None
 90 |         if im_batch is not None:
 91 |             img = im_batch[batch_i]
 92 |             col = img.reshape(-1, 3)
 93 |             cols.append(col)
 94 | 
 95 |         pc_tri = trimesh.PointCloud(vertices=pc_cam, colors=col)
 96 |         pc_tri.apply_transform(T_WC)
 97 |         pcs_w.append(pc_tri.vertices)
 98 | 
 99 |         if scene is not None:
100 |             scene.add_geometry(pc_tri)
101 | 
102 |     pcs_w = np.concatenate(pcs_w, axis=0)
103 |     if len(cols) != 0:
104 |         cols = np.concatenate(cols)
105 |     return pcs_w, cols
106 | 
107 | 
108 | def marching_cubes_trimesh(numpy_3d_sdf_tensor, level=0.0):
109 |     """
110 |     Convert sdf samples to triangular mesh.
111 |     """
112 |     vertices, faces, vertex_normals, _ = skimage.measure.marching_cubes(
113 |         numpy_3d_sdf_tensor,
114 |         level=level,
115 |         step_size=1,
116 |     )
117 | 
118 |     dim = numpy_3d_sdf_tensor.shape[0]
119 |     vertices = vertices / (dim - 1)
120 |     mesh = trimesh.Trimesh(
121 |         vertices=vertices, vertex_normals=vertex_normals, faces=faces
122 |     )
123 | 
124 |     return mesh
125 | 
126 | 
127 | def draw_mesh(sdf, color_by="normals", clean_mesh=True):
128 |     """
129 |     Run marching cubes on sdf tensor to return mesh.
130 |     """
131 |     if isinstance(sdf, torch.Tensor):
132 |         sdf = sdf.detach().cpu().numpy()
133 |     mesh = marching_cubes_trimesh(sdf)
134 | 
135 |     # Transform to [-1, 1] range
136 |     mesh.apply_translation([-0.5, -0.5, -0.5])
137 |     mesh.apply_scale(2)
138 | 
139 |     try:
140 |         # from NICE-SLAM
141 |         if clean_mesh:
142 |             get_largest_components = False
143 |             remove_small_geometry_threshold = 2
144 |             # get connected components
145 |             components = mesh.split(only_watertight=False)
146 |             if get_largest_components:
147 |                 areas = np.array([c.area for c in components], dtype=np.float)
148 |                 print(areas)
149 |                 clean_mesh = components[areas.argmax()]
150 |             else:
151 |                 new_components = []
152 |                 for comp in components:
153 |                     if comp.area > remove_small_geometry_threshold:
154 |                         new_components.append(comp)
155 |                 # print(f"Removed {len(components) - len(new_components)} blobs")
156 |                 clean_mesh = trimesh.util.concatenate(new_components)
157 |             vertices = clean_mesh.vertices
158 |             faces = clean_mesh.faces
159 |             mesh = trimesh.Trimesh(vertices, faces)
160 |     except:
161 |         print("clean_mesh error: continuing")
162 | 
163 |     mesh = trimesh.smoothing.filter_laplacian(mesh, lamb=0.3)
164 |     if color_by == "normals":
165 |         norm_cols = (-mesh.vertex_normals + 1) / 2
166 |         norm_cols = np.clip(norm_cols, 0.0, 1.0)
167 |         norm_cols = (norm_cols * 255).astype(np.uint8)
168 |         alphas = np.full([norm_cols.shape[0], 1], 255, dtype=np.uint8)
169 |         cols = np.concatenate((norm_cols, alphas), axis=1)
170 |         mesh.visual.vertex_colors = cols
171 |     elif color_by == "height":
172 |         zs = mesh.vertices[:, 1]
173 |         cols = trimesh.visual.interpolate(zs, color_map="viridis")
174 |         mesh.visual.vertex_colors = cols
175 |     else:
176 |         mesh.visual.face_colors = [160, 160, 160, 255]
177 |     return mesh
178 | 
179 | 
180 | def capture_scene_im(scene, pose, tm_pose=False, resolution=(1280, 720)):
181 |     if not tm_pose:
182 |         pose = geometry.transform.to_trimesh(pose)
183 |     scene.camera_transform = pose
184 |     data = scene.save_image(resolution=resolution)
185 |     image = np.array(Image.open(io.BytesIO(data)))
186 | 
187 |     return image
188 | 
189 | 
190 | # adapted from https://github.com/NVlabs/BundleSDF/blob/878cee2f1cda23810ff861f6fef2922c96c7a67e/Utils.py#L309C1-L344C13
191 | def draw_xyz_axis(
192 |     color,
193 |     obj_in_cam,
194 |     fx,
195 |     fy,
196 |     cx,
197 |     cy,
198 |     h,
199 |     w,
200 |     scale=0.1,
201 |     thickness=2,
202 |     transparency=0.3,
203 |     is_input_rgb=False,
204 | ):
205 |     """
206 |     @color: BGR
207 |     """
208 |     if is_input_rgb:
209 |         color = cv2.cvtColor(color, cv2.COLOR_RGB2BGR)
210 | 
211 |     oo = np.array([0, 0, 0]).astype(float)
212 |     xx = np.array([1, 0, 0]).astype(float) * scale
213 |     yy = np.array([0, 1, 0]).astype(float) * scale
214 |     zz = np.array([0, 0, 1]).astype(float) * scale
215 |     pts_of = torch.tensor(np.vstack((oo, xx, yy, zz))).float()  # in object frame
216 |     pts_of = pts_of.to(device=obj_in_cam.device, dtype=obj_in_cam.dtype)
217 |     pts_cf = geometry.transform.transform_points(pts_of, obj_in_cam)  # in camera frame
218 | 
219 |     pts_2d = geometry.transform.point_cloud_to_image_plane(pts_cf, fx, fy, cx, cy, h, w)
220 |     origin = tuple(pts_2d[0].cpu().numpy())
221 |     xx = tuple(pts_2d[1].cpu().numpy())
222 |     yy = tuple(pts_2d[2].cpu().numpy())
223 |     zz = tuple(pts_2d[3].cpu().numpy())
224 | 
225 |     line_type = cv2.FILLED
226 |     arrow_len = 0
227 |     tmp = color.copy()
228 |     tmp1 = tmp.copy()
229 |     tmp1 = cv2.arrowedLine(
230 |         tmp1,
231 |         origin,
232 |         xx,
233 |         color=(0, 0, 255),
234 |         thickness=thickness,
235 |         line_type=line_type,
236 |         tipLength=arrow_len,
237 |     )
238 |     mask = np.linalg.norm(tmp1 - tmp, axis=-1) > 0
239 |     tmp[mask] = tmp[mask] * transparency + tmp1[mask] * (1 - transparency)
240 |     tmp1 = tmp.copy()
241 |     tmp1 = cv2.arrowedLine(
242 |         tmp1,
243 |         origin,
244 |         yy,
245 |         color=(0, 255, 0),
246 |         thickness=thickness,
247 |         line_type=line_type,
248 |         tipLength=arrow_len,
249 |     )
250 |     mask = np.linalg.norm(tmp1 - tmp, axis=-1) > 0
251 |     tmp[mask] = tmp[mask] * transparency + tmp1[mask] * (1 - transparency)
252 |     tmp1 = tmp.copy()
253 |     tmp1 = cv2.arrowedLine(
254 |         tmp1,
255 |         origin,
256 |         zz,
257 |         color=(255, 0, 0),
258 |         thickness=thickness,
259 |         line_type=line_type,
260 |         tipLength=arrow_len,
261 |     )
262 |     mask = np.linalg.norm(tmp1 - tmp, axis=-1) > 0
263 |     tmp[mask] = tmp[mask] * transparency + tmp1[mask] * (1 - transparency)
264 |     tmp = tmp.astype(np.uint8)
265 |     if is_input_rgb:
266 |         tmp = cv2.cvtColor(tmp, cv2.COLOR_BGR2RGB)
267 | 
268 |     return tmp
269 | 


--------------------------------------------------------------------------------
/neuralfeels/datasets/sdf_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | # Utility functions for SDF computation and visualization
  7 | 
  8 | import colorsys
  9 | import hashlib
 10 | import os
 11 | 
 12 | import matplotlib as mpl
 13 | import numpy as np
 14 | import open3d as o3d
 15 | import trimesh
 16 | from matplotlib import cm
 17 | from matplotlib.colors import ListedColormap
 18 | from scipy import ndimage
 19 | from scipy.spatial import cKDTree as KDTree
 20 | 
 21 | from neuralfeels.contrib.urdf import SceneGraph, URDFParser, URDFTree
 22 | 
 23 | 
 24 | def load_gt_mesh(file, color=True):
 25 |     """
 26 |     Load ground-truth mesh from URDF file
 27 |     """
 28 |     # Parse the URDF file
 29 |     parser = URDFParser(file)
 30 |     parser.parse()
 31 |     # Construct the URDF tree
 32 |     links = parser.links
 33 |     joints = parser.joints
 34 |     tree = URDFTree(links, joints)
 35 |     scene = SceneGraph(tree.root)
 36 |     mesh = scene.getMesh()[0]
 37 | 
 38 |     # SDF computation needs trimesh, but visualization needs open3d so we load both
 39 |     mesh_trimesh = trimesh.Trimesh(
 40 |         np.asarray(mesh.vertices),
 41 |         np.asarray(mesh.triangles),
 42 |         vertex_normals=np.asarray(mesh.vertex_normals),
 43 |     )
 44 |     mesh_path = tree.root.link.visuals[0].geometry_mesh["filename"]
 45 |     mesh_scale = tree.root.link.visuals[0].geometry_mesh["scale"][0]
 46 |     object_name = os.path.dirname(mesh_path).split("/")[-1]
 47 |     mesh_o3d = o3d.io.read_triangle_mesh(mesh_path, color)
 48 |     mesh_o3d = mesh_o3d.scale(mesh_scale, center=mesh_o3d.get_center())
 49 | 
 50 |     if not color:
 51 |         # assign random color, taken from viser: https://nerfstudio-project.github.io/viser/
 52 |         mesh_o3d.paint_uniform_color(
 53 |             colorsys.hls_to_rgb(
 54 |                 np.random.default_rng(
 55 |                     np.frombuffer(
 56 |                         hashlib.md5(object_name.encode("utf-8")).digest(),
 57 |                         dtype="uint32",
 58 |                     )
 59 |                     + 5
 60 |                 ).uniform(),
 61 |                 0.6,
 62 |                 0.9,
 63 |             )
 64 |         )
 65 | 
 66 |     return mesh_trimesh, mesh_o3d
 67 | 
 68 | 
 69 | def saturate_colors(rgb_array, factor):
 70 |     """Increase the saturation of an RGB array by a factor."""
 71 |     import colorsys
 72 | 
 73 |     # Convert the array to HSL color space
 74 |     hsl_array = np.zeros_like(rgb_array)
 75 |     for i in range(rgb_array.shape[0]):
 76 |         hsl_array[i] = colorsys.rgb_to_hls(*rgb_array[i])
 77 | 
 78 |     # Increase the saturation value
 79 |     hsl_array[:, 1] *= factor
 80 | 
 81 |     # Convert the array back to RGB color space
 82 |     rgb_array_out = np.zeros_like(rgb_array)
 83 |     for i in range(rgb_array.shape[0]):
 84 |         rgb_array_out[i] = colorsys.hls_to_rgb(*hsl_array[i])
 85 | 
 86 |     return rgb_array_out
 87 | 
 88 | 
 89 | def get_grid_pts(dims, transform):
 90 |     x = np.arange(dims[0])
 91 |     y = np.arange(dims[1])
 92 |     z = np.arange(dims[2])
 93 |     x = x * transform[0, 0] + transform[0, 3]
 94 |     y = y * transform[1, 1] + transform[1, 3]
 95 |     z = z * transform[2, 2] + transform[2, 3]
 96 | 
 97 |     return x, y, z
 98 | 
 99 | 
100 | def eval_sdf_interp(sdf_interp, pc, handle_oob="except", oob_val=0.0):
101 |     """param:
102 |     handle_oob: dictates what to do with out of bounds points. Must
103 |     take either 'except', 'mask' or 'fill'.
104 |     """
105 | 
106 |     reshaped = False
107 |     if pc.ndim != 2:
108 |         reshaped = True
109 |         pc_shape = pc.shape[:-1]
110 |         pc = pc.reshape(-1, 3)
111 | 
112 |     if handle_oob == "except":
113 |         sdf_interp.bounds_error = True
114 |     elif handle_oob == "mask":
115 |         dummy_val = 1e99
116 |         sdf_interp.bounds_error = False
117 |         sdf_interp.fill_value = dummy_val
118 |     elif handle_oob == "fill":
119 |         sdf_interp.bounds_error = False
120 |         sdf_interp.fill_value = oob_val
121 |     else:
122 |         assert True, "handle_oob must take a recognised value."
123 | 
124 |     sdf = sdf_interp(pc)
125 | 
126 |     if reshaped:
127 |         sdf = sdf.reshape(pc_shape)
128 | 
129 |     if handle_oob == "mask":
130 |         valid_mask = sdf != dummy_val
131 |         return sdf, valid_mask
132 | 
133 |     return sdf
134 | 
135 | 
136 | def get_colormap(sdf_range=[-2, 2], surface_cutoff=0.01):
137 |     white = np.array([1.0, 1.0, 1.0, 1.0])
138 |     sdf_range[1] += surface_cutoff - (sdf_range[1] % surface_cutoff)
139 |     sdf_range[0] -= surface_cutoff - (-sdf_range[0] % surface_cutoff)
140 | 
141 |     positive_n_cols = int(sdf_range[1] / surface_cutoff)
142 |     viridis = cm.get_cmap("viridis", positive_n_cols)
143 |     positive_colors = viridis(np.linspace(0.2, 1, int(positive_n_cols)))
144 |     positive_colors[0] = white
145 | 
146 |     negative_n_cols = int(np.abs(sdf_range[0]) / surface_cutoff)
147 |     redpurple = cm.get_cmap("RdPu", negative_n_cols).reversed()
148 |     negative_colors = redpurple(np.linspace(0.0, 0.7, negative_n_cols))
149 |     negative_colors[-1] = white
150 | 
151 |     colors = np.concatenate((negative_colors, white[None, :], positive_colors), axis=0)
152 |     sdf_cmap = ListedColormap(colors)
153 | 
154 |     norm = mpl.colors.Normalize(sdf_range[0], sdf_range[1])
155 |     sdf_cmap_fn = cm.ScalarMappable(norm=norm, cmap=sdf_cmap)
156 |     # plt.colorbar(sdf_cmap_fn)
157 |     # plt.show()
158 |     return sdf_cmap_fn
159 | 
160 | 
161 | def voxelize_subdivide(
162 |     mesh, pitch, origin_voxel=np.zeros(3), max_iter=10, edge_factor=2.0
163 | ):
164 |     """
165 |     Adapted from trimesh function allow for shifts in the origin
166 |     of the SDF grid. i.e. there doesn't need to be a voxel with
167 |     centere at [0, 0, 0].
168 | 
169 |     Voxelize a surface by subdividing a mesh until every edge is
170 |     shorter than: (pitch / edge_factor)
171 |     Parameters
172 |     -----------
173 |     mesh:        Trimesh object
174 |     pitch:       float, side length of a single voxel cube
175 |     max_iter:    int, cap maximum subdivisions or None for no limit.
176 |     edge_factor: float,
177 |     Returns
178 |     -----------
179 |     VoxelGrid instance representing the voxelized mesh.
180 |     """
181 |     max_edge = pitch / edge_factor
182 | 
183 |     if max_iter is None:
184 |         longest_edge = np.linalg.norm(
185 |             mesh.vertices[mesh.edges[:, 0]] - mesh.vertices[mesh.edges[:, 1]], axis=1
186 |         ).max()
187 |         max_iter = max(int(np.ceil(np.log2(longest_edge / max_edge))), 0)
188 | 
189 |     # get the same mesh sudivided so every edge is shorter
190 |     # than a factor of our pitch
191 |     v, f = trimesh.remesh.subdivide_to_size(
192 |         mesh.vertices, mesh.faces, max_edge=max_edge, max_iter=max_iter
193 |     )
194 | 
195 |     # convert the vertices to their voxel grid position
196 |     hit = (v - origin_voxel) / pitch
197 | 
198 |     # Provided edge_factor > 1 and max_iter is large enough, this is
199 |     # sufficient to preserve 6-connectivity at the level of voxels.
200 |     hit = np.round(hit).astype(int)
201 | 
202 |     # remove duplicates
203 |     unique, inverse = trimesh.grouping.unique_rows(hit)
204 | 
205 |     # get the voxel centers in model space
206 |     occupied_index = hit[unique]
207 | 
208 |     origin_index = occupied_index.min(axis=0)
209 |     origin_position = origin_voxel + origin_index * pitch
210 | 
211 |     return trimesh.voxel.base.VoxelGrid(
212 |         trimesh.voxel.encoding.SparseBinaryEncoding(occupied_index - origin_index),
213 |         transform=trimesh.transformations.scale_and_translate(
214 |             scale=pitch, translate=origin_position
215 |         ),
216 |     )
217 | 
218 | 
219 | def sdf_from_occupancy(occ_map, voxel_size):
220 |     # Convert occupancy field to sdf field
221 |     inv_occ_map = 1 - occ_map
222 | 
223 |     # Get signed distance from occupancy map and inv map
224 |     map_dist = ndimage.distance_transform_edt(inv_occ_map)
225 |     inv_map_dist = ndimage.distance_transform_edt(occ_map)
226 | 
227 |     sdf = map_dist - inv_map_dist
228 | 
229 |     # metric units
230 |     sdf = sdf.astype(float)
231 |     sdf = sdf * voxel_size
232 | 
233 |     return sdf
234 | 
235 | 
236 | def sdf_from_mesh(mesh, voxel_size, extend_factor=0.15, origin_voxel=np.zeros(3)):
237 |     # Convert mesh to occupancy field
238 |     voxels = voxelize_subdivide(mesh, voxel_size, origin_voxel=origin_voxel)
239 |     voxels = voxels.fill()
240 |     occ_map = voxels.matrix
241 |     transform = voxels.transform
242 | 
243 |     # Extend voxel grid around object
244 |     extend = np.array(occ_map.shape) * extend_factor
245 |     extend = np.repeat(extend, 2).reshape(3, 2)
246 |     extend = np.round(extend).astype(int)
247 |     occ_map = np.pad(occ_map, extend)
248 |     transform[:3, 3] -= extend[:, 0] * voxel_size
249 | 
250 |     sdf = sdf_from_occupancy(occ_map, voxel_size)
251 | 
252 |     return sdf, np.array(transform)
253 | 
254 | 
255 | def colorize_mesh(color_pcd, mesh, sigma=0.01):
256 |     """
257 |     Colorize the mesh by interpolating the colors of the point cloud with Gaussian kernel
258 |     """
259 |     # downsample the point cloud
260 |     color_pcd = color_pcd.voxel_down_sample(voxel_size=0.001)
261 |     pc_positions = color_pcd.point.positions.numpy().astype(np.float64)
262 |     pc_colors = color_pcd.point.colors.numpy()
263 |     pc_tree = KDTree(pc_positions)
264 |     # Compute the distances between each mesh vertex and all the points in the point cloud
265 |     distances, indices = pc_tree.query(np.asarray(mesh.vertices), k=20)
266 |     # Compute the weights for each neighboring point based on its distance to the vertex using a Gaussian kernel
267 |     weights = np.exp(-(distances**2) / (2 * sigma**2))
268 |     weights /= np.sum(weights, axis=1)[:, np.newaxis]
269 |     mesh_colors = np.sum(weights[:, :, np.newaxis] * pc_colors[indices], axis=1)
270 |     return o3d.utility.Vector3dVector(mesh_colors)
271 | 


--------------------------------------------------------------------------------