├── images ├── .gitignore ├── teaser.png ├── method_overview.png └── qualitative_results.png ├── notebooks ├── .gitignore └── visualize_predictions.ipynb ├── robopose ├── models │ ├── __init__.py │ └── wide_resnet.py ├── scripts │ ├── __init__.py │ ├── .gitignore │ └── example_multigpu.py ├── utils │ ├── __init__.py │ ├── random.py │ ├── resources.py │ ├── timer.py │ ├── logging.py │ ├── xarray.py │ ├── multiepoch_dataloader.py │ ├── distributed.py │ └── tensor_collection.py ├── datasets │ ├── __init__.py │ ├── wrappers │ │ ├── __init__.py │ │ ├── augmentation_wrapper.py │ │ └── base.py │ ├── urdf_dataset.py │ ├── samplers.py │ ├── utils.py │ ├── articulated_dataset.py │ ├── datasets_cfg.py │ ├── dream.py │ └── augmentations.py ├── evaluation │ ├── __init__.py │ ├── meters │ │ ├── __init__.py │ │ ├── errors.py │ │ ├── base.py │ │ ├── utils.py │ │ ├── dream_meters.py │ │ └── craves_meters.py │ ├── eval_runner │ │ ├── __init__.py │ │ └── articulated_obj_eval.py │ ├── pred_runner │ │ ├── __init__.py │ │ ├── articulated_predictions.py │ │ └── video_predictions.py │ ├── runner_utils.py │ └── data_utils.py ├── integrated │ ├── __init__.py │ └── pose_predictor.py ├── third_party │ └── craves │ │ ├── __init__.py │ │ ├── load_results.py │ │ ├── get_2d_gt.py │ │ ├── eval_utils.py │ │ ├── d3.py │ │ └── heatmap_utils.py ├── libmesh │ ├── __init__.py │ ├── meshlab_templates │ │ ├── template_ply_texture_to_obj.mlx │ │ ├── template_sample_points.mlx │ │ ├── template_add_uv.mlx │ │ ├── template_downsample_textures.mlx │ │ ├── template_vertexcolor_to_texture.mlx │ │ ├── template_downsample.mlx │ │ ├── template_transfer_texture.mlx │ │ ├── template_remesh_poisson.mlx │ │ └── template_remesh_marchingcubes.mlx │ ├── meshlab_converter.py │ └── urdf_utils.py ├── simulator │ ├── __init__.py │ ├── client.py │ ├── caching.py │ ├── base_scene.py │ └── body.py ├── __init__.py ├── lib3d │ ├── __init__.py │ ├── robot_ops.py │ ├── mesh_ops.py │ ├── transform_ops.py │ ├── transform.py │ ├── cropping.py │ ├── articulated_mesh_database.py │ ├── camera_geometry.py │ ├── robopose_ops.py │ └── rotations.py ├── config.py ├── training │ ├── articulated_models_cfg.py │ └── pose_articulated_forward_loss.py ├── rendering │ ├── bullet_scene_renderer.py │ └── bullet_batch_renderer.py ├── visualization │ ├── bokeh_utils.py │ ├── model_debug.py │ └── singleview_articulated.py └── paper_models_cfg.py ├── tox.ini ├── job-runner-preamble.sh ├── .gitmodules ├── setup.py ├── .gitignore ├── rclone.conf ├── job-runner-config.yaml ├── LICENSE └── environment.yaml /images/.gitignore: -------------------------------------------------------------------------------- 1 | gs-* -------------------------------------------------------------------------------- /notebooks/.gitignore: -------------------------------------------------------------------------------- 1 | perso/ -------------------------------------------------------------------------------- /robopose/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robopose/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robopose/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robopose/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robopose/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robopose/integrated/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robopose/evaluation/meters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robopose/evaluation/eval_runner/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robopose/evaluation/pred_runner/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robopose/third_party/craves/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /robopose/scripts/.gitignore: -------------------------------------------------------------------------------- 1 | upload_models.py 2 | -------------------------------------------------------------------------------- /images/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ylabbe/robopose/HEAD/images/teaser.png -------------------------------------------------------------------------------- /robopose/datasets/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from .augmentation_wrapper import AugmentationWrapper 2 | -------------------------------------------------------------------------------- /images/method_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ylabbe/robopose/HEAD/images/method_overview.png -------------------------------------------------------------------------------- /images/qualitative_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ylabbe/robopose/HEAD/images/qualitative_results.png -------------------------------------------------------------------------------- /robopose/libmesh/__init__.py: -------------------------------------------------------------------------------- 1 | from .meshlab_converter import ply_to_obj, downsample_obj 2 | from .urdf_utils import obj_to_urdf 3 | -------------------------------------------------------------------------------- /robopose/simulator/__init__.py: -------------------------------------------------------------------------------- 1 | from .body import Body 2 | from .camera import Camera 3 | from .base_scene import BaseScene 4 | from .caching import BodyCache 5 | -------------------------------------------------------------------------------- /robopose/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['MKL_NUM_THREADS'] = '1' 3 | os.environ['OMP_NUM_THREADS'] = '1' 4 | print("Setting OMP and MKL num threads to 1.") 5 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | slow: A test that may take few seconds/minutes to run. 4 | 5 | [flake8] 6 | ignore = E231, E226, E731 7 | max-line-length = 120 -------------------------------------------------------------------------------- /job-runner-preamble.sh: -------------------------------------------------------------------------------- 1 | source $CONDA_ROOT/bin/activate 2 | conda activate $CONDA_ENV 3 | cd $PROJECT_DIR 4 | 5 | export MKL_NUM_THREADS=1 6 | export OMP_NUM_THREADS=1 7 | -------------------------------------------------------------------------------- /robopose/libmesh/meshlab_templates/template_ply_texture_to_obj.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /robopose/lib3d/__init__.py: -------------------------------------------------------------------------------- 1 | from .transform import Transform, parse_pose_args 2 | from .rotations import compute_rotation_matrix_from_ortho6d 3 | from .transform_ops import transform_pts, invert_T 4 | -------------------------------------------------------------------------------- /robopose/utils/random.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import numpy as np 3 | 4 | @contextlib.contextmanager 5 | def temp_numpy_seed(seed): 6 | state = np.random.get_state() 7 | np.random.seed(seed) 8 | try: 9 | yield 10 | finally: 11 | np.random.set_state(state) 12 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "deps/bullet3"] 2 | path = deps/bullet3 3 | url = https://github.com/ylabbe/bullet3.git 4 | [submodule "deps/job-runner"] 5 | path = deps/job-runner 6 | url = https://github.com/ylabbe/job-runner 7 | [submodule "deps/urdfpytorch"] 8 | path = deps/urdfpytorch 9 | url = https://github.com/ylabbe/robopose_urdfpytorch 10 | -------------------------------------------------------------------------------- /robopose/simulator/client.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import pybullet as pb 3 | 4 | 5 | class BulletClient: 6 | def __init__(self, client_id): 7 | self.client_id = client_id 8 | 9 | def __getattr__(self, name): 10 | attribute = getattr(pb, name) 11 | attribute = functools.partial(attribute, physicsClientId=self.client_id) 12 | return attribute 13 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | from torch.utils.cpp_extension import BuildExtension, CppExtension 4 | from os import path 5 | 6 | here = path.abspath(path.dirname(__file__)) 7 | 8 | setup( 9 | name='robopose', 10 | version='1.0.0', 11 | description='RoboPose', 12 | packages=find_packages(), 13 | ext_modules=[], 14 | cmdclass={} 15 | ) 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .ipynb_checkpoints 3 | .vscode 4 | logs 5 | *.pyc 6 | dask-worker-space 7 | *.lprof 8 | *.prof 9 | *egg-info* 10 | *.gv* 11 | test_data/ 12 | .venv 13 | .dir-locals.el 14 | test_record 15 | stdout* 16 | *.slurm 17 | .mypy_cache 18 | local_data 19 | dist/ 20 | *.so 21 | build/ 22 | *.temp 23 | deps/owi-description/ 24 | deps/baxter-description/ 25 | deps/kuka-description/ 26 | deps/panda-description/ 27 | -------------------------------------------------------------------------------- /robopose/libmesh/meshlab_templates/template_sample_points.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /robopose/datasets/wrappers/augmentation_wrapper.py: -------------------------------------------------------------------------------- 1 | from .base import SceneDatasetWrapper 2 | 3 | 4 | class AugmentationWrapper(SceneDatasetWrapper): 5 | def __init__(self, scene_ds, augmentation): 6 | self.augmentation = augmentation 7 | super().__init__(scene_ds) 8 | self.frame_index = self.scene_ds.frame_index 9 | 10 | def process_data(self, data): 11 | rgb, mask, obs = data 12 | return self.augmentation(rgb, mask, obs) 13 | -------------------------------------------------------------------------------- /rclone.conf: -------------------------------------------------------------------------------- 1 | [robopose] 2 | type = drive 3 | scope = drive.readonly 4 | root_folder_id = 1Mk8AKk49NdNb4Cu_6deGd6IFJAoRy40f 5 | token = {"access_token":"ya29.a0AfH6SMDX3svj1a7JOoBFUUPWA4O9bks50nTx4dCZo1Jw0tH9NumA_boBKYzC6Y-YLNfcEA-qFSst59K7YdqliB6wO3J8rc0w4BK0Vzh52RnMZdgyrtAvJFZmbNg1NI24EFbx6g72BqZGfz3N9QRbuqTyamVK","token_type":"Bearer","refresh_token":"1//03OZZkpUsz5NjCgYIARAAGAMSNwF-L9IrwE7zpig_6PERe2_MnccwlkLnCJhb_6TZdRBcCilVdEuCx_GQnjWcAzkA8ptV7yqgLuA","expiry":"2021-04-16T11:53:15.516610696+02:00"} 6 | -------------------------------------------------------------------------------- /robopose/datasets/wrappers/base.py: -------------------------------------------------------------------------------- 1 | class SceneDatasetWrapper: 2 | def __init__(self, scene_ds): 3 | self.scene_ds = scene_ds 4 | 5 | @property 6 | def unwrapped(self): 7 | if isinstance(self, SceneDatasetWrapper): 8 | return self.scene_ds 9 | else: 10 | return self 11 | 12 | def __len__(self): 13 | return len(self.scene_ds) 14 | 15 | def __getitem__(self, idx): 16 | data = self.scene_ds[idx] 17 | return self.process_data(data) 18 | 19 | def process_data(self, data): 20 | return data 21 | -------------------------------------------------------------------------------- /robopose/utils/resources.py: -------------------------------------------------------------------------------- 1 | import os 2 | import psutil 3 | from shutil import which 4 | 5 | 6 | def is_egl_available(): 7 | return is_gpu_available and 'EGL_VISIBLE_DEVICES' in os.environ 8 | 9 | 10 | def is_gpu_available(): 11 | return which('nvidia-smi') is not None 12 | 13 | 14 | def is_slurm_available(): 15 | return which('sinfo') is not None 16 | 17 | 18 | def get_total_memory(): 19 | current_process = psutil.Process(os.getpid()) 20 | mem = current_process.memory_info().rss 21 | for child in current_process.children(recursive=True): 22 | mem += child.memory_info().rss 23 | return mem / 1e9 24 | -------------------------------------------------------------------------------- /robopose/libmesh/meshlab_templates/template_add_uv.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /robopose/evaluation/meters/errors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import transforms3d 4 | 5 | 6 | def loc_rot_pose_errors(TXO_pred, TXO_gt): 7 | loc_errors = TXO_pred[:, :3, -1] - TXO_gt[:, :3, -1] 8 | ypr_errors = [] 9 | for R_pred_n, R_gt_n in zip(TXO_pred[:, :3, :3], TXO_gt[:, :3, :3]): 10 | ypr_pred = np.array(transforms3d.euler.mat2euler(R_pred_n.cpu().numpy())) 11 | ypr_gt = np.array(transforms3d.euler.mat2euler(R_gt_n.cpu().numpy())) 12 | ypr_errors.append(np.abs(ypr_pred - ypr_gt)) 13 | ypr_errors = torch.as_tensor(np.stack(ypr_errors), device=TXO_pred.device, dtype=TXO_pred.dtype) 14 | ypr_errors *= 180 / np.pi 15 | return loc_errors, ypr_errors 16 | -------------------------------------------------------------------------------- /robopose/scripts/example_multigpu.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from robopose.utils.distributed import init_distributed_mode, get_world_size, get_tmp_dir, get_rank 4 | from robopose.utils.logging import get_logger 5 | logger = get_logger(__name__) 6 | 7 | 8 | if __name__ == '__main__': 9 | init_distributed_mode() 10 | proc_id = get_rank() 11 | n_tasks = get_world_size() 12 | n_cpus = os.environ.get('N_CPUS', 'not specified') 13 | logger.info(f'Number of processes (=num GPUs): {n_tasks}') 14 | logger.info(f'Process ID: {proc_id}') 15 | logger.info(f'TMP Directory for this job: {get_tmp_dir()}') 16 | logger.info(f'GPU CUDA ID: {torch.cuda.current_device()}') 17 | logger.info(f'Max number of CPUs for this process: {n_cpus}') 18 | -------------------------------------------------------------------------------- /robopose/lib3d/robot_ops.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def clamp(x, t_min, t_max): 6 | return torch.max(torch.min(x, t_max), t_min) 7 | 8 | 9 | def add_noise_joints(q0, std_interval_ratio, q_limits): 10 | assert q_limits.dim() == 2 11 | assert q_limits.shape[0] == 2 12 | assert q0.dim() == 2 13 | assert q_limits.shape[1] == q0.shape[1] 14 | bsz = len(q0) 15 | 16 | q = q0.clone() 17 | std = std_interval_ratio * (q_limits[1] - q_limits[0]) 18 | std = std.unsqueeze(0).view(-1).cpu().numpy() 19 | noise = np.random.normal(loc=0, scale=std, size=(bsz, len(std))) 20 | noise = torch.as_tensor(noise).to(q.device).to(q.dtype) 21 | q = q + noise 22 | q = clamp(q, q_limits[0] + 1e-4, q_limits[1] - 1e-4) 23 | return q 24 | -------------------------------------------------------------------------------- /robopose/libmesh/meshlab_templates/template_downsample_textures.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /job-runner-config.yaml: -------------------------------------------------------------------------------- 1 | default_project: 'robopose' 2 | default_queue: 'gpu_p1' 3 | 4 | projects: 5 | robopose: 6 | dir: '$WORK/projects/robopose/robopose' # Replace $WORK/projects by location of robopose directory 7 | preamble: 'job-runner-preamble.sh' 8 | conda_env: 'robopose' 9 | default_queue: 'gpu_p1' 10 | 11 | conda: 12 | root: '$WORK/anaconda' # Path to anaconda installation 13 | 14 | storage: 15 | root: '$SCRATCH/jobs' # Where to store information of the jobs 16 | 17 | gpu_queues: 18 | gpu_p1: # Provide SLURM information relative to the queue you want to use 19 | n_cpus_per_node: 40 20 | n_gpus_per_node: 4 21 | flags: 22 | partition: 'gpu_p1' 23 | time: '20:00:00' 24 | hint: 'nomultithread' 25 | qos: 'qos_gpu-t3' 26 | 27 | local: # Don't touch this 28 | n_cpus_per_node: 'auto' 29 | n_gpus_per_node: 'auto' 30 | -------------------------------------------------------------------------------- /robopose/libmesh/meshlab_templates/template_vertexcolor_to_texture.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /robopose/libmesh/meshlab_templates/template_downsample.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /robopose/libmesh/meshlab_templates/template_transfer_texture.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /robopose/utils/timer.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | 4 | class Timer: 5 | def __init__(self): 6 | self.start_time = None 7 | self.elapsed = datetime.timedelta() 8 | self.is_running = False 9 | 10 | def reset(self): 11 | self.start_time = None 12 | self.elapsed = 0. 13 | self.is_running = False 14 | 15 | def start(self): 16 | self.elapsed = datetime.timedelta() 17 | self.is_running = True 18 | self.start_time = datetime.datetime.now() 19 | return self 20 | 21 | def pause(self): 22 | if self.is_running: 23 | self.elapsed += datetime.datetime.now() - self.start_time 24 | self.is_running = False 25 | 26 | def resume(self): 27 | if not self.is_running: 28 | self.start_time = datetime.datetime.now() 29 | self.is_running = True 30 | 31 | def stop(self): 32 | self.pause() 33 | elapsed = self.elapsed 34 | self.reset() 35 | return elapsed 36 | -------------------------------------------------------------------------------- /robopose/utils/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | from datetime import timedelta 4 | 5 | 6 | class ElapsedFormatter(): 7 | 8 | def __init__(self): 9 | self.start_time = time.time() 10 | 11 | def format(self, record): 12 | elapsed_seconds = record.created - self.start_time 13 | elapsed = timedelta(seconds=elapsed_seconds) 14 | return "{} - {}".format(elapsed, record.getMessage()) 15 | 16 | 17 | def get_logger(name): 18 | logger = logging.getLogger(name) 19 | handler = logging.StreamHandler() 20 | handler.setFormatter(ElapsedFormatter()) 21 | logger.addHandler(handler) 22 | logger.setLevel(logging.INFO) 23 | return logger 24 | 25 | 26 | def set_logging_level(level): 27 | if 'level' == 'debug': 28 | loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict] 29 | for logger in loggers: 30 | if 'robopose' in logger.name: 31 | logger.setLevel(logging.DEBUG) 32 | else: 33 | pass 34 | return 35 | -------------------------------------------------------------------------------- /robopose/third_party/craves/load_results.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pandas as pd 3 | import torch 4 | from pathlib import Path 5 | 6 | from robopose.datasets.craves import parse_name 7 | import robopose.utils.tensor_collection as tc 8 | from robopose.config import CRAVES_YOUTUBE_RESULTS_DIR, CRAVES_LAB_RESULTS_DIR 9 | 10 | 11 | def load_craves_results(ds_name): 12 | if 'youtube' in ds_name: 13 | results_dir = CRAVES_YOUTUBE_RESULTS_DIR 14 | else: 15 | results_dir = CRAVES_LAB_RESULTS_DIR 16 | 17 | results_json = Path(results_dir).glob('*.json') 18 | infos = [] 19 | keypoints = [] 20 | for result_json in results_json: 21 | result = json.loads(result_json.read_text()) 22 | keypoints.append(torch.tensor(result['d2_key'])) 23 | scene_id, view_id = parse_name(result_json.with_suffix('').name) 24 | infos.append(dict(scene_id=scene_id, view_id=view_id)) 25 | infos = pd.DataFrame(infos) 26 | keypoints = torch.stack(keypoints) 27 | data = tc.PandasTensorCollection(infos, keypoints_2d=keypoints) 28 | return data 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Yann Labbé 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /robopose/libmesh/meshlab_templates/template_remesh_poisson.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /robopose/datasets/urdf_dataset.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pathlib import Path 3 | 4 | 5 | class UrdfDataset: 6 | def __init__(self, ds_dir): 7 | ds_dir = Path(ds_dir) 8 | index = [] 9 | for urdf_dir in Path(ds_dir).iterdir(): 10 | urdf_paths = list(urdf_dir.glob('*.urdf')) 11 | if len(urdf_paths) == 1: 12 | urdf_path = urdf_paths[0] 13 | infos = dict( 14 | label=urdf_dir.name, 15 | urdf_path=urdf_path.as_posix(), 16 | scale=1.0, 17 | ) 18 | index.append(infos) 19 | self.index = pd.DataFrame(index) 20 | 21 | def __getitem__(self, idx): 22 | return self.index.iloc[idx] 23 | 24 | def __len__(self): 25 | return len(self.index) 26 | 27 | 28 | class OneUrdfDataset: 29 | def __init__(self, urdf_path, label, scale=1.0): 30 | index = [ 31 | dict(urdf_path=urdf_path, 32 | label=label, 33 | scale=scale) 34 | ] 35 | self.index = pd.DataFrame(index) 36 | 37 | def __len__(self): 38 | return len(self.index) 39 | 40 | def __getitem__(self, idx): 41 | return self.index.iloc[idx] 42 | -------------------------------------------------------------------------------- /robopose/datasets/samplers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch.utils.data import Sampler 4 | from robopose.utils.random import temp_numpy_seed 5 | 6 | class PartialSampler(Sampler): 7 | def __init__(self, ds, epoch_size): 8 | self.n_items = len(ds) 9 | self.epoch_size = min(epoch_size, len(ds)) 10 | super().__init__(None) 11 | 12 | def __len__(self): 13 | return self.epoch_size 14 | 15 | def __iter__(self): 16 | return (i.item() for i in torch.randperm(self.n_items)[:len(self)]) 17 | 18 | 19 | class DistributedSceneSampler(Sampler): 20 | def __init__(self, scene_ds, num_replicas, rank, shuffle=True): 21 | indices = np.arange(len(scene_ds)) 22 | if shuffle: 23 | with temp_numpy_seed(0): 24 | indices = np.random.permutation(indices) 25 | all_indices = np.array_split(indices, num_replicas) 26 | local_indices = all_indices[rank].tolist() 27 | self.local_indices = local_indices 28 | 29 | def __len__(self): 30 | return len(self.local_indices) 31 | 32 | def __iter__(self): 33 | return iter(self.local_indices) 34 | 35 | 36 | class ListSampler(Sampler): 37 | def __init__(self, ids): 38 | self.ids = ids 39 | 40 | def __len__(self): 41 | return len(self.ids) 42 | 43 | def __iter__(self): 44 | return iter(self.ids) 45 | -------------------------------------------------------------------------------- /robopose/utils/xarray.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def xr_merge(ds1, ds2, on, how='left', dim1='dim_0', dim2='dim_0', fill_value=np.nan): 5 | if how != 'left': 6 | raise NotImplementedError 7 | 8 | ds1 = ds1.copy() 9 | ds1 = ds1.reset_coords().set_coords(on) 10 | ds2 = ds2.reset_coords().set_coords(on) 11 | 12 | ds2 = ds2.rename({dim2: dim1}) 13 | df1 = ds1.reset_coords()[on].to_dataframe() 14 | df2 = ds2.reset_coords()[on].to_dataframe() 15 | 16 | df1['idx1'] = np.arange(len(df1)) 17 | df2['idx2'] = np.arange(len(df2)) 18 | 19 | merge = df1.merge(df2, on=on, how=how) 20 | assert len(merge) == ds1.dims[dim1] 21 | 22 | idx1 = merge['idx1'].values 23 | idx2 = merge['idx2'].values 24 | mask = np.isfinite(idx2) 25 | idx1 = idx1[mask] 26 | idx2 = idx2[mask].astype(np.int) 27 | 28 | for k, data_var in ds2.data_vars.items(): 29 | array = data_var.values 30 | if isinstance(fill_value, dict): 31 | fill = fill_value.get(k, float('nan')) 32 | else: 33 | fill = fill_value 34 | assert data_var.dims[0] == dim1 35 | shape = list(array.shape) 36 | shape[0] = len(merge) 37 | new_array = np.empty(shape, dtype=np.array(fill).dtype) 38 | new_array[:] = fill 39 | new_array[idx1] = array[idx2] 40 | ds1[k] = data_var.dims, new_array 41 | return ds1 42 | -------------------------------------------------------------------------------- /robopose/libmesh/meshlab_templates/template_remesh_marchingcubes.mlx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /robopose/evaluation/meters/base.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import torch 3 | from collections import defaultdict 4 | 5 | from robopose.utils.distributed import get_world_size, get_rank 6 | 7 | 8 | class Meter: 9 | def __init__(self): 10 | self.reset() 11 | 12 | def reset(self): 13 | self.datas = defaultdict(list) 14 | 15 | def add(self, pred_data, gt_data): 16 | raise NotImplementedError 17 | 18 | def is_data_valid(self, data): 19 | raise NotImplementedError 20 | 21 | def gather_distributed(self, tmp_dir): 22 | tmp_dir = Path(tmp_dir) 23 | tmp_dir.mkdir(exist_ok=True, parents=True) 24 | rank, world_size = get_rank(), get_world_size() 25 | tmp_file_template = (tmp_dir / 'rank={rank}.pth.tar').as_posix() 26 | if rank > 0: 27 | tmp_file = tmp_file_template.format(rank=rank) 28 | torch.save(self.datas, tmp_file) 29 | 30 | if world_size > 1: 31 | torch.distributed.barrier() 32 | 33 | if rank == 0 and world_size > 1: 34 | all_datas = self.datas 35 | for n in range(1, world_size): 36 | tmp_file = tmp_file_template.format(rank=n) 37 | datas = torch.load(tmp_file) 38 | for k in all_datas.keys(): 39 | all_datas[k].extend(datas.get(k, [])) 40 | Path(tmp_file).unlink() 41 | self.datas = all_datas 42 | 43 | if world_size > 1: 44 | torch.distributed.barrier() 45 | return 46 | -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: robopose 2 | channels: 3 | - conda-forge 4 | - pytorch 5 | dependencies: 6 | - python=3.7.6 7 | - pinocchio=2.3.1 8 | - assimp=4.1.0 9 | - pytorch=1.3.1=py3.7_cuda10.1.243_cudnn7.6.3_0 10 | - torchvision=0.4.2=py37_cu101 11 | - rclone=1.52.3 12 | - ffmpeg=4.2 13 | - bzip2=1.0.8 14 | - git=2.28.0 15 | - cmake=3.14.0 16 | - pip=19.3.1 17 | - numpy=1.17.4 18 | - pillow=6.0.0 19 | - nodejs 20 | - ipywidgets 21 | - ipython 22 | - ffmpeg 23 | - phantomjs 24 | - pip: 25 | - selenium 26 | - moviepy 27 | - omegaconf 28 | - pytube 29 | - line_profiler 30 | - msgpack==0.6.2 31 | - imageio==2.6.1 32 | - simplejson==3.17.0 33 | - opencv-python==4.1.2.30 34 | - opencv-contrib-python==4.4.0.42 35 | - torchnet==0.0.4 36 | - tqdm==4.41.1 37 | - lxml==4.4.2 38 | - transforms3d==0.3.1 39 | - distributed==2.9.1 40 | - joblib==0.14.1 41 | - pandas==0.25.3 42 | - xarray==0.14.1 43 | - pyarrow==0.15.1 44 | - matplotlib==3.1.2 45 | - bokeh==1.4.0 46 | - plyfile==0.7.1 47 | - trimesh==3.5.16 48 | - pycollada==0.6 49 | - shapely==1.7.0 50 | - wget==3.2 51 | - pypng==0.0.20 52 | - PyOpenGL==3.1.0 53 | - ipdb==0.12.3 54 | - colorama==0.4.3 55 | - scikit-video==1.1.11 56 | - scikit-image==0.16.2 57 | - scikit-learn==0.22.1 58 | - pyyaml==5.1 59 | - ipykernel==5.1.3 60 | - scipy==1.4.1 61 | - pywavefront==1.3.1 62 | - seaborn==0.9.0 63 | - ipywidgets 64 | - ./deps/bullet3 65 | - ./deps/job-runner 66 | - ./deps/urdfpytorch 67 | - catkin_pkg 68 | - empy 69 | - nose 70 | - cffi==1.14.0 71 | -------------------------------------------------------------------------------- /robopose/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import numpy as np 4 | from robopose.lib3d.camera_geometry import get_K_crop_resize 5 | 6 | def crop_to_aspect_ratio(images, box, masks=None, K=None): 7 | assert images.dim() == 4 8 | bsz, _, h, w = images.shape 9 | assert box.dim() == 1 10 | assert box.shape[0] == 4 11 | w_output, h_output = box[[2, 3]] - box[[0, 1]] 12 | boxes = torch.cat( 13 | (torch.arange(bsz).unsqueeze(1).to(box.device).float(), box.unsqueeze(0).repeat(bsz, 1).float()), 14 | dim=1).to(images.device) 15 | images = torchvision.ops.roi_pool(images, boxes, output_size=(h_output, w_output)) 16 | if masks is not None: 17 | assert masks.dim() == 4 18 | masks = torchvision.ops.roi_pool(masks, boxes, output_size=(h_output, w_output)) 19 | if K is not None: 20 | assert K.dim() == 3 21 | assert K.shape[0] == bsz 22 | K = get_K_crop_resize(K, boxes[:, 1:], orig_size=(h, w), crop_resize=(h_output, w_output)) 23 | return images, masks, K 24 | 25 | 26 | def make_detections_from_segmentation(masks): 27 | detections = [] 28 | if masks.dim() == 4: 29 | assert masks.shape[0] == 1 30 | masks = masks.squeeze(0) 31 | 32 | for mask_n in masks: 33 | dets_n = dict() 34 | for uniq in torch.unique(mask_n, sorted=True): 35 | ids = np.where((mask_n == uniq).cpu().numpy()) 36 | x1, y1, x2, y2 = np.min(ids[1]), np.min(ids[0]), np.max(ids[1]), np.max(ids[0]) 37 | dets_n[int(uniq.item())] = torch.tensor([x1, y1, x2, y2]).to(mask_n.device) 38 | detections.append(dets_n) 39 | return detections 40 | 41 | 42 | def make_masks_from_det(detections, h, w): 43 | n_ids = len(detections) 44 | detections = torch.as_tensor(detections) 45 | masks = torch.zeros((n_ids, h, w)).byte() 46 | for mask_n, det_n in zip(masks, detections): 47 | x1, y1, x2, y2 = det_n.cpu().int().tolist() 48 | mask_n[y1:y2, x1:x2] = True 49 | return masks 50 | -------------------------------------------------------------------------------- /robopose/third_party/craves/get_2d_gt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | from .d3 import CameraPose 5 | 6 | joint_name = [ 7 | 'Rotation', 'Base', 'Elbow', 'Wrist' 8 | ] 9 | 10 | vertex_seq = [ 11 | [0], 12 | [1], 13 | [2], 14 | [3,4], 15 | [5,5,6,6,7,8], 16 | [5,6,7,7,8,8], 17 | [9,9,10,10,11,12], 18 | [9,10,11,11,12,12], 19 | [13,14], 20 | [15,16], 21 | [17,18], 22 | [19,20], 23 | [21], 24 | [22,23] 25 | ] 26 | 27 | 28 | def fov2f(fov, width): 29 | # fov = 2 * arctan(w / 2f) 30 | fov = fov * np.pi / 180 31 | return width / (2 * np.tan(fov / 2)) 32 | 33 | 34 | def make_2d_keypoints(vertex_infos, joints_infos, cam_info): 35 | loc = cam_info['Location'] 36 | rot = cam_info['Rotation'] 37 | 38 | if cam_info.__contains__('Fov'): 39 | f = fov2f(cam_info['Fov'], cam_info['FilmWidth']) 40 | else: 41 | f = fov2f(90, cam_info['FilmWidth']) 42 | 43 | loc = cam_info['Location'] 44 | rot = cam_info['Rotation'] 45 | cam = CameraPose(loc['X'], loc['Y'], loc['Z'], 46 | rot['Pitch'], rot['Yaw'], rot['Roll'], 47 | cam_info['FilmWidth'], cam_info['FilmHeight'], f) 48 | 49 | actor_name = 'RobotArmActor_3' 50 | joints = joints_infos 51 | joint = joints[actor_name]['WorldJoints'] # This usage might change 52 | joints3d = np.array([[joint[v]['X'], joint[v]['Y'], joint[v]['Z']] for v in joint_name]) 53 | joints2d = cam.project_to_2d(joints3d) 54 | joints2d = joints2d[1:] # discard the first joint, as we do not predict it. 55 | 56 | vertex = vertex_infos[actor_name] 57 | vertex3d = np.array([[v['X'], v['Y'], v['Z']] for v in vertex]) 58 | vertex2d = cam.project_to_2d(vertex3d) 59 | 60 | num_vertex = len(vertex_seq) 61 | pts = np.zeros((num_vertex, 2)) 62 | 63 | for i in range(num_vertex): 64 | pts[i] = np.average(vertex2d[vertex_seq[i]], axis=0) 65 | 66 | pts = np.concatenate((joints2d, pts), axis=0) 67 | return pts 68 | -------------------------------------------------------------------------------- /robopose/utils/multiepoch_dataloader.py: -------------------------------------------------------------------------------- 1 | from itertools import chain 2 | 3 | 4 | class MultiEpochDataLoader: 5 | def __init__(self, dataloader): 6 | self.dataloader = dataloader 7 | self.dataloader_iter = None 8 | self.epoch_id = -1 9 | self.batch_id = 0 10 | self.n_repeats_sampler = 1 11 | self.sampler_length = None 12 | self.id_in_sampler = None 13 | 14 | def __iter__(self): 15 | if self.dataloader_iter is None: 16 | self.dataloader_iter = iter(self.dataloader) 17 | 18 | self.sampler_length = len(self.dataloader) 19 | self.id_in_sampler = 0 20 | while self.sampler_length <= 2 * self.dataloader.num_workers: 21 | self.sampler_length += len(self.dataloader) 22 | next_index_sampler = iter(self.dataloader_iter._index_sampler) 23 | self.dataloader_iter._sampler_iter = chain( 24 | self.dataloader_iter._sampler_iter, next_index_sampler) 25 | 26 | self.epoch_id += 1 27 | self.batch_id = 0 28 | self.epoch_size = len(self.dataloader_iter) 29 | 30 | return self 31 | 32 | def __len__(self): 33 | return len(self.dataloader) 34 | 35 | def __next__(self): 36 | if self.batch_id == self.epoch_size: 37 | raise StopIteration 38 | 39 | elif self.id_in_sampler == self.sampler_length - 2 * self.dataloader.num_workers: 40 | next_index_sampler = iter(self.dataloader_iter._index_sampler) 41 | self.dataloader_iter._sampler_iter = next_index_sampler 42 | self.id_in_sampler = 0 43 | 44 | idx, batch = self.dataloader_iter._get_data() 45 | self.dataloader_iter._tasks_outstanding -= 1 46 | self.dataloader_iter._process_data(batch) 47 | 48 | self.batch_id += 1 49 | self.id_in_sampler += 1 50 | return batch 51 | 52 | def get_infos(self): 53 | return dict() 54 | 55 | def __del__(self): 56 | del self.dataloader_iter 57 | -------------------------------------------------------------------------------- /robopose/config.py: -------------------------------------------------------------------------------- 1 | import robopose 2 | import os 3 | import yaml 4 | from joblib import Memory 5 | from pathlib import Path 6 | import getpass 7 | import socket 8 | import torch.multiprocessing 9 | torch.multiprocessing.set_sharing_strategy('file_system') 10 | 11 | hostname = socket.gethostname() 12 | username = getpass.getuser() 13 | 14 | PROJECT_ROOT = Path(robopose.__file__).parent.parent 15 | PROJECT_DIR = PROJECT_ROOT 16 | DATA_DIR = PROJECT_DIR / 'data' 17 | LOCAL_DATA_DIR = PROJECT_DIR / 'local_data' 18 | TEST_DATA_DIR = LOCAL_DATA_DIR 19 | 20 | EXP_DIR = LOCAL_DATA_DIR / 'experiments' 21 | RESULTS_DIR = LOCAL_DATA_DIR / 'results' 22 | DEBUG_DATA_DIR = LOCAL_DATA_DIR / 'debug_data' 23 | DEPS_DIR = PROJECT_DIR / 'deps' 24 | CACHE_DIR = LOCAL_DATA_DIR / 'joblib_cache' 25 | 26 | assert LOCAL_DATA_DIR.exists() 27 | CACHE_DIR.mkdir(exist_ok=True) 28 | TEST_DATA_DIR.mkdir(exist_ok=True) 29 | RESULTS_DIR.mkdir(exist_ok=True) 30 | DEBUG_DATA_DIR.mkdir(exist_ok=True) 31 | 32 | ASSET_DIR = DATA_DIR / 'assets' 33 | MEMORY = Memory(CACHE_DIR, verbose=2) 34 | 35 | # ROBOTS 36 | DREAM_DS_DIR = LOCAL_DATA_DIR / 'dream_datasets' 37 | 38 | UR_DESCRIPTION = DEPS_DIR / 'ur5-description' / 'ur_description' 39 | UR_DESCRIPTION_NEW = DEPS_DIR / 'ur5-description' / 'package_new_visuals' / 'ur_description' 40 | 41 | OWI_DESCRIPTION = DEPS_DIR / 'owi-description' / 'owi535_description' 42 | OWI_KEYPOINTS_PATH = DEPS_DIR / 'owi-description' / 'keypoints.json' 43 | 44 | PANDA_DESCRIPTION_PATH = DEPS_DIR / 'panda-description' / 'panda.urdf' 45 | PANDA_KEYPOINTS_PATH = LOCAL_DATA_DIR / 'dream_results' / 'panda_keypoints_infos.json' 46 | 47 | CRAVES_YOUTUBE_RESULTS_DIR = LOCAL_DATA_DIR / 'craves_results/youtube-vis/preds' 48 | CRAVES_LAB_RESULTS_DIR = LOCAL_DATA_DIR / 'craves_results/lab/preds' 49 | 50 | CONDA_PREFIX = os.environ['CONDA_PREFIX'] 51 | if 'CONDA_PREFIX_1' in os.environ: 52 | CONDA_BASE_DIR = os.environ['CONDA_PREFIX_1'] 53 | CONDA_ENV = os.environ['CONDA_DEFAULT_ENV'] 54 | else: 55 | CONDA_BASE_DIR = os.environ['CONDA_PREFIX'] 56 | CONDA_ENV = 'base' 57 | -------------------------------------------------------------------------------- /robopose/simulator/caching.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .body import Body 3 | from copy import deepcopy 4 | from collections import defaultdict 5 | from .client import BulletClient 6 | 7 | 8 | class BodyCache: 9 | def __init__(self, urdf_ds, client_id): 10 | self.urdf_ds = urdf_ds 11 | self.client = BulletClient(client_id) 12 | self.cache = defaultdict(list) 13 | self.away_transform = (0, 0, 1000), (0, 0, 0, 1) 14 | 15 | def _load_body(self, label): 16 | ds_idx = np.where(self.urdf_ds.index['label'] == label)[0].item() 17 | object_infos = self.urdf_ds[ds_idx].to_dict() 18 | body = Body.load(object_infos['urdf_path'], 19 | scale=object_infos['scale'], 20 | client_id=self.client.client_id) 21 | body.pose = self.away_transform 22 | self.cache[object_infos['label']].append(body) 23 | return body 24 | 25 | def hide_bodies(self): 26 | n = 0 27 | for body_list in self.cache.values(): 28 | for body in body_list: 29 | pos = (1000, 1000, 1000 + n * 10) 30 | orn = (0, 0, 0, 1) 31 | body.pose = pos, orn 32 | n += 1 33 | 34 | def get_bodies_by_labels(self, labels): 35 | self.hide_bodies() 36 | gb_label = defaultdict(lambda: 0) 37 | for label in labels: 38 | gb_label[label] += 1 39 | 40 | for label, n_instances in gb_label.items(): 41 | n_missing = gb_label[label] - len(self.cache[label]) 42 | for n in range(n_missing): 43 | self._load_body(label) 44 | 45 | remaining = deepcopy(dict(self.cache)) 46 | bodies = [remaining[label].pop(0) for label in labels] 47 | return bodies 48 | 49 | def get_bodies_by_ids(self, ids): 50 | labels = [self.urdf_ds[idx]['label'] for idx in ids] 51 | return self.get_bodies_by_labels(labels) 52 | 53 | def __len__(self): 54 | return sum([len(bodies) for bodies in self.cache.values()]) 55 | -------------------------------------------------------------------------------- /robopose/lib3d/mesh_ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def get_meshes_center(pts): 6 | bsz = pts.shape[0] 7 | limits = get_meshes_bounding_boxes(pts) 8 | t_offset = limits[..., :3].mean(dim=1) 9 | T_offset = torch.eye(4, 4, dtype=pts.dtype, device=pts.device) 10 | T_offset = T_offset.unsqueeze(0).repeat(bsz, 1, 1) 11 | T_offset[:, :3, -1] = t_offset 12 | return T_offset 13 | 14 | def get_meshes_bounding_boxes(pts): 15 | xmin, xmax = pts[..., 0].min(dim=-1, keepdim=True).values, pts[..., 0].max(dim=-1, keepdim=True).values 16 | ymin, ymax = pts[..., 1].min(dim=-1, keepdim=True).values, pts[..., 1].max(dim=-1, keepdim=True).values 17 | zmin, zmax = pts[..., 2].min(dim=-1, keepdim=True).values, pts[..., 2].max(dim=-1, keepdim=True).values 18 | v0 = torch.cat((xmin, ymax, zmax), dim=-1).unsqueeze(1) 19 | v1 = torch.cat((xmax, ymax, zmax), dim=-1).unsqueeze(1) 20 | v2 = torch.cat((xmax, ymin, zmax), dim=-1).unsqueeze(1) 21 | v3 = torch.cat((xmin, ymin, zmax), dim=-1).unsqueeze(1) 22 | v4 = torch.cat((xmin, ymax, zmin), dim=-1).unsqueeze(1) 23 | v5 = torch.cat((xmax, ymax, zmin), dim=-1).unsqueeze(1) 24 | v6 = torch.cat((xmax, ymin, zmin), dim=-1).unsqueeze(1) 25 | v7 = torch.cat((xmin, ymin, zmin), dim=-1).unsqueeze(1) 26 | bbox_pts = torch.cat((v0, v1, v2, v3, v4, v5, v6, v7), dim=1) 27 | return bbox_pts 28 | 29 | def get_meshes_aabbs_bounds(pts): 30 | xmin, xmax = pts[..., 0].min(dim=-1, keepdim=True).values, pts[..., 0].max(dim=-1, keepdim=True).values 31 | ymin, ymax = pts[..., 1].min(dim=-1, keepdim=True).values, pts[..., 1].max(dim=-1, keepdim=True).values 32 | zmin, zmax = pts[..., 2].min(dim=-1, keepdim=True).values, pts[..., 2].max(dim=-1, keepdim=True).values 33 | lower = torch.cat((xmin, ymin, zmin), dim=1) 34 | upper = torch.cat((xmax, ymax, zmax), dim=1) 35 | return lower, upper 36 | 37 | def sample_points(points, n_points, deterministic=False): 38 | assert points.dim() == 3 39 | assert n_points <= points.shape[1] 40 | if deterministic: 41 | np_random = np.random.RandomState(0) 42 | else: 43 | np_random = np.random 44 | point_ids = np_random.choice(points.shape[1], size=n_points, replace=False) 45 | point_ids = torch.as_tensor(point_ids).to(points.device) 46 | points = torch.index_select(points, 1, point_ids) 47 | return points 48 | -------------------------------------------------------------------------------- /robopose/lib3d/transform_ops.py: -------------------------------------------------------------------------------- 1 | import transforms3d 2 | import numpy as np 3 | import torch 4 | from .rotations import compute_rotation_matrix_from_ortho6d 5 | 6 | 7 | def transform_pts(T, pts): 8 | bsz = T.shape[0] 9 | n_pts = pts.shape[1] 10 | assert pts.shape == (bsz, n_pts, 3) 11 | if T.dim() == 4: 12 | pts = pts.unsqueeze(1) 13 | assert T.shape[-2:] == (4, 4) 14 | elif T.dim() == 3: 15 | assert T.shape == (bsz, 4, 4) 16 | else: 17 | raise ValueError('Unsupported shape for T', T.shape) 18 | pts = pts.unsqueeze(-1) 19 | T = T.unsqueeze(-3) 20 | pts_transformed = T[..., :3, :3] @ pts + T[..., :3, [-1]] 21 | return pts_transformed.squeeze(-1) 22 | 23 | 24 | def invert_T(T): 25 | R = T[..., :3, :3] 26 | t = T[..., :3, [-1]] 27 | R_inv = R.transpose(-2, -1) 28 | t_inv = - R_inv @ t 29 | T_inv = T.clone() 30 | T_inv[..., :3, :3] = R_inv 31 | T_inv[..., :3, [-1]] = t_inv 32 | return T_inv 33 | 34 | 35 | def add_noise(TCO, euler_deg_std=[15, 15, 15], trans_std=[0.01, 0.01, 0.05]): 36 | TCO_out = TCO.clone() 37 | device = TCO_out.device 38 | bsz = TCO.shape[0] 39 | euler_noise_deg = np.concatenate( 40 | [np.random.normal(loc=0, scale=euler_deg_std_i, size=bsz)[:, None] 41 | for euler_deg_std_i in euler_deg_std], axis=1) 42 | euler_noise_rad = euler_noise_deg * np.pi / 180 43 | R_noise = torch.tensor([transforms3d.euler.euler2mat(*xyz) for xyz in euler_noise_rad]).float().to(device) 44 | 45 | trans_noise = np.concatenate( 46 | [np.random.normal(loc=0, scale=trans_std_i, size=bsz)[:, None] 47 | for trans_std_i in trans_std], axis=1) 48 | trans_noise = torch.tensor(trans_noise).float().to(device) 49 | TCO_out[:, :3, :3] = TCO_out[:, :3, :3] @ R_noise 50 | TCO_out[:, :3, 3] += trans_noise 51 | return TCO_out 52 | 53 | 54 | def compute_transform_from_pose9d(pose9d): 55 | # assert len(pose9d.shape) == 2 56 | # assert pose9d.shape[1] == 9 57 | assert pose9d.shape[-1] == 9 58 | R = compute_rotation_matrix_from_ortho6d(pose9d[..., :6]) 59 | trans = pose9d[..., 6:] 60 | T = torch.zeros(*pose9d.shape[:-1], 4, 4, dtype=pose9d.dtype, device=pose9d.device) 61 | T[..., 0:3, 0:3] = R 62 | T[..., 0:3, 3] = trans 63 | T[..., 3, 3] = 1 64 | return T 65 | 66 | 67 | def normalize_T(T): 68 | pose_9d = torch.cat([T[..., :3, 0], T[..., :3, 1], T[..., :3, -1]], dim=-1) 69 | return compute_transform_from_pose9d(pose_9d) 70 | -------------------------------------------------------------------------------- /robopose/evaluation/runner_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import pandas as pd 3 | from collections import defaultdict 4 | 5 | from robopose.utils.distributed import get_tmp_dir, get_rank 6 | from robopose.utils.logging import get_logger 7 | 8 | logger = get_logger(__name__) 9 | 10 | 11 | def run_pred_eval(pred_runner, pred_kwargs, eval_runner, eval_preds=None): 12 | all_predictions = dict() 13 | for pred_prefix, pred_kwargs_n in pred_kwargs.items(): 14 | print("Prediction :", pred_prefix) 15 | preds = pred_runner.get_predictions(**pred_kwargs_n) 16 | for preds_name, preds_n in preds.items(): 17 | all_predictions[f'{pred_prefix}/{preds_name}'] = preds_n 18 | 19 | all_predictions = OrderedDict({k: v for k, v in sorted(all_predictions.items(), key=lambda item: item[0])}) 20 | eval_metrics, eval_dfs = dict(), dict() 21 | 22 | for preds_k, preds in all_predictions.items(): 23 | print("Evaluation :", preds_k) 24 | if eval_preds is None or preds_k in eval_preds: 25 | eval_metrics[preds_k], eval_dfs[preds_k] = eval_runner.evaluate(preds) 26 | 27 | all_predictions = gather_predictions(all_predictions) 28 | 29 | if get_rank() == 0: 30 | results = format_results(all_predictions, 31 | eval_metrics, 32 | eval_dfs) 33 | else: 34 | results = None 35 | return results 36 | 37 | 38 | def gather_predictions(all_predictions): 39 | for k, v in all_predictions.items(): 40 | all_predictions[k] = v.gather_distributed(tmp_dir=get_tmp_dir()).cpu() 41 | return all_predictions 42 | 43 | 44 | def format_results(predictions, 45 | eval_metrics, 46 | eval_dfs, 47 | print_metrics=True): 48 | summary = dict() 49 | df = defaultdict(list) 50 | summary_txt = '' 51 | for k, v in eval_metrics.items(): 52 | summary_txt += f"\n{k}\n{'-'*80}\n" 53 | for k_, v_ in v.items(): 54 | summary[f'{k}/{k_}'] = v_ 55 | df['method'].append(k) 56 | df['metric'].append(k_) 57 | df['value'].append(v_) 58 | summary_txt += f'{k}/{k_}: {v_}\n' 59 | summary_txt += f"{'-'*80}" 60 | if print_metrics: 61 | logger.info(summary_txt) 62 | 63 | df = pd.DataFrame(df) 64 | results = dict( 65 | summary=summary, 66 | summary_txt=summary_txt, 67 | predictions=predictions, 68 | metrics=eval_metrics, 69 | summary_df=df, 70 | dfs=eval_dfs, 71 | ) 72 | return results 73 | -------------------------------------------------------------------------------- /robopose/evaluation/eval_runner/articulated_obj_eval.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | 3 | from torch.utils.data import DataLoader 4 | 5 | from robopose.utils.distributed import get_world_size, get_rank, get_tmp_dir 6 | 7 | import robopose.utils.tensor_collection as tc 8 | from robopose.evaluation.data_utils import parse_obs_data 9 | from robopose.datasets.samplers import DistributedSceneSampler 10 | 11 | 12 | class ArticulatedObjectEvaluation: 13 | def __init__(self, scene_ds, meters, batch_size=64, 14 | cache_data=True, n_workers=4, sampler=None): 15 | 16 | self.rank = get_rank() 17 | self.world_size = get_world_size() 18 | self.tmp_dir = get_tmp_dir() 19 | 20 | self.scene_ds = scene_ds 21 | if sampler is None: 22 | sampler = DistributedSceneSampler(scene_ds, 23 | num_replicas=self.world_size, 24 | rank=self.rank) 25 | dataloader = DataLoader(scene_ds, batch_size=batch_size, 26 | num_workers=n_workers, 27 | sampler=sampler, collate_fn=self.collate_fn) 28 | 29 | if cache_data: 30 | self.dataloader = list(tqdm(dataloader)) 31 | else: 32 | self.dataloader = dataloader 33 | 34 | self.meters = meters 35 | 36 | def collate_fn(self, batch): 37 | obj_data = [] 38 | for data_n in batch: 39 | _, _, obs = data_n 40 | obj_data_ = parse_obs_data(obs, parse_joints=True) 41 | obj_data.append(obj_data_) 42 | obj_data = tc.concatenate(obj_data) 43 | return obj_data 44 | 45 | def evaluate(self, obj_predictions): 46 | for meter in self.meters.values(): 47 | meter.reset() 48 | device = obj_predictions.device 49 | for obj_data_gt in tqdm(self.dataloader): 50 | for k, meter in self.meters.items(): 51 | if meter.is_data_valid(obj_predictions) and meter.is_data_valid(obj_data_gt): 52 | meter.add(obj_predictions, obj_data_gt.to(device)) 53 | return self.summary() 54 | 55 | def summary(self): 56 | summary, dfs = dict(), dict() 57 | for meter_k, meter in self.meters.items(): 58 | if len(meter.datas) > 0: 59 | meter.gather_distributed(tmp_dir=self.tmp_dir) 60 | summary_, df_ = meter.summary() 61 | dfs[meter_k] = df_ 62 | for k, v in summary_.items(): 63 | summary[meter_k + '/' + k] = v 64 | return summary, dfs 65 | -------------------------------------------------------------------------------- /robopose/evaluation/meters/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from collections import OrderedDict 4 | 5 | 6 | def one_to_one_matching(pred_infos, gt_infos, 7 | keys=('scene_id', 'view_id'), 8 | allow_pred_missing=False): 9 | keys = list(keys) 10 | pred_infos['pred_id'] = np.arange(len(pred_infos)) 11 | gt_infos['gt_id'] = np.arange(len(gt_infos)) 12 | matches = pred_infos.merge(gt_infos, on=keys) 13 | 14 | matches_gb = matches.groupby(keys).groups 15 | assert all([len(v) == 1 for v in matches_gb.values()]) 16 | if not allow_pred_missing: 17 | assert len(matches) == len(gt_infos) 18 | return matches 19 | 20 | 21 | def get_candidate_matches(pred_infos, gt_infos, 22 | group_keys=['scene_id', 'view_id', 'label'], 23 | only_valids=True): 24 | pred_infos['pred_id'] = np.arange(len(pred_infos)) 25 | gt_infos['gt_id'] = np.arange(len(gt_infos)) 26 | group_keys = list(group_keys) 27 | cand_infos = pred_infos.merge(gt_infos, on=group_keys) 28 | if only_valids: 29 | cand_infos = cand_infos[cand_infos['valid']].reset_index(drop=True) 30 | cand_infos['cand_id'] = np.arange(len(cand_infos)) 31 | return cand_infos 32 | 33 | 34 | def match_poses(cand_infos, group_keys=['scene_id', 'view_id', 'label']): 35 | assert 'error' in cand_infos 36 | 37 | matches = [] 38 | 39 | def match_label_preds(group): 40 | gt_ids_matched = set() 41 | group = group.reset_index(drop=True) 42 | gb_pred = group.groupby('pred_id', sort=False) 43 | ids_sorted = gb_pred.first().sort_values('score', ascending=False) 44 | gb_pred_groups = gb_pred.groups 45 | for idx, _ in ids_sorted.iterrows(): 46 | pred_group = group.iloc[gb_pred_groups[idx]] 47 | best_error = np.inf 48 | best_match = None 49 | for _, tentative_match in pred_group.iterrows(): 50 | if tentative_match['error'] < best_error and \ 51 | tentative_match['gt_id'] not in gt_ids_matched: 52 | best_match = tentative_match 53 | best_error = tentative_match['error'] 54 | 55 | if best_match is not None: 56 | gt_ids_matched.add(best_match['gt_id']) 57 | matches.append(best_match) 58 | 59 | if len(cand_infos) > 0: 60 | cand_infos.groupby(group_keys).apply(match_label_preds) 61 | matches = pd.DataFrame(matches).reset_index(drop=True) 62 | else: 63 | matches = cand_infos 64 | return matches 65 | -------------------------------------------------------------------------------- /robopose/evaluation/meters/dream_meters.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import xarray as xr 3 | import torch 4 | from .base import Meter 5 | from .utils import one_to_one_matching 6 | from robopose.lib3d.transform_ops import transform_pts 7 | 8 | 9 | class DreamErrorMeter(Meter): 10 | def __init__(self): 11 | self.reset() 12 | 13 | def is_data_valid(self, data): 14 | valid = False 15 | if not valid and hasattr(data, 'TCO_keypoints_3d'): 16 | valid = True 17 | if not valid and hasattr(data, 'pnp_poses'): 18 | valid = True 19 | return valid 20 | 21 | def add(self, pred_data, gt_data): 22 | pred_data = pred_data.float() 23 | gt_data = gt_data.float() 24 | 25 | matches = one_to_one_matching(pred_data.infos, 26 | gt_data.infos, 27 | keys=('scene_id', 'view_id'), 28 | allow_pred_missing=False) 29 | 30 | pred_data = pred_data[matches.pred_id] 31 | gt_data = gt_data[matches.gt_id] 32 | 33 | if not hasattr(pred_data, 'TCO_keypoints_3d'): 34 | pnp_poses = pred_data.pnp_poses 35 | kp3d_pred = transform_pts(pnp_poses, gt_data.TCO_keypoints_3d) 36 | else: 37 | kp3d_pred = pred_data.TCO_keypoints_3d 38 | 39 | kp3d_gt = gt_data.TCO_keypoints_3d 40 | bsz = kp3d_pred.shape[0] 41 | n_kp = kp3d_gt.shape[1] 42 | assert kp3d_pred.shape == (bsz, n_kp, 3) 43 | assert kp3d_pred.shape == (bsz, n_kp, 3) 44 | 45 | add = torch.norm(kp3d_pred - kp3d_gt, dim=-1, p=2).mean(dim=-1).cpu().numpy() 46 | df = xr.Dataset(matches).rename(dict(dim_0='match_id')) 47 | df['add'] = 'match_id', add 48 | self.datas['df'].append(df) 49 | 50 | def summary(self): 51 | df = xr.concat(self.datas['df'], dim='match_id') 52 | 53 | auc_threshold = 0.1 54 | delta_threshold = 0.00001 55 | add_threshold_values = np.arange(0.0, auc_threshold, delta_threshold) 56 | counts = [] 57 | for value in add_threshold_values: 58 | under_threshold = ( 59 | (df['add'].values <= value).mean() 60 | ) 61 | counts.append(under_threshold) 62 | auc = np.trapz(counts, dx=delta_threshold) / auc_threshold 63 | 64 | summary = { 65 | 'n_objects': len(df['match_id']), 66 | 'ADD/mean': df['add'].values.mean().item(), 67 | 'ADD/AUC': auc.item(), 68 | } 69 | for th_mm in (10, 20, 40, 60): 70 | summary[f'ADD<{th_mm}mm'] = (df['add'].values <= th_mm * 1e-3).mean() * 100 71 | return summary, df 72 | -------------------------------------------------------------------------------- /robopose/third_party/craves/eval_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .heatmap_utils import heatmap_from_keypoints 3 | 4 | 5 | def get_preds(scores): 6 | ''' get predictions from score maps in torch Tensor 7 | return type: torch.LongTensor 8 | ''' 9 | assert scores.dim() == 4, 'Score maps should be 4-dim' 10 | maxval, idx = torch.max(scores.view(scores.size(0), scores.size(1), -1), 2) 11 | 12 | maxval = maxval.view(scores.size(0), scores.size(1), 1) 13 | idx = idx.view(scores.size(0), scores.size(1), 1) + 1 14 | 15 | preds = idx.repeat(1, 1, 2).float() 16 | 17 | preds[:,:,0] = (preds[:,:,0] - 1) % scores.size(3) + 1 18 | preds[:,:,1] = torch.floor((preds[:,:,1] - 1) / scores.size(3)) + 1 19 | 20 | pred_mask = maxval.gt(0).repeat(1, 1, 2).float() 21 | preds *= pred_mask 22 | 23 | return preds 24 | 25 | 26 | def calc_dists(preds, target, normalize): 27 | preds = preds.float() 28 | target = target.float() 29 | dists = torch.zeros(preds.size(1), preds.size(0)) 30 | for n in range(preds.size(0)): 31 | for c in range(preds.size(1)): 32 | if target[n,c,0] > 1 and target[n, c, 1] > 1: 33 | dists[c, n] = torch.dist(preds[n,c,:], target[n,c,:])/normalize[n] 34 | else: 35 | dists[c, n] = -1 36 | return dists 37 | 38 | 39 | def dist_acc(dists, thr=0.5): 40 | ''' Return percentage below threshold while ignoring values with a -1 ''' 41 | if dists.ne(-1).sum() > 0: 42 | return dists.le(thr).eq(dists.ne(-1)).sum().numpy() / dists.ne(-1).sum().numpy() 43 | else: 44 | return -1 45 | 46 | 47 | def accuracy(output, target, idxs, thr=0.2): 48 | ''' Calculate accuracy according to PCK, but uses ground truth heatmap rather than x,y locations 49 | First value to be returned is average accuracy across 'idxs', followed by individual accuracies 50 | ''' 51 | preds = get_preds(output) 52 | gts = get_preds(target) 53 | norm = torch.ones(preds.size(0))*output.size(3)/4.0 54 | dists = calc_dists(preds, gts, norm) 55 | 56 | acc = torch.zeros(len(idxs)+1) 57 | avg_acc = 0 58 | cnt = 0 59 | 60 | for i in range(len(idxs)): 61 | acc[i+1] = dist_acc(dists[idxs[i]-1], thr=thr) 62 | if acc[i+1] >= 0: 63 | avg_acc = avg_acc + acc[i+1] 64 | cnt += 1 65 | 66 | if cnt != 0: 67 | acc[0] = avg_acc / cnt 68 | return acc 69 | 70 | 71 | def accuracy_from_keypoints(kp_pred, kp_gt, bboxes_gt, thr=0.2): 72 | assert len(kp_pred) == len(kp_gt) == len(bboxes_gt) 73 | range_n = range(len(kp_pred)) 74 | heatmaps_pred = torch.stack([heatmap_from_keypoints(bboxes_gt[n], kp_pred[n]) for n in range_n]) 75 | heatmaps_gt = torch.stack([heatmap_from_keypoints(bboxes_gt[n], kp_gt[n]) for n in range_n]) 76 | 77 | idxs = range(0, 17) 78 | acc = accuracy(heatmaps_pred, heatmaps_gt, idxs=idxs, thr=thr) 79 | return acc 80 | -------------------------------------------------------------------------------- /robopose/training/articulated_models_cfg.py: -------------------------------------------------------------------------------- 1 | # Backbones 2 | from robopose.models.wide_resnet import WideResNet34 3 | 4 | # Pose models 5 | from robopose.models.articulated import ArticulatedObjectRefiner 6 | 7 | from robopose.utils.logging import get_logger 8 | logger = get_logger(__name__) 9 | 10 | 11 | def check_update_config(config): 12 | # Compatibility with models trained using the pre-release code 13 | config.is_old_format = False 14 | if not hasattr(config, 'urdf_ds_name'): 15 | config.urdf_ds_name = 'owi535' 16 | config.backbone_str = 'resnet34' 17 | config.init_method = 'v1' 18 | config.is_old_format = True 19 | if not hasattr(config, 'possible_roots'): 20 | config.possible_roots = 'top_5_largest' 21 | if not hasattr(config, 'coordinate_system'): 22 | config.coordinate_system = 'base_centered' 23 | if not hasattr(config, 'multiroot'): 24 | config.multiroot = False 25 | if not hasattr(config, 'center_crop_on'): 26 | config.center_crop_on = 'com' 27 | if not hasattr(config, 'predict_all_links_poses'): 28 | config.predict_all_links_poses = False 29 | 30 | # Compatib names for new models. 31 | if not hasattr(config, 'possible_anchor_links'): 32 | config.possible_anchor_links = config.possible_roots 33 | if hasattr(config, 'center_crop_on') and config.center_crop_on == 'com': 34 | config.center_crop_on = 'centroid' 35 | if hasattr(config, 'coordinate_system'): 36 | if config.coordinate_system == 'link': 37 | assert len(config.possible_roots) == 1 38 | assert not config.predict_joints 39 | config.possible_anchor_links = 'base_only' 40 | config.reference_point = f'on_link={config.possible_roots[0]}' 41 | elif config.coordinate_system == 'camera_centered': 42 | config.reference_point = 'centroid' 43 | else: 44 | pass 45 | if not hasattr(config, 'center_crop_on'): 46 | config.possible_anchor_links = config.possible_roots 47 | return config 48 | 49 | 50 | def create_model(cfg, renderer, mesh_db): 51 | n_inputs = 6 52 | backbone_str = cfg.backbone_str 53 | anchor_mask_input = cfg.predict_joints 54 | if anchor_mask_input: 55 | n_inputs += 1 56 | if 'resnet34' in backbone_str: 57 | backbone = WideResNet34(n_inputs=n_inputs) 58 | else: 59 | raise ValueError('Unknown backbone', backbone_str) 60 | 61 | logger.info(f'Backbone: {backbone_str}') 62 | backbone.n_inputs = n_inputs 63 | render_size = (240, 320) 64 | model = ArticulatedObjectRefiner( 65 | backbone=backbone, renderer=renderer, mesh_db=mesh_db, 66 | render_size=render_size, input_anchor_mask=anchor_mask_input, 67 | predict_joints=cfg.predict_joints, possible_anchors=cfg.possible_anchor_links, 68 | center_crop_on=cfg.center_crop_on, reference_point=cfg.reference_point) 69 | return model 70 | -------------------------------------------------------------------------------- /robopose/simulator/base_scene.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import xml.etree.ElementTree as ET 4 | import pkgutil 5 | import pybullet as pb 6 | from .client import BulletClient 7 | 8 | 9 | class BaseScene: 10 | _client_id = -1 11 | _client = None 12 | _connected = False 13 | _simulation_step = 1/240. 14 | 15 | def connect(self, gpu_renderer=True, gui=False): 16 | assert not self._connected, 'Already connected' 17 | if gui: 18 | self._client_id = pb.connect(pb.GUI, '--width=640 --height=480') 19 | pb.configureDebugVisualizer(pb.COV_ENABLE_GUI, 1, physicsClientId=self._client_id) 20 | pb.configureDebugVisualizer(pb.COV_ENABLE_RENDERING, 1, physicsClientId=self._client_id) 21 | pb.configureDebugVisualizer(pb.COV_ENABLE_TINY_RENDERER, 0, physicsClientId=self._client_id) 22 | else: 23 | self._client_id = pb.connect(pb.DIRECT) 24 | if self._client_id < 0: 25 | raise Exception('Cannot connect to pybullet') 26 | if gpu_renderer and not gui: 27 | os.environ['MESA_GL_VERSION_OVERRIDE'] = '3.3' 28 | os.environ['MESA_GLSL_VERSION_OVERRIDE'] = '330' 29 | # Get EGL device 30 | assert 'CUDA_VISIBLE_DEVICES' in os.environ 31 | devices = os.environ.get('CUDA_VISIBLE_DEVICES', ).split(',') 32 | assert len(devices) == 1 33 | out = subprocess.check_output(['nvidia-smi', '--id='+str(devices[0]), '-q', '--xml-format']) 34 | tree = ET.fromstring(out) 35 | gpu = tree.findall('gpu')[0] 36 | dev_id = gpu.find('minor_number').text 37 | os.environ['EGL_VISIBLE_DEVICES'] = str(dev_id) 38 | egl = pkgutil.get_loader('eglRenderer') 39 | pb.loadPlugin(egl.get_filename(), "_eglRendererPlugin", physicsClientId=self._client_id) 40 | pb.resetSimulation(physicsClientId=self._client_id) 41 | self._connected = True 42 | self._client = BulletClient(self._client_id) 43 | 44 | self.client.setPhysicsEngineParameter(numSolverIterations=50) 45 | self.client.setPhysicsEngineParameter(fixedTimeStep=self._simulation_step) 46 | self.client.setGravity(0, 0, -9.8) 47 | 48 | def run_simulation(self, time): 49 | n_steps = float(time) / self._simulation_step 50 | for _ in range(int(n_steps)): 51 | self.client.stepSimulation() 52 | 53 | def disconnect(self): 54 | try: 55 | pb.resetSimulation(physicsClientId=self._client_id) 56 | pb.disconnect(physicsClientId=self._client_id) 57 | except pb.error: 58 | pass 59 | self._connected = False 60 | self._client_id = -1 61 | 62 | @property 63 | def client(self): 64 | assert self._connected 65 | return self._client 66 | 67 | @property 68 | def client_id(self): 69 | assert self._connected 70 | return self._client_id 71 | 72 | def __del__(self): 73 | self.disconnect() 74 | -------------------------------------------------------------------------------- /robopose/lib3d/transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pinocchio as pin 3 | import eigenpy 4 | eigenpy.switchToNumpyArray() 5 | 6 | 7 | def parse_pose_args(pose_args): 8 | if len(pose_args) == 2: 9 | pos, orn = pose_args 10 | pose = Transform(orn, pos) 11 | elif isinstance(pose_args, Transform): 12 | pose = pose_args 13 | else: 14 | raise ValueError 15 | return pose 16 | 17 | 18 | class Transform: 19 | def __init__(self, *args): 20 | if len(args) == 0: 21 | raise NotImplementedError 22 | elif len(args) == 7: 23 | raise NotImplementedError 24 | elif len(args) == 1: 25 | arg = args[0] 26 | if isinstance(arg, Transform): 27 | self.T = arg.T 28 | elif isinstance(arg, pin.SE3): 29 | self.T = arg 30 | else: 31 | arg_array = np.asarray(arg) 32 | if arg_array.shape == (4, 4): 33 | R = arg_array[:3, :3] 34 | t = arg_array[:3, -1] 35 | self.T = pin.SE3(R, t.reshape(3, 1)) 36 | else: 37 | raise NotImplementedError 38 | elif len(args) == 2: 39 | args_0_array = np.asarray(args[0]) 40 | n_elem_rot = len(args_0_array.flatten()) 41 | if n_elem_rot == 4: 42 | xyzw = np.asarray(args[0]).flatten().tolist() 43 | wxyz = [xyzw[-1], *xyzw[:-1]] 44 | assert len(wxyz) == 4 45 | q = pin.Quaternion(*wxyz) 46 | q.normalize() 47 | R = q.matrix() 48 | elif n_elem_rot == 9: 49 | assert args_0_array.shape == (3, 3) 50 | R = args_0_array 51 | t = np.asarray(args[1]) 52 | assert len(t) == 3 53 | self.T = pin.SE3(R, t.reshape(3, 1)) 54 | elif len(args) == 1: 55 | if isinstance(args[0], Transform): 56 | raise NotImplementedError 57 | elif isinstance(args[0], list): 58 | array = np.array(args[0]) 59 | assert array.shape == (4, 4) 60 | self.T = pin.SE3(array) 61 | elif isinstance(args[0], np.ndarray): 62 | assert args[0].shape == (4, 4) 63 | self.T = pin.SE3(args[0]) 64 | 65 | def __mul__(self, other): 66 | T = self.T * other.T 67 | return Transform(T) 68 | 69 | def inverse(self): 70 | return Transform(self.T.inverse()) 71 | 72 | def __str__(self): 73 | return str(self.T) 74 | 75 | def __getitem__(self, i): 76 | raise NotImplementedError 77 | 78 | def __len__(self): 79 | return 7 80 | 81 | def toHomogeneousMatrix(self): 82 | return self.T.homogeneous 83 | 84 | @property 85 | def translation(self): 86 | return self.T.translation.reshape(3) 87 | 88 | @property 89 | def quaternion(self): 90 | return pin.Quaternion(self.T.rotation) 91 | -------------------------------------------------------------------------------- /robopose/rendering/bullet_scene_renderer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pybullet as pb 3 | 4 | from robopose.datasets.datasets_cfg import make_urdf_dataset 5 | from robopose.lib3d import Transform 6 | 7 | from robopose.simulator.base_scene import BaseScene 8 | from robopose.simulator.caching import BodyCache 9 | from robopose.simulator.camera import Camera 10 | 11 | 12 | class BulletSceneRenderer(BaseScene): 13 | def __init__(self, 14 | urdf_ds='ycbv', 15 | preload_cache=False, 16 | background_color=(0, 0, 0), 17 | gpu_renderer=True, 18 | gui=False): 19 | 20 | self.urdf_ds = make_urdf_dataset(urdf_ds) 21 | self.connect(gpu_renderer=gpu_renderer, gui=gui) 22 | self.body_cache = BodyCache(self.urdf_ds, self.client_id) 23 | if preload_cache: 24 | self.body_cache.get_bodies_by_ids(np.arange(len(self.urdf_ds))) 25 | self.background_color = background_color 26 | 27 | def setup_scene(self, obj_infos): 28 | labels = [obj['name'] for obj in obj_infos] 29 | bodies = self.body_cache.get_bodies_by_labels(labels) 30 | for (obj_info, body) in zip(obj_infos, bodies): 31 | TWO = Transform(obj_info['TWO']) 32 | body.pose = TWO 33 | q = obj_info.get('joints', None) 34 | if q is not None: 35 | body.q = q 36 | color = obj_info.get('color', None) 37 | if color is not None: 38 | pb.changeVisualShape(body.body_id, -1, physicsClientId=0, rgbaColor=color) 39 | return bodies 40 | 41 | def render_images(self, cam_infos, render_depth=False, render_mask=True): 42 | cam_obs = [] 43 | for cam_info in cam_infos: 44 | K = cam_info['K'] 45 | TWC = Transform(cam_info['TWC']) 46 | resolution = cam_info['resolution'] 47 | cam = Camera(resolution=resolution, client_id=self.client_id) 48 | cam.set_intrinsic_K(K) 49 | cam.set_extrinsic_T(TWC) 50 | cam_obs_ = cam.get_state() 51 | if self.background_color is not None: 52 | im = cam_obs_['rgb'] 53 | mask = cam_obs_['mask'] 54 | im[np.logical_or(mask < 0, mask == 255)] = self.background_color 55 | if render_depth: 56 | depth = cam_obs_['depth'] 57 | near, far = cam_obs_['near'], cam_obs_['far'] 58 | z_n = 2 * depth - 1 59 | z_e = 2 * near * far / (far + near - z_n * (far - near)) 60 | z_e[np.logical_or(mask < 0, mask == 255)] = 0. 61 | cam_obs_['depth'] = z_e 62 | cam_obs.append(cam_obs_) 63 | return cam_obs 64 | 65 | def render_scene(self, obj_infos, cam_infos, render_depth=False, render_mask=True): 66 | self.setup_scene(obj_infos) 67 | # NOTE: Mask is always rendered, flag is not used. 68 | cam_obs = self.render_images(cam_infos, render_depth=render_depth, render_mask=render_mask) 69 | return cam_obs 70 | -------------------------------------------------------------------------------- /robopose/visualization/bokeh_utils.py: -------------------------------------------------------------------------------- 1 | import bokeh 2 | from bokeh.plotting import figure as bokeh_figure 3 | from bokeh.models import ColumnDataSource 4 | from bokeh.models.widgets import DataTable, TableColumn 5 | from bokeh.models.widgets import NumberFormatter 6 | import bokeh.io 7 | import numpy as np 8 | from PIL import Image 9 | from pathlib import Path 10 | from bokeh.io.export import get_screenshot_as_png 11 | 12 | def save_image_figure(f, jpg_path): 13 | f.toolbar.logo = None 14 | f.toolbar_location = None 15 | f.title = None 16 | f.sizing_mode = 'fixed' 17 | im = get_screenshot_as_png(f) 18 | w, h = im.size 19 | im = im.convert('RGB').crop((1, 1, w, h)).resize((w, h)) 20 | im.save(jpg_path) 21 | return im 22 | 23 | def to_rgba(im): 24 | im = Image.fromarray(im) 25 | im = np.asarray(im.convert('RGBA')) 26 | im = np.flipud(im) 27 | return im 28 | 29 | 30 | def plot_image(im, axes=False, tools='', im_size=None, figure=None): 31 | if np.asarray(im).ndim == 2: 32 | gray = True 33 | else: 34 | im = to_rgba(im) 35 | gray = False 36 | 37 | if im_size is None: 38 | h, w = im.shape[:2] 39 | else: 40 | h, w = im_size 41 | source = bokeh.models.sources.ColumnDataSource(dict(rgba=[im])) 42 | f = image_figure('rgba', source, im_size=(h, w), axes=axes, tools=tools, gray=gray, figure=figure) 43 | return f, source 44 | 45 | 46 | def make_image_figure(im_size=(240, 320), axes=False): 47 | w, h = max(im_size), min(im_size) 48 | f = bokeh_figure(x_range=(0, w-1), y_range=(0, h-1), 49 | plot_width=w, plot_height=h, title='', 50 | tooltips=[("x", "$x"), ("y", "$y"), ("value", "@image")]) 51 | # f.x_range.range_padding = f.y_range.range_padding = 0 52 | f.toolbar.logo = None 53 | f.toolbar_location = None 54 | f.axis.visible = False 55 | f.grid.visible = False 56 | f.min_border = 0 57 | f.outline_line_width = 0 58 | f.outline_line_color = None 59 | f.background_fill_color = None 60 | f.border_fill_color = None 61 | return f 62 | 63 | 64 | def image_figure(key, source, im_size=(240, 320), axes=False, tools='', 65 | gray=False, figure=None): 66 | h, w = im_size 67 | if figure is None: 68 | # f = bokeh_figure(x_range=(0, w-1), y_range=(0, h-1), 69 | # plot_width=w, plot_height=h, tools=tools, 70 | # tooltips=[("x", "$x"), ("y", "$y"), ("value", "@image")]) 71 | f = make_image_figure(im_size=im_size, axes=axes) 72 | else: 73 | f = figure 74 | 75 | if gray: 76 | f.image(key, x=0, y=0, dw=w, dh=h, source=source) 77 | else: 78 | f.image_rgba(key, x=0, y=0, dw=w, dh=h, source=source) 79 | return f 80 | 81 | 82 | def convert_df(df): 83 | columns = [] 84 | for column in df.columns: 85 | if df.dtypes[column].kind == 'f': 86 | formatter = NumberFormatter(format='0.000') 87 | else: 88 | formatter = None 89 | table_col = TableColumn(field=column, title=column, formatter=formatter) 90 | columns.append(table_col) 91 | data_table = DataTable(columns=columns, source=ColumnDataSource(df), height=200) 92 | return data_table 93 | -------------------------------------------------------------------------------- /robopose/paper_models_cfg.py: -------------------------------------------------------------------------------- 1 | PANDA_MODELS = dict( 2 | gt_joints='dream-panda-gt_joints--495831', 3 | predict_joints='dream-panda-predict_joints--173472', 4 | ) 5 | 6 | KUKA_MODELS = dict( 7 | gt_joints='dream-kuka-gt_joints--192228', 8 | predict_joints='dream-kuka-predict_joints--990681', 9 | ) 10 | 11 | BAXTER_MODELS = dict( 12 | gt_joints='dream-baxter-gt_joints--510055', 13 | predict_joints='dream-baxter-predict_joints--519984', 14 | ) 15 | 16 | OWI_MODELS = dict( 17 | predict_joints='craves-owi535-predict_joints--295440', 18 | ) 19 | 20 | PANDA_ABLATION_REFERENCE_POINT_MODELS = dict( 21 | link0='dream-panda-gt_joints-reference_point=link0--864695', 22 | link1='dream-panda-gt_joints-reference_point=link1--499756', 23 | link2='dream-panda-gt_joints-reference_point=link2--905185', 24 | link4='dream-panda-gt_joints-reference_point=link4--913645', 25 | link5='dream-panda-gt_joints-reference_point=link5--669469', 26 | link9='dream-panda-gt_joints-reference_point=hand--588677', 27 | ) 28 | 29 | PANDA_ABLATION_ANCHOR_MODELS = dict( 30 | link0='dream-panda-predict_joints-anchor=link0--90648', 31 | link1='dream-panda-predict_joints-anchor=link1--375503', 32 | link2='dream-panda-predict_joints-anchor=link2--463951', 33 | link4='dream-panda-predict_joints-anchor=link4--388856', 34 | link5='dream-panda-predict_joints-anchor=link5--249745', 35 | link9='dream-panda-predict_joints-anchor=link9--106543', 36 | random_all='dream-panda-predict_joints-anchor=random_all--116995', 37 | random_top3='dream-panda-predict_joints-anchor=random_top_3_largest--65378', 38 | random_top5=PANDA_MODELS['predict_joints'], 39 | ) 40 | 41 | PANDA_ABLATION_ITERATION_MODELS = { 42 | 'n_train_iter=1': 'dream-panda-predict_joints-n_train_iter=1--752826', 43 | 'n_train_iter=2': 'dream-panda-predict_joints-n_train_iter=2--949003', 44 | 'n_train_iter=5': 'dream-panda-predict_joints-n_train_iter=5--315150', 45 | } 46 | 47 | 48 | RESULT_ID = 1804 49 | 50 | DREAM_PAPER_RESULT_IDS = [ 51 | f'dream-{robot}-dream-all-models--{RESULT_ID}' for robot in ('panda', 'kuka', 'baxter') 52 | ] 53 | 54 | DREAM_KNOWN_ANGLES_RESULT_IDS = [ 55 | f'dream-{robot}-knownq--{RESULT_ID}' for robot in ('panda', 'kuka', 'baxter') 56 | ] 57 | 58 | DREAM_UNKNOWN_ANGLES_RESULT_IDS = [ 59 | f'dream-{robot}-unknownq--{RESULT_ID}' for robot in ('panda', 'kuka', 'baxter') 60 | ] 61 | 62 | PANDA_KNOWN_ANGLES_ITERATIVE_RESULT_IDS = [ 63 | f'dream-panda-orb-knownq--{RESULT_ID}', 64 | f'dream-panda-orb-knownq-online--{RESULT_ID}' 65 | ] 66 | 67 | CRAVES_LAB_RESULT_IDS = [ 68 | f'craves-lab-unknownq--{RESULT_ID}' 69 | ] 70 | 71 | CRAVES_YOUTUBE_RESULT_IDS = [ 72 | f'craves-youtube-unknownq-focal={focal}--{RESULT_ID}' for focal in (500, 750, 1000, 1250, 1500, 1750, 2000) 73 | ] 74 | 75 | PANDA_KNOWN_ANGLES_ABLATION_RESULT_IDS = [ 76 | f'dream-panda-orb-knownq-link{link_id}--{RESULT_ID}' for link_id in (0, 1, 2, 4, 5, 9) 77 | ] 78 | 79 | PANDA_UNKNOWN_ANGLES_ABLATION_RESULT_IDS = [ 80 | f'dream-panda-orb-unknownq-{anchor}--{RESULT_ID}' 81 | for anchor in ('link5', 'link2', 'link1', 'link0', 'link4', 'link9', 'random_all', 'random_top5', 'random_top3') 82 | ] 83 | 84 | PANDA_ITERATIONS_ABLATION_RESULT_IDS = [ 85 | f'dream-panda-orb-train_K={train_K}--{RESULT_ID}' 86 | for train_K in (1, 2, 3, 5) 87 | ] 88 | -------------------------------------------------------------------------------- /robopose/lib3d/cropping.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | 4 | from .camera_geometry import project_points, boxes_from_uv, project_points_robust 5 | 6 | 7 | def deepim_boxes(rend_center_uv, obs_boxes, rend_boxes, lamb=1.4, im_size=(240, 320), clamp=False): 8 | """ 9 | gt_boxes: N x 4 10 | crop_boxes: N x 4 11 | """ 12 | lobs, robs, uobs, dobs = obs_boxes[:, [0, 2, 1, 3]].t() 13 | lrend, rrend, urend, drend = rend_boxes[:, [0, 2, 1, 3]].t() 14 | xc = rend_center_uv[..., 0, 0] 15 | yc = rend_center_uv[..., 0, 1] 16 | lobs, robs = lobs.unsqueeze(-1), robs.unsqueeze(-1) 17 | uobs, dobs = uobs.unsqueeze(-1), dobs.unsqueeze(-1) 18 | lrend, rrend = lrend.unsqueeze(-1), rrend.unsqueeze(-1) 19 | urend, drend = urend.unsqueeze(-1), drend.unsqueeze(-1) 20 | 21 | xc, yc = xc.unsqueeze(-1), yc.unsqueeze(-1) 22 | w = max(im_size) 23 | h = min(im_size) 24 | r = w / h 25 | 26 | xdists = torch.cat( 27 | ((lobs - xc).abs(), (lrend - xc).abs(), 28 | (robs - xc).abs(), (rrend - xc).abs()), 29 | dim=1) 30 | ydists = torch.cat( 31 | ((uobs - yc).abs(), (urend - yc).abs(), 32 | (dobs - yc).abs(), (drend - yc).abs()), 33 | dim=1) 34 | xdist = xdists.max(dim=1)[0] 35 | ydist = ydists.max(dim=1)[0] 36 | width = torch.max(xdist, ydist * r) * 2 * lamb 37 | height = torch.max(xdist / r, ydist) * 2 * lamb 38 | 39 | xc, yc = xc.squeeze(-1), yc.squeeze(-1) 40 | x1, y1, x2, y2 = xc - width/2, yc - height / 2, xc + width / 2, yc + height / 2 41 | boxes = torch.cat( 42 | (x1.unsqueeze(1), y1.unsqueeze(1), x2.unsqueeze(1), y2.unsqueeze(1)), dim=1) 43 | assert not clamp 44 | if clamp: 45 | boxes[:, [0, 2]] = torch.clamp(boxes[:, [0, 2]], 0, w - 1) 46 | boxes[:, [1, 3]] = torch.clamp(boxes[:, [1, 3]], 0, h - 1) 47 | return boxes 48 | 49 | 50 | def deepim_crops(images, obs_boxes, K, TCO_pred, O_vertices, output_size=None, lamb=1.4): 51 | batch_size, _, h, w = images.shape 52 | device = images.device 53 | if output_size is None: 54 | output_size = (h, w) 55 | uv = project_points(O_vertices, K, TCO_pred) 56 | rend_boxes = boxes_from_uv(uv) 57 | rend_center_uv = project_points(torch.zeros(batch_size, 1, 3).to(device), K, TCO_pred) 58 | boxes = deepim_boxes(rend_center_uv, obs_boxes, rend_boxes, im_size=(h, w), lamb=lamb) 59 | boxes = torch.cat((torch.arange(batch_size).unsqueeze(1).to(device).float(), boxes), dim=1) 60 | crops = torchvision.ops.roi_align(images, boxes, output_size=output_size, sampling_ratio=4) 61 | return boxes[:, 1:], crops 62 | 63 | 64 | def deepim_crops_robust(images, obs_boxes, K, TCO_pred, tCR_in, O_vertices, output_size=None, lamb=1.4): 65 | batch_size, _, h, w = images.shape 66 | device = images.device 67 | if output_size is None: 68 | output_size = (h, w) 69 | uv = project_points_robust(O_vertices, K, TCO_pred) 70 | rend_boxes = boxes_from_uv(uv) 71 | TCR = TCO_pred.clone() 72 | TCR[:, :3, -1] = tCR_in 73 | rend_center_uv = project_points_robust(torch.zeros(batch_size, 1, 3).to(device), K, TCR) 74 | boxes = deepim_boxes(rend_center_uv, obs_boxes, rend_boxes, im_size=(h, w), lamb=lamb) 75 | boxes = torch.cat((torch.arange(batch_size).unsqueeze(1).to(device).float(), boxes), dim=1) 76 | crops = torchvision.ops.roi_align(images, boxes, output_size=output_size, sampling_ratio=4) 77 | return boxes[:, 1:], crops 78 | -------------------------------------------------------------------------------- /robopose/lib3d/articulated_mesh_database.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import torch 4 | from torch import nn 5 | 6 | from .mesh_ops import get_meshes_bounding_boxes, sample_points 7 | from .urdf_layer import UrdfLayer 8 | from .transform_ops import invert_T, transform_pts 9 | from .mesh_ops import get_meshes_center 10 | 11 | class MeshDataBase(nn.Module): 12 | def __init__(self, labels, urdf_layers): 13 | super().__init__() 14 | self.labels = labels 15 | self.label_to_id = {l: n for n, l in enumerate(labels)} 16 | self.urdf_layers = nn.ModuleList(urdf_layers) 17 | self.label_to_urdf_layer = {l: self.urdf_layers[n] for l, n in self.label_to_id.items()} 18 | 19 | @staticmethod 20 | def from_urdf_ds(urdf_ds, n_points=20000): 21 | labels = [] 22 | urdf_layers = [] 23 | for _, object_info in urdf_ds.index.iterrows(): 24 | layer = UrdfLayer(object_info.urdf_path, 25 | n_points=n_points, 26 | global_scale=object_info.scale, 27 | sampler='per_link') 28 | labels.append(object_info.to_dict()['label']) 29 | urdf_layers.append(layer) 30 | labels = np.array(labels) 31 | return MeshDataBase(labels, urdf_layers).float() 32 | 33 | def select(self, obj_infos, link_names=None, apply_fk=True): 34 | labels = np.array([obj['name'] for obj in obj_infos]) 35 | groups = pd.DataFrame(dict(labels=labels)).groupby('labels').groups 36 | bsz = len(obj_infos) 37 | device = self.urdf_layers[0].pts_TLV.device 38 | dtype = self.urdf_layers[0].pts_TLV.dtype 39 | 40 | if link_names is None: 41 | n_pts = self.urdf_layers[0].pts_TLV.shape[0] 42 | else: 43 | n_pts = self.urdf_layers[0].link_pts.shape[1] 44 | 45 | points = torch.zeros(bsz, n_pts, 3, device=device, dtype=dtype) 46 | 47 | for label, ids in groups.items(): 48 | label_id = self.label_to_id[label] 49 | layer = self.urdf_layers[label_id] 50 | joints = dict() 51 | for k in layer.joint_names: 52 | joints[k] = torch.cat([obj_infos[i]['joints'][k] for i in ids], dim=0).to(device).to(dtype) 53 | if link_names is None: 54 | if apply_fk: 55 | points[ids] = layer(joints) 56 | else: 57 | raise NotImplementedError 58 | else: 59 | if apply_fk: 60 | points[ids] = layer.compute_per_link_fk(link_names=link_names, q=joints) 61 | else: 62 | points[ids] = layer.get_link_pts(link_names) 63 | return Meshes(labels=labels, points=points) 64 | 65 | 66 | class Meshes(nn.Module): 67 | def __init__(self, labels, points): 68 | super().__init__() 69 | self.labels = labels 70 | self.register_buffer('points', points) 71 | 72 | def center_meshes(self): 73 | T_offset = get_meshes_center(self.points) 74 | points = transform_pts(invert_T(T_offset), self.points) 75 | return Meshes( 76 | labels=self.labels, 77 | points=points, 78 | ), T_offset 79 | 80 | def __len__(self): 81 | return len(self.labels) 82 | 83 | def sample_points(self, n_points, deterministic=False): 84 | return sample_points(self.points, n_points, deterministic=deterministic) 85 | -------------------------------------------------------------------------------- /robopose/utils/distributed.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import torch.distributed as dist 4 | import torch 5 | from pathlib import Path 6 | 7 | from robopose.utils.logging import get_logger 8 | logger = get_logger(__name__) 9 | 10 | 11 | def get_tmp_dir(): 12 | if 'JOB_DIR' in os.environ: 13 | tmp_dir = Path(os.environ['JOB_DIR']) / 'tmp' 14 | else: 15 | tmp_dir = Path('/tmp/robopose_job') 16 | tmp_dir.mkdir(exist_ok=True) 17 | return tmp_dir 18 | 19 | 20 | def sync_model(model): 21 | sync_dir = get_tmp_dir() / 'models' 22 | sync_dir.mkdir(exist_ok=True) 23 | sync_ckpt = sync_dir / 'sync.checkpoint' 24 | if get_rank() == 0 and get_world_size() > 1: 25 | logger.info("save") 26 | torch.save(model.state_dict(), sync_ckpt) 27 | dist.barrier() 28 | if get_rank() > 0: 29 | logger.info("load") 30 | model.load_state_dict(torch.load(sync_ckpt)) 31 | dist.barrier() 32 | return model 33 | 34 | 35 | def redirect_output(): 36 | if 'JOB_DIR' in os.environ: 37 | rank = get_rank() 38 | output_file = Path(os.environ['JOB_DIR']) / f'stdout{rank}.out' 39 | sys.stdout = open(output_file, 'w') 40 | sys.stderr = open(output_file, 'w') 41 | return 42 | 43 | 44 | def get_rank(): 45 | if not torch.distributed.is_initialized(): 46 | rank = 0 47 | else: 48 | rank = torch.distributed.get_rank() 49 | return rank 50 | 51 | 52 | def get_world_size(): 53 | if not torch.distributed.is_initialized(): 54 | world_size = 1 55 | else: 56 | world_size = torch.distributed.get_world_size() 57 | return world_size 58 | 59 | 60 | def init_distributed_mode(initfile=None): 61 | assert torch.cuda.device_count() == 1 62 | rank = int(os.environ.get('SLURM_PROCID', 0)) 63 | world_size = int(os.environ.get('SLURM_NTASKS', 1)) 64 | if initfile is None: 65 | initfile = get_tmp_dir() / 'initfile' 66 | if initfile.exists() and world_size == 1: 67 | initfile.unlink() 68 | initfile = Path(initfile) 69 | assert initfile.parent.exists() 70 | torch.distributed.init_process_group( 71 | backend='nccl', rank=rank, world_size=world_size, 72 | init_method=f'file://{initfile.as_posix()}' 73 | ) 74 | torch.distributed.barrier() 75 | 76 | 77 | def reduce_dict(input_dict, average=True): 78 | """ 79 | https://github.com/pytorch/vision/blob/master/references/detection/utils.py 80 | Args: 81 | input_dict (dict): all the values will be reduced 82 | average (bool): whether to do average or sum 83 | Reduce the values in the dictionary from all processes so that all processes 84 | have the averaged results. Returns a dict with the same fields as 85 | input_dict, after reduction. 86 | """ 87 | world_size = dist.get_world_size() 88 | if world_size < 2: 89 | return input_dict 90 | with torch.no_grad(): 91 | names = [] 92 | values = [] 93 | reduced_dict = [] 94 | # sort the keys so that they are consistent across processes 95 | for k in sorted(input_dict.keys()): 96 | names.append(k) 97 | values.append(input_dict[k]) 98 | values = torch.tensor(values).float().cuda() 99 | dist.all_reduce(values) 100 | if average: 101 | values /= world_size 102 | reduced_dict = {k: v.item() for k, v in zip(names, values)} 103 | return reduced_dict 104 | -------------------------------------------------------------------------------- /robopose/libmesh/meshlab_converter.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import shutil 3 | from PIL import Image 4 | import PIL 5 | import os 6 | import numpy as np 7 | from plyfile import PlyData 8 | 9 | 10 | def _get_template(template_name): 11 | template_path = Path(__file__).resolve().parent / 'meshlab_templates' / template_name 12 | return template_path.read_text() 13 | 14 | 15 | def run_meshlab_script(in_path, out_path, script, cd_dir=None, has_textures=True): 16 | in_path = Path(in_path) 17 | out_path = Path(out_path) 18 | n = np.random.randint(1e6) 19 | script_path = Path(f'/dev/shm/{n}.mlx') 20 | script_path.write_text(script) 21 | 22 | if cd_dir is None: 23 | cd_dir = '.' 24 | command = [f'cd {cd_dir} &&', 'LC_ALL=C', 25 | 'meshlabserver', '-i', in_path.as_posix(), '-o', out_path.as_posix(), 26 | '-s', script_path.as_posix(), '-om', 'vn'] 27 | if has_textures: 28 | command += ['wt', 'vt'] 29 | print(command) 30 | os.system(' '.join(command)) 31 | script_path.unlink() 32 | return 33 | 34 | 35 | def add_texture_to_mtl(obj_path): 36 | # Sometimes meshlab forgets to put the texture in the output mtl. 37 | obj_path = Path(obj_path) 38 | texture_name = obj_path.with_suffix('').name + '_texture.png' 39 | mtl_path = obj_path.with_suffix('.obj.mtl') 40 | mtl = mtl_path.read_text() 41 | mtl += f'\nmap_Kd {texture_name}' 42 | mtl_path.write_text(mtl) 43 | return 44 | 45 | 46 | def ply_to_obj(ply_path, obj_path, texture_size=(1024, 1024)): 47 | ply_path = Path(ply_path) 48 | obj_path = Path(obj_path) 49 | ply_copied_path = obj_path.parent / ply_path.name 50 | is_same = ply_copied_path == ply_path 51 | if not is_same: 52 | shutil.copy(ply_path, ply_copied_path) 53 | 54 | ply = PlyData.read(ply_path) 55 | ply_texture = None 56 | for c in ply.comments: 57 | if 'TextureFile' in c: 58 | ply_texture = c.split(' ')[-1] 59 | 60 | if ply_texture is None: 61 | template = _get_template('template_vertexcolor_to_texture.mlx') 62 | out_texture_path = obj_path.with_suffix('').name + '_texture.png' 63 | script = template.format(out_texture_path=out_texture_path) 64 | run_meshlab_script(ply_copied_path, obj_path, script, cd_dir=obj_path.parent) 65 | else: 66 | template = _get_template('template_ply_texture_to_obj.mlx') 67 | script = template 68 | ply_texture_name = ply_texture.split('.')[0] 69 | out_texture_path = obj_path.parent / (ply_texture_name+'_texture.png') 70 | shutil.copy(ply_path.parent / ply_texture, out_texture_path) 71 | Image.open(out_texture_path).resize(texture_size, resample=PIL.Image.BILINEAR).save(out_texture_path) 72 | run_meshlab_script(ply_path, obj_path, template) 73 | add_texture_to_mtl(obj_path) 74 | if not is_same: 75 | ply_copied_path.unlink() 76 | return 77 | 78 | 79 | def downsample_obj(in_path, out_path, n_faces=1000): 80 | # Remesh and downsample 81 | template = _get_template('template_downsample_textures.mlx') 82 | script = template.format(n_faces=n_faces) 83 | run_meshlab_script(in_path, out_path, script, has_textures=True, cd_dir=in_path.parent) 84 | 85 | 86 | def sample_points(in_path, out_path, n_points=2000): 87 | # Remesh and downsample 88 | template_path = Path(__file__).resolve().parent / 'template_sample_points.mlx' 89 | template = template_path.read_text() 90 | script = template.format(n_points=n_points) 91 | run_meshlab_script(in_path, out_path, script) 92 | return 93 | -------------------------------------------------------------------------------- /robopose/lib3d/camera_geometry.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def project_points(points_3d, K, TCO): 5 | assert K.shape[-2:] == (3, 3) 6 | assert TCO.shape[-2:] == (4, 4) 7 | batch_size = points_3d.shape[0] 8 | n_points = points_3d.shape[1] 9 | device = points_3d.device 10 | if points_3d.shape[-1] == 3: 11 | points_3d = torch.cat((points_3d, torch.ones(batch_size, n_points, 1).to(device)), dim=-1) 12 | P = K @ TCO[:, :3] 13 | suv = (P.unsqueeze(1) @ points_3d.unsqueeze(-1)).squeeze(-1) 14 | suv = suv / suv[..., [-1]] 15 | return suv[..., :2] 16 | 17 | 18 | def project_points_robust(points_3d, K, TCO, z_min=0.1): 19 | assert K.shape[-2:] == (3, 3) 20 | assert TCO.shape[-2:] == (4, 4) 21 | batch_size = points_3d.shape[0] 22 | n_points = points_3d.shape[1] 23 | device = points_3d.device 24 | if points_3d.shape[-1] == 3: 25 | points_3d = torch.cat((points_3d, torch.ones(batch_size, n_points, 1).to(device)), dim=-1) 26 | P = K @ TCO[:, :3] 27 | suv = (P.unsqueeze(1) @ points_3d.unsqueeze(-1)).squeeze(-1) 28 | z = suv[..., -1] 29 | suv[..., -1] = torch.max(torch.ones_like(z) * z_min, z) 30 | suv = suv / suv[..., [-1]] 31 | return suv[..., :2] 32 | 33 | 34 | def boxes_from_uv(uv): 35 | assert uv.shape[-1] == 2 36 | x1 = uv[..., [0]].min(dim=1)[0] 37 | y1 = uv[..., [1]].min(dim=1)[0] 38 | 39 | x2 = uv[..., [0]].max(dim=1)[0] 40 | y2 = uv[..., [1]].max(dim=1)[0] 41 | 42 | return torch.cat((x1, y1, x2, y2), dim=1) 43 | 44 | 45 | def get_K_crop_resize(K, boxes, orig_size, crop_resize): 46 | """ 47 | Adapted from https://github.com/BerkeleyAutomation/perception/blob/master/perception/camera_intrinsics.py 48 | Skew is not handled ! 49 | """ 50 | assert K.shape[1:] == (3, 3) 51 | assert boxes.shape[1:] == (4, ) 52 | K = K.float() 53 | boxes = boxes.float() 54 | new_K = K.clone() 55 | 56 | orig_size = torch.tensor(orig_size, dtype=torch.float) 57 | crop_resize = torch.tensor(crop_resize, dtype=torch.float) 58 | 59 | final_width, final_height = max(crop_resize), min(crop_resize) 60 | crop_width = boxes[:, 2] - boxes[:, 0] 61 | crop_height = boxes[:, 3] - boxes[:, 1] 62 | crop_cj = (boxes[:, 0] + boxes[:, 2]) / 2 63 | crop_ci = (boxes[:, 1] + boxes[:, 3]) / 2 64 | 65 | # Crop 66 | cx = K[:, 0, 2] + (crop_width - 1) / 2 - crop_cj 67 | cy = K[:, 1, 2] + (crop_height - 1) / 2 - crop_ci 68 | 69 | # # Resize (upsample) 70 | center_x = (crop_width - 1) / 2 71 | center_y = (crop_height - 1) / 2 72 | orig_cx_diff = cx - center_x 73 | orig_cy_diff = cy - center_y 74 | scale_x = final_width / crop_width 75 | scale_y = final_height / crop_height 76 | scaled_center_x = (final_width - 1) / 2 77 | scaled_center_y = (final_height - 1) / 2 78 | fx = scale_x * K[:, 0, 0] 79 | fy = scale_y * K[:, 1, 1] 80 | cx = scaled_center_x + scale_x * orig_cx_diff 81 | cy = scaled_center_y + scale_y * orig_cy_diff 82 | 83 | new_K[:, 0, 0] = fx 84 | new_K[:, 1, 1] = fy 85 | new_K[:, 0, 2] = cx 86 | new_K[:, 1, 2] = cy 87 | return new_K 88 | 89 | 90 | def cropresize_backtransform_points2d(input_wh, boxes_2d_crop, 91 | output_wh, points_2d_in_output): 92 | bsz = input_wh.shape[0] 93 | assert output_wh.shape == (bsz, 2) 94 | assert input_wh.shape == (bsz, 2) 95 | assert points_2d_in_output.dim() == 3 96 | 97 | points_2d_normalized = points_2d_in_output / output_wh.unsqueeze(1) 98 | points_2d = boxes_2d_crop[:, [0, 1]].unsqueeze(1) + points_2d_normalized * input_wh.unsqueeze(1) 99 | return points_2d 100 | -------------------------------------------------------------------------------- /robopose/simulator/body.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from collections import OrderedDict 3 | 4 | import numpy as np 5 | import pybullet as pb 6 | from robopose.lib3d import Transform, parse_pose_args 7 | 8 | from .client import BulletClient 9 | 10 | 11 | class Body: 12 | def __init__(self, body_id, scale=1.0, client_id=0): 13 | self._body_id = body_id 14 | self._client = BulletClient(client_id) 15 | self._scale = scale 16 | 17 | @property 18 | def name(self): 19 | info = self._client.getBodyInfo(self._body_id) 20 | return info[-1].decode('utf8') 21 | 22 | @property 23 | def pose(self): 24 | return self.pose 25 | 26 | @pose.getter 27 | def pose(self): 28 | pos, orn = self._client.getBasePositionAndOrientation(self._body_id) 29 | return Transform(orn, pos).toHomogeneousMatrix() 30 | 31 | @pose.setter 32 | def pose(self, pose_args): 33 | pose = parse_pose_args(pose_args) 34 | pos, orn = pose.translation, pose.quaternion.coeffs() 35 | self._client.resetBasePositionAndOrientation(self._body_id, pos, orn) 36 | 37 | @property 38 | def num_joints(self): 39 | return self._client.getNumJoints(self._body_id) 40 | 41 | @property 42 | def joint_infos(self): 43 | return (self._client.getJointInfo(self._body_id, n) for n in range(self.num_joints)) 44 | 45 | @property 46 | def joint_states(self): 47 | return (self._client.getJointState(self._body_id, n) for n in range(self.num_joints)) 48 | 49 | def link_states(self): 50 | return (self._client.getLinkState(self._body_id, n) for n in range(self.num_joints)) 51 | 52 | @property 53 | def q_limits(self): 54 | q_limits = OrderedDict([(info[1].decode('utf8'), (info[8], info[9])) for info in self.joint_infos]) 55 | return q_limits 56 | 57 | @property 58 | def joint_names(self): 59 | joint_names = [ 60 | info[1].decode('utf8') for info in self.joint_infos 61 | ] 62 | return joint_names 63 | 64 | @property 65 | def joint_name_to_id(self): 66 | return {name: n for n, name in enumerate(self.joint_names)} 67 | 68 | @property 69 | def q(self): 70 | values = np.array([state[0] for state in self.joint_states]) 71 | q = OrderedDict({name: value for name, value in zip(self.joint_names, values)}) 72 | return q 73 | 74 | @q.setter 75 | def q(self, joints): 76 | joint_name_to_id = self.joint_name_to_id 77 | for joint_name, joint_value in joints.items(): 78 | joint_id = joint_name_to_id.get(joint_name, None) 79 | if joint_id is not None: 80 | self._client.resetJointState(self._body_id, joint_id, joint_value) 81 | 82 | def get_state(self): 83 | return dict(TWO=self.pose, 84 | q=self.q, 85 | joint_names=self.joint_names, 86 | name=self.name, 87 | scale=self._scale, 88 | body_id=self._body_id) 89 | 90 | 91 | @property 92 | def visual_shape_data(self): 93 | return self._client.getVisualShapeData(self.body_id) 94 | 95 | @property 96 | def body_id(self): 97 | return self._body_id 98 | 99 | @property 100 | def client_id(self): 101 | return self.client_id 102 | 103 | @staticmethod 104 | def load(urdf_path, scale=1.0, client_id=0): 105 | urdf_path = Path(urdf_path) 106 | assert urdf_path.exists, 'URDF does not exist.' 107 | body_id = pb.loadURDF(urdf_path.as_posix(), physicsClientId=client_id, globalScaling=scale) 108 | return Body(body_id, scale=scale, client_id=client_id) 109 | -------------------------------------------------------------------------------- /robopose/third_party/craves/d3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import json 4 | 5 | def make_translation(X, Y, Z): 6 | ''' Make translation matrix ''' 7 | T = np.matrix( 8 | [ 9 | [1, 0, 0, -X], 10 | [0, 1, 0, -Y], 11 | [0, 0, 1, -Z], 12 | [0, 0, 0, 1] 13 | ] 14 | ) 15 | return T 16 | 17 | def make_location(X, Y, Z): 18 | T = np.matrix( 19 | [ 20 | [X], 21 | [Y], 22 | [Z] 23 | ]) 24 | return T 25 | 26 | def make_rotation(pitch, yaw, roll): 27 | # Convert from degree to radius 28 | pitch = pitch / 180.0 * np.pi 29 | yaw = yaw / 180.0 * np.pi 30 | roll = roll / 180.0 * np.pi 31 | pitch = -pitch 32 | yaw = yaw # ???!!! 33 | roll = -roll # Seems UE4 rotation direction is different 34 | # from: http://planning.cs.uiuc.edu/node102.html 35 | ryaw = [ 36 | [np.cos(yaw), -np.sin(yaw), 0, 0], 37 | [np.sin(yaw), np.cos(yaw), 0, 0], 38 | [0, 0, 1, 0], 39 | [0, 0, 0, 1] 40 | ] 41 | rpitch = [ 42 | [np.cos(pitch), 0, np.sin(pitch), 0], 43 | [0, 1, 0, 0], 44 | [-np.sin(pitch), 0, np.cos(pitch), 0], 45 | [0, 0, 0, 1] 46 | ] 47 | rroll = [ 48 | [1, 0, 0, 0], 49 | [0, np.cos(roll), -np.sin(roll), 0], 50 | [0, np.sin(roll), np.cos(roll), 0], 51 | [0, 0, 0, 1] 52 | ] 53 | T = np.matrix(ryaw) * np.matrix(rpitch) * np.matrix(rroll) 54 | return T 55 | 56 | def make_rearrage(): 57 | return np.matrix( 58 | [[0, 1, 0, 0], 59 | [0, 0, -1, 0], 60 | [1, 0, 0, 0], 61 | [0, 0, 0, 1] 62 | ] 63 | ) 64 | 65 | def make_intrinsic(fx, fy, cx, cy): 66 | return np.matrix( 67 | [[fx, 0, cx, 0], 68 | [0, fy, cy, 0], 69 | [0, 0, 1, 0], 70 | [0, 0, 0, 0] 71 | ]) 72 | 73 | class CameraPose: 74 | def __init__(self, x, y, z, pitch, yaw, roll, width, height, f): 75 | self.x, self.y, self.z = x, y, z 76 | self.pitch, self.yaw, self.roll = pitch, yaw, roll 77 | self.width, self.height, self.f = width, height, f 78 | # with open(os.path.join(meta_dir, 'camera_parameter.json'), 'r') as file: 79 | # obj = json.load(file) 80 | # self.fx, self.fy = obj[img_type]['FocalLength'] 81 | # self.px, self.py = obj[img_type]['PrincipalPoint'] 82 | 83 | def __repr__(self): 84 | print(self.__dict__) 85 | return 'x:{x} y:{y} z:{z} pitch:{pitch} yaw:{yaw} roll:{roll} width:{width} height:{height} f:{f}'.format(**self.__dict__) 86 | 87 | # The points_3d is in the canonical coordinate 88 | def project_to_2d(self, points_3d): 89 | ''' 90 | points_3d: points in 3D world coordinate, n * 3 91 | cam_pose: camera pose 92 | ''' 93 | if not points_3d.shape[1] == 3: 94 | print('The input shape is not n x 3, but n x %d' % points_3d.shape[1]) 95 | return 96 | 97 | npoints = points_3d.shape[0] 98 | points_3d = np.concatenate((points_3d, np.ones((npoints, 1))), axis = 1) 99 | 100 | cam_T = make_translation(self.x, self.y, self.z) 101 | cam_R = make_rotation(self.pitch, self.yaw, self.roll) 102 | 103 | world_to_camera = cam_R.T * cam_T # inverse matrix 104 | points_3d_camera = (make_rearrage() * world_to_camera * points_3d.T).T 105 | 106 | px = self.width / 2 107 | py = self.height / 2 108 | 109 | n_points = points_3d.shape[0] 110 | points_2d = np.zeros((n_points, 2)) 111 | 112 | points_2d[:,0] = (points_3d_camera[:,0] / points_3d_camera[:,2] * self.f + px).squeeze() 113 | points_2d[:,1] = ((points_3d_camera[:,1]) / points_3d_camera[:,2] * self.f + py).squeeze() 114 | return points_2d 115 | -------------------------------------------------------------------------------- /robopose/datasets/articulated_dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import numpy as np 4 | from dataclasses import dataclass 5 | from robopose.lib3d import invert_T 6 | from robopose.config import LOCAL_DATA_DIR 7 | 8 | from .augmentations import ( 9 | CropResizeToAspectAugmentation, VOCBackgroundAugmentation, 10 | PillowBlur, PillowSharpness, PillowContrast, PillowBrightness, PillowColor, to_torch_uint8 11 | ) 12 | 13 | @dataclass 14 | class PoseData: 15 | images: None 16 | bboxes: None 17 | TCO: None 18 | K: None 19 | objects: None 20 | 21 | def pin_memory(self): 22 | self.images = self.images.pin_memory() 23 | self.bboxes = self.bboxes.pin_memory() 24 | self.TCO = self.TCO.pin_memory() 25 | self.K = self.K.pin_memory() 26 | return self 27 | 28 | 29 | class ArticulatedDataset(torch.utils.data.Dataset): 30 | def __init__(self, 31 | scene_ds, 32 | resize=(640, 480), 33 | visibility_check=True, 34 | rgb_augmentation=False, 35 | background_augmentation=False): 36 | 37 | self.scene_ds = scene_ds 38 | 39 | self.resize_augmentation = CropResizeToAspectAugmentation(resize=resize) 40 | 41 | self.background_augmentation = background_augmentation 42 | self.background_augmentations = VOCBackgroundAugmentation( 43 | voc_root=LOCAL_DATA_DIR / 'VOCdevkit/VOC2012', p=0.3) 44 | 45 | self.rgb_augmentation = rgb_augmentation 46 | self.rgb_augmentations = [ 47 | PillowBlur(p=0.4, factor_interval=(1, 3)), 48 | PillowSharpness(p=0.3, factor_interval=(0., 50.)), 49 | PillowContrast(p=0.3, factor_interval=(0.2, 50.)), 50 | PillowBrightness(p=0.5, factor_interval=(0.1, 6.0)), 51 | PillowColor(p=0.3, factor_interval=(0., 20.)) 52 | ] 53 | 54 | self.visibility_check = visibility_check 55 | 56 | def __len__(self): 57 | return len(self.scene_ds) 58 | 59 | def collate_fn(self, batch): 60 | data = dict() 61 | for k in batch[0].__annotations__: 62 | v = [getattr(x, k) for x in batch] 63 | if k in ('images', 'bboxes', 'TCO', 'K'): 64 | v = torch.tensor(np.stack(v)) 65 | data[k] = v 66 | data = PoseData(**data) 67 | return data 68 | 69 | def __getitem__(self, idx): 70 | rgb, mask, state = self.scene_ds[idx] 71 | 72 | rgb, mask, state = self.resize_augmentation(rgb, mask, state) 73 | 74 | if self.background_augmentation: 75 | rgb, mask, state = self.background_augmentations(rgb, mask, state) 76 | 77 | if self.rgb_augmentation and random.random() < 0.8: 78 | for augmentation in self.rgb_augmentations: 79 | rgb, mask, state = augmentation(rgb, mask, state) 80 | 81 | rgb, mask = to_torch_uint8(rgb), to_torch_uint8(mask) 82 | mask_uniqs = set(np.unique(mask)) 83 | objects_visible = [] 84 | for obj in state['objects']: 85 | if (not self.visibility_check) or obj['id_in_segm'] in mask_uniqs: 86 | objects_visible.append(obj) 87 | assert len(objects_visible) > 0, idx 88 | 89 | rgb = torch.as_tensor(rgb).permute(2, 0, 1).to(torch.uint8) 90 | assert rgb.shape[0] == 3 91 | 92 | obj = random.sample(objects_visible, k=1)[0] 93 | TWO = torch.as_tensor(obj['TWO']) 94 | TWC = torch.as_tensor(state['camera']['TWC']) 95 | TCO = invert_T(TWC) @ TWO 96 | 97 | data = PoseData( 98 | images=np.asarray(rgb), 99 | bboxes=np.asarray(obj['bbox']), 100 | TCO=np.asarray(TCO), 101 | K=np.asarray(state['camera']['K']), 102 | objects=obj, 103 | ) 104 | return data 105 | -------------------------------------------------------------------------------- /robopose/visualization/model_debug.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import matplotlib.patches as patches 5 | from robopose.lib3d.camera_geometry import project_points 6 | 7 | def make_rot_lines(TCO, K, length=0.03): 8 | assert TCO.shape == (4, 4) 9 | assert K.shape == (3, 3) 10 | TCO = np.asarray(TCO) 11 | pts = np.array([ 12 | [0, 0, 0], 13 | [length, 0, 0], 14 | [0, length, 0], 15 | [0, 0, length] 16 | ]) 17 | uv = project_points(torch.tensor(pts).unsqueeze(0).float(), 18 | torch.tensor(K).unsqueeze(0).float(), 19 | torch.tensor(TCO).unsqueeze(0).float())[0] 20 | return uv 21 | 22 | def plot_rot_axes(ax, TCO, K, length=0.03): 23 | uv = make_rot_lines(TCO, K, length=length) 24 | ax.arrow(uv[0, 0], uv[0, 1], uv[1, 0] - uv[0, 0], uv[1, 1] - uv[0, 1], 25 | color='red') 26 | ax.arrow(uv[0, 0], uv[0, 1], uv[2, 0] - uv[0, 0], uv[2, 1] - uv[0, 1], 27 | color='green') 28 | ax.arrow(uv[0, 0], uv[0, 1], uv[3, 0] - uv[0, 0], uv[3, 1] - uv[0, 1], 29 | color='blue') 30 | return uv 31 | 32 | def plot_debug(debug_dict, n=0): 33 | d = debug_dict 34 | im = d['images'][n].cpu().permute(1, 2, 0) 35 | render1 = d['renders'][n, :3].cpu().permute(1, 2, 0) 36 | render = render1 37 | render2 = d['renders'][n, 3:].cpu().permute(1, 2, 0) 38 | im_crop = d['images_crop'][n].cpu().permute(1, 2, 0) 39 | rend_box = d['boxes_rend'][n].cpu().numpy() 40 | crop_box = d['boxes_crop'][n].cpu().numpy() 41 | ref_point_uv = d['ref_point_uv'][n, 0].cpu().numpy() 42 | origin_uv = d['origin_uv'][n, 0].cpu().numpy() 43 | origin_uv_crop = d['origin_uv_crop'][n, 0].cpu().numpy() 44 | # TCO = d['TCO'][n].cpu().numpy() 45 | # TCR = d['TCR'][n].cpu().numpy() 46 | K_crop = d['K_crop'][n].cpu().numpy() 47 | uv = d['uv'][n].cpu().numpy() 48 | uv_gt_root = d['pts_proj_gt'][n].cpu().numpy() if 'pts_proj_gt' in d else None 49 | uv_input_root = d['pts_proj_input'][n].cpu().numpy() if 'pts_proj_input' in d else None 50 | 51 | 52 | plt.figure() 53 | fig,ax = plt.subplots(1) 54 | ax.imshow(im) 55 | ax.axis('off') 56 | 57 | plt.figure() 58 | fig,ax = plt.subplots(1) 59 | ax.imshow(im) 60 | x1, y1, x2, y2 = rend_box 61 | rect = patches.Rectangle((x1,y1),(x2-x1),(y2-y1),linewidth=1,edgecolor='r',facecolor='none') 62 | ax.add_patch(rect) 63 | ax.axis('off') 64 | 65 | x1, y1, x2, y2 = crop_box 66 | rect = patches.Rectangle((x1,y1),(x2-x1),(y2-y1),linewidth=1,edgecolor='g',facecolor='none') 67 | ax.add_patch(rect) 68 | x, y = ref_point_uv 69 | ax.plot(x, y, '+', color='red') 70 | x, y = origin_uv 71 | ax.plot(x, y, '+', color='orange') 72 | ax.scatter(uv[:, 0], uv[:, 1], marker='+') 73 | ax.axis('off') 74 | 75 | plt.figure() 76 | fig, ax = plt.subplots(1) 77 | ax.imshow(im_crop) 78 | ax.axis('off') 79 | if uv_gt_root is not None: 80 | ax.scatter(uv_gt_root[:, 0], uv_gt_root[:, 1], marker='+') 81 | 82 | plt.figure() 83 | fig, ax = plt.subplots(1) 84 | ax.imshow(render1) 85 | ax.axis('off') 86 | ax.plot(320/2, 240/2, '+', color='red') 87 | x, y = origin_uv_crop 88 | ax.plot(x, y, '+', color='orange') 89 | # plot_rot_axes(ax, TCO, K_crop, 0.03) 90 | 91 | if render2.numel() > 0: 92 | plt.figure() 93 | fig, ax = plt.subplots(1) 94 | ax.imshow(render2) 95 | ax.axis('off') 96 | ax.plot(320/2, 240/2, '+', color='red') 97 | x, y = origin_uv_crop 98 | ax.plot(x, y, '+', color='orange') 99 | 100 | # if uv_input_root is not None: 101 | # ax.scatter(uv_input_root[:, 0], uv_input_root[:, 1], marker='+') 102 | 103 | plt.figure() 104 | plt.imshow(render) 105 | plt.imshow(im_crop, alpha=0.5) 106 | plt.axis('off') 107 | -------------------------------------------------------------------------------- /robopose/evaluation/data_utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import torch 4 | from collections import defaultdict 5 | import robopose.utils.tensor_collection as tc 6 | from robopose.lib3d.transform_ops import invert_T 7 | from robopose.datasets.utils import make_masks_from_det 8 | from robopose.datasets.augmentations import CropResizeToAspectAugmentation 9 | import robopose.utils.tensor_collection as tc 10 | 11 | 12 | def make_articulated_input_infos(rgb_uint8, robot_label, bbox=None, focal=1000, resize=(640, 480)): 13 | rgb_uint8 = np.asarray(rgb_uint8) 14 | h, w, _ = rgb_uint8.shape 15 | K = np.array([[focal, 0, w/2], [0, focal, h/2], [0, 0, 1]]) 16 | camera = dict(K=K, T0C=np.eye(4), TWC=np.eye(4), resolution=(w, h)) 17 | if bbox is None: 18 | margin = 0 19 | h, w, _ = np.array(rgb_uint8).shape 20 | keypoints_2d = np.array([[w*margin, h*margin], 21 | [w-w*margin, h-h*margin]]) 22 | bbox = np.concatenate([np.min(keypoints_2d, axis=0), np.max(keypoints_2d, axis=0)]) 23 | mask = make_masks_from_det(np.array(bbox)[None], h, w).numpy().astype(np.uint8)[0] * 255 24 | robot = dict(joints=None, name=robot_label, id_in_segm=255, bbox=bbox, TWO=np.eye(4)) 25 | state = dict(objects=[robot], camera=camera) 26 | augmentation = CropResizeToAspectAugmentation(resize=resize) 27 | rgb, mask, state = augmentation(rgb_uint8, mask, state) 28 | det_infos = [dict(label=robot_label, score=1.0, batch_im_id=0)] 29 | detections = tc.PandasTensorCollection( 30 | infos=pd.DataFrame(det_infos), 31 | bboxes=torch.as_tensor(state['objects'][0]['bbox']).float().cuda().unsqueeze(0), 32 | ) 33 | 34 | images = torch.tensor(np.array(rgb)).cuda().float().unsqueeze(0).permute(0, 3, 1, 2) / 255 35 | K = torch.tensor(state['camera']['K']).float().cuda().unsqueeze(0) 36 | return images, K, detections 37 | 38 | 39 | 40 | def parse_obs_data(obs, parse_joints=False): 41 | data = defaultdict(list) 42 | frame_info = obs['frame_info'] 43 | TWC = torch.as_tensor(obs['camera']['TWC']).float() 44 | for n, obj in enumerate(obs['objects']): 45 | info = dict(frame_obj_id=n, 46 | label=obj['name'], 47 | visib_fract=obj.get('visib_fract', 1), 48 | scene_id=frame_info['scene_id'], 49 | view_id=frame_info['view_id']) 50 | data['infos'].append(info) 51 | data['TWO'].append(obj['TWO']) 52 | data['bboxes'].append(obj['bbox']) 53 | data['keypoints_2d'].append(obj.get('keypoints_2d', [])) 54 | data['TCO_keypoints_3d'].append(obj.get('TCO_keypoints_3d', [])) 55 | data['points_3d'].append(obj.get('keypoints_2d', [])) 56 | 57 | joints = None 58 | if parse_joints: 59 | objects = obs['objects'] 60 | joint_names = list(objects[0]['joints'].keys()) 61 | joints = torch.stack([torch.tensor([obj['joints'][k] for k in joint_names]) for obj in obs['objects']]) 62 | 63 | for k, v in data.items(): 64 | if k != 'infos': 65 | data[k] = torch.stack([torch.as_tensor(x) .float()for x in v]) 66 | 67 | data['infos'] = pd.DataFrame(data['infos']) 68 | TCO = invert_T(TWC).unsqueeze(0) @ data['TWO'] 69 | 70 | data = tc.PandasTensorCollection( 71 | infos=data['infos'], 72 | TCO=TCO, 73 | bboxes=data['bboxes'], 74 | keypoints_2d=data['keypoints_2d'], 75 | TCO_keypoints_3d=data['TCO_keypoints_3d'], 76 | poses=TCO, 77 | ) 78 | if parse_joints: 79 | data.register_tensor('joints', joints) 80 | data.infos['joint_names'] = [joint_names for _ in range(len(data))] 81 | return data 82 | 83 | 84 | def data_to_pose_model_inputs(data): 85 | TXO = data.poses 86 | has_joints = hasattr(data, 'joints') 87 | obj_infos = [] 88 | for n in range(len(data)): 89 | obj_info = dict(name=data.infos.loc[n, 'label']) 90 | if has_joints: 91 | joint_names = data.infos.loc[n, 'joint_names'] 92 | joints = {joint_names[i]: data.joints[n, [i]].view(1, 1) for i in range(len(joint_names))} 93 | obj_info.update(joints=joints) 94 | obj_infos.append(obj_info) 95 | return TXO, obj_infos 96 | -------------------------------------------------------------------------------- /robopose/datasets/datasets_cfg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from robopose.config import LOCAL_DATA_DIR, ASSET_DIR 5 | from robopose.config import OWI_DESCRIPTION, PANDA_DESCRIPTION_PATH 6 | from robopose.config import DREAM_DS_DIR, PROJECT_DIR 7 | from robopose.utils.logging import get_logger 8 | 9 | from .urdf_dataset import OneUrdfDataset 10 | from .craves import CRAVESDataset 11 | from .dream import DreamDataset 12 | 13 | 14 | logger = get_logger(__name__) 15 | 16 | def _make_craves_dataset(split): 17 | ds_root = LOCAL_DATA_DIR / 'craves_datasets' 18 | ds = CRAVESDataset(ds_root, split=split) 19 | return ds 20 | 21 | def make_scene_dataset(ds_name, n_frames=None): 22 | # CRAVES 23 | if ds_name == 'craves.synt.train': 24 | split = 'synt_train' 25 | ds = _make_craves_dataset(split) 26 | elif ds_name == 'craves.synt.val': 27 | split = 'synt_val' 28 | ds = _make_craves_dataset(split) 29 | elif ds_name == 'craves.lab.real.test': 30 | split = 'lab_test_real' 31 | ds = _make_craves_dataset(split) 32 | elif ds_name == 'craves.youtube': 33 | split = 'youtube' 34 | ds = _make_craves_dataset(split) 35 | 36 | # DREAM 37 | # Panda 38 | elif ds_name == 'dream.panda.synt.dr.train': 39 | ds = DreamDataset(DREAM_DS_DIR / 'synthetic/panda_synth_train_dr') 40 | elif ds_name == 'dream.panda.synt.dr.train.1000': 41 | ds = DreamDataset(DREAM_DS_DIR / 'synthetic/panda_synth_train_dr') 42 | ds.frame_index = ds.frame_index.iloc[:1000].reset_index(drop=True) 43 | elif ds_name == 'dream.panda.synt.photo.test': 44 | ds = DreamDataset(DREAM_DS_DIR / 'synthetic/panda_synth_test_photo') 45 | elif ds_name == 'dream.panda.synt.dr.test': 46 | ds = DreamDataset(DREAM_DS_DIR / 'synthetic/panda_synth_test_dr') 47 | elif ds_name == 'dream.panda.real.orb': 48 | ds = DreamDataset(DREAM_DS_DIR / 'real/panda-orb') 49 | elif ds_name == 'dream.panda.real.kinect360': 50 | ds = DreamDataset(DREAM_DS_DIR / 'real/panda-3cam_kinect360') 51 | elif ds_name == 'dream.panda.real.realsense': 52 | ds = DreamDataset(DREAM_DS_DIR / 'real/panda-3cam_realsense') 53 | elif ds_name == 'dream.panda.real.azure': 54 | ds = DreamDataset(DREAM_DS_DIR / 'real/panda-3cam_azure') 55 | 56 | # Baxter 57 | elif ds_name == 'dream.baxter.synt.dr.train': 58 | ds = DreamDataset(DREAM_DS_DIR / 'synthetic/baxter_synth_train_dr') 59 | elif ds_name == 'dream.baxter.synt.dr.test': 60 | ds = DreamDataset(DREAM_DS_DIR / 'synthetic/baxter_synth_test_dr') 61 | 62 | # Kuka 63 | elif ds_name == 'dream.kuka.synt.dr.train': 64 | ds = DreamDataset(DREAM_DS_DIR / 'synthetic/kuka_synth_train_dr') 65 | elif ds_name == 'dream.kuka.synt.dr.test': 66 | ds = DreamDataset(DREAM_DS_DIR / 'synthetic/kuka_synth_test_dr') 67 | elif ds_name == 'dream.kuka.synt.photo.test': 68 | ds = DreamDataset(DREAM_DS_DIR / 'synthetic/kuka_synth_test_photo') 69 | 70 | else: 71 | raise ValueError(ds_name) 72 | 73 | if n_frames is not None: 74 | ds.frame_index = ds.frame_index.iloc[:n_frames].reset_index(drop=True) 75 | ds.name = ds_name 76 | return ds 77 | 78 | 79 | def make_urdf_dataset(ds_name): 80 | if isinstance(ds_name, list): 81 | ds_index = [] 82 | for ds_name_n in ds_name: 83 | dataset = make_urdf_dataset(ds_name_n) 84 | ds_index.append(dataset.index) 85 | dataset.index = pd.concat(ds_index, axis=0) 86 | return dataset 87 | # OWI 88 | if ds_name == 'owi535': 89 | ds = OneUrdfDataset(OWI_DESCRIPTION / 'owi535.urdf', label='owi535', scale=0.001) 90 | elif ds_name == 'panda': 91 | ds = OneUrdfDataset(PANDA_DESCRIPTION_PATH.parent / 'patched_urdf/panda.urdf', label='panda') 92 | # BAXTER 93 | elif ds_name == 'baxter': 94 | ds = OneUrdfDataset(PROJECT_DIR / 'deps/baxter-description/baxter_description/patched_urdf/baxter.urdf', label='baxter') 95 | # KUKA 96 | elif ds_name == 'iiwa7': 97 | ds = OneUrdfDataset(PROJECT_DIR / 'deps/kuka-description/iiwa_description/urdf/iiwa7.urdf', label='iiwa7') 98 | else: 99 | raise ValueError(ds_name) 100 | return ds 101 | -------------------------------------------------------------------------------- /robopose/third_party/craves/heatmap_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def heatmap_from_keypoints(bbox, pts2d): 6 | scale_factor = 60.0 7 | out_res = 64 8 | nparts = 17 9 | sigma = 1 10 | label_type = 'Gaussian' 11 | bbox = bbox.cpu().numpy() 12 | pts2d = pts2d.cpu().numpy() 13 | 14 | x0, y0, x1, y1 = bbox 15 | c = np.array([(x0+x1), (y0+y1)])/2 16 | s = np.sqrt((y1-y0)*(x1-x0)) / scale_factor 17 | r = 0 18 | 19 | tpts = np.asarray(pts2d).copy() 20 | target = torch.zeros(nparts, out_res, out_res) 21 | for i in range(nparts): 22 | # if tpts[i, 2] > 0: # This is evil!! 23 | if tpts[i, 1] > 0: 24 | tpts[i, 0:2] = to_torch(transform(tpts[i, 0:2], c, s, [out_res, out_res], rot=r)) 25 | target[i] = draw_labelmap(target[i], tpts[i], sigma, type=label_type) 26 | return target 27 | 28 | 29 | def to_numpy(tensor): 30 | if torch.is_tensor(tensor): 31 | return tensor.cpu().numpy() 32 | elif type(tensor).__module__ != 'numpy': 33 | raise ValueError("Cannot convert {} to numpy array" 34 | .format(type(tensor))) 35 | return tensor 36 | 37 | 38 | def to_torch(ndarray): 39 | if type(ndarray).__module__ == 'numpy': 40 | return torch.from_numpy(ndarray) 41 | elif not torch.is_tensor(ndarray): 42 | raise ValueError("Cannot convert {} to torch tensor" 43 | .format(type(ndarray))) 44 | return ndarray 45 | 46 | 47 | def draw_labelmap(img, pt, sigma, type='Gaussian'): 48 | # Draw a 2D gaussian 49 | # Adopted from https://github.com/anewell/pose-hg-train/blob/master/src/pypose/draw.py 50 | img = to_numpy(img) 51 | 52 | # Check that any part of the gaussian is in-bounds 53 | ul = [int(pt[0] - 3 * sigma), int(pt[1] - 3 * sigma)] 54 | br = [int(pt[0] + 3 * sigma + 1), int(pt[1] + 3 * sigma + 1)] 55 | if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or 56 | br[0] < 0 or br[1] < 0): 57 | # If not, just return the image as is 58 | return to_torch(img) 59 | 60 | # Generate gaussian 61 | size = 6 * sigma + 1 62 | x = np.arange(0, size, 1, float) 63 | y = x[:, np.newaxis] 64 | x0 = y0 = size // 2 65 | # The gaussian is not normalized, we want the center value to equal 1 66 | if type == 'Gaussian': 67 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 68 | elif type == 'Cauchy': 69 | g = sigma / (((x - x0) ** 2 + (y - y0) ** 2 + sigma ** 2) ** 1.5) 70 | 71 | 72 | # Usable gaussian range 73 | g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0] 74 | g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1] 75 | # Image range 76 | img_x = max(0, ul[0]), min(br[0], img.shape[1]) 77 | img_y = max(0, ul[1]), min(br[1], img.shape[0]) 78 | 79 | img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]] 80 | return to_torch(img) 81 | 82 | 83 | def transform(pt, center, scale, res, invert=0, rot=0): 84 | # Transform pixel location to different reference 85 | # print(scale) 86 | t = get_transform(center, scale, res, rot=rot) 87 | if invert: 88 | t = np.linalg.inv(t) 89 | new_pt = np.array([pt[0] - 1, pt[1] - 1, 1.]).T 90 | #new_pt = np.array([pt[0], pt[1], 1.]).T 91 | new_pt = np.dot(t, new_pt) 92 | # return new_pt[:2].astype(int) + 1 93 | return (new_pt[:2] + 0.5).astype(int) 94 | 95 | 96 | def get_transform(center, scale, res, rot=0): 97 | """ 98 | General image processing functions 99 | """ 100 | # Generate transformation matrix 101 | h = 200 * scale 102 | t = np.zeros((3, 3)) 103 | t[0, 0] = float(res[1]) / h 104 | t[1, 1] = float(res[0]) / h 105 | t[0, 2] = res[1] * (-float(center[0]) / h + .5) 106 | t[1, 2] = res[0] * (-float(center[1]) / h + .5) 107 | t[2, 2] = 1 108 | if not rot == 0: 109 | rot = -rot # To match direction of rotation from cropping 110 | rot_mat = np.zeros((3,3)) 111 | rot_rad = rot * np.pi / 180 112 | sn,cs = np.sin(rot_rad), np.cos(rot_rad) 113 | rot_mat[0,:2] = [cs, -sn] 114 | rot_mat[1,:2] = [sn, cs] 115 | rot_mat[2,2] = 1 116 | # Need to rotate around center 117 | t_mat = np.eye(3) 118 | t_mat[0,2] = -res[1]/2 119 | t_mat[1,2] = -res[0]/2 120 | t_inv = t_mat.copy() 121 | t_inv[:2,2] *= -1 122 | t = np.dot(t_inv,np.dot(rot_mat,np.dot(t_mat,t))) 123 | return t 124 | -------------------------------------------------------------------------------- /robopose/models/wide_resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def conv3x3(in_planes, out_planes, stride=1): 7 | """3x3 convolution with padding""" 8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 9 | padding=1, bias=False) 10 | 11 | 12 | class BasicBlockV2(nn.Module): 13 | r"""BasicBlock V2 from 14 | `"Identity Mappings in Deep Residual Networks"`_ paper. 15 | This is used for ResNet V2 for 18, 34 layers. 16 | Args: 17 | inplanes (int): number of input channels. 18 | planes (int): number of output channels. 19 | stride (int): stride size. 20 | downsample (Module) optional downsample module to downsample the input. 21 | """ 22 | expansion = 1 23 | 24 | def __init__(self, inplanes, planes, stride=1, downsample=None): 25 | super(BasicBlockV2, self).__init__() 26 | self.bn1 = nn.BatchNorm2d(inplanes) 27 | self.conv1 = conv3x3(inplanes, planes, stride) 28 | self.bn2 = nn.BatchNorm2d(planes) 29 | self.conv2 = conv3x3(planes, planes) 30 | self.downsample = downsample 31 | self.stride = stride 32 | 33 | def forward(self, x): 34 | out = F.relu(self.bn1(x), inplace=True) 35 | residual = self.downsample(out) if self.downsample is not None else x 36 | out = self.conv1(out) 37 | out = F.relu(self.bn2(out), inplace=True) 38 | out = self.conv2(out) 39 | return out + residual 40 | 41 | 42 | class WideResNet(nn.Module): 43 | def __init__(self, block, layers, width, num_inputs=3, maxpool=True): 44 | super(WideResNet, self).__init__() 45 | 46 | config = [int(v * width) for v in (64, 128, 256, 512)] 47 | self.inplanes = config[0] 48 | self.conv1 = nn.Conv2d(num_inputs, self.inplanes, kernel_size=5, 49 | stride=2, padding=2, bias=False) 50 | self.bn1 = nn.BatchNorm2d(self.inplanes) 51 | self.relu = nn.ReLU(inplace=True) 52 | if maxpool: 53 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 54 | else: 55 | self.maxpool = nn.Identity() 56 | self.layer1 = self._make_layer(block, config[0], layers[0]) 57 | self.layer2 = self._make_layer(block, config[1], layers[1], stride=2) 58 | self.layer3 = self._make_layer(block, config[2], layers[2], stride=2) 59 | self.layer4 = self._make_layer(block, config[3], layers[3], stride=2) 60 | 61 | for m in self.modules(): 62 | if isinstance(m, nn.Conv2d): 63 | nn.init.kaiming_normal_( 64 | m.weight, mode='fan_out', nonlinearity='relu') 65 | elif isinstance(m, nn.BatchNorm2d): 66 | nn.init.constant_(m.weight, 1) 67 | nn.init.constant_(m.bias, 0) 68 | 69 | def _make_layer(self, block, planes, blocks, stride=1): 70 | downsample = None 71 | if stride != 1 or self.inplanes != planes * block.expansion: 72 | downsample = nn.Conv2d(self.inplanes, planes * block.expansion, 73 | kernel_size=1, stride=stride, bias=False) 74 | 75 | layers = [block(self.inplanes, planes, stride, downsample), ] 76 | self.inplanes = planes * block.expansion 77 | for i in range(1, blocks): 78 | layers.append(block(self.inplanes, planes)) 79 | 80 | return nn.Sequential(*layers) 81 | 82 | def forward(self, x): 83 | x = self.conv1(x) 84 | x = self.bn1(x) 85 | x = self.relu(x) 86 | x = self.maxpool(x) 87 | x = self.layer1(x) 88 | x = self.layer2(x) 89 | x = self.layer3(x) 90 | x = self.layer4(x) 91 | return x 92 | 93 | 94 | CONFIG = {18: [2, 2, 2, 2], 34: [3, 4, 6, 3]} 95 | 96 | 97 | class WideResNet18(WideResNet): 98 | def __init__(self, n_inputs=3, width=1.0): 99 | super().__init__(block=BasicBlockV2, layers=CONFIG[18], width=1.0, num_inputs=n_inputs) 100 | self.n_features = int(512 * width) 101 | 102 | 103 | class WideResNet34(WideResNet): 104 | def __init__(self, n_inputs=3, width=1.0): 105 | super().__init__(block=BasicBlockV2, layers=CONFIG[34], width=1.0, num_inputs=n_inputs) 106 | self.n_features = int(512 * width) 107 | 108 | 109 | if __name__ == '__main__': 110 | model = WideResNet(BasicBlockV2, [2, 2, 2, 2], 0.5, num_inputs=3, num_outputs=4) 111 | x = torch.randn(1, 3, 224, 224) 112 | outputs = model(x) 113 | -------------------------------------------------------------------------------- /robopose/visualization/singleview_articulated.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | from copy import deepcopy 4 | 5 | from .plotter import Plotter 6 | 7 | from robopose.datasets.wrappers.augmentation_wrapper import AugmentationWrapper 8 | from robopose.datasets.augmentations import CropResizeToAspectAugmentation 9 | 10 | 11 | def render_prediction_wrt_camera(renderer, pred, camera=None, resolution=(640, 480), K=None): 12 | assert len(pred) == 1 13 | pred = pred.cpu() 14 | 15 | if camera is not None: 16 | camera = deepcopy(camera) 17 | else: 18 | K = pred.K[0].numpy() 19 | camera = dict( 20 | K=K, 21 | resolution=resolution 22 | ) 23 | camera.update(TWC=np.eye(4)) 24 | 25 | list_objects = [] 26 | for n in range(len(pred)): 27 | row = pred.infos.iloc[n] 28 | joint_names = row.joint_names 29 | joints = {joint_names[i]: pred.joints[n][i].item() for i in range(len(joint_names))} 30 | obj = dict( 31 | name=row.label, 32 | color=(1, 1, 1, 1), 33 | TWO=pred.poses[n].numpy(), 34 | joints=joints, 35 | ) 36 | list_objects.append(obj) 37 | renders = renderer.render_scene(list_objects, [camera])[0]['rgb'] 38 | 39 | # renders = renderer.render_scene(list_objects, [camera])[0]['mask_int'] 40 | # renders[renders == (0 + 5 << 24)] = 255 41 | # renders[renders != 255] = 0 42 | # renders = renders.astype(np.uint8) 43 | # renders = renders[..., None] 44 | # renders = np.repeat(renders, 3, axis=-1) 45 | return renders 46 | 47 | 48 | def render_gt(renderer, objects, camera): 49 | camera = deepcopy(camera) 50 | TWC = camera['TWC'] 51 | for obj in objects: 52 | obj['TWO'] = np.linalg.inv(TWC) @ obj['TWO'] 53 | camera['TWC'] = np.eye(4) 54 | rgb_rendered = renderer.render_scene(objects, [camera])[0]['rgb'] 55 | return rgb_rendered 56 | 57 | 58 | def make_singleview_prediction_plots(scene_ds, renderer, one_prediction): 59 | plotter = Plotter() 60 | 61 | assert len(one_prediction) == 1 62 | predictions = one_prediction 63 | scene_id, view_id = np.unique(predictions.infos['scene_id']).item(), np.unique(predictions.infos['view_id']).item() 64 | 65 | scene_ds_index = scene_ds.frame_index 66 | scene_ds_index['ds_idx'] = np.arange(len(scene_ds_index)) 67 | scene_ds_index = scene_ds_index.set_index(['scene_id', 'view_id']) 68 | idx = scene_ds_index.loc[(scene_id, view_id), 'ds_idx'] 69 | 70 | augmentation = CropResizeToAspectAugmentation(resize=(640, 480)) 71 | scene_ds = AugmentationWrapper(scene_ds, augmentation) 72 | rgb_input, mask, state = scene_ds[idx] 73 | 74 | figures = dict() 75 | 76 | figures['input_im'] = plotter.plot_image(rgb_input) 77 | 78 | # gt_rendered = render_gt(renderer, state['objects'], state['camera']) 79 | # figures['gt_rendered'] = plotter.plot_image(gt_rendered) 80 | # figures['gt_overlay'] = plotter.plot_overlay(rgb_input, gt_rendered) 81 | 82 | renders = render_prediction_wrt_camera(renderer, predictions) 83 | # pred_rendered = renders['rgb'] 84 | pred_rendered = renders 85 | # pred_rendered[renders['mask_int'] == -1] = 255 86 | # print(pred_rendered) 87 | figures['pred_rendered'] = plotter.plot_image(pred_rendered) 88 | figures['pred_overlay'] = plotter.plot_overlay(rgb_input, pred_rendered) 89 | # figures['pred_mask_int'] = renders['mask_int'] 90 | figures['rgb_input'] = rgb_input 91 | figures['rgb_overlay'] = plotter.make_overlay(rgb_input, pred_rendered) 92 | return figures 93 | 94 | 95 | def make_video(scene_ds, renderer, predictions, view_ids=None): 96 | plotter = Plotter() 97 | if view_ids is None: 98 | view_ids = np.sort(predictions.infos.view_id.tolist()) 99 | # view_ids = view_ids[::30] 100 | 101 | augmentation = CropResizeToAspectAugmentation(resize=(640, 480)) 102 | scene_ds = AugmentationWrapper(scene_ds, augmentation) 103 | 104 | images = [] 105 | for view_id in tqdm(view_ids): 106 | scene_ds_index = scene_ds.frame_index 107 | scene_ds_index['ds_idx'] = np.arange(len(scene_ds_index)) 108 | scene_ds_index = scene_ds_index.set_index(['view_id']) 109 | idx = scene_ds_index.loc[(view_id), 'ds_idx'] 110 | 111 | rgb_input, mask, state = scene_ds[idx] 112 | pred_idx = np.where(predictions.infos['view_id'] == view_id)[0].item() 113 | renders = render_prediction_wrt_camera(renderer, predictions[[pred_idx]]) 114 | overlay = plotter.make_overlay(rgb_input, renders) 115 | 116 | # Rotation 117 | images.append((np.asarray(rgb_input), np.asarray(overlay))) 118 | return images 119 | -------------------------------------------------------------------------------- /notebooks/visualize_predictions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%load_ext autoreload\n", 10 | "%autoreload 2\n", 11 | "import torch\n", 12 | "from PIL import Image\n", 13 | "from pathlib import Path\n", 14 | "import numpy as np\n", 15 | "\n", 16 | "from robopose.config import LOCAL_DATA_DIR\n", 17 | "from robopose.datasets.datasets_cfg import make_scene_dataset\n", 18 | "from robopose.rendering.bullet_scene_renderer import BulletSceneRenderer\n", 19 | "from robopose.visualization.singleview_articulated import make_singleview_prediction_plots\n", 20 | "from robopose.visualization.bokeh_utils import save_image_figure\n", 21 | "\n", 22 | "import os\n", 23 | "from tqdm import tqdm_notebook as tqdm\n", 24 | "from bokeh.plotting import gridplot\n", 25 | "from bokeh.io import show, output_notebook; output_notebook()\n", 26 | "os.environ['CUDA_VISIBLE_DEVICES'] = '0'" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "# Pick Robot\n", 36 | "renderer = BulletSceneRenderer('owi535')\n", 37 | "# renderer = BulletSceneRenderer('panda')\n", 38 | "# renderer = BulletSceneRenderer('baxter')\n", 39 | "# renderer = BulletSceneRenderer('iiwa7')" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# Pick inference result_id and dataset\n", 49 | "\n", 50 | "# result_id = 'craves-lab-unknownq--1804'\n", 51 | "# dataset = 'craves.lab.real.test'\n", 52 | "\n", 53 | "# result_id = 'dream-panda-unknownq--1804'\n", 54 | "# dataset = 'dream.panda.real.orb'\n", 55 | "\n", 56 | "result_id = 'craves-youtube-unknownq-focal=1000--1804'\n", 57 | "dataset = 'craves.youtube'\n", 58 | "\n", 59 | "scene_ds = make_scene_dataset(dataset)\n", 60 | "\n", 61 | "results = torch.load(LOCAL_DATA_DIR / 'results' / result_id / f'dataset={dataset}' / 'results.pth.tar')" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "# Generate visualization for a few images\n", 71 | "type_joints = 'unknown'\n", 72 | "dets = 'full_image_detections'\n", 73 | "pred_keys = [\n", 74 | " f'{dets}/{type_joints}_joints/init',\n", 75 | "]\n", 76 | "\n", 77 | "pred_keys += [f'{dets}/{type_joints}_joints/iteration={K}' for K in (1, 10)]\n", 78 | "print(pred_keys)\n", 79 | "\n", 80 | "n_images = len(results['predictions'][f'{dets}/{type_joints}_joints/init'])\n", 81 | "pred_ids = np.random.choice(np.arange(n_images), 5)\n", 82 | "fig_array = []\n", 83 | "save_dir = Path(f'unknown_joints_dataset={dataset}')\n", 84 | "\n", 85 | "fig_idx = 0\n", 86 | "all_figures = []\n", 87 | "def add_figure(fig):\n", 88 | " fig.title.text = f'fig={len(all_figures)}'\n", 89 | " all_figures.append(fig)\n", 90 | " \n", 91 | "for pred_idx in tqdm(pred_ids):\n", 92 | " row = []\n", 93 | " for n, pred_key in enumerate(pred_keys):\n", 94 | " if pred_key in results['predictions']:\n", 95 | " pred = results['predictions'][pred_key][[pred_idx]]\n", 96 | " figures = make_singleview_prediction_plots(scene_ds, renderer, pred)\n", 97 | " if n == 0:\n", 98 | " fig = figures['input_im']\n", 99 | " add_figure(fig)\n", 100 | " row.append(fig)\n", 101 | " fig = figures['pred_overlay']\n", 102 | " add_figure(fig)\n", 103 | " row.append(fig)\n", 104 | " rgb_input = figures['rgb_input']\n", 105 | " rgb_overlay = figures['rgb_overlay']\n", 106 | " fig_array.append(row)\n", 107 | "show(gridplot(fig_array, sizing_mode='scale_width'))" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "# Save selected figures\n", 117 | "\n", 118 | "save_ids = [0, 3]\n", 119 | "for idx in save_ids:\n", 120 | " save_image_figure(all_figures[idx], f'images/examples_{idx}.jpg')" 121 | ] 122 | } 123 | ], 124 | "metadata": { 125 | "kernelspec": { 126 | "display_name": "robotpose", 127 | "language": "python", 128 | "name": "robotpose" 129 | }, 130 | "language_info": { 131 | "codemirror_mode": { 132 | "name": "ipython", 133 | "version": 3 134 | }, 135 | "file_extension": ".py", 136 | "mimetype": "text/x-python", 137 | "name": "python", 138 | "nbconvert_exporter": "python", 139 | "pygments_lexer": "ipython3", 140 | "version": "3.7.6" 141 | } 142 | }, 143 | "nbformat": 4, 144 | "nbformat_minor": 4 145 | } 146 | -------------------------------------------------------------------------------- /robopose/integrated/pose_predictor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from collections import defaultdict 4 | from torch.utils.data import TensorDataset, DataLoader 5 | from robopose.lib3d.robopose_ops import TCO_init_from_boxes, TCO_init_from_boxes_zup_autodepth 6 | 7 | import robopose.utils.tensor_collection as tc 8 | 9 | from robopose.utils.logging import get_logger 10 | from robopose.utils.timer import Timer 11 | logger = get_logger(__name__) 12 | 13 | 14 | class CoarseRefinePosePredictor(torch.nn.Module): 15 | def __init__(self, 16 | coarse_model=None, 17 | refiner_model=None, 18 | bsz_objects=64): 19 | super().__init__() 20 | self.coarse_model = coarse_model 21 | self.refiner_model = refiner_model 22 | self.bsz_objects = bsz_objects 23 | self.eval() 24 | 25 | @torch.no_grad() 26 | def batched_model_predictions(self, model, images, K, obj_data, n_iterations=1): 27 | timer = Timer() 28 | timer.start() 29 | 30 | ids = torch.arange(len(obj_data)) 31 | 32 | ds = TensorDataset(ids) 33 | dl = DataLoader(ds, batch_size=self.bsz_objects) 34 | 35 | preds = defaultdict(list) 36 | for (batch_ids, ) in dl: 37 | timer.resume() 38 | obj_inputs = obj_data[batch_ids.numpy()] 39 | labels = obj_inputs.infos['label'].values 40 | im_ids = obj_inputs.infos['batch_im_id'].values 41 | images_ = images[im_ids] 42 | K_ = K[im_ids] 43 | TCO_input = obj_inputs.poses 44 | outputs = model(images=images_, K=K_, TCO=TCO_input, 45 | n_iterations=n_iterations, labels=labels) 46 | timer.pause() 47 | for n in range(1, n_iterations+1): 48 | iter_outputs = outputs[f'iteration={n}'] 49 | 50 | infos = obj_inputs.infos 51 | batch_preds = tc.PandasTensorCollection(infos, 52 | poses=iter_outputs['TCO_output'], 53 | poses_input=iter_outputs['TCO_input'], 54 | K_crop=iter_outputs['K_crop'], 55 | boxes_rend=iter_outputs['boxes_rend'], 56 | boxes_crop=iter_outputs['boxes_crop']) 57 | preds[f'iteration={n}'].append(batch_preds) 58 | 59 | logger.debug(f'Pose prediction on {len(obj_data)} detections (n_iterations={n_iterations}): {timer.stop()}') 60 | preds = dict(preds) 61 | for k, v in preds.items(): 62 | preds[k] = tc.concatenate(v) 63 | return preds 64 | 65 | def make_TCO_init(self, detections, K): 66 | K = K[detections.infos['batch_im_id'].values] 67 | boxes = detections.bboxes 68 | if self.coarse_model.cfg.init_method == 'z-up+auto-depth': 69 | meshes = self.coarse_model.mesh_db.select(detections.infos['label']) 70 | points_3d = meshes.sample_points(2000, deterministic=True) 71 | TCO_init = TCO_init_from_boxes_zup_autodepth(boxes, points_3d, K) 72 | else: 73 | TCO_init = TCO_init_from_boxes(z_range=(1.0, 1.0), boxes=boxes, K=K) 74 | return tc.PandasTensorCollection(infos=detections.infos, poses=TCO_init) 75 | 76 | def get_predictions(self, images, K, 77 | detections=None, 78 | data_TCO_init=None, 79 | n_coarse_iterations=1, 80 | n_refiner_iterations=1): 81 | 82 | preds = dict() 83 | if data_TCO_init is None: 84 | assert detections is not None 85 | assert self.coarse_model is not None 86 | assert n_coarse_iterations > 0 87 | data_TCO_init = self.make_TCO_init(detections, K) 88 | coarse_preds = self.batched_model_predictions(self.coarse_model, 89 | images, K, data_TCO_init, 90 | n_iterations=n_coarse_iterations) 91 | for n in range(1, n_coarse_iterations + 1): 92 | preds[f'coarse/iteration={n}'] = coarse_preds[f'iteration={n}'] 93 | data_TCO = coarse_preds[f'iteration={n_coarse_iterations}'] 94 | else: 95 | assert n_coarse_iterations == 0 96 | data_TCO = data_TCO_init 97 | preds[f'external_coarse'] = data_TCO 98 | 99 | if n_refiner_iterations >= 1: 100 | assert self.refiner_model is not None 101 | refiner_preds = self.batched_model_predictions(self.refiner_model, 102 | images, K, data_TCO, 103 | n_iterations=n_refiner_iterations) 104 | for n in range(1, n_refiner_iterations + 1): 105 | preds[f'refiner/iteration={n}'] = refiner_preds[f'iteration={n}'] 106 | data_TCO = refiner_preds[f'iteration={n_refiner_iterations}'] 107 | return data_TCO, preds 108 | -------------------------------------------------------------------------------- /robopose/evaluation/meters/craves_meters.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import xarray as xr 3 | import numpy as np 4 | from .base import Meter 5 | 6 | from robopose.utils.distributed import get_rank, get_world_size 7 | from robopose.third_party.craves.eval_utils import accuracy_from_keypoints 8 | from .errors import loc_rot_pose_errors 9 | from .utils import get_candidate_matches, match_poses, one_to_one_matching 10 | 11 | 12 | class CravesKeypointsMeter(Meter): 13 | def __init__(self): 14 | self.reset() 15 | 16 | def is_data_valid(self, data): 17 | return hasattr(data, 'keypoints_2d') 18 | 19 | def add(self, pred_data, gt_data): 20 | pred_data = pred_data.float() 21 | gt_data = gt_data.float() 22 | 23 | matches = one_to_one_matching(pred_data.infos, 24 | gt_data.infos, 25 | keys=('scene_id', 'view_id'), 26 | allow_pred_missing=False) 27 | 28 | accuracies = accuracy_from_keypoints(pred_data.keypoints_2d[matches.pred_id], 29 | gt_data.keypoints_2d[matches.gt_id], 30 | gt_data.bboxes[matches.gt_id], 31 | thr=0.2).cpu().numpy() 32 | batch_df = xr.Dataset() 33 | batch_df['keypoint_accuracy'] = ('batch_id', 'keypoints'), accuracies[None, 1:] 34 | batch_df['keypoints_mean_accuracy'] = ('batch_id', ), accuracies[None, 0] 35 | self.datas['df'].append(batch_df) 36 | 37 | def summary(self): 38 | df = xr.concat(self.datas['df'], dim='batch_id') 39 | summary = { 40 | 'PCK@0.2': df['keypoints_mean_accuracy'].mean('batch_id').values.tolist(), 41 | } 42 | return summary, df 43 | 44 | 45 | class CravesErrorMeter(Meter): 46 | def __init__(self): 47 | self.reset() 48 | 49 | def is_data_valid(self, data): 50 | return hasattr(data, 'poses') 51 | 52 | def add(self, pred_data, gt_data): 53 | # ArticulatedObjectData 54 | pred_data = pred_data.float() 55 | gt_data = gt_data.float() 56 | 57 | TXO_gt = gt_data.poses 58 | q_gt = gt_data.joints 59 | gt_infos = gt_data.infos 60 | gt_infos['valid'] = True 61 | 62 | TXO_pred = pred_data.poses 63 | q_pred = pred_data.joints 64 | pred_infos = pred_data.infos 65 | 66 | cand_infos = get_candidate_matches(pred_infos, gt_infos) 67 | cand_TXO_gt = TXO_gt[cand_infos['gt_id']] 68 | cand_TXO_pred = TXO_pred[cand_infos['pred_id']] 69 | 70 | loc_errors_xyz, rot_errors = loc_rot_pose_errors(cand_TXO_pred, cand_TXO_gt) 71 | loc_errors_norm = torch.norm(loc_errors_xyz, dim=-1, p=2) 72 | 73 | error = loc_errors_norm.cpu().numpy().astype(np.float) 74 | error[np.isnan(error)] = 1000 75 | cand_infos['error'] = error 76 | matches = match_poses(cand_infos) 77 | 78 | n_gt = len(gt_infos) 79 | 80 | def empty_array(shape, default='nan', dtype=np.float): 81 | return np.empty(shape, dtype=dtype) * float(default) 82 | 83 | gt_infos['rank'] = get_rank() 84 | gt_infos['world_size'] = get_world_size() 85 | df = xr.Dataset(gt_infos).rename(dict(dim_0='gt_object_id')) 86 | 87 | scores = empty_array(n_gt) 88 | scores[matches.gt_id] = pred_infos.loc[matches.pred_id, 'score'] 89 | df['pred_score'] = 'gt_object_id', scores 90 | 91 | rot_errors_ = empty_array((n_gt, 3)) 92 | matches_rot_errors = rot_errors[matches.cand_id].cpu().numpy() 93 | matches_rot_errors = np.abs((matches_rot_errors + 180.0) % 360.0 - 180.0) 94 | rot_errors_[matches.gt_id] = matches_rot_errors 95 | df['rot_error'] = ('gt_object_id', 'ypr'), rot_errors_ 96 | 97 | loc_errors_xyz_ = empty_array((n_gt, 3)) 98 | loc_errors_xyz_[matches.gt_id] = loc_errors_xyz[matches.cand_id].cpu().numpy() 99 | df['loc_error_xyz'] = ('gt_object_id', 'xyz'), loc_errors_xyz_ 100 | 101 | loc_errors_norm_ = empty_array((n_gt)) 102 | loc_errors_norm_[matches.gt_id] = loc_errors_norm[matches.cand_id].cpu().numpy() 103 | df['loc_error_norm'] = ('gt_object_id', ), loc_errors_norm_ 104 | 105 | q_errors = empty_array((n_gt, q_gt.shape[1])) 106 | matches_q_errors = (q_gt[matches.gt_id] - q_pred[matches.pred_id]).cpu().numpy() * 180 / np.pi 107 | matches_q_errors = np.abs((matches_q_errors + 180.0) % 360.0 - 180.0) 108 | q_errors[matches.gt_id] = matches_q_errors 109 | 110 | df['joint_error'] = ('gt_object_id', 'dofs'), q_errors 111 | self.datas['df'].append(df) 112 | 113 | def summary(self): 114 | df = xr.concat(self.datas['df'], dim='gt_object_id') 115 | n_images_top50 = int(len(df['gt_object_id']) * 0.5) 116 | df_top50 = df.isel(gt_object_id=np.argsort(df['joint_error'].max(axis=1).values)[:n_images_top50]) 117 | summary = dict( 118 | loc_error_norm=df['loc_error_norm'].mean('gt_object_id').values.tolist(), 119 | loc_error_norm_top50=df_top50['loc_error_norm'].mean('gt_object_id').values.tolist(), 120 | 121 | loc_error_xyz_abs_mean=np.mean(np.abs(df['loc_error_xyz'].values)).item(), 122 | loc_error_xyz_abs_mean_top50=np.mean(np.abs(df_top50['loc_error_xyz'].values)).item(), 123 | 124 | rot_error_mean=df['rot_error'].mean().item(), 125 | rot_error_mean_top50=df_top50['rot_error'].mean().item(), 126 | 127 | joint_error_mean=df['joint_error'].mean(('gt_object_id', 'dofs')).values.tolist(), 128 | joint_error_mean_top50=df_top50['joint_error'].mean(('gt_object_id', 'dofs')).values.tolist(), 129 | ) 130 | return summary, df 131 | -------------------------------------------------------------------------------- /robopose/utils/tensor_collection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from pathlib import Path 3 | import pandas as pd 4 | from robopose.utils.distributed import get_rank, get_world_size 5 | 6 | 7 | def concatenate(datas): 8 | datas = [data for data in datas if len(data) > 0] 9 | if len(datas) == 0: 10 | return PandasTensorCollection(infos=pd.DataFrame()) 11 | classes = [data.__class__ for data in datas] 12 | assert all([class_n == classes[0] for class_n in classes]) 13 | 14 | infos = pd.concat([data.infos for data in datas], axis=0, sort=False).reset_index(drop=True) 15 | tensor_keys = datas[0].tensors.keys() 16 | tensors = dict() 17 | for k in tensor_keys: 18 | tensors[k] = torch.cat([getattr(data, k) for data in datas], dim=0) 19 | return PandasTensorCollection(infos=infos, **tensors) 20 | 21 | 22 | class TensorCollection: 23 | def __init__(self, **kwargs): 24 | self.__dict__['_tensors'] = dict() 25 | for k, v in kwargs.items(): 26 | self.register_tensor(k, v) 27 | 28 | def register_tensor(self, name, tensor): 29 | self._tensors[name] = tensor 30 | 31 | def delete_tensor(self, name): 32 | del self._tensors[name] 33 | 34 | def __repr__(self): 35 | s = self.__class__.__name__ + '(' '\n' 36 | for k, t in self._tensors.items(): 37 | s += f' {k}: {t.shape} {t.dtype} {t.device},\n' 38 | s += ')' 39 | return s 40 | 41 | def __getitem__(self, ids): 42 | tensors = dict() 43 | for k, v in self._tensors.items(): 44 | tensors[k] = getattr(self, k)[ids] 45 | return TensorCollection(**tensors) 46 | 47 | def __getattr__(self, name): 48 | if name in self._tensors: 49 | return self._tensors[name] 50 | elif name in self.__dict__: 51 | return self.__dict__[name] 52 | else: 53 | raise AttributeError 54 | 55 | @property 56 | def tensors(self): 57 | return self._tensors 58 | 59 | @property 60 | def device(self): 61 | return list(self.tensors.values())[0].device 62 | 63 | def __getstate__(self): 64 | return {'tensors': self.tensors} 65 | 66 | def __setstate__(self, state): 67 | self.__init__(**state['tensors']) 68 | return 69 | 70 | def __setattr__(self, name, value): 71 | if '_tensors' not in self.__dict__: 72 | raise ValueError('Please call __init__') 73 | if name in self._tensors: 74 | self._tensors[name] = value 75 | else: 76 | self.__dict__[name] = value 77 | 78 | def to(self, torch_attr): 79 | for k, v in self._tensors.items(): 80 | self._tensors[k] = v.to(torch_attr) 81 | return self 82 | 83 | def cuda(self): 84 | return self.to('cuda') 85 | 86 | def cpu(self): 87 | return self.to('cpu') 88 | 89 | def float(self): 90 | return self.to(torch.float) 91 | 92 | def double(self): 93 | return self.to(torch.double) 94 | 95 | def half(self): 96 | return self.to(torch.half) 97 | 98 | def clone(self): 99 | tensors = dict() 100 | for k, v in self.tensors.items(): 101 | tensors[k] = getattr(self, k).clone() 102 | return TensorCollection(**tensors) 103 | 104 | 105 | class PandasTensorCollection(TensorCollection): 106 | def __init__(self, infos, **tensors): 107 | super().__init__(**tensors) 108 | self.infos = infos.reset_index(drop=True) 109 | self.meta = dict() 110 | 111 | def register_buffer(self, k, v): 112 | assert len(v) == len(self) 113 | super().register_buffer() 114 | 115 | def merge_df(self, df, *args, **kwargs): 116 | infos = self.infos.merge(df, how='left', *args, **kwargs) 117 | assert len(infos) == len(self.infos) 118 | assert (infos.index == self.infos.index).all() 119 | return PandasTensorCollection(infos=infos, **self.tensors) 120 | 121 | def clone(self): 122 | tensors = super().clone().tensors 123 | return PandasTensorCollection(self.infos.copy(), **tensors) 124 | 125 | def __repr__(self): 126 | s = self.__class__.__name__ + '(' '\n' 127 | for k, t in self._tensors.items(): 128 | s += f' {k}: {t.shape} {t.dtype} {t.device},\n' 129 | s += f"{'-'*40}\n" 130 | s += ' infos:\n' + self.infos.__repr__() + '\n' 131 | s += ')' 132 | return s 133 | 134 | def __getitem__(self, ids): 135 | infos = self.infos.iloc[ids].reset_index(drop=True) 136 | tensors = super().__getitem__(ids).tensors 137 | return PandasTensorCollection(infos, **tensors) 138 | 139 | def __len__(self): 140 | return len(self.infos) 141 | 142 | def gather_distributed(self, tmp_dir=None): 143 | rank, world_size = get_rank(), get_world_size() 144 | tmp_file_template = (tmp_dir / 'rank={rank}.pth.tar').as_posix() 145 | 146 | if rank > 0: 147 | tmp_file = tmp_file_template.format(rank=rank) 148 | torch.save(self, tmp_file) 149 | 150 | if world_size > 1: 151 | torch.distributed.barrier() 152 | 153 | datas = [self] 154 | if rank == 0 and world_size > 1: 155 | for n in range(1, world_size): 156 | tmp_file = tmp_file_template.format(rank=n) 157 | data = torch.load(tmp_file) 158 | datas.append(data) 159 | Path(tmp_file).unlink() 160 | 161 | if world_size > 1: 162 | torch.distributed.barrier() 163 | return concatenate(datas) 164 | 165 | def __getstate__(self): 166 | state = super().__getstate__() 167 | state['infos'] = self.infos 168 | state['meta'] = self.meta 169 | return state 170 | 171 | def __setstate__(self, state): 172 | self.__init__(state['infos'], **state['tensors']) 173 | self.meta = state['meta'] 174 | return 175 | -------------------------------------------------------------------------------- /robopose/lib3d/robopose_ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .rotations import compute_rotation_matrix_from_ortho6d, compute_rotation_matrix_from_quaternions 4 | from .transform_ops import transform_pts 5 | 6 | l1 = lambda diff: diff.abs() 7 | l2 = lambda diff: diff ** 2 8 | 9 | 10 | def pose_update_with_reference_point(TCO, K, vxvyvz, dRCO, tCR): 11 | bsz = len(TCO) 12 | assert TCO.shape[-2:] == (4, 4) 13 | assert K.shape[-2:] == (3, 3) 14 | assert dRCO.shape[-2:] == (3, 3) 15 | assert vxvyvz.shape[-1] == 3 16 | assert tCR.shape == (bsz, 3) 17 | 18 | zsrc = tCR[:, [2]] 19 | vz = vxvyvz[:, [2]] 20 | ztgt = vz * zsrc 21 | 22 | vxvy = vxvyvz[:, :2] 23 | fxfy = K[:, [0, 1], [0, 1]] 24 | xsrcysrc = tCR[:, :2] 25 | tCR_out = tCR.clone() 26 | tCR_out[:, 2] = ztgt.flatten() 27 | tCR_out[:, :2] = ((vxvy / fxfy) + (xsrcysrc / zsrc.repeat(1, 2))) * ztgt.repeat(1, 2) 28 | 29 | tCO_out = dRCO @ (TCO[:, :3, 3] - tCR).unsqueeze(-1) + tCR_out.unsqueeze(-1) 30 | tCO_out = tCO_out.squeeze(-1) 31 | TCO_out = TCO.clone().detach() 32 | TCO_out[:, :3, 3] = tCO_out 33 | TCO_out[:, :3,: 3] = dRCO @ TCO[:, :3, :3] 34 | return TCO_out 35 | 36 | 37 | def loss_CO_symmetric(TCO_possible_gt, TCO_pred, points, l1_or_l2=l1): 38 | bsz = TCO_possible_gt.shape[0] 39 | assert TCO_possible_gt.shape[0] == bsz 40 | assert len(TCO_possible_gt.shape) == 4 and TCO_possible_gt.shape[-2:] == (4, 4) 41 | assert TCO_pred.shape == (bsz, 4, 4) 42 | assert points.dim() == 3 and points.shape[0] == bsz and points.shape[-1] == 3 43 | 44 | TCO_points_possible_gt = transform_pts(TCO_possible_gt, points) 45 | TCO_pred_points = transform_pts(TCO_pred, points) 46 | losses_possible = l1_or_l2((TCO_pred_points.unsqueeze(1) - TCO_points_possible_gt).flatten(-2, -1)).mean(-1) 47 | loss, min_id = losses_possible.min(dim=1) 48 | TCO_assign = TCO_possible_gt[torch.arange(bsz), min_id] 49 | return loss, TCO_assign 50 | 51 | 52 | def loss_refiner_CO_disentangled_reference_point(TCO_possible_gt, 53 | TCO_input, refiner_outputs, 54 | K_crop, points, tCR): 55 | from robopose.lib3d.transform_ops import invert_T 56 | bsz = TCO_possible_gt.shape[0] 57 | assert TCO_possible_gt.shape[0] == bsz 58 | assert TCO_input.shape[0] == bsz 59 | assert refiner_outputs.shape == (bsz, 9) 60 | assert K_crop.shape == (bsz, 3, 3) 61 | assert points.dim() == 3 and points.shape[0] == bsz and points.shape[-1] == 3 62 | assert TCO_possible_gt.dim() == 4 and TCO_possible_gt.shape[-2:] == (4, 4) 63 | assert tCR.shape == (bsz, 3) 64 | 65 | dR = compute_rotation_matrix_from_ortho6d(refiner_outputs[:, 0:6]) 66 | vxvy = refiner_outputs[:, 6:8] 67 | vz = refiner_outputs[:, 8] 68 | TCO_gt = TCO_possible_gt[:, 0] 69 | fxfy = K_crop[:, [0, 1], [0, 1]] 70 | 71 | dR_gt = TCO_gt[:, :3, :3] @ TCO_input[:, :3, :3].permute(0, 2, 1) 72 | tCO_gt = TCO_gt[:, :3, [-1]] 73 | tCR_out_gt = tCO_gt - dR_gt @ (TCO_input[:, :3, [-1]] - tCR.unsqueeze(-1)) 74 | tCR_out_gt = tCR_out_gt.squeeze(-1) 75 | 76 | vz_gt = tCR_out_gt[:, [2]] / tCR[:, [2]] 77 | vxvy_gt = fxfy * (tCR_out_gt[:, :2] / tCR_out_gt[:, [2]] - tCR[:, :2] / tCR[:, [2]]) 78 | 79 | # First term 80 | TCO_pred_orn = TCO_gt.clone() 81 | TCO_pred_orn[:, :3, :3] = pose_update_with_reference_point(TCO_input, K_crop, 82 | torch.cat((vxvy_gt, vz_gt), dim=-1), 83 | dR, tCR)[:, :3, :3] 84 | 85 | # Second term: influence of vxvy 86 | TCO_pred_xy = TCO_gt.clone() 87 | TCO_pred_xy[:, :2, [3]] = pose_update_with_reference_point(TCO_input, K_crop, 88 | torch.cat((vxvy, vz_gt), dim=-1), 89 | dR_gt, tCR)[:, :2, [3]] 90 | 91 | # Third term: influence of vz 92 | TCO_pred_z = TCO_gt.clone() 93 | TCO_pred_z[:, [2], [3]] = pose_update_with_reference_point(TCO_input, K_crop, 94 | torch.cat((vxvy_gt, vz.unsqueeze(-1)), dim=-1), 95 | dR_gt, tCR)[:, [2], [3]] 96 | 97 | loss_orn, _ = loss_CO_symmetric(TCO_possible_gt, TCO_pred_orn, points, l1_or_l2=l1) 98 | loss_xy, _ = loss_CO_symmetric(TCO_possible_gt, TCO_pred_xy, points, l1_or_l2=l1) 99 | loss_z, _ = loss_CO_symmetric(TCO_possible_gt, TCO_pred_z, points, l1_or_l2=l1) 100 | return loss_orn + loss_xy + loss_z 101 | 102 | def TCO_init_from_boxes_zup_autodepth(boxes_2d, model_points_3d, K): 103 | assert boxes_2d.shape[-1] == 4 104 | assert boxes_2d.dim() == 2 105 | bsz = boxes_2d.shape[0] 106 | z_guess = 1.0 107 | fxfy = K[:, [0, 1], [0, 1]] 108 | cxcy = K[:, [0, 1], [2, 2]] 109 | TCO = torch.tensor([ 110 | [0, 1, 0, 0], 111 | [0, 0, -1, 0], 112 | [-1, 0, 0, z_guess], 113 | [0, 0, 0, 1] 114 | ]).to(torch.float).to(boxes_2d.device).repeat(bsz, 1, 1) 115 | bb_xy_centers = (boxes_2d[:, [0, 1]] + boxes_2d[:, [2, 3]]) / 2 116 | xy_init = ((bb_xy_centers - cxcy) * z_guess) / fxfy 117 | TCO[:, :2, 3] = xy_init 118 | 119 | C_pts_3d = transform_pts(TCO, model_points_3d) 120 | 121 | if bsz > 0: 122 | deltax_3d = C_pts_3d[:, :, 0].max(dim=1).values - C_pts_3d[:, :, 0].min(dim=1).values 123 | deltay_3d = C_pts_3d[:, :, 1].max(dim=1).values - C_pts_3d[:, :, 1].min(dim=1).values 124 | else: 125 | deltax_3d = C_pts_3d[:, 0, 0] 126 | deltay_3d = C_pts_3d[:, 0, 1] 127 | 128 | bb_deltax = (boxes_2d[:, 2] - boxes_2d[:, 0]) + 1 129 | bb_deltay = (boxes_2d[:, 3] - boxes_2d[:, 1]) + 1 130 | 131 | z_from_dx = fxfy[:, 0] * deltax_3d / bb_deltax 132 | z_from_dy = fxfy[:, 1] * deltay_3d / bb_deltay 133 | 134 | z = (z_from_dy.unsqueeze(1) + z_from_dx.unsqueeze(1)) / 2 135 | 136 | xy_init = ((bb_xy_centers - cxcy) * z) / fxfy 137 | TCO[:, :2, 3] = xy_init 138 | TCO[:, 2, 3] = z.flatten() 139 | return TCO 140 | -------------------------------------------------------------------------------- /robopose/rendering/bullet_batch_renderer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import multiprocessing 4 | 5 | from robopose.lib3d.transform_ops import invert_T 6 | from .bullet_scene_renderer import BulletSceneRenderer 7 | 8 | 9 | def init_renderer(urdf_ds, preload=True): 10 | renderer = BulletSceneRenderer(urdf_ds=urdf_ds, 11 | preload_cache=preload, 12 | background_color=(0, 0, 0)) 13 | return renderer 14 | 15 | 16 | def worker_loop(worker_id, in_queue, out_queue, object_set, preload=True): 17 | renderer = init_renderer(object_set, preload=preload) 18 | while True: 19 | kwargs = in_queue.get() 20 | if kwargs is None: 21 | return 22 | obj_infos = kwargs['obj_infos'] 23 | cam_infos = kwargs['cam_infos'] 24 | render_depth = kwargs['render_depth'] 25 | render_mask = kwargs['render_mask'] 26 | is_valid = np.isfinite(obj_infos[0]['TWO']).all() \ 27 | and np.isfinite(cam_infos[0]['TWC']).all() \ 28 | and np.isfinite(cam_infos[0]['K']).all() 29 | if is_valid: 30 | cam_obs = renderer.render_scene(cam_infos=cam_infos, obj_infos=obj_infos, 31 | render_depth=render_depth) 32 | images = np.stack([d['rgb'] for d in cam_obs]) 33 | depth = np.stack([d['depth'] for d in cam_obs]) if render_depth else None 34 | mask = np.stack([d['mask_int'] for d in cam_obs]) if render_mask else None 35 | else: 36 | res = cam_infos[0]['resolution'] 37 | images = np.zeros((1, min(res), max(res), 3), dtype=np.uint8) 38 | depth = np.zeros((1, min(res), max(res)), dtype=np.float32) 39 | mask = np.zeros((1, min(res), max(res)), dtype=np.int32) 40 | out_queue.put((kwargs['data_id'], images, depth, mask)) 41 | 42 | 43 | class BulletBatchRenderer: 44 | def __init__(self, object_set, n_workers=8, preload_cache=True): 45 | self.object_set = object_set 46 | self.n_workers = n_workers 47 | self.init_plotters(preload_cache) 48 | 49 | def render(self, obj_infos, TCO, K, resolution=(240, 320), render_depth=False, render_mask=False): 50 | TCO = torch.as_tensor(TCO).detach() 51 | TOC = invert_T(TCO).cpu().numpy() 52 | K = torch.as_tensor(K).cpu().numpy() 53 | bsz = len(TCO) 54 | assert TCO.shape == (bsz, 4, 4) 55 | assert K.shape == (bsz, 3, 3) 56 | 57 | for n in np.arange(bsz): 58 | joints = obj_infos[n].get('joints', None) 59 | new_joints = dict() 60 | if joints is not None: 61 | for k, v in joints.items(): 62 | new_joints[k] = torch.as_tensor(v).clone().detach().cpu().numpy().item() 63 | joints = new_joints 64 | obj_info = dict( 65 | name=obj_infos[n]['name'], 66 | joints=joints, 67 | TWO=np.eye(4) 68 | ) 69 | cam_info = dict( 70 | resolution=resolution, 71 | K=K[n], 72 | TWC=TOC[n], 73 | ) 74 | kwargs = dict(cam_infos=[cam_info], obj_infos=[obj_info], render_depth=render_depth, 75 | render_mask=render_mask) 76 | if self.n_workers > 0: 77 | kwargs['data_id'] = n 78 | self.in_queue.put(kwargs) 79 | else: 80 | cam_obs = self.plotters[0].render_scene(**kwargs) 81 | images = np.stack([d['rgb'] for d in cam_obs]) 82 | depth = np.stack([d['depth'] for d in cam_obs]) if render_depth else None 83 | mask = np.stack([d['mask_int'] for d in cam_obs]) if render_mask else None 84 | self.out_queue.put((n, images, depth, mask)) 85 | 86 | images = [None for _ in np.arange(bsz)] 87 | depths = [None for _ in np.arange(bsz)] 88 | masks = [None for _ in np.arange(bsz)] 89 | for n in np.arange(bsz): 90 | data_id, im, depth, mask = self.out_queue.get() 91 | images[data_id] = im[0] 92 | if render_depth: 93 | depths[data_id] = depth[0] 94 | if render_mask: 95 | masks[data_id] = mask[0] 96 | images = torch.as_tensor(np.stack(images, axis=0)).pin_memory().cuda(non_blocking=True) 97 | images = images.float().permute(0, 3, 1, 2) / 255 98 | 99 | if render_depth: 100 | depths = torch.as_tensor(np.stack(depths, axis=0)).pin_memory().cuda(non_blocking=True) 101 | depths = depths.float() 102 | return images, depths 103 | if render_mask: 104 | masks = torch.as_tensor(np.stack(masks, axis=0)).pin_memory().cuda(non_blocking=True) 105 | masks = masks.int() 106 | return images, masks 107 | else: 108 | return images 109 | 110 | def init_plotters(self, preload_cache): 111 | self.plotters = [] 112 | self.in_queue = multiprocessing.Queue() 113 | self.out_queue = multiprocessing.Queue() 114 | 115 | if self.n_workers > 0: 116 | for n in range(self.n_workers): 117 | plotter = multiprocessing.Process(target=worker_loop, 118 | kwargs=dict(worker_id=n, 119 | in_queue=self.in_queue, 120 | out_queue=self.out_queue, 121 | preload=preload_cache, 122 | object_set=self.object_set)) 123 | plotter.start() 124 | self.plotters.append(plotter) 125 | else: 126 | self.plotters = [init_renderer(self.object_set, preload_cache)] 127 | 128 | def stop(self): 129 | if self.n_workers > 0: 130 | for p in self.plotters: 131 | self.in_queue.put(None) 132 | for p in self.plotters: 133 | p.join() 134 | p.terminate() 135 | self.in_queue.close() 136 | self.out_queue.close() 137 | 138 | def __del__(self): 139 | self.stop() 140 | -------------------------------------------------------------------------------- /robopose/libmesh/urdf_utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import numpy as np 3 | import xml.etree.ElementTree as ET 4 | from xml.dom import minidom 5 | import trimesh 6 | from urdfpytorch.utils import unparse_origin, parse_origin 7 | from .meshlab_converter import ply_to_obj 8 | 9 | 10 | def resolve_package_path(urdf_path, mesh_path): 11 | urdf_path = Path(urdf_path) 12 | search_dir = urdf_path.parent 13 | relative_path = Path(str(mesh_path).replace('package://', '')) 14 | while True: 15 | absolute_path = (search_dir / relative_path) 16 | if absolute_path.exists(): 17 | return absolute_path 18 | search_dir = search_dir.parent 19 | 20 | 21 | def extract_mesh_visuals(mesh): 22 | visuals = [] 23 | graph = mesh.graph 24 | geometries = mesh.geometry 25 | for node_id, node_infos in graph.to_flattened().items(): 26 | geometry = node_infos.get('geometry') 27 | if geometry is not None: 28 | visuals.append((geometries[geometry], node_infos['transform'])) 29 | return visuals 30 | 31 | 32 | def obj_to_urdf(obj_path, urdf_path): 33 | obj_path = Path(obj_path) 34 | urdf_path = Path(urdf_path) 35 | assert urdf_path.parent == obj_path.parent 36 | 37 | geometry = ET.Element('geometry') 38 | mesh = ET.SubElement(geometry, 'mesh') 39 | mesh.set('filename', obj_path.name) 40 | mesh.set('scale', '1.0 1.0 1.0') 41 | 42 | material = ET.Element('material') 43 | material.set('name', 'mat_part0') 44 | color = ET.SubElement(material, 'color') 45 | color.set('rgba', '1.0 1.0 1.0 1.0') 46 | 47 | inertial = ET.Element('inertial') 48 | origin = ET.SubElement(inertial, 'origin') 49 | origin.set('rpy', '0 0 0') 50 | origin.set('xyz', '0.0 0.0 0.0') 51 | 52 | mass = ET.SubElement(inertial, 'mass') 53 | mass.set('value', '0.1') 54 | 55 | inertia = ET.SubElement(inertial, 'inertia') 56 | inertia.set('ixx', '1') 57 | inertia.set('ixy', '0') 58 | inertia.set('ixz', '0') 59 | inertia.set('iyy', '1') 60 | inertia.set('iyz', '0') 61 | inertia.set('izz', '1') 62 | 63 | robot = ET.Element('robot') 64 | robot.set('name', obj_path.with_suffix('').name) 65 | 66 | link = ET.SubElement(robot, 'link') 67 | link.set('name', 'base_link') 68 | 69 | visual = ET.SubElement(link, 'visual') 70 | visual.append(geometry) 71 | visual.append(material) 72 | 73 | collision = ET.SubElement(link, 'collision') 74 | collision.append(geometry) 75 | 76 | link.append(inertial) 77 | 78 | xmlstr = minidom.parseString(ET.tostring(robot)).toprettyxml(indent=" ") 79 | Path(urdf_path).write_text(xmlstr) # Write xml file 80 | return 81 | 82 | 83 | def make_multivisual_urdf(urdf_path, out_base_dir, add_uv=True, texture_size=(100, 100)): 84 | urdf_path = Path(urdf_path) 85 | out_urdf_dir = Path(out_base_dir) 86 | meshes_dir = out_urdf_dir / 'patched_visual_meshes' 87 | meshes_dir.mkdir(exist_ok=True, parents=True) 88 | out_urdf_path = out_urdf_dir / 'patched_urdf' / urdf_path.name 89 | out_urdf_path.parent.mkdir(exist_ok=True, parents=True) 90 | root = ET.fromstring(Path(urdf_path).read_text()) 91 | for link in root.iter('link'): 92 | visuals = list(link.iter('visual')) 93 | xml_visuals = [] 94 | link_name = link.attrib['name'] 95 | for visual in visuals: 96 | TLV = parse_origin(visual) 97 | geometry = visual.find('geometry') 98 | mesh = geometry.find('mesh') 99 | if mesh is None: 100 | continue 101 | filename = mesh.attrib['filename'] 102 | filename = resolve_package_path(urdf_path, filename) 103 | visual_mesh = trimesh.load(filename) 104 | visuals = extract_mesh_visuals(visual_mesh) 105 | for mesh_id, (mesh, TVM) in enumerate(visuals): 106 | ply_path = meshes_dir / link_name / (filename.with_suffix('').name + f'_part={mesh_id}.ply') 107 | obj_path = ply_path.with_suffix('.obj') 108 | ply_path.parent.mkdir(exist_ok=True) 109 | keep_faces = [] 110 | for face_id, face in enumerate(mesh.faces): 111 | if len(np.unique(face)) == 3: 112 | keep_faces.append(face_id) 113 | mesh.faces = mesh.faces[keep_faces] 114 | if mesh.visual.uv is not None: 115 | mesh.export(obj_path) 116 | else: 117 | mesh.visual = mesh.visual.to_color() 118 | mesh.export(ply_path) 119 | ply_to_obj(ply_path, obj_path) 120 | ply_path.unlink() 121 | relative_obj_path = Path('../') / meshes_dir.name / link_name / obj_path.name 122 | xml_visual = ET.Element('visual') 123 | xml_geom = ET.SubElement(xml_visual, 'geometry') 124 | xml_mesh = ET.SubElement(xml_geom, 'mesh', attrib=dict(filename=str(relative_obj_path))) 125 | TLM = TLV @ np.array(TVM) 126 | attrib = dict(unparse_origin(TLM).attrib) 127 | xml_origin = ET.SubElement(xml_visual, 'origin', attrib=attrib) 128 | xml_material = visual.find('material') 129 | if xml_material is None: 130 | xml_material = ET.SubElement(xml_visual, 'material', {'name': f'mat_part{np.random.randint(1e4)}'}) 131 | xml_color = ET.SubElement(xml_material, 'color', {'rgba': '1.0 1.0 1.0 1.0'}) 132 | xml_visuals.append(xml_visual) 133 | link.remove(visual) 134 | 135 | for new_visual in xml_visuals: 136 | link.append(new_visual) 137 | xmlstr = minidom.parseString(ET.tostring(root).decode()).toprettyxml(indent=" ") 138 | Path(out_urdf_path).write_text(xmlstr) 139 | return out_urdf_path 140 | 141 | 142 | if __name__ == '__main__': 143 | # base_dir = Path('/home/ylabbe/projects/robopose/deps/baxter-description/baxter_description/') 144 | # urdf_path = base_dir / 'urdf/baxter.urdf' 145 | 146 | # base_dir = Path('/home/ylabbe/projects/robopose/deps/kuka-description/iiwa_description') 147 | # urdf_path = base_dir / 'urdf/iiwa7.urdf' 148 | 149 | # base_dir = Path('/home/ylabbe/projects/robopose/deps/kuka-description/lbr_iiwa7_r800') 150 | # urdf_path = base_dir / 'urdf/lbr_iiwa7_r800.urdf' 151 | 152 | base_dir = Path('/home/ylabbe/projects/robopose/deps/panda-description/') 153 | urdf_path = base_dir / 'panda.urdf' 154 | 155 | make_multivisual_urdf(urdf_path, base_dir) 156 | -------------------------------------------------------------------------------- /robopose/lib3d/rotations.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import transforms3d 4 | 5 | 6 | def compute_rotation_matrix_from_ortho6d(poses): 7 | """ 8 | Code from https://github.com/papagina/RotationContinuity 9 | On the Continuity of Rotation Representations in Neural Networks 10 | Zhou et al. CVPR19 11 | https://zhouyisjtu.github.io/project_rotation/rotation.html 12 | """ 13 | assert poses.shape[-1] == 6 14 | x_raw = poses[..., 0:3] 15 | y_raw = poses[..., 3:6] 16 | x = x_raw / torch.norm(x_raw, p=2, dim=-1, keepdim=True) 17 | z = torch.cross(x, y_raw, dim=-1) 18 | z = z / torch.norm(z, p=2, dim=-1, keepdim=True) 19 | y = torch.cross(z, x, dim=-1) 20 | matrix = torch.stack((x, y, z), -1) 21 | return matrix 22 | 23 | 24 | def euler2quat(xyz, axes='sxyz'): 25 | """ 26 | euler: sxyz 27 | quaternion: xyzw 28 | """ 29 | wxyz = transforms3d.euler.euler2quat(*xyz, axes=axes) 30 | xyzw = [*wxyz[1:], wxyz[0]] 31 | return np.array(xyzw) 32 | 33 | 34 | def angle_axis_to_rotation_matrix(angle_axis): 35 | """Convert 3d vector of axis-angle rotation to 4x4 rotation matrix 36 | 37 | Args: 38 | angle_axis (Tensor): tensor of 3d vector of axis-angle rotations. 39 | 40 | Returns: 41 | Tensor: tensor of 4x4 rotation matrices. 42 | 43 | Shape: 44 | - Input: :math:`(N, 3)` 45 | - Output: :math:`(N, 4, 4)` 46 | 47 | Example: 48 | >>> input = torch.rand(1, 3) # Nx3 49 | >>> output = tgm.angle_axis_to_rotation_matrix(input) # Nx4x4 50 | """ 51 | def _compute_rotation_matrix(angle_axis, theta2, eps=1e-6): 52 | # We want to be careful to only evaluate the square root if the 53 | # norm of the angle_axis vector is greater than zero. Otherwise 54 | # we get a division by zero. 55 | k_one = 1.0 56 | theta = torch.sqrt(theta2) 57 | wxyz = angle_axis / (theta + eps) 58 | wx, wy, wz = torch.chunk(wxyz, 3, dim=1) 59 | cos_theta = torch.cos(theta) 60 | sin_theta = torch.sin(theta) 61 | 62 | r00 = cos_theta + wx * wx * (k_one - cos_theta) 63 | r10 = wz * sin_theta + wx * wy * (k_one - cos_theta) 64 | r20 = -wy * sin_theta + wx * wz * (k_one - cos_theta) 65 | r01 = wx * wy * (k_one - cos_theta) - wz * sin_theta 66 | r11 = cos_theta + wy * wy * (k_one - cos_theta) 67 | r21 = wx * sin_theta + wy * wz * (k_one - cos_theta) 68 | r02 = wy * sin_theta + wx * wz * (k_one - cos_theta) 69 | r12 = -wx * sin_theta + wy * wz * (k_one - cos_theta) 70 | r22 = cos_theta + wz * wz * (k_one - cos_theta) 71 | rotation_matrix = torch.cat( 72 | [r00, r01, r02, r10, r11, r12, r20, r21, r22], dim=1) 73 | return rotation_matrix.view(-1, 3, 3) 74 | 75 | def _compute_rotation_matrix_taylor(angle_axis): 76 | rx, ry, rz = torch.chunk(angle_axis, 3, dim=1) 77 | k_one = torch.ones_like(rx) 78 | rotation_matrix = torch.cat( 79 | [k_one, -rz, ry, rz, k_one, -rx, -ry, rx, k_one], dim=1) 80 | return rotation_matrix.view(-1, 3, 3) 81 | 82 | # stolen from ceres/rotation.h 83 | 84 | _angle_axis = torch.unsqueeze(angle_axis, dim=1) 85 | theta2 = torch.matmul(_angle_axis, _angle_axis.transpose(1, 2)) 86 | theta2 = torch.squeeze(theta2, dim=1) 87 | 88 | # compute rotation matrices 89 | rotation_matrix_normal = _compute_rotation_matrix(angle_axis, theta2) 90 | rotation_matrix_taylor = _compute_rotation_matrix_taylor(angle_axis) 91 | 92 | # create mask to handle both cases 93 | eps = 1e-6 94 | mask = (theta2 > eps).view(-1, 1, 1).to(theta2.device) 95 | mask_pos = (mask).type_as(theta2) 96 | mask_neg = (mask == False).type_as(theta2) # noqa 97 | 98 | # create output pose matrix 99 | batch_size = angle_axis.shape[0] 100 | rotation_matrix = torch.eye(4).to(angle_axis.device).type_as(angle_axis) 101 | rotation_matrix = rotation_matrix.view(1, 4, 4).repeat(batch_size, 1, 1) 102 | # fill output matrix with masked values 103 | rotation_matrix[..., :3, :3] = \ 104 | mask_pos * rotation_matrix_normal + mask_neg * rotation_matrix_taylor 105 | return rotation_matrix # Nx4x4 106 | 107 | 108 | def quaternion_to_angle_axis(quaternion: torch.Tensor) -> torch.Tensor: 109 | """Convert quaternion vector to angle axis of rotation. 110 | 111 | Adapted from ceres C++ library: ceres-solver/include/ceres/rotation.h 112 | 113 | Args: 114 | quaternion (torch.Tensor): tensor with quaternions. 115 | 116 | Return: 117 | torch.Tensor: tensor with angle axis of rotation. 118 | 119 | Shape: 120 | - Input: :math:`(*, 4)` where `*` means, any number of dimensions 121 | - Output: :math:`(*, 3)` 122 | 123 | Example: 124 | >>> quaternion = torch.rand(2, 4) # Nx4 125 | >>> angle_axis = tgm.quaternion_to_angle_axis(quaternion) # Nx3 126 | """ 127 | if not torch.is_tensor(quaternion): 128 | raise TypeError("Input type is not a torch.Tensor. Got {}".format( 129 | type(quaternion))) 130 | 131 | if not quaternion.shape[-1] == 4: 132 | raise ValueError("Input must be a tensor of shape Nx4 or 4. Got {}" 133 | .format(quaternion.shape)) 134 | # unpack input and compute conversion 135 | q1: torch.Tensor = quaternion[..., 1] 136 | q2: torch.Tensor = quaternion[..., 2] 137 | q3: torch.Tensor = quaternion[..., 3] 138 | sin_squared_theta: torch.Tensor = q1 * q1 + q2 * q2 + q3 * q3 139 | 140 | sin_theta: torch.Tensor = torch.sqrt(sin_squared_theta) 141 | cos_theta: torch.Tensor = quaternion[..., 0] 142 | two_theta: torch.Tensor = 2.0 * torch.where( 143 | cos_theta < 0.0, 144 | torch.atan2(-sin_theta, -cos_theta), 145 | torch.atan2(sin_theta, cos_theta)) 146 | 147 | k_pos: torch.Tensor = two_theta / sin_theta 148 | k_neg: torch.Tensor = 2.0 * torch.ones_like(sin_theta) 149 | k: torch.Tensor = torch.where(sin_squared_theta > 0.0, k_pos, k_neg) 150 | 151 | angle_axis: torch.Tensor = torch.zeros_like(quaternion)[..., :3] 152 | angle_axis[..., 0] += q1 * k 153 | angle_axis[..., 1] += q2 * k 154 | angle_axis[..., 2] += q3 * k 155 | return angle_axis 156 | 157 | 158 | def quat2mat(quat): 159 | q_xyzw = quat 160 | q_wxyz = quat.clone() 161 | q_wxyz[..., 0] = q_xyzw[..., -1] 162 | q_wxyz[..., 1:] = q_xyzw[..., :-1] 163 | return angle_axis_to_rotation_matrix(quaternion_to_angle_axis(q_wxyz)) 164 | 165 | 166 | def compute_rotation_matrix_from_quaternions(quats): 167 | assert quats.shape[-1] == 4 168 | quats = quats / torch.norm(quats, p=2, dim=-1, keepdim=True) 169 | mat = quat2mat(quats)[:, :3, :3] 170 | return mat 171 | -------------------------------------------------------------------------------- /robopose/evaluation/pred_runner/articulated_predictions.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from tqdm import tqdm 4 | import torch 5 | from collections import defaultdict 6 | 7 | from robopose.datasets.samplers import DistributedSceneSampler 8 | import robopose.utils.tensor_collection as tc 9 | from robopose.utils.distributed import get_world_size, get_rank, get_tmp_dir 10 | from robopose.lib3d.camera_geometry import cropresize_backtransform_points2d 11 | 12 | from torch.utils.data import DataLoader 13 | 14 | 15 | class ArticulatedObjectPredictionRunner: 16 | def __init__(self, scene_ds, batch_size=4, cache_data=False, n_workers=4): 17 | 18 | self.rank = get_rank() 19 | self.world_size = get_world_size() 20 | self.tmp_dir = get_tmp_dir() 21 | 22 | sampler = DistributedSceneSampler(scene_ds, num_replicas=self.world_size, rank=self.rank) 23 | self.sampler = sampler 24 | dataloader = DataLoader(scene_ds, batch_size=batch_size, 25 | num_workers=n_workers, 26 | sampler=sampler, collate_fn=self.collate_fn) 27 | 28 | if cache_data: 29 | self.dataloader = list(tqdm(dataloader)) 30 | else: 31 | self.dataloader = dataloader 32 | 33 | def collate_fn(self, batch): 34 | batch_im_id = -1 35 | cam_infos, K, TWC = [], [], [] 36 | joints = defaultdict(list) 37 | orig_K, cropresize_bboxes, orig_wh = [], [], [] 38 | det_infos, bboxes, poses_gt = [], [], [] 39 | images = [] 40 | for n, data in enumerate(batch): 41 | rgb, masks, obs = data 42 | batch_im_id += 1 43 | frame_info = obs['frame_info'] 44 | im_info = {k: frame_info[k] for k in ('scene_id', 'view_id')} 45 | im_info.update(batch_im_id=batch_im_id) 46 | cam_info = im_info.copy() 47 | 48 | if 'orig_camera' in obs: 49 | orig_K_ = obs['orig_camera']['K'] 50 | res = obs['orig_camera']['resolution'] 51 | orig_wh_ = [max(res), min(res)] 52 | cropresize_bbox = obs['orig_camera']['crop_resize_bbox'] 53 | else: 54 | orig_K_ = obs['camera']['K'] 55 | orig_wh_ = [rgb.shape[1], rgb.shape[0]] 56 | cropresize_bbox = (0, 0, orig_wh[0]-1, orig_wh[1]-1) 57 | 58 | orig_K.append(torch.as_tensor(orig_K_).float()) 59 | cropresize_bboxes.append(torch.as_tensor(cropresize_bbox)) 60 | orig_wh.append(torch.as_tensor(orig_wh_)) 61 | 62 | K.append(obs['camera']['K']) 63 | TWC.append(obs['camera']['TWC']) 64 | cam_infos.append(cam_info) 65 | images.append(rgb) 66 | 67 | for o, obj in enumerate(obs['objects']): 68 | obj_info = dict( 69 | label=obj['name'], 70 | score=1.0, 71 | ) 72 | obj_info.update(im_info) 73 | 74 | h, w, _ = rgb.shape 75 | m = 1/5 76 | bbox = np.array([w*m, h*m, w-w*m, h-h*m]) 77 | bboxes.append(bbox) 78 | 79 | det_infos.append(obj_info) 80 | assert 'joints' in obj 81 | for k, v in obj['joints'].items(): 82 | joints[k].append(torch.as_tensor(v).view(-1).float()) 83 | 84 | detections = tc.PandasTensorCollection( 85 | infos=pd.DataFrame(det_infos), 86 | bboxes=torch.as_tensor(np.stack(bboxes)).float(), 87 | ) 88 | cameras = tc.PandasTensorCollection( 89 | infos=pd.DataFrame(cam_infos), 90 | K=torch.as_tensor(np.stack(K)), 91 | orig_K=torch.as_tensor(np.stack(orig_K)), 92 | orig_wh=torch.as_tensor(np.stack(orig_wh)), 93 | cropresize_bboxes=torch.as_tensor(np.stack(cropresize_bboxes)), 94 | TWC=torch.as_tensor(np.stack(TWC)), 95 | ) 96 | data = dict( 97 | images=torch.stack(images), 98 | cameras=cameras, 99 | detections=detections, 100 | joints=joints, 101 | ) 102 | return data 103 | 104 | def get_predictions(self, obj_predictor, 105 | use_gt_joints=True, 106 | predict_keypoints=True, 107 | n_iterations=10): 108 | 109 | predictions = defaultdict(list) 110 | for data in tqdm(self.dataloader): 111 | images = data['images'].cuda().float().permute(0, 3, 1, 2) / 255 112 | detections = data['detections'] 113 | cameras = data['cameras'].cuda().float() 114 | joints = data['joints'] 115 | stacked_joints = dict() 116 | for k, v in joints.items(): 117 | stacked_joints[k] = torch.stack(v).cuda().float() 118 | joints = stacked_joints 119 | 120 | assert len(obj_predictor.model.robots) == 1 121 | robot = obj_predictor.model.robots[0] 122 | urdf_layer = obj_predictor.model.get_urdf_layer(robot.name) 123 | joints_tensor = urdf_layer.to_tensor(joints) 124 | detections.register_tensor('joints', joints_tensor) 125 | detections.infos['joint_names'] = urdf_layer.joint_names[None].repeat(len(detections), axis=0).tolist() 126 | 127 | K = cameras.K.float() 128 | detections = detections.cuda().float() 129 | 130 | _, preds = obj_predictor.get_predictions( 131 | images=images, K=K, 132 | detections=detections, 133 | use_gt_joints=use_gt_joints, 134 | gt_joints=joints_tensor, 135 | n_iterations=n_iterations, 136 | ) 137 | if predict_keypoints: 138 | for pred_k, pred_v in preds.items(): 139 | TCO_keypoints_3d, keypoints_2d = obj_predictor.pred_keypoints(pred_v) 140 | orig_wh = cameras.orig_wh[pred_v.infos.batch_im_id] 141 | cropresize_bboxes = cameras.cropresize_bboxes[pred_v.infos.batch_im_id] 142 | wh = torch.zeros_like(orig_wh) 143 | wh[:, 1] = images.shape[2] 144 | wh[:, 0] = images.shape[3] 145 | keypoints_2d = cropresize_backtransform_points2d( 146 | orig_wh, cropresize_bboxes, wh, keypoints_2d 147 | ) 148 | pred_v.register_tensor('keypoints_2d', keypoints_2d) 149 | pred_v.register_tensor('TCO_keypoints_3d', TCO_keypoints_3d) 150 | predictions[pred_k].append(pred_v) 151 | 152 | for k, v in predictions.items(): 153 | predictions[k] = tc.concatenate(predictions[k]) 154 | return predictions 155 | -------------------------------------------------------------------------------- /robopose/datasets/dream.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import json 3 | from tqdm import tqdm 4 | from collections import OrderedDict 5 | from robopose.datasets.utils import make_masks_from_det 6 | import numpy as np 7 | from PIL import Image 8 | 9 | from collections import defaultdict 10 | from pathlib import Path 11 | 12 | from robopose.config import MEMORY 13 | from robopose.lib3d import Transform 14 | 15 | 16 | KUKA_SYNT_TRAIN_DR_INCORRECT_IDS = {83114, 28630, } 17 | @MEMORY.cache 18 | def build_frame_index(base_dir): 19 | im_paths = base_dir.glob('*.jpg') 20 | infos = defaultdict(list) 21 | for n, im_path in tqdm(enumerate(sorted(im_paths))): 22 | view_id = int(im_path.with_suffix('').with_suffix('').name) 23 | if 'kuka_synth_train_dr' in str(base_dir) and int(view_id) in KUKA_SYNT_TRAIN_DR_INCORRECT_IDS: 24 | pass 25 | else: 26 | scene_id = view_id 27 | infos['rgb_path'].append(im_path.as_posix()) 28 | infos['scene_id'].append(scene_id) 29 | infos['view_id'].append(view_id) 30 | 31 | infos = pd.DataFrame(infos) 32 | return infos 33 | 34 | 35 | class DreamDataset: 36 | def __init__(self, base_dir, image_bbox=False): 37 | self.base_dir = Path(base_dir) 38 | self.frame_index = build_frame_index(self.base_dir) 39 | 40 | self.joint_map = dict() 41 | if 'panda' in str(base_dir): 42 | self.keypoint_names = [ 43 | 'panda_link0', 'panda_link2', 'panda_link3', 44 | 'panda_link4', 'panda_link6', 'panda_link7', 45 | 'panda_hand', 46 | ] 47 | self.label = 'panda' 48 | elif 'baxter' in str(base_dir): 49 | self.keypoint_names = [ 50 | 'torso_t0', 'left_s0', 'left_s1', 51 | 'left_e0', 'left_e1', 'left_w0', 52 | 'left_w1', 'left_w2', 'left_hand', 53 | 'right_s0', 'right_s1', 'right_e0', 54 | 'right_e1', 'right_w0', 'right_w1', 55 | 'right_w2', 'right_hand' 56 | ] 57 | self.label = 'baxter' 58 | elif 'kuka' in str(base_dir): 59 | self.keypoint_names = [ 60 | 'iiwa7_link_0', 'iiwa7_link_1', 61 | 'iiwa7_link_2', 'iiwa7_link_3', 62 | 'iiwa7_link_4', 'iiwa7_link_5', 63 | 'iiwa7_link_6', 'iiwa7_link_7', 64 | ] 65 | self.label = 'iiwa7' 66 | else: 67 | raise NotImplementedError 68 | 69 | self.scale = 0.01 if 'synthetic' in str(self.base_dir) else 1.0 70 | self.all_labels = [self.label] 71 | self.image_bbox = image_bbox 72 | 73 | def __len__(self): 74 | return len(self.frame_index) 75 | 76 | def __getitem__(self, idx): 77 | row = self.frame_index.iloc[idx] 78 | rgb_path = Path(row.rgb_path) 79 | rgb = np.asarray(Image.open(rgb_path)) 80 | assert rgb.shape[-1] == 3, rgb_path 81 | 82 | mask = None 83 | annotations = json.loads(rgb_path.with_suffix('').with_suffix('.json').read_text()) 84 | 85 | # Camera 86 | TWC = np.eye(4) 87 | camera_infos_path = self.base_dir / '_camera_settings.json' 88 | h, w = rgb.shape[0], rgb.shape[1] 89 | if camera_infos_path.exists(): 90 | cam_infos = json.loads(camera_infos_path.read_text()) 91 | assert len(cam_infos['camera_settings']) == 1 92 | cam_infos = cam_infos['camera_settings'][0]['intrinsic_settings'] 93 | fx, fy, cx, cy = [cam_infos[k] for k in ('fx', 'fy', 'cx', 'cy')] 94 | else: 95 | fx, fy = 320, 320 96 | cx, cy = w/2, h/2 97 | 98 | K = np.array([ 99 | [fx, 0, cx], 100 | [0, fy, cy], 101 | [0, 0, 1] 102 | ]) 103 | camera = dict( 104 | TWC=TWC, 105 | resolution=(w, h), 106 | K=K, 107 | ) 108 | label = self.label 109 | 110 | # Objects 111 | obj_data = annotations['objects'][0] 112 | if 'quaternion_xyzw' in obj_data: 113 | TWO = Transform(np.array(obj_data['quaternion_xyzw']), 114 | np.array(obj_data['location']) * self.scale).toHomogeneousMatrix() 115 | R_NORMAL_UE = np.array([ 116 | [0, -1, 0], 117 | [0, 0, -1], 118 | [1, 0, 0], 119 | ]) 120 | TWO[:3, :3] = TWO[:3, :3] @ R_NORMAL_UE 121 | else: 122 | TWO = np.eye(4) * float('nan') 123 | TWO = Transform(TWO) 124 | 125 | joints = annotations['sim_state']['joints'] 126 | joints = OrderedDict({d['name'].split('/')[-1]: d['position'] for d in joints}) 127 | if self.label == 'iiwa7': 128 | joints = {k.replace('iiwa7_', 'iiwa_'): v for k,v in joints.items()} 129 | 130 | keypoints_2d = obj_data['keypoints'] 131 | keypoints_2d = np.concatenate([np.array(kp['projected_location'])[None] for kp in keypoints_2d], axis=0) 132 | keypoints_2d = np.unique(keypoints_2d, axis=0) 133 | valid = np.logical_and(np.logical_and(keypoints_2d[:, 0] >= 0, keypoints_2d[:, 0] <= w - 1), 134 | np.logical_and(keypoints_2d[:, 1] >= 0, keypoints_2d[:, 1] <= h - 1)) 135 | keypoints_2d = keypoints_2d[valid] 136 | det_valid = len(keypoints_2d) >= 2 137 | if det_valid: 138 | bbox = np.concatenate([np.min(keypoints_2d, axis=0), np.max(keypoints_2d, axis=0)]) 139 | dx, dy = bbox[[2, 3]] - bbox[[0, 1]] 140 | if dx <= 20 or dy <= 20: 141 | det_valid = False 142 | 143 | if not det_valid or self.image_bbox: 144 | m = 1/5 145 | keypoints_2d = np.array([[w*m, h*m], [w-w*m, h-h*m]]) 146 | bbox = np.concatenate([np.min(keypoints_2d, axis=0), np.max(keypoints_2d, axis=0)]) 147 | mask = make_masks_from_det(bbox[None], h, w).numpy().astype(np.uint8)[0] * 1 148 | 149 | keypoints = obj_data['keypoints'] 150 | TCO_keypoints_3d = {kp['name']: np.array(kp['location']) * self.scale for kp in keypoints} 151 | TCO_keypoints_3d = np.array([TCO_keypoints_3d.get(k, np.nan) for k in self.keypoint_names]) 152 | keypoints_2d = {kp['name']: kp['projected_location'] for kp in keypoints} 153 | keypoints_2d = np.array([keypoints_2d.get(k, np.nan) for k in self.keypoint_names]) 154 | 155 | robot = dict(label=label, name=label, joints=joints, 156 | TWO=TWO.toHomogeneousMatrix(), bbox=bbox, 157 | id_in_segm=1, 158 | keypoints_2d=keypoints_2d, 159 | TCO_keypoints_3d=TCO_keypoints_3d) 160 | 161 | state = dict( 162 | objects=[robot], 163 | camera=camera, 164 | frame_info=row.to_dict() 165 | ) 166 | return rgb, mask, state 167 | -------------------------------------------------------------------------------- /robopose/datasets/augmentations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import PIL 3 | import torch 4 | import random 5 | from PIL import ImageEnhance, ImageFilter 6 | from torchvision.datasets import ImageFolder 7 | import torch.nn.functional as F 8 | from copy import deepcopy 9 | 10 | from robopose.lib3d.camera_geometry import get_K_crop_resize 11 | from .utils import make_detections_from_segmentation, crop_to_aspect_ratio 12 | 13 | 14 | def to_pil(im): 15 | if isinstance(im, PIL.Image.Image): 16 | return im 17 | elif isinstance(im, torch.Tensor): 18 | return PIL.Image.fromarray(np.asarray(im)) 19 | elif isinstance(im, np.ndarray): 20 | return PIL.Image.fromarray(im) 21 | else: 22 | raise ValueError('Type not supported', type(im)) 23 | 24 | 25 | def to_torch_uint8(im): 26 | if isinstance(im, PIL.Image.Image): 27 | im = torch.as_tensor(np.asarray(im).astype(np.uint8)) 28 | elif isinstance(im, torch.Tensor): 29 | assert im.dtype == torch.uint8 30 | elif isinstance(im, np.ndarray): 31 | assert im.dtype == np.uint8 32 | im = torch.as_tensor(im) 33 | else: 34 | raise ValueError('Type not supported', type(im)) 35 | if im.dim() == 3: 36 | assert im.shape[-1] in {1, 3} 37 | return im 38 | 39 | 40 | class PillowBlur: 41 | def __init__(self, p=0.4, factor_interval=(1, 3)): 42 | self.p = p 43 | self.factor_interval = factor_interval 44 | 45 | def __call__(self, im, mask, obs): 46 | im = to_pil(im) 47 | k = random.randint(*self.factor_interval) 48 | im = im.filter(ImageFilter.GaussianBlur(k)) 49 | return im, mask, obs 50 | 51 | 52 | class PillowRGBAugmentation: 53 | def __init__(self, pillow_fn, p, factor_interval): 54 | self._pillow_fn = pillow_fn 55 | self.p = p 56 | self.factor_interval = factor_interval 57 | 58 | def __call__(self, im, mask, obs): 59 | im = to_pil(im) 60 | if random.random() <= self.p: 61 | im = self._pillow_fn(im).enhance(factor=random.uniform(*self.factor_interval)) 62 | return im, mask, obs 63 | 64 | 65 | class PillowSharpness(PillowRGBAugmentation): 66 | def __init__(self, p=0.3, factor_interval=(0., 50.)): 67 | super().__init__(pillow_fn=ImageEnhance.Sharpness, 68 | p=p, 69 | factor_interval=factor_interval) 70 | 71 | 72 | class PillowContrast(PillowRGBAugmentation): 73 | def __init__(self, p=0.3, factor_interval=(0.2, 50.)): 74 | super().__init__(pillow_fn=ImageEnhance.Contrast, 75 | p=p, 76 | factor_interval=factor_interval) 77 | 78 | 79 | class PillowBrightness(PillowRGBAugmentation): 80 | def __init__(self, p=0.5, factor_interval=(0.1, 6.0)): 81 | super().__init__(pillow_fn=ImageEnhance.Brightness, 82 | p=p, 83 | factor_interval=factor_interval) 84 | 85 | 86 | class PillowColor(PillowRGBAugmentation): 87 | def __init__(self, p=0.3, factor_interval=(0.0, 20.0)): 88 | super().__init__(pillow_fn=ImageEnhance.Color, 89 | p=p, 90 | factor_interval=factor_interval) 91 | 92 | 93 | class GrayScale(PillowRGBAugmentation): 94 | def __init__(self, p=0.3): 95 | self.p = p 96 | 97 | def __call__(self, im, mask, obs): 98 | im = to_pil(im) 99 | if random.random() <= self.p: 100 | im = to_torch_uint8(im).float() 101 | gray = 0.2989 * im[..., 0] + 0.5870 * im[..., 1] + 0.1140 * im[..., 2] 102 | gray = gray.to(torch.uint8) 103 | im = gray.unsqueeze(-1).repeat(1, 1, 3) 104 | return im, mask, obs 105 | 106 | 107 | class BackgroundAugmentation: 108 | def __init__(self, image_dataset, p): 109 | self.image_dataset = image_dataset 110 | self.p = p 111 | 112 | def get_bg_image(self, idx): 113 | return self.image_dataset[idx] 114 | 115 | def __call__(self, im, mask, obs): 116 | if random.random() <= self.p: 117 | im = to_torch_uint8(im) 118 | mask = to_torch_uint8(mask) 119 | h, w, c = im.shape 120 | im_bg = self.get_bg_image(random.randint(0, len(self.image_dataset) - 1)) 121 | im_bg = to_pil(im_bg) 122 | im_bg = torch.as_tensor(np.asarray(im_bg.resize((w, h)))) 123 | mask_bg = mask == 0 124 | im[mask_bg] = im_bg[mask_bg] 125 | return im, mask, obs 126 | 127 | 128 | class VOCBackgroundAugmentation(BackgroundAugmentation): 129 | def __init__(self, voc_root, p=0.3): 130 | image_dataset = ImageFolder(voc_root) 131 | super().__init__(image_dataset=image_dataset, p=p) 132 | 133 | def get_bg_image(self, idx): 134 | return self.image_dataset[idx][0] 135 | 136 | 137 | class CropResizeToAspectAugmentation: 138 | def __init__(self, resize=(640, 480)): 139 | self.resize = (min(resize), max(resize)) 140 | self.aspect = max(resize) / min(resize) 141 | 142 | def __call__(self, im, mask, obs): 143 | im = to_torch_uint8(im) 144 | mask = to_torch_uint8(mask) 145 | obs['orig_camera'] = deepcopy(obs['camera']) 146 | assert im.shape[-1] == 3 147 | h, w = im.shape[:2] 148 | if (h, w) == self.resize: 149 | obs['orig_camera']['crop_resize_bbox'] = (0, 0, w-1, h-1) 150 | return im, mask, obs 151 | 152 | images = (torch.as_tensor(im).float() / 255).unsqueeze(0).permute(0, 3, 1, 2) 153 | masks = torch.as_tensor(mask).unsqueeze(0).unsqueeze(0).float() 154 | K = torch.tensor(obs['camera']['K']).unsqueeze(0) 155 | 156 | # Match the width on input image with an image of target aspect ratio. 157 | if not np.isclose(w/h, self.aspect): 158 | x0, y0 = images.shape[-1] / 2, images.shape[-2] / 2 159 | w = images.shape[-1] 160 | r = self.aspect 161 | h = w * 1/r 162 | box_size = (h, w) 163 | h, w = min(box_size), max(box_size) 164 | x1, y1, x2, y2 = x0-w/2, y0-h/2, x0+w/2, y0+h/2 165 | box = torch.tensor([x1, y1, x2, y2]) 166 | images, masks, K = crop_to_aspect_ratio(images, box, masks=masks, K=K) 167 | 168 | # Resize to target size 169 | x0, y0 = images.shape[-1] / 2, images.shape[-2] / 2 170 | h_input, w_input = images.shape[-2], images.shape[-1] 171 | h_output, w_output = min(self.resize), max(self.resize) 172 | box_size = (h_input, w_input) 173 | h, w = min(box_size), max(box_size) 174 | x1, y1, x2, y2 = x0-w/2, y0-h/2, x0+w/2, y0+h/2 175 | box = torch.tensor([x1, y1, x2, y2]) 176 | images = F.interpolate(images, size=(h_output, w_output), mode='bilinear', align_corners=False) 177 | masks = F.interpolate(masks, size=(h_output, w_output), mode='nearest') 178 | obs['orig_camera']['crop_resize_bbox'] = tuple(box.tolist()) 179 | K = get_K_crop_resize(K, box.unsqueeze(0), orig_size=(h_input, w_input), crop_resize=(h_output, w_output)) 180 | 181 | # Update the bounding box annotations 182 | dets_gt = make_detections_from_segmentation(masks)[0] 183 | for n, obj in enumerate(obs['objects']): 184 | if 'bbox' in obj: 185 | assert 'id_in_segm' in obj 186 | obj['bbox'] = dets_gt[obj['id_in_segm']] 187 | 188 | im = (images[0].permute(1, 2, 0) * 255).to(torch.uint8) 189 | mask = masks[0, 0].to(torch.uint8) 190 | obs['camera']['K'] = K.squeeze(0).numpy() 191 | obs['camera']['resolution'] = (w_output, h_output) 192 | return im, mask, obs 193 | -------------------------------------------------------------------------------- /robopose/evaluation/pred_runner/video_predictions.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from tqdm import tqdm 4 | import torch 5 | from collections import defaultdict 6 | 7 | import robopose.utils.tensor_collection as tc 8 | from robopose.datasets.samplers import ListSampler 9 | from robopose.utils.distributed import get_world_size, get_rank, get_tmp_dir 10 | from robopose.lib3d.camera_geometry import cropresize_backtransform_points2d 11 | 12 | 13 | from torch.utils.data import DataLoader 14 | 15 | 16 | class VideoPredictionRunner: 17 | def __init__(self, scene_ds, cache_data=False, n_workers=4): 18 | 19 | self.rank = get_rank() 20 | self.world_size = get_world_size() 21 | self.tmp_dir = get_tmp_dir() 22 | assert self.world_size == 1 23 | 24 | self.sampler = ListSampler(np.argsort(scene_ds.frame_index['view_id'])) 25 | dataloader = DataLoader(scene_ds, batch_size=1, num_workers=n_workers, 26 | sampler=self.sampler, collate_fn=self.collate_fn) 27 | 28 | if cache_data: 29 | self.dataloader = list(tqdm(dataloader)) 30 | else: 31 | self.dataloader = dataloader 32 | 33 | def collate_fn(self, batch): 34 | batch_im_id = -1 35 | cam_infos, K, TWC = [], [], [] 36 | joints = defaultdict(list) 37 | orig_K, cropresize_bboxes, orig_wh = [], [], [] 38 | det_infos, bboxes, poses_gt = [], [], [] 39 | images = [] 40 | for n, data in enumerate(batch): 41 | rgb, masks, obs = data 42 | batch_im_id += 1 43 | frame_info = obs['frame_info'] 44 | im_info = {k: frame_info[k] for k in ('scene_id', 'view_id')} 45 | im_info.update(batch_im_id=batch_im_id) 46 | cam_info = im_info.copy() 47 | 48 | if 'orig_camera' in obs: 49 | orig_K_ = obs['orig_camera']['K'] 50 | res = obs['orig_camera']['resolution'] 51 | orig_wh_ = [max(res), min(res)] 52 | cropresize_bbox = obs['orig_camera']['crop_resize_bbox'] 53 | else: 54 | orig_K_ = obs['camera']['K'] 55 | orig_wh_ = [rgb.shape[1], rgb.shape[0]] 56 | cropresize_bbox = (0, 0, orig_wh[0]-1, orig_wh[1]-1) 57 | 58 | orig_K.append(torch.as_tensor(orig_K_).float()) 59 | cropresize_bboxes.append(torch.as_tensor(cropresize_bbox)) 60 | orig_wh.append(torch.as_tensor(orig_wh_)) 61 | 62 | K.append(obs['camera']['K']) 63 | TWC.append(obs['camera']['TWC']) 64 | cam_infos.append(cam_info) 65 | images.append(rgb) 66 | 67 | for o, obj in enumerate(obs['objects']): 68 | obj_info = dict( 69 | label=obj['name'], 70 | score=1.0, 71 | ) 72 | obj_info.update(im_info) 73 | h, w, _ = rgb.shape 74 | m = 1/5 75 | bbox = np.array([w*m, h*m, w-w*m, h-h*m]) 76 | bboxes.append(bbox) 77 | det_infos.append(obj_info) 78 | assert 'joints' in obj 79 | for k, v in obj['joints'].items(): 80 | joints[k].append(torch.as_tensor(v).view(-1).float()) 81 | 82 | detections = tc.PandasTensorCollection( 83 | infos=pd.DataFrame(det_infos), 84 | bboxes=torch.as_tensor(np.stack(bboxes)).float(), 85 | ) 86 | cameras = tc.PandasTensorCollection( 87 | infos=pd.DataFrame(cam_infos), 88 | K=torch.as_tensor(np.stack(K)), 89 | orig_K=torch.as_tensor(np.stack(orig_K)), 90 | orig_wh=torch.as_tensor(np.stack(orig_wh)), 91 | cropresize_bboxes=torch.as_tensor(np.stack(cropresize_bboxes)), 92 | TWC=torch.as_tensor(np.stack(TWC)), 93 | ) 94 | data = dict( 95 | images=torch.stack(images), 96 | cameras=cameras, 97 | detections=detections, 98 | joints=joints, 99 | ) 100 | return data 101 | 102 | def get_predictions(self, obj_predictor, 103 | use_gt_joints=True, 104 | predict_keypoints=True, 105 | n_iterations_init=10, 106 | n_iterations_new_image=1): 107 | 108 | predictions = defaultdict(list) 109 | prev_preds = None 110 | pred_key = None 111 | for frame_id, data in enumerate(tqdm(self.dataloader)): 112 | detections = data['detections'] 113 | images = data['images'].cuda().float().permute(0, 3, 1, 2) / 255 114 | cameras = data['cameras'].cuda().float() 115 | joints = data['joints'] 116 | stacked_joints = dict() 117 | for k, v in joints.items(): 118 | stacked_joints[k] = torch.stack(v).cuda().float() 119 | joints = stacked_joints 120 | 121 | assert len(obj_predictor.model.robots) == 1 122 | robot = obj_predictor.model.robots[0] 123 | urdf_layer = obj_predictor.model.get_urdf_layer(robot.name) 124 | joints_tensor = urdf_layer.to_tensor(joints) 125 | detections.register_tensor('joints', joints_tensor) 126 | detections.infos['joint_names'] = urdf_layer.joint_names[None].repeat(len(detections), axis=0).tolist() 127 | 128 | K = cameras.K.float() 129 | detections = detections.cuda().float() 130 | 131 | assert len(images) == 1 132 | if frame_id == 0: 133 | _, current_preds = obj_predictor.get_predictions( 134 | images=images, K=K, 135 | detections=detections, 136 | use_gt_joints=use_gt_joints, 137 | gt_joints=joints_tensor, 138 | n_iterations=n_iterations_init, 139 | ) 140 | pred_key = f'iteration={n_iterations_init}' 141 | else: 142 | _, current_preds = obj_predictor.get_predictions( 143 | images=images, K=K, 144 | use_gt_joints=use_gt_joints, 145 | gt_joints=joints_tensor, 146 | n_iterations=n_iterations_new_image, 147 | data_TCO_init=prev_preds[f'{pred_key}'], 148 | ) 149 | pred_key = f'iteration={n_iterations_new_image}' 150 | 151 | prev_preds = current_preds 152 | current_pred = current_preds[pred_key] 153 | for k in ('view_id', 'scene_id', 'batch_im_id'): 154 | current_pred.infos[k] = detections.infos[k] 155 | preds = {'online': current_pred} 156 | if predict_keypoints: 157 | for pred_k, pred_v in preds.items(): 158 | TCO_keypoints_3d, keypoints_2d = obj_predictor.pred_keypoints(pred_v) 159 | orig_wh = cameras.orig_wh[pred_v.infos.batch_im_id] 160 | cropresize_bboxes = cameras.cropresize_bboxes[pred_v.infos.batch_im_id] 161 | wh = torch.zeros_like(orig_wh) 162 | wh[:, 1] = images.shape[2] 163 | wh[:, 0] = images.shape[3] 164 | keypoints_2d = cropresize_backtransform_points2d( 165 | orig_wh, cropresize_bboxes, wh, keypoints_2d 166 | ) 167 | pred_v.register_tensor('keypoints_2d', keypoints_2d) 168 | pred_v.register_tensor('TCO_keypoints_3d', TCO_keypoints_3d) 169 | predictions[pred_k].append(pred_v) 170 | 171 | for k, v in predictions.items(): 172 | predictions[k] = tc.concatenate(predictions[k]) 173 | return predictions 174 | -------------------------------------------------------------------------------- /robopose/training/pose_articulated_forward_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from collections import OrderedDict 4 | from copy import deepcopy 5 | from robopose.utils.logging import get_logger 6 | 7 | 8 | from robopose.lib3d.robopose_ops import loss_refiner_CO_disentangled_reference_point 9 | from robopose.lib3d.transform_ops import add_noise, invert_T, transform_pts 10 | 11 | from robopose.lib3d.robopose_ops import ( 12 | TCO_init_from_boxes_zup_autodepth, 13 | ) 14 | from robopose.lib3d.robot_ops import add_noise_joints 15 | from robopose.lib3d.articulated_mesh_database import Meshes 16 | 17 | 18 | logger = get_logger(__name__) 19 | 20 | def cast(obj, dtype=None): 21 | if isinstance(obj, (dict, OrderedDict)): 22 | for k, v in obj.items(): 23 | obj[k] = cast(torch.as_tensor(v)) 24 | if dtype is not None: 25 | obj[k] = obj[k].to(dtype) 26 | return obj 27 | else: 28 | return obj.cuda(non_blocking=True) 29 | 30 | 31 | def obj_infos_to_tensor(urdf_layer, obj_infos): 32 | q = [] 33 | for n in range(len(obj_infos)): 34 | q.append(urdf_layer.to_tensor(obj_infos[n]['joints'])) 35 | q = torch.cat(q, dim=0) 36 | return q 37 | 38 | def h_pose(model, data, meters, cfg, 39 | n_iterations=1, mesh_db=None, train=True): 40 | if isinstance(model, torch.nn.parallel.DistributedDataParallel): 41 | model_without_ddp = model.module 42 | else: 43 | model_without_ddp = model 44 | 45 | dtype, device = torch.float32, 'cuda' 46 | images = cast(data.images).float() / 255. 47 | batch_size, _, h, w = images.shape 48 | 49 | TCO_gt = cast(data.TCO).float() 50 | K = cast(data.K).float() 51 | bboxes = cast(data.bboxes).float() 52 | 53 | # Convert joints dict to tensor 54 | obj_infos_gt = data.objects 55 | for n in range(batch_size): 56 | name = obj_infos_gt[n]['name'] 57 | urdf_layer = mesh_db.urdf_layers[mesh_db.label_to_id[name]] 58 | obj_infos_gt[n]['joints'] = {k: torch.as_tensor(obj_infos_gt[n]['joints'][k]).view(1, -1).to(dtype) for k in urdf_layer.joint_names} 59 | 60 | # Compute input pose/joints by adding noise to the ground truth 61 | ## Joint initialization 62 | obj_infos_init = deepcopy(obj_infos_gt) 63 | if cfg.predict_joints: 64 | for n in range(batch_size): 65 | name = obj_infos_gt[n]['name'] 66 | urdf_layer = mesh_db.urdf_layers[mesh_db.label_to_id[name]] 67 | if cfg.input_generator == 'gt+noise': 68 | q_limits = urdf_layer.joint_limits 69 | q0 = cast(obj_infos_gt[n]['joints'], dtype=dtype) 70 | q0_tensor = urdf_layer.to_tensor(q0) 71 | q0_tensor = add_noise_joints(q0_tensor, 72 | std_interval_ratio=cfg.joints_std_interval_ratio, 73 | q_limits=q_limits) 74 | elif cfg.input_generator == 'fixed': 75 | q_default = urdf_layer.joints_default.unsqueeze(0).to(dtype) 76 | q0_tensor = urdf_layer.to_tensor(q_default) 77 | else: 78 | raise ValueError 79 | obj_infos_init[n]['joints'] = urdf_layer.from_tensor(q0_tensor) 80 | 81 | # Pose initialization 82 | meshes = model_without_ddp.mesh_db.select(obj_infos_init) 83 | _, T_O_CENTROID = meshes.center_meshes() 84 | T_C_CENTROID_gt = TCO_gt @ T_O_CENTROID 85 | 86 | if cfg.input_generator == 'gt+noise': 87 | T_C_CENTROID_init = add_noise(T_C_CENTROID_gt, 88 | euler_deg_std=[60, 60, 60], 89 | trans_std=[0.1, 0.1, 0.1]) 90 | elif cfg.input_generator == 'fixed': 91 | centered_meshes = Meshes(meshes.labels, transform_pts(invert_T(T_O_CENTROID), meshes.points)) 92 | centered_points = centered_meshes.sample_points(2000, deterministic=True) 93 | T_C_CENTROID_init = TCO_init_from_boxes_zup_autodepth(bboxes, centered_points, K) 94 | else: 95 | raise ValueError 96 | 97 | TCO_init = T_C_CENTROID_init @ invert_T(T_O_CENTROID) 98 | 99 | # Cast joints to gpu 100 | for n in range(batch_size): 101 | name = obj_infos_gt[n]['name'] 102 | urdf_layer = mesh_db.urdf_layers[mesh_db.label_to_id[name]] 103 | obj_infos_gt[n]['joints'] = cast(obj_infos_gt[n]['joints'], dtype=dtype) 104 | obj_infos_init[n]['joints'] = cast(obj_infos_init[n]['joints'], dtype=dtype) 105 | 106 | # Forward pass 107 | outputs = model(images=images, K=K, obj_infos=obj_infos_init, 108 | TCO=TCO_init, n_iterations=n_iterations, 109 | update_obj_infos=cfg.predict_joints) 110 | 111 | losses_TCO_iter = [] 112 | losses_q_iter = [] 113 | losses_iter = [] 114 | q_gt = obj_infos_to_tensor(urdf_layer, obj_infos_gt) 115 | for n in range(n_iterations): 116 | iter_outputs = outputs[f'iteration={n+1}'] 117 | K_crop = iter_outputs['K_crop'] 118 | refiner_outputs = iter_outputs['refiner_outputs'] 119 | obj_infos_input = iter_outputs['obj_infos_input'] 120 | 121 | # Pose loss 122 | anchor_link_names = iter_outputs['anchor_link_names'] 123 | if cfg.points_for_pose_loss == 'anchor_link': 124 | link_meshes = mesh_db.select(obj_infos_input, link_names=iter_outputs['anchor_link_names'], apply_fk=False) 125 | anchor_loss_pts = link_meshes.sample_points(min(cfg.n_points_loss, link_meshes.points.shape[1]), deterministic=False) 126 | elif cfg.points_for_pose_loss == 'whole_robot': 127 | robot_meshes = mesh_db.select(obj_infos_input, apply_fk=True) 128 | anchor_loss_pts = robot_meshes.sample_points(min(cfg.n_points_loss, robot_meshes.points.shape[1]), deterministic=False) 129 | assert all([anchor_link_names[n] == urdf_layer.robot.base_link.name for n in range(batch_size)]) 130 | else: 131 | raise ValueError(cfg.points_for_pose_loss) 132 | 133 | TOA_gt = urdf_layer.compute_link_pose(anchor_link_names, q_gt) 134 | TCA_gt = TCO_gt @ TOA_gt 135 | 136 | TCA_input = iter_outputs['TCA_input'] 137 | t_C_REF = iter_outputs['t_C_REF'] 138 | refiner_pose_update = refiner_outputs['pose'] 139 | 140 | loss_TCO_iter = loss_refiner_CO_disentangled_reference_point( 141 | TCO_possible_gt=TCA_gt.unsqueeze(1), 142 | TCO_input=TCA_input, 143 | refiner_outputs=refiner_pose_update, 144 | K_crop=K_crop, 145 | points=anchor_loss_pts, 146 | tCR=t_C_REF, 147 | ) 148 | 149 | # Joints loss 150 | q_output = obj_infos_to_tensor(urdf_layer, iter_outputs['obj_infos_output_no_clamp']) 151 | 152 | if cfg.predict_joints: 153 | loss_q_iter = ((q_output - q_gt) ** 2).mean(dim=-1) 154 | meters[f'loss_q-iter={n+1}'].add(loss_q_iter.mean().item()) 155 | losses_q_iter.append(loss_q_iter) 156 | 157 | if model_without_ddp.debug: 158 | from robopose.lib3d.camera_geometry import project_points 159 | model_without_ddp.tmp_debug['pts_proj_gt'] = project_points(anchor_loss_pts, K_crop, TCA_gt) 160 | model_without_ddp.tmp_debug['pts_proj_input'] = project_points(anchor_loss_pts, K_crop, TCA_input) 161 | 162 | meters[f'loss_TCO-iter={n+1}'].add(loss_TCO_iter.mean().item()) 163 | losses_TCO_iter.append(loss_TCO_iter) 164 | 165 | losses_TCO_iter = torch.cat(losses_TCO_iter) 166 | loss_TCO = losses_TCO_iter.mean() 167 | if cfg.predict_joints: 168 | loss_q = torch.cat(losses_q_iter).mean() 169 | loss_q_scaled = loss_q * cfg.loss_q_lambda 170 | meters['loss_q'].add(loss_q.item()) 171 | meters['loss_q_scaled'].add(loss_q_scaled.item()) 172 | loss = loss_TCO + loss_q_scaled 173 | else: 174 | loss = loss_TCO 175 | meters['loss_TCO'].add(loss_TCO.item()) 176 | meters['loss_total'].add(loss.item()) 177 | return loss 178 | --------------------------------------------------------------------------------