├── .DS_Store ├── .gitignore ├── LICENSE ├── README.md ├── diffusion_policy ├── .DS_Store ├── codecs │ └── imagecodecs_numcodecs.py ├── common │ ├── checkpoint_util.py │ ├── cv2_util.py │ ├── env_util.py │ ├── json_logger.py │ ├── nested_dict_util.py │ ├── normalize_util.py │ ├── pose_trajectory_interpolator.py │ ├── precise_sleep.py │ ├── pymunk_override.py │ ├── pymunk_util.py │ ├── pytorch_util.py │ ├── replay_buffer.py │ ├── robomimic_config_util.py │ ├── robomimic_util.py │ ├── sampler.py │ └── timestamp_accumulator.py ├── config │ ├── task │ │ ├── blockpush_lowdim_seed.yaml │ │ ├── blockpush_lowdim_seed_abs.yaml │ │ ├── can_image.yaml │ │ ├── can_image_abs.yaml │ │ ├── can_lowdim.yaml │ │ ├── can_lowdim_abs.yaml │ │ ├── kitchen_lowdim.yaml │ │ ├── kitchen_lowdim_abs.yaml │ │ ├── lift_image.yaml │ │ ├── lift_image_abs.yaml │ │ ├── lift_lowdim.yaml │ │ ├── lift_lowdim_abs.yaml │ │ ├── pusht_image.yaml │ │ ├── pusht_lowdim.yaml │ │ ├── real_pusht_image.yaml │ │ ├── square_image.yaml │ │ ├── square_image_abs.yaml │ │ ├── square_lowdim.yaml │ │ ├── square_lowdim_abs.yaml │ │ ├── tool_hang_image.yaml │ │ ├── tool_hang_image_abs.yaml │ │ ├── tool_hang_lowdim.yaml │ │ ├── tool_hang_lowdim_abs.yaml │ │ ├── transport_image.yaml │ │ ├── transport_image_abs.yaml │ │ ├── transport_lowdim.yaml │ │ └── transport_lowdim_abs.yaml │ ├── train_bet_lowdim_workspace.yaml │ ├── train_diffusion_transformer_hybrid_workspace.yaml │ ├── train_diffusion_transformer_lowdim_kitchen_workspace.yaml │ ├── train_diffusion_transformer_lowdim_pusht_workspace.yaml │ ├── train_diffusion_transformer_lowdim_workspace.yaml │ ├── train_diffusion_transformer_real_hybrid_workspace.yaml │ ├── train_diffusion_unet_ddim_hybrid_workspace.yaml │ ├── train_diffusion_unet_ddim_lowdim_workspace.yaml │ ├── train_diffusion_unet_hybrid_workspace.yaml │ ├── train_diffusion_unet_image_pretrained_workspace.yaml │ ├── train_diffusion_unet_image_workspace.yaml │ ├── train_diffusion_unet_lowdim_workspace.yaml │ ├── train_diffusion_unet_real_hybrid_workspace.yaml │ ├── train_diffusion_unet_real_image_workspace.yaml │ ├── train_diffusion_unet_real_pretrained_workspace.yaml │ ├── train_diffusion_unet_video_workspace.yaml │ ├── train_ibc_dfo_hybrid_workspace.yaml │ ├── train_ibc_dfo_lowdim_workspace.yaml │ ├── train_ibc_dfo_real_hybrid_workspace.yaml │ ├── train_robomimic_image_workspace.yaml │ ├── train_robomimic_lowdim_workspace.yaml │ └── train_robomimic_real_image_workspace.yaml ├── dataset │ ├── .DS_Store │ ├── base_dataset.py │ ├── blockpush_lowdim_dataset.py │ ├── gibson_dataset.py │ ├── kitchen_lowdim_dataset.py │ ├── kitchen_mjl_lowdim_dataset.py │ ├── pusht_dataset.py │ ├── pusht_image_dataset.py │ ├── real_pusht_image_dataset.py │ ├── robomimic_replay_image_dataset.py │ ├── robomimic_replay_lowdim_dataset.py │ ├── test_img.tar.gz │ └── test_img │ │ ├── 0.png │ │ ├── 1.png │ │ ├── 10.png │ │ ├── 11.png │ │ ├── 12.png │ │ ├── 13.png │ │ ├── 14.png │ │ ├── 15.png │ │ ├── 2.png │ │ ├── 3.png │ │ ├── 4.png │ │ ├── 5.png │ │ ├── 6.png │ │ ├── 7.png │ │ ├── 8.png │ │ ├── 9.png │ │ ├── action.png │ │ └── pose.png ├── env_runner │ ├── base_image_runner.py │ ├── base_lowdim_runner.py │ ├── blockpush_lowdim_runner.py │ ├── kitchen_lowdim_runner.py │ ├── pusht_image_runner.py │ ├── pusht_keypoints_runner.py │ ├── real_pusht_image_runner.py │ ├── robomimic_image_runner.py │ └── robomimic_lowdim_runner.py ├── gym_util │ ├── async_vector_env.py │ ├── multistep_wrapper.py │ ├── sync_vector_env.py │ ├── video_recording_wrapper.py │ └── video_wrapper.py ├── model │ ├── .DS_Store │ ├── bet │ │ ├── action_ae │ │ │ ├── __init__.py │ │ │ └── discretizers │ │ │ │ └── k_means.py │ │ ├── latent_generators │ │ │ ├── latent_generator.py │ │ │ ├── mingpt.py │ │ │ └── transformer.py │ │ ├── libraries │ │ │ ├── loss_fn.py │ │ │ └── mingpt │ │ │ │ ├── LICENSE │ │ │ │ ├── __init__.py │ │ │ │ ├── model.py │ │ │ │ ├── trainer.py │ │ │ │ └── utils.py │ │ └── utils.py │ ├── clip_model │ │ └── clip_model_for_features.py │ ├── common │ │ ├── dict_of_tensor_mixin.py │ │ ├── lr_scheduler.py │ │ ├── module_attr_mixin.py │ │ ├── normalizer.py │ │ ├── rotation_transformer.py │ │ ├── shape_util.py │ │ └── tensor_util.py │ ├── diffusion │ │ ├── conditional_unet1d.py │ │ ├── conv1d_components.py │ │ ├── ema_model.py │ │ ├── mask_generator.py │ │ ├── positional_embedding.py │ │ └── transformer_for_diffusion.py │ └── vision │ │ ├── crop_randomizer.py │ │ ├── model_getter.py │ │ └── multi_image_obs_encoder.py ├── policy │ ├── base_image_policy.py │ ├── base_lowdim_policy.py │ ├── bet_lowdim_policy.py │ ├── diffusion_transformer_hybrid_image_policy.py │ ├── diffusion_transformer_hybrid_image_policy_backup.py │ ├── diffusion_transformer_hybrid_image_policy_for_vis.py │ ├── diffusion_transformer_lowdim_policy.py │ ├── diffusion_unet_hybrid_image_policy copy.py │ ├── diffusion_unet_hybrid_image_policy.py │ ├── diffusion_unet_image_policy.py │ ├── diffusion_unet_lowdim_policy.py │ ├── diffusion_unet_video_policy.py │ ├── ibc_dfo_hybrid_image_policy.py │ ├── ibc_dfo_lowdim_policy.py │ ├── robomimic_image_policy.py │ └── robomimic_lowdim_policy.py ├── real_world │ ├── .DS_Store │ ├── keystroke_counter.py │ ├── multi_camera_visualizer.py │ ├── multi_realsense.py │ ├── real_data_conversion.py │ ├── real_env.py │ ├── real_inference_util.py │ ├── realsense_config │ │ ├── 415_high_accuracy_mode.json │ │ └── 435_high_accuracy_mode.json │ ├── rtde_interpolation_controller.py │ ├── single_realsense.py │ ├── spacemouse.py │ ├── spacemouse_shared_memory.py │ └── video_recorder.py ├── scripts │ ├── bet_blockpush_conversion.py │ ├── blockpush_abs_conversion.py │ ├── episode_lengths.py │ ├── generate_bet_blockpush.py │ ├── real_dataset_conversion.py │ ├── real_pusht_metrics.py │ ├── real_pusht_successrate.py │ ├── robomimic_dataset_action_comparison.py │ └── robomimic_dataset_conversion.py ├── shared_memory │ ├── shared_memory_queue.py │ ├── shared_memory_ring_buffer.py │ ├── shared_memory_util.py │ └── shared_ndarray.py └── workspace │ ├── base_workspace.py │ ├── train_bet_lowdim_workspace.py │ ├── train_diffusion_transformer_hybrid_workspace.py │ ├── train_diffusion_transformer_lowdim_workspace.py │ ├── train_diffusion_unet_hybrid_workspace.py │ ├── train_diffusion_unet_image_workspace.py │ ├── train_diffusion_unet_lowdim_workspace.py │ ├── train_diffusion_unet_video_workspace.py │ ├── train_ibc_dfo_hybrid_workspace.py │ ├── train_ibc_dfo_lowdim_workspace.py │ ├── train_robomimic_image_workspace.py │ └── train_robomimic_lowdim_workspace.py ├── environment.yml ├── experiment_scripts └── gibson │ └── eval_tdiff.sh ├── semexp ├── .DS_Store ├── agents │ └── utils │ │ ├── semantic_prediction.py │ │ └── visualization.py ├── arguments.py ├── configs │ ├── Base-RCNN-FPN.yaml │ └── COCO-InstanceSegmentation │ │ └── mask_rcnn_R_50_FPN_3x.yaml ├── constants.py ├── docs │ ├── legend_gibson.png │ └── legend_mp3d.png ├── envs │ ├── .DS_Store │ ├── __init__.py │ ├── habitat │ │ ├── __init__.py │ │ ├── configs │ │ │ └── tasks │ │ │ │ └── objectnav_gibson.yaml │ │ ├── objectgoal_env.py │ │ ├── sem_exp.py │ │ └── utils │ │ │ └── vector_env.py │ └── utils │ │ ├── depth_utils.py │ │ ├── fmm_planner.py │ │ ├── map_builder.py │ │ ├── pose.py │ │ └── rotation_utils.py ├── eval_tdiff.py ├── km_match.py ├── model.py ├── model_pf.py ├── sxz │ ├── data_check.py │ ├── img │ │ ├── circle0.png │ │ ├── circle1.png │ │ ├── circle2.png │ │ ├── circle3.png │ │ ├── dist_circle_test0.png │ │ ├── dist_circle_test1.png │ │ ├── dist_map.png │ │ ├── dist_map_dilate.png │ │ ├── origin_map.png │ │ ├── pbz2_Collierville.png │ │ ├── pbz2_Corozal.png │ │ ├── pbz2_Darden.png │ │ ├── pbz2_Markleeville.png │ │ ├── pbz2_Wiconisco.png │ │ └── test.png │ └── visualize.py ├── util │ ├── crop.py │ ├── cross_attention.py │ ├── datasets.py │ ├── lr_decay.py │ ├── lr_sched.py │ ├── misc.py │ └── pos_embed.py ├── utils │ ├── agent_helper.py │ ├── agent_state.py │ ├── distributions.py │ ├── mapping_module.py │ ├── model.py │ ├── rednet.py │ ├── storage.py │ ├── stubborn_agent.py │ └── visualize_tools.py └── vis_adds.py ├── tdiff ├── constants.py ├── dataset.py ├── default.py ├── fmm_planner.py ├── geometry.py ├── hab_utils.py ├── model.py ├── train_utils.py └── visualize_tools.py ├── train.py └── train_traj ├── environment.yaml ├── train.py ├── train.sh ├── train_diffusion_traj_gibson.yaml └── trajectory_diffusion ├── codecs └── imagecodecs_numcodecs.py ├── common ├── checkpoint_util.py ├── cv2_util.py ├── env_util.py ├── json_logger.py ├── nested_dict_util.py ├── normalize_util.py ├── pose_trajectory_interpolator.py ├── precise_sleep.py ├── pymunk_override.py ├── pymunk_util.py ├── pytorch_util.py ├── replay_buffer.py ├── robomimic_config_util.py ├── robomimic_util.py ├── sampler.py └── timestamp_accumulator.py ├── config └── train_diffusion_transformer_gibson_workspace.yaml ├── dataset ├── base_dataset.py └── gibson_dataset.py ├── env └── objnav │ ├── __init__.py │ ├── objnav_env.py │ ├── objnav_gibson_env.py │ ├── objnav_keypoints_env.py │ ├── pymunk_keypoint_manager.py │ └── pymunk_override.py ├── env_runner ├── base_image_runner.py └── gibson_traj_diff_runner.py ├── gym_util ├── async_vector_env.py ├── multistep_wrapper.py ├── sync_vector_env.py ├── video_recording_wrapper.py └── video_wrapper.py ├── model ├── clip_model │ └── clip_model_for_features.py ├── common │ ├── dict_of_tensor_mixin.py │ ├── lr_scheduler.py │ ├── module_attr_mixin.py │ ├── normalizer.py │ ├── rotation_transformer.py │ ├── shape_util.py │ └── tensor_util.py ├── diffusion │ ├── conditional_unet1d.py │ ├── conv1d_components.py │ ├── ema_model.py │ ├── mask_generator.py │ ├── positional_embedding.py │ └── transformer_for_diffusion.py └── vision │ ├── crop_randomizer.py │ ├── model_getter.py │ └── multi_image_obs_encoder.py ├── policy ├── base_image_policy.py └── trajectory_diffusion_transformer_gibson_policy.py ├── real_world ├── keystroke_counter.py ├── real_data_conversion.py ├── real_env.py ├── real_inference_util.py ├── realsense_config │ ├── 415_high_accuracy_mode.json │ └── 435_high_accuracy_mode.json ├── spacemouse.py ├── spacemouse_shared_memory.py └── video_recorder.py └── workspace ├── base_workspace.py └── train_diffusion_transformer_gibson_workspace.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/.DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Trajectory Diffusion for ObjectGoal Navigation 2 | 3 | ## Setup 4 | - Clone the repository and move into the top-level directory `cd T-Diff` 5 | - Create conda environment. `conda env create -f environment.yml` 6 | - Activate the environment. `conda activate tdiff` 7 | - We provide pre-trained model of [T-Diff](https://drive.google.com/file/d/1AzJEfhy8Sfu_CUiNTwO4RkZEmbX04k42/view?usp=sharing) and [area_prediction](https://drive.google.com/file/d/113hMyZFT5orwfcFlrX_ESRawbrr6UiT7/view?usp=sharing). For evaluation, you can download them to the directory. 8 | - Download the [t_diff_dataset](https://drive.google.com/file/d/1p5h7wxRwnPZ63cwZK6DWhpJKErhWNuDb/view). 9 | - Download the [semantic maps (gt)](https://drive.google.com/file/d/1lOJlZXWBeCsnPzqgdnvXbEmF2yGxRwY4/view?usp=sharing). 10 | 11 | ## Dataset 12 | We use a modified version of the Gibson ObjectNav evaluation setup from [SemExp](https://github.com/devendrachaplot/Object-Goal-Navigation). 13 | 14 | 1. Download the [Gibson ObjectNav dataset](https://utexas.box.com/s/tss7udt3ralioalb6eskj3z3spuvwz7v) to `$T_Diff_ROOT/data/datasets/objectnav/gibson`. 15 | ``` 16 | cd $T_Diff_ROOT/data/datasets/objectnav 17 | wget -O gibson_objectnav_episodes.tar.gz https://utexas.box.com/shared/static/tss7udt3ralioalb6eskj3z3spuvwz7v.gz 18 | tar -xvzf gibson_objectnav_episodes.tar.gz && rm gibson_objectnav_episodes.tar.gz 19 | ``` 20 | 2. Download the image segmentation model [[URL](https://utexas.box.com/s/sf4prmup4fsiu6taljnt5ht8unev5ikq)] to `$T_Diff_ROOT/pretrained_models`. 21 | 3. To visualize episodes with the semantic map and potential function predictions, add the arguments `--print_images 1 --num_pf_maps 3` in the evaluation script. 22 | 23 | The `data` folder should look like this 24 | ```python 25 | data/ 26 | ├── datasets/objectnav/gibdon/v1.1 27 | ├── train/ 28 | │ ├── content/ 29 | │ ├── train_info.pbz2 30 | │ └── train.json.gz 31 | ├── val/ 32 | │ ├── content/ 33 | │ ├── val_info.pbz2 34 | │ └── val.json.gz 35 | ├── scene_datasets/ 36 | ├── gibson_semantic/ 37 | ├── Allensville_semantic.ply 38 | ├── Allensville.glb 39 | ├── Allensville.ids 40 | ├── Allensville.navmesh 41 | ├── Allensville.scn 42 | ├── ... 43 | ├── semantic_maps/ 44 | ├── gibson/semantic_maps 45 | ├── semmap_GT_info.json 46 | ├── Allensville_0.png 47 | ├── Allensville.h5 48 | ├── ... 49 | ``` 50 | 51 | 52 | 53 | 54 | ## Evaluation 55 | `sh experiment_scripts/gibson/eval_tdiff.sh` 56 | 57 | ## Training 58 | Download the [Gibson Traj dataset](https://drive.google.com/file/d/1p5h7wxRwnPZ63cwZK6DWhpJKErhWNuDb/view?usp=sharing) to `$T_Diff_ROOT/train_traj/data/gibson_traj_32`. 59 | 60 | 1. Create conda environment. `conda env create -f train_traj/environment.yml` 61 | 2. Activate the environment. `conda activate diff_train` 62 | 3. `sh $T_Diff_ROOT/train_traj/train.sh` 63 | -------------------------------------------------------------------------------- /diffusion_policy/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/.DS_Store -------------------------------------------------------------------------------- /diffusion_policy/common/checkpoint_util.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Dict 2 | import os 3 | 4 | class TopKCheckpointManager: 5 | def __init__(self, 6 | save_dir, 7 | monitor_key: str, 8 | mode='min', 9 | k=1, 10 | format_str='epoch={epoch:03d}-train_loss={train_loss:.3f}.ckpt' 11 | ): 12 | assert mode in ['max', 'min'] 13 | assert k >= 0 14 | 15 | self.save_dir = save_dir 16 | # self.monitor_key = monitor_key 17 | self.monitor_key = "val_loss" 18 | self.mode = mode 19 | self.k = k 20 | self.format_str = format_str 21 | self.path_value_map = dict() 22 | 23 | def get_ckpt_path(self, data: Dict[str, float]) -> Optional[str]: 24 | if self.k == 0: 25 | return None 26 | 27 | value = data[self.monitor_key] 28 | ckpt_path = os.path.join( 29 | self.save_dir, self.format_str.format(**data)) 30 | 31 | if len(self.path_value_map) < self.k: 32 | # under-capacity 33 | self.path_value_map[ckpt_path] = value 34 | return ckpt_path 35 | 36 | # at capacity 37 | sorted_map = sorted(self.path_value_map.items(), key=lambda x: x[1]) 38 | min_path, min_value = sorted_map[0] 39 | max_path, max_value = sorted_map[-1] 40 | 41 | delete_path = None 42 | if self.mode == 'max': 43 | if value > min_value: 44 | delete_path = min_path 45 | else: 46 | if value < max_value: 47 | delete_path = max_path 48 | 49 | if delete_path is None: 50 | return None 51 | else: 52 | del self.path_value_map[delete_path] 53 | self.path_value_map[ckpt_path] = value 54 | 55 | if not os.path.exists(self.save_dir): 56 | os.mkdir(self.save_dir) 57 | 58 | if os.path.exists(delete_path): 59 | os.remove(delete_path) 60 | return ckpt_path 61 | -------------------------------------------------------------------------------- /diffusion_policy/common/env_util.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | def render_env_video(env, states, actions=None): 6 | observations = states 7 | imgs = list() 8 | for i in range(len(observations)): 9 | state = observations[i] 10 | env.set_state(state) 11 | if i == 0: 12 | env.set_state(state) 13 | img = env.render() 14 | # draw action 15 | if actions is not None: 16 | action = actions[i] 17 | coord = (action / 512 * 96).astype(np.int32) 18 | cv2.drawMarker(img, coord, 19 | color=(255,0,0), markerType=cv2.MARKER_CROSS, 20 | markerSize=8, thickness=1) 21 | imgs.append(img) 22 | imgs = np.array(imgs) 23 | return imgs 24 | -------------------------------------------------------------------------------- /diffusion_policy/common/nested_dict_util.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | def nested_dict_map(f, x): 4 | """ 5 | Map f over all leaf of nested dict x 6 | """ 7 | 8 | if not isinstance(x, dict): 9 | return f(x) 10 | y = dict() 11 | for key, value in x.items(): 12 | y[key] = nested_dict_map(f, value) 13 | return y 14 | 15 | def nested_dict_reduce(f, x): 16 | """ 17 | Map f over all values of nested dict x, and reduce to a single value 18 | """ 19 | if not isinstance(x, dict): 20 | return x 21 | 22 | reduced_values = list() 23 | for value in x.values(): 24 | reduced_values.append(nested_dict_reduce(f, value)) 25 | y = functools.reduce(f, reduced_values) 26 | return y 27 | 28 | 29 | def nested_dict_check(f, x): 30 | bool_dict = nested_dict_map(f, x) 31 | result = nested_dict_reduce(lambda x, y: x and y, bool_dict) 32 | return result 33 | -------------------------------------------------------------------------------- /diffusion_policy/common/precise_sleep.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | def precise_sleep(dt: float, slack_time: float=0.001, time_func=time.monotonic): 4 | """ 5 | Use hybrid of time.sleep and spinning to minimize jitter. 6 | Sleep dt - slack_time seconds first, then spin for the rest. 7 | """ 8 | t_start = time_func() 9 | if dt > slack_time: 10 | time.sleep(dt - slack_time) 11 | t_end = t_start + dt 12 | while time_func() < t_end: 13 | pass 14 | return 15 | 16 | def precise_wait(t_end: float, slack_time: float=0.001, time_func=time.monotonic): 17 | t_start = time_func() 18 | t_wait = t_end - t_start 19 | if t_wait > 0: 20 | t_sleep = t_wait - slack_time 21 | if t_sleep > 0: 22 | time.sleep(t_sleep) 23 | while time_func() < t_end: 24 | pass 25 | return 26 | -------------------------------------------------------------------------------- /diffusion_policy/common/pymunk_util.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import pymunk 3 | import pymunk.pygame_util 4 | import numpy as np 5 | 6 | COLLTYPE_DEFAULT = 0 7 | COLLTYPE_MOUSE = 1 8 | COLLTYPE_BALL = 2 9 | 10 | def get_body_type(static=False): 11 | body_type = pymunk.Body.DYNAMIC 12 | if static: 13 | body_type = pymunk.Body.STATIC 14 | return body_type 15 | 16 | 17 | def create_rectangle(space, 18 | pos_x,pos_y,width,height, 19 | density=3,static=False): 20 | body = pymunk.Body(body_type=get_body_type(static)) 21 | body.position = (pos_x,pos_y) 22 | shape = pymunk.Poly.create_box(body,(width,height)) 23 | shape.density = density 24 | space.add(body,shape) 25 | return body, shape 26 | 27 | 28 | def create_rectangle_bb(space, 29 | left, bottom, right, top, 30 | **kwargs): 31 | pos_x = (left + right) / 2 32 | pos_y = (top + bottom) / 2 33 | height = top - bottom 34 | width = right - left 35 | return create_rectangle(space, pos_x, pos_y, width, height, **kwargs) 36 | 37 | def create_circle(space, pos_x, pos_y, radius, density=3, static=False): 38 | body = pymunk.Body(body_type=get_body_type(static)) 39 | body.position = (pos_x, pos_y) 40 | shape = pymunk.Circle(body, radius=radius) 41 | shape.density = density 42 | shape.collision_type = COLLTYPE_BALL 43 | space.add(body, shape) 44 | return body, shape 45 | 46 | def get_body_state(body): 47 | state = np.zeros(6, dtype=np.float32) 48 | state[:2] = body.position 49 | state[2] = body.angle 50 | state[3:5] = body.velocity 51 | state[5] = body.angular_velocity 52 | return state 53 | -------------------------------------------------------------------------------- /diffusion_policy/common/pytorch_util.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Callable, List 2 | import collections 3 | import torch 4 | import torch.nn as nn 5 | 6 | def dict_apply( 7 | x: Dict[str, torch.Tensor], 8 | func: Callable[[torch.Tensor], torch.Tensor] 9 | ) -> Dict[str, torch.Tensor]: 10 | result = dict() 11 | for key, value in x.items(): 12 | if isinstance(value, dict): 13 | result[key] = dict_apply(value, func) 14 | else: 15 | result[key] = func(value) 16 | return result 17 | 18 | def pad_remaining_dims(x, target): 19 | assert x.shape == target.shape[:len(x.shape)] 20 | return x.reshape(x.shape + (1,)*(len(target.shape) - len(x.shape))) 21 | 22 | def dict_apply_split( 23 | x: Dict[str, torch.Tensor], 24 | split_func: Callable[[torch.Tensor], Dict[str, torch.Tensor]] 25 | ) -> Dict[str, torch.Tensor]: 26 | results = collections.defaultdict(dict) 27 | for key, value in x.items(): 28 | result = split_func(value) 29 | for k, v in result.items(): 30 | results[k][key] = v 31 | return results 32 | 33 | def dict_apply_reduce( 34 | x: List[Dict[str, torch.Tensor]], 35 | reduce_func: Callable[[List[torch.Tensor]], torch.Tensor] 36 | ) -> Dict[str, torch.Tensor]: 37 | result = dict() 38 | for key in x[0].keys(): 39 | result[key] = reduce_func([x_[key] for x_ in x]) 40 | return result 41 | 42 | 43 | def replace_submodules( 44 | root_module: nn.Module, 45 | predicate: Callable[[nn.Module], bool], 46 | func: Callable[[nn.Module], nn.Module]) -> nn.Module: 47 | """ 48 | predicate: Return true if the module is to be replaced. 49 | func: Return new module to use. 50 | """ 51 | if predicate(root_module): 52 | return func(root_module) 53 | 54 | bn_list = [k.split('.') for k, m 55 | in root_module.named_modules(remove_duplicate=True) 56 | if predicate(m)] 57 | for *parent, k in bn_list: 58 | parent_module = root_module 59 | if len(parent) > 0: 60 | parent_module = root_module.get_submodule('.'.join(parent)) 61 | if isinstance(parent_module, nn.Sequential): 62 | src_module = parent_module[int(k)] 63 | else: 64 | src_module = getattr(parent_module, k) 65 | tgt_module = func(src_module) 66 | if isinstance(parent_module, nn.Sequential): 67 | parent_module[int(k)] = tgt_module 68 | else: 69 | setattr(parent_module, k, tgt_module) 70 | # verify that all BN are replaced 71 | bn_list = [k.split('.') for k, m 72 | in root_module.named_modules(remove_duplicate=True) 73 | if predicate(m)] 74 | assert len(bn_list) == 0 75 | return root_module 76 | 77 | def optimizer_to(optimizer, device): 78 | for state in optimizer.state.values(): 79 | for k, v in state.items(): 80 | if isinstance(v, torch.Tensor): 81 | state[k] = v.to(device=device) 82 | return optimizer 83 | -------------------------------------------------------------------------------- /diffusion_policy/common/robomimic_config_util.py: -------------------------------------------------------------------------------- 1 | from omegaconf import OmegaConf 2 | from robomimic.config import config_factory 3 | import robomimic.scripts.generate_paper_configs as gpc 4 | from robomimic.scripts.generate_paper_configs import ( 5 | modify_config_for_default_image_exp, 6 | modify_config_for_default_low_dim_exp, 7 | modify_config_for_dataset, 8 | ) 9 | 10 | def get_robomimic_config( 11 | algo_name='bc_rnn', 12 | hdf5_type='low_dim', 13 | task_name='square', 14 | dataset_type='ph' 15 | ): 16 | base_dataset_dir = '/tmp/null' 17 | filter_key = None 18 | 19 | # decide whether to use low-dim or image training defaults 20 | modifier_for_obs = modify_config_for_default_image_exp 21 | if hdf5_type in ["low_dim", "low_dim_sparse", "low_dim_dense"]: 22 | modifier_for_obs = modify_config_for_default_low_dim_exp 23 | 24 | algo_config_name = "bc" if algo_name == "bc_rnn" else algo_name 25 | config = config_factory(algo_name=algo_config_name) 26 | # turn into default config for observation modalities (e.g.: low-dim or rgb) 27 | config = modifier_for_obs(config) 28 | # add in config based on the dataset 29 | config = modify_config_for_dataset( 30 | config=config, 31 | task_name=task_name, 32 | dataset_type=dataset_type, 33 | hdf5_type=hdf5_type, 34 | base_dataset_dir=base_dataset_dir, 35 | filter_key=filter_key, 36 | ) 37 | # add in algo hypers based on dataset 38 | algo_config_modifier = getattr(gpc, f'modify_{algo_name}_config_for_dataset') 39 | config = algo_config_modifier( 40 | config=config, 41 | task_name=task_name, 42 | dataset_type=dataset_type, 43 | hdf5_type=hdf5_type, 44 | ) 45 | return config 46 | 47 | 48 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/blockpush_lowdim_seed.yaml: -------------------------------------------------------------------------------- 1 | name: blockpush_lowdim_seed 2 | 3 | obs_dim: 16 4 | action_dim: 2 5 | keypoint_dim: 2 6 | obs_eef_target: True 7 | 8 | env_runner: 9 | _target_: diffusion_policy.env_runner.blockpush_lowdim_runner.BlockPushLowdimRunner 10 | n_train: 6 11 | n_train_vis: 2 12 | train_start_seed: 0 13 | n_test: 50 14 | n_test_vis: 4 15 | test_start_seed: 100000 16 | max_steps: 350 17 | n_obs_steps: ${n_obs_steps} 18 | n_action_steps: ${n_action_steps} 19 | fps: 5 20 | past_action: ${past_action_visible} 21 | abs_action: False 22 | obs_eef_target: ${task.obs_eef_target} 23 | n_envs: null 24 | 25 | dataset: 26 | _target_: diffusion_policy.dataset.blockpush_lowdim_dataset.BlockPushLowdimDataset 27 | zarr_path: data/block_pushing/multimodal_push_seed.zarr 28 | horizon: ${horizon} 29 | pad_before: ${eval:'${n_obs_steps}-1'} 30 | pad_after: ${eval:'${n_action_steps}-1'} 31 | obs_eef_target: ${task.obs_eef_target} 32 | use_manual_normalizer: False 33 | seed: 42 34 | val_ratio: 0.02 35 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/blockpush_lowdim_seed_abs.yaml: -------------------------------------------------------------------------------- 1 | name: blockpush_lowdim_seed_abs 2 | 3 | obs_dim: 16 4 | action_dim: 2 5 | keypoint_dim: 2 6 | obs_eef_target: True 7 | 8 | env_runner: 9 | _target_: diffusion_policy.env_runner.blockpush_lowdim_runner.BlockPushLowdimRunner 10 | n_train: 6 11 | n_train_vis: 2 12 | train_start_seed: 0 13 | n_test: 50 14 | n_test_vis: 4 15 | test_start_seed: 100000 16 | max_steps: 350 17 | n_obs_steps: ${n_obs_steps} 18 | n_action_steps: ${n_action_steps} 19 | fps: 5 20 | past_action: ${past_action_visible} 21 | abs_action: True 22 | obs_eef_target: ${task.obs_eef_target} 23 | n_envs: null 24 | 25 | dataset: 26 | _target_: diffusion_policy.dataset.blockpush_lowdim_dataset.BlockPushLowdimDataset 27 | zarr_path: data/block_pushing/multimodal_push_seed_abs.zarr 28 | horizon: ${horizon} 29 | pad_before: ${eval:'${n_obs_steps}-1'} 30 | pad_after: ${eval:'${n_action_steps}-1'} 31 | obs_eef_target: ${task.obs_eef_target} 32 | use_manual_normalizer: False 33 | seed: 42 34 | val_ratio: 0.02 35 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/can_image.yaml: -------------------------------------------------------------------------------- 1 | name: can_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [7] 21 | 22 | task_name: &task_name can 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 25 | abs_action: &abs_action False 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/can_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: can_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [10] 21 | 22 | task_name: &task_name can 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 25 | abs_action: &abs_action True 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/can_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: can_lowdim 2 | 3 | obs_dim: 23 4 | action_dim: 7 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name can 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 11 | abs_action: &abs_action False 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | n_envs: 28 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | seed: 42 45 | val_ratio: 0.02 46 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/can_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: can_lowdim 2 | 3 | obs_dim: 23 4 | action_dim: 10 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name can 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 11 | abs_action: &abs_action True 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | n_envs: 28 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | rotation_rep: rotation_6d 45 | seed: 42 46 | val_ratio: 0.02 47 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/kitchen_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: kitchen_lowdim 2 | 3 | obs_dim: 60 4 | action_dim: 9 5 | keypoint_dim: 3 6 | 7 | dataset_dir: &dataset_dir data/kitchen 8 | 9 | env_runner: 10 | _target_: diffusion_policy.env_runner.kitchen_lowdim_runner.KitchenLowdimRunner 11 | dataset_dir: *dataset_dir 12 | n_train: 6 13 | n_train_vis: 2 14 | train_start_seed: 0 15 | n_test: 50 16 | n_test_vis: 4 17 | test_start_seed: 100000 18 | max_steps: 280 19 | n_obs_steps: ${n_obs_steps} 20 | n_action_steps: ${n_action_steps} 21 | render_hw: [240, 360] 22 | fps: 12.5 23 | past_action: ${past_action_visible} 24 | n_envs: null 25 | 26 | dataset: 27 | _target_: diffusion_policy.dataset.kitchen_lowdim_dataset.KitchenLowdimDataset 28 | dataset_dir: *dataset_dir 29 | horizon: ${horizon} 30 | pad_before: ${eval:'${n_obs_steps}-1'} 31 | pad_after: ${eval:'${n_action_steps}-1'} 32 | seed: 42 33 | val_ratio: 0.02 34 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/kitchen_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: kitchen_lowdim 2 | 3 | obs_dim: 60 4 | action_dim: 9 5 | keypoint_dim: 3 6 | 7 | abs_action: True 8 | robot_noise_ratio: 0.1 9 | 10 | env_runner: 11 | _target_: diffusion_policy.env_runner.kitchen_lowdim_runner.KitchenLowdimRunner 12 | dataset_dir: data/kitchen 13 | n_train: 6 14 | n_train_vis: 2 15 | train_start_seed: 0 16 | n_test: 50 17 | n_test_vis: 4 18 | test_start_seed: 100000 19 | max_steps: 280 20 | n_obs_steps: ${n_obs_steps} 21 | n_action_steps: ${n_action_steps} 22 | render_hw: [240, 360] 23 | fps: 12.5 24 | past_action: ${past_action_visible} 25 | abs_action: ${task.abs_action} 26 | robot_noise_ratio: ${task.robot_noise_ratio} 27 | n_envs: null 28 | 29 | dataset: 30 | _target_: diffusion_policy.dataset.kitchen_mjl_lowdim_dataset.KitchenMjlLowdimDataset 31 | dataset_dir: data/kitchen/kitchen_demos_multitask 32 | horizon: ${horizon} 33 | pad_before: ${eval:'${n_obs_steps}-1'} 34 | pad_after: ${eval:'${n_action_steps}-1'} 35 | abs_action: ${task.abs_action} 36 | robot_noise_ratio: ${task.robot_noise_ratio} 37 | seed: 42 38 | val_ratio: 0.02 39 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/lift_image.yaml: -------------------------------------------------------------------------------- 1 | name: lift_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [7] 21 | 22 | task_name: &task_name lift 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 25 | abs_action: &abs_action False 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 1 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 3 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/lift_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: lift_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [10] 21 | 22 | task_name: &task_name lift 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 25 | abs_action: &abs_action True 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | n_train: 6 32 | n_train_vis: 2 33 | train_start_idx: 0 34 | n_test: 50 35 | n_test_vis: 4 36 | test_start_seed: 100000 37 | # use python's eval function as resolver, single-quoted string as argument 38 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 39 | n_obs_steps: ${n_obs_steps} 40 | n_action_steps: ${n_action_steps} 41 | render_obs_key: 'agentview_image' 42 | fps: 10 43 | crf: 22 44 | past_action: ${past_action_visible} 45 | abs_action: *abs_action 46 | tqdm_interval_sec: 1.0 47 | n_envs: 28 48 | # evaluation at this config requires a 16 core 64GB instance. 49 | 50 | dataset: 51 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 52 | shape_meta: *shape_meta 53 | dataset_path: *dataset_path 54 | horizon: ${horizon} 55 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 56 | pad_after: ${eval:'${n_action_steps}-1'} 57 | n_obs_steps: ${dataset_obs_steps} 58 | abs_action: *abs_action 59 | rotation_rep: 'rotation_6d' 60 | use_legacy_normalizer: False 61 | use_cache: True 62 | seed: 42 63 | val_ratio: 0.02 64 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/lift_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: lift_lowdim 2 | 3 | obs_dim: 19 4 | action_dim: 7 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name lift 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 11 | abs_action: &abs_action False 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | tqdm_interval_sec: 1.0 34 | n_envs: 28 35 | 36 | dataset: 37 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 38 | dataset_path: *dataset_path 39 | horizon: ${horizon} 40 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 41 | pad_after: ${eval:'${n_action_steps}-1'} 42 | obs_keys: *obs_keys 43 | abs_action: *abs_action 44 | use_legacy_normalizer: False 45 | seed: 42 46 | val_ratio: 0.02 47 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/lift_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: lift_lowdim 2 | 3 | obs_dim: 19 4 | action_dim: 10 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name lift 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 11 | abs_action: &abs_action True 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 3 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | tqdm_interval_sec: 1.0 34 | n_envs: 28 35 | 36 | dataset: 37 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 38 | dataset_path: *dataset_path 39 | horizon: ${horizon} 40 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 41 | pad_after: ${eval:'${n_action_steps}-1'} 42 | obs_keys: *obs_keys 43 | abs_action: *abs_action 44 | use_legacy_normalizer: False 45 | rotation_rep: rotation_6d 46 | seed: 42 47 | val_ratio: 0.02 48 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/pusht_image.yaml: -------------------------------------------------------------------------------- 1 | name: pusht_image 2 | 3 | image_shape: &image_shape [3, 96, 96] 4 | shape_meta: &shape_meta 5 | # acceptable types: rgb, low_dim 6 | obs: 7 | image: 8 | shape: *image_shape 9 | type: rgb 10 | agent_pos: 11 | shape: [2] 12 | type: low_dim 13 | action: 14 | shape: [2] 15 | 16 | env_runner: 17 | _target_: diffusion_policy.env_runner.pusht_image_runner.PushTImageRunner 18 | n_train: 6 19 | n_train_vis: 2 20 | train_start_seed: 0 21 | n_test: 50 22 | n_test_vis: 4 23 | legacy_test: True 24 | test_start_seed: 100000 25 | max_steps: 300 26 | n_obs_steps: ${n_obs_steps} 27 | n_action_steps: ${n_action_steps} 28 | fps: 10 29 | past_action: ${past_action_visible} 30 | n_envs: null 31 | 32 | dataset: 33 | _target_: diffusion_policy.dataset.pusht_image_dataset.PushTImageDataset 34 | zarr_path: data/pusht/pusht_cchi_v7_replay.zarr 35 | horizon: ${horizon} 36 | pad_before: ${eval:'${n_obs_steps}-1'} 37 | pad_after: ${eval:'${n_action_steps}-1'} 38 | seed: 42 39 | val_ratio: 0.02 40 | max_train_episodes: 90 41 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/pusht_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: pusht_lowdim 2 | 3 | obs_dim: 20 # 9*2 keypoints + 2 state 4 | action_dim: 2 5 | keypoint_dim: 2 6 | 7 | env_runner: 8 | _target_: diffusion_policy.env_runner.pusht_keypoints_runner.PushTKeypointsRunner 9 | keypoint_visible_rate: ${keypoint_visible_rate} 10 | n_train: 6 11 | n_train_vis: 2 12 | train_start_seed: 0 13 | n_test: 50 14 | n_test_vis: 4 15 | legacy_test: True 16 | test_start_seed: 100000 17 | max_steps: 300 18 | n_obs_steps: ${n_obs_steps} 19 | n_action_steps: ${n_action_steps} 20 | n_latency_steps: ${n_latency_steps} 21 | fps: 10 22 | agent_keypoints: False 23 | past_action: ${past_action_visible} 24 | n_envs: null 25 | 26 | dataset: 27 | _target_: diffusion_policy.dataset.pusht_dataset.PushTLowdimDataset 28 | zarr_path: data/pusht/pusht_cchi_v7_replay.zarr 29 | horizon: ${horizon} 30 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 31 | pad_after: ${eval:'${n_action_steps}-1'} 32 | seed: 42 33 | val_ratio: 0.02 34 | max_train_episodes: 90 35 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/real_pusht_image.yaml: -------------------------------------------------------------------------------- 1 | name: real_image 2 | 3 | image_shape: [3, 240, 320] 4 | dataset_path: data/pusht_real/real_pusht_20230105 5 | 6 | shape_meta: &shape_meta 7 | # acceptable types: rgb, low_dim 8 | obs: 9 | # camera_0: 10 | # shape: ${task.image_shape} 11 | # type: rgb 12 | camera_1: 13 | shape: ${task.image_shape} 14 | type: rgb 15 | # camera_2: 16 | # shape: ${task.image_shape} 17 | # type: rgb 18 | camera_3: 19 | shape: ${task.image_shape} 20 | type: rgb 21 | # camera_4: 22 | # shape: ${task.image_shape} 23 | # type: rgb 24 | robot_eef_pose: 25 | shape: [2] 26 | type: low_dim 27 | action: 28 | shape: [2] 29 | 30 | env_runner: 31 | _target_: diffusion_policy.env_runner.real_pusht_image_runner.RealPushTImageRunner 32 | 33 | dataset: 34 | _target_: diffusion_policy.dataset.real_pusht_image_dataset.RealPushTImageDataset 35 | shape_meta: *shape_meta 36 | dataset_path: ${task.dataset_path} 37 | horizon: ${horizon} 38 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 39 | pad_after: ${eval:'${n_action_steps}-1'} 40 | n_obs_steps: ${dataset_obs_steps} 41 | n_latency_steps: ${n_latency_steps} 42 | use_cache: True 43 | seed: 42 44 | val_ratio: 0.00 45 | max_train_episodes: null 46 | delta_action: False 47 | 48 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/square_image.yaml: -------------------------------------------------------------------------------- 1 | name: square_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [7] 21 | 22 | task_name: &task_name square 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 25 | abs_action: &abs_action False 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/square_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: square_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [10] 21 | 22 | task_name: &task_name square 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 25 | abs_action: &abs_action True 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/square_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: square_lowdim 2 | 3 | obs_dim: 23 4 | action_dim: 7 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name square 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 11 | abs_action: &abs_action False 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | n_envs: 28 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | seed: 42 45 | val_ratio: 0.02 46 | max_train_episodes: null 47 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/square_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: square_lowdim 2 | 3 | obs_dim: 23 4 | action_dim: 10 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name square 9 | dataset_type: &dataset_type ph 10 | abs_action: &abs_action True 11 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 12 | 13 | 14 | env_runner: 15 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 16 | dataset_path: *dataset_path 17 | obs_keys: *obs_keys 18 | n_train: 6 19 | n_train_vis: 2 20 | train_start_idx: 0 21 | n_test: 50 22 | n_test_vis: 4 23 | test_start_seed: 100000 24 | # use python's eval function as resolver, single-quoted string as argument 25 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 26 | n_obs_steps: ${n_obs_steps} 27 | n_action_steps: ${n_action_steps} 28 | n_latency_steps: ${n_latency_steps} 29 | render_hw: [128,128] 30 | fps: 10 31 | crf: 22 32 | past_action: ${past_action_visible} 33 | abs_action: *abs_action 34 | n_envs: 28 35 | 36 | dataset: 37 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 38 | dataset_path: *dataset_path 39 | horizon: ${horizon} 40 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 41 | pad_after: ${eval:'${n_action_steps}-1'} 42 | obs_keys: *obs_keys 43 | abs_action: *abs_action 44 | use_legacy_normalizer: False 45 | seed: 42 46 | val_ratio: 0.02 47 | max_train_episodes: null 48 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/tool_hang_image.yaml: -------------------------------------------------------------------------------- 1 | name: tool_hang_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | sideview_image: 7 | shape: [3, 240, 240] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 240, 240] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [7] 21 | 22 | task_name: &task_name tool_hang 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 25 | abs_action: &abs_action False 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | max_steps: 700 39 | n_obs_steps: ${n_obs_steps} 40 | n_action_steps: ${n_action_steps} 41 | render_obs_key: 'sideview_image' 42 | fps: 10 43 | crf: 22 44 | past_action: ${past_action_visible} 45 | abs_action: *abs_action 46 | tqdm_interval_sec: 1.0 47 | n_envs: 28 48 | # evaluation at this config requires a 16 core 64GB instance. 49 | 50 | dataset: 51 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 52 | shape_meta: *shape_meta 53 | dataset_path: *dataset_path 54 | horizon: ${horizon} 55 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 56 | pad_after: ${eval:'${n_action_steps}-1'} 57 | n_obs_steps: ${dataset_obs_steps} 58 | abs_action: *abs_action 59 | rotation_rep: 'rotation_6d' 60 | use_legacy_normalizer: False 61 | use_cache: True 62 | seed: 42 63 | val_ratio: 0.02 64 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/tool_hang_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: tool_hang_image_abs 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | sideview_image: 7 | shape: [3, 240, 240] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 240, 240] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [10] 21 | 22 | task_name: &task_name tool_hang 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 25 | abs_action: &abs_action True 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | max_steps: 700 39 | n_obs_steps: ${n_obs_steps} 40 | n_action_steps: ${n_action_steps} 41 | render_obs_key: 'sideview_image' 42 | fps: 10 43 | crf: 22 44 | past_action: ${past_action_visible} 45 | abs_action: *abs_action 46 | tqdm_interval_sec: 1.0 47 | n_envs: 28 48 | # evaluation at this config requires a 16 core 64GB instance. 49 | 50 | dataset: 51 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 52 | shape_meta: *shape_meta 53 | dataset_path: *dataset_path 54 | horizon: ${horizon} 55 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 56 | pad_after: ${eval:'${n_action_steps}-1'} 57 | n_obs_steps: ${dataset_obs_steps} 58 | abs_action: *abs_action 59 | rotation_rep: 'rotation_6d' 60 | use_legacy_normalizer: False 61 | use_cache: True 62 | seed: 42 63 | val_ratio: 0.02 64 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/tool_hang_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: tool_hang_lowdim 2 | 3 | obs_dim: 53 4 | action_dim: 7 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name tool_hang 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 11 | abs_action: &abs_action False 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | max_steps: 700 24 | n_obs_steps: ${n_obs_steps} 25 | n_action_steps: ${n_action_steps} 26 | n_latency_steps: ${n_latency_steps} 27 | render_hw: [128,128] 28 | fps: 10 29 | crf: 22 30 | past_action: ${past_action_visible} 31 | abs_action: *abs_action 32 | n_envs: 28 33 | # seed 42 will crash MuJoCo for some reason. 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | seed: 42 45 | val_ratio: 0.02 46 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/tool_hang_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: tool_hang_lowdim 2 | 3 | obs_dim: 53 4 | action_dim: 10 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name tool_hang 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 11 | abs_action: &abs_action True 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | max_steps: 700 24 | n_obs_steps: ${n_obs_steps} 25 | n_action_steps: ${n_action_steps} 26 | n_latency_steps: ${n_latency_steps} 27 | render_hw: [128,128] 28 | fps: 10 29 | crf: 22 30 | past_action: ${past_action_visible} 31 | abs_action: *abs_action 32 | n_envs: 28 33 | # seed 42 will crash MuJoCo for some reason. 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | rotation_rep: rotation_6d 45 | seed: 42 46 | val_ratio: 0.02 47 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/transport_image.yaml: -------------------------------------------------------------------------------- 1 | name: transport_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | shouldercamera0_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | shouldercamera1_image: 20 | shape: [3, 84, 84] 21 | type: rgb 22 | robot1_eye_in_hand_image: 23 | shape: [3, 84, 84] 24 | type: rgb 25 | robot1_eef_pos: 26 | shape: [3] 27 | # type default: low_dim 28 | robot1_eef_quat: 29 | shape: [4] 30 | robot1_gripper_qpos: 31 | shape: [2] 32 | action: 33 | shape: [14] 34 | 35 | task_name: &task_name transport 36 | dataset_type: &dataset_type ph 37 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 38 | abs_action: &abs_action False 39 | 40 | env_runner: 41 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 42 | dataset_path: *dataset_path 43 | shape_meta: *shape_meta 44 | n_train: 6 45 | n_train_vis: 2 46 | train_start_idx: 0 47 | n_test: 50 48 | n_test_vis: 4 49 | test_start_seed: 100000 50 | max_steps: 700 51 | n_obs_steps: ${n_obs_steps} 52 | n_action_steps: ${n_action_steps} 53 | render_obs_key: 'shouldercamera0_image' 54 | fps: 10 55 | crf: 22 56 | past_action: ${past_action_visible} 57 | abs_action: *abs_action 58 | tqdm_interval_sec: 1.0 59 | n_envs: 28 60 | # evaluation at this config requires a 16 core 64GB instance. 61 | 62 | dataset: 63 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 64 | shape_meta: *shape_meta 65 | dataset_path: *dataset_path 66 | horizon: ${horizon} 67 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 68 | pad_after: ${eval:'${n_action_steps}-1'} 69 | n_obs_steps: ${dataset_obs_steps} 70 | abs_action: *abs_action 71 | rotation_rep: 'rotation_6d' 72 | use_legacy_normalizer: False 73 | use_cache: True 74 | seed: 42 75 | val_ratio: 0.02 76 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/transport_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: transport_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | shouldercamera0_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | shouldercamera1_image: 20 | shape: [3, 84, 84] 21 | type: rgb 22 | robot1_eye_in_hand_image: 23 | shape: [3, 84, 84] 24 | type: rgb 25 | robot1_eef_pos: 26 | shape: [3] 27 | # type default: low_dim 28 | robot1_eef_quat: 29 | shape: [4] 30 | robot1_gripper_qpos: 31 | shape: [2] 32 | action: 33 | shape: [20] 34 | 35 | task_name: &task_name transport 36 | dataset_type: &dataset_type ph 37 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 38 | abs_action: &abs_action True 39 | 40 | env_runner: 41 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 42 | dataset_path: *dataset_path 43 | shape_meta: *shape_meta 44 | n_train: 6 45 | n_train_vis: 2 46 | train_start_idx: 0 47 | n_test: 50 48 | n_test_vis: 4 49 | test_start_seed: 100000 50 | max_steps: 700 51 | n_obs_steps: ${n_obs_steps} 52 | n_action_steps: ${n_action_steps} 53 | render_obs_key: 'shouldercamera0_image' 54 | fps: 10 55 | crf: 22 56 | past_action: ${past_action_visible} 57 | abs_action: *abs_action 58 | tqdm_interval_sec: 1.0 59 | n_envs: 28 60 | # evaluation at this config requires a 16 core 64GB instance. 61 | 62 | dataset: 63 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 64 | shape_meta: *shape_meta 65 | dataset_path: *dataset_path 66 | horizon: ${horizon} 67 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 68 | pad_after: ${eval:'${n_action_steps}-1'} 69 | n_obs_steps: ${dataset_obs_steps} 70 | abs_action: *abs_action 71 | rotation_rep: 'rotation_6d' 72 | use_legacy_normalizer: False 73 | use_cache: True 74 | seed: 42 75 | val_ratio: 0.02 76 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/transport_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: transport_lowdim 2 | 3 | obs_dim: 59 # 41+(3+4+2)*2 4 | action_dim: 14 # 7*2 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys [ 8 | 'object', 9 | 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', 10 | 'robot1_eef_pos', 'robot1_eef_quat', 'robot1_gripper_qpos' 11 | ] 12 | task_name: &task_name transport 13 | dataset_type: &dataset_type ph 14 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 15 | abs_action: &abs_action False 16 | 17 | env_runner: 18 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 19 | dataset_path: *dataset_path 20 | obs_keys: *obs_keys 21 | n_train: 6 22 | n_train_vis: 2 23 | train_start_idx: 0 24 | n_test: 50 25 | n_test_vis: 5 26 | test_start_seed: 100000 27 | max_steps: 700 28 | n_obs_steps: ${n_obs_steps} 29 | n_action_steps: ${n_action_steps} 30 | n_latency_steps: ${n_latency_steps} 31 | render_hw: [128,128] 32 | fps: 10 33 | crf: 22 34 | past_action: ${past_action_visible} 35 | abs_action: *abs_action 36 | n_envs: 28 37 | # evaluation at this config requires a 16 core 64GB instance. 38 | 39 | dataset: 40 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 41 | dataset_path: *dataset_path 42 | horizon: ${horizon} 43 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 44 | pad_after: ${eval:'${n_action_steps}-1'} 45 | obs_keys: *obs_keys 46 | abs_action: *abs_action 47 | use_legacy_normalizer: False 48 | seed: 42 49 | val_ratio: 0.02 50 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/transport_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: transport_lowdim 2 | 3 | obs_dim: 59 # 41+(3+4+2)*2 4 | action_dim: 20 # 10*2 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys [ 8 | 'object', 9 | 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', 10 | 'robot1_eef_pos', 'robot1_eef_quat', 'robot1_gripper_qpos' 11 | ] 12 | task_name: &task_name transport 13 | dataset_type: &dataset_type ph 14 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 15 | abs_action: &abs_action True 16 | 17 | env_runner: 18 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 19 | dataset_path: *dataset_path 20 | obs_keys: *obs_keys 21 | n_train: 6 22 | n_train_vis: 2 23 | train_start_idx: 0 24 | n_test: 50 25 | n_test_vis: 4 26 | test_start_seed: 100000 27 | max_steps: 700 28 | n_obs_steps: ${n_obs_steps} 29 | n_action_steps: ${n_action_steps} 30 | n_latency_steps: ${n_latency_steps} 31 | render_hw: [128,128] 32 | fps: 10 33 | crf: 22 34 | past_action: ${past_action_visible} 35 | abs_action: *abs_action 36 | n_envs: 28 37 | # evaluation at this config requires a 16 core 64GB instance. 38 | 39 | dataset: 40 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 41 | dataset_path: *dataset_path 42 | horizon: ${horizon} 43 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 44 | pad_after: ${eval:'${n_action_steps}-1'} 45 | obs_keys: *obs_keys 46 | abs_action: *abs_action 47 | use_legacy_normalizer: False 48 | seed: 42 49 | val_ratio: 0.02 50 | -------------------------------------------------------------------------------- /diffusion_policy/config/train_bet_lowdim_workspace.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - task: blockpush_lowdim_seed 4 | 5 | name: train_bet_lowdim 6 | _target_: diffusion_policy.workspace.train_bet_lowdim_workspace.TrainBETLowdimWorkspace 7 | 8 | obs_dim: ${task.obs_dim} 9 | action_dim: ${task.action_dim} 10 | keypoint_dim: ${task.keypoint_dim} 11 | task_name: ${task.name} 12 | exp_name: "default" 13 | 14 | horizon: 3 15 | n_obs_steps: 3 16 | n_action_steps: 1 17 | n_latency_steps: 0 18 | past_action_visible: False 19 | keypoint_visible_rate: 1.0 20 | obs_as_local_cond: False 21 | obs_as_global_cond: False 22 | pred_action_steps_only: False 23 | 24 | policy: 25 | _target_: diffusion_policy.policy.bet_lowdim_policy.BETLowdimPolicy 26 | 27 | action_ae: 28 | _target_: diffusion_policy.model.bet.action_ae.discretizers.k_means.KMeansDiscretizer 29 | num_bins: 24 30 | action_dim: ${action_dim} 31 | predict_offsets: True 32 | 33 | obs_encoding_net: 34 | _target_: torch.nn.Identity 35 | output_dim: ${obs_dim} 36 | 37 | state_prior: 38 | _target_: diffusion_policy.model.bet.latent_generators.mingpt.MinGPT 39 | 40 | discrete_input: false 41 | input_dim: ${obs_dim} 42 | 43 | vocab_size: ${policy.action_ae.num_bins} 44 | 45 | # Architecture details 46 | n_layer: 4 47 | n_head: 4 48 | n_embd: 72 49 | 50 | block_size: ${horizon} # Length of history/context 51 | predict_offsets: True 52 | offset_loss_scale: 1000.0 # actions are very small 53 | focal_loss_gamma: 2.0 54 | action_dim: ${action_dim} 55 | 56 | horizon: ${horizon} 57 | n_obs_steps: ${n_obs_steps} 58 | n_action_steps: ${n_action_steps} 59 | 60 | dataloader: 61 | batch_size: 256 62 | num_workers: 1 63 | shuffle: True 64 | pin_memory: True 65 | persistent_workers: False 66 | 67 | val_dataloader: 68 | batch_size: 256 69 | num_workers: 1 70 | shuffle: False 71 | pin_memory: True 72 | persistent_workers: False 73 | 74 | optimizer: 75 | learning_rate: 0.0001 # 1e-4 76 | weight_decay: 0.1 77 | betas: [0.9, 0.95] 78 | 79 | training: 80 | device: "cuda:0" 81 | seed: 42 82 | debug: False 83 | resume: True 84 | # optimization 85 | lr_scheduler: cosine 86 | lr_warmup_steps: 500 87 | num_epochs: 5000 88 | gradient_accumulate_every: 1 89 | grad_norm_clip: 1.0 90 | enable_normalizer: True 91 | # training loop control 92 | # in epochs 93 | rollout_every: 50 94 | checkpoint_every: 50 95 | val_every: 1 96 | sample_every: 5 97 | # steps per epoch 98 | max_train_steps: null 99 | max_val_steps: null 100 | # misc 101 | tqdm_interval_sec: 1.0 102 | 103 | logging: 104 | project: diffusion_policy_debug 105 | resume: True 106 | mode: online 107 | name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 108 | tags: ["${name}", "${task_name}", "${exp_name}"] 109 | id: null 110 | group: null 111 | 112 | checkpoint: 113 | topk: 114 | monitor_key: test_mean_score 115 | mode: max 116 | k: 5 117 | format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt' 118 | save_last_ckpt: True 119 | save_last_snapshot: False 120 | 121 | multi_run: 122 | run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 123 | wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 124 | 125 | hydra: 126 | job: 127 | override_dirname: ${name} 128 | run: 129 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 130 | sweep: 131 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 132 | subdir: ${hydra.job.num} 133 | -------------------------------------------------------------------------------- /diffusion_policy/config/train_ibc_dfo_hybrid_workspace.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - task: pusht_image 4 | 5 | name: train_ibc_dfo_hybrid 6 | _target_: diffusion_policy.workspace.train_ibc_dfo_hybrid_workspace.TrainIbcDfoHybridWorkspace 7 | 8 | task_name: ${task.name} 9 | shape_meta: ${task.shape_meta} 10 | exp_name: "default" 11 | 12 | horizon: 2 13 | n_obs_steps: 2 14 | n_action_steps: 1 15 | n_latency_steps: 0 16 | dataset_obs_steps: ${n_obs_steps} 17 | past_action_visible: False 18 | keypoint_visible_rate: 1.0 19 | 20 | policy: 21 | _target_: diffusion_policy.policy.ibc_dfo_hybrid_image_policy.IbcDfoHybridImagePolicy 22 | 23 | shape_meta: ${shape_meta} 24 | 25 | horizon: ${horizon} 26 | n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'} 27 | n_obs_steps: ${n_obs_steps} 28 | dropout: 0.1 29 | train_n_neg: 1024 30 | pred_n_iter: 5 31 | pred_n_samples: 1024 32 | kevin_inference: False 33 | andy_train: False 34 | obs_encoder_group_norm: True 35 | eval_fixed_crop: True 36 | crop_shape: [84, 84] 37 | 38 | dataloader: 39 | batch_size: 128 40 | num_workers: 8 41 | shuffle: True 42 | pin_memory: True 43 | persistent_workers: False 44 | 45 | val_dataloader: 46 | batch_size: 128 47 | num_workers: 8 48 | shuffle: False 49 | pin_memory: True 50 | persistent_workers: False 51 | 52 | optimizer: 53 | _target_: torch.optim.AdamW 54 | lr: 1.0e-4 55 | betas: [0.95, 0.999] 56 | eps: 1.0e-8 57 | weight_decay: 1.0e-6 58 | 59 | training: 60 | device: "cuda:0" 61 | seed: 42 62 | debug: False 63 | resume: True 64 | # optimization 65 | lr_scheduler: cosine 66 | lr_warmup_steps: 500 67 | num_epochs: 3050 68 | gradient_accumulate_every: 1 69 | # training loop control 70 | # in epochs 71 | rollout_every: 50 72 | checkpoint_every: 50 73 | val_every: 1 74 | sample_every: 5 75 | sample_max_batch: 128 76 | # steps per epoch 77 | max_train_steps: null 78 | max_val_steps: null 79 | # misc 80 | tqdm_interval_sec: 1.0 81 | 82 | logging: 83 | project: diffusion_policy_debug 84 | resume: True 85 | mode: online 86 | name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 87 | tags: ["${name}", "${task_name}", "${exp_name}"] 88 | id: null 89 | group: null 90 | 91 | checkpoint: 92 | topk: 93 | monitor_key: test_mean_score 94 | mode: max 95 | k: 5 96 | format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt' 97 | save_last_ckpt: True 98 | save_last_snapshot: False 99 | 100 | multi_run: 101 | run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 102 | wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 103 | 104 | hydra: 105 | job: 106 | override_dirname: ${name} 107 | run: 108 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 109 | sweep: 110 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 111 | subdir: ${hydra.job.num} 112 | -------------------------------------------------------------------------------- /diffusion_policy/config/train_ibc_dfo_lowdim_workspace.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - task: pusht_lowdim 4 | 5 | name: train_ibc_dfo_lowdim 6 | _target_: diffusion_policy.workspace.train_ibc_dfo_lowdim_workspace.TrainIbcDfoLowdimWorkspace 7 | 8 | obs_dim: ${task.obs_dim} 9 | action_dim: ${task.action_dim} 10 | keypoint_dim: ${task.keypoint_dim} 11 | task_name: ${task.name} 12 | exp_name: "default" 13 | 14 | horizon: 2 15 | n_obs_steps: 2 16 | n_action_steps: 1 17 | n_latency_steps: 0 18 | past_action_visible: False 19 | keypoint_visible_rate: 1.0 20 | 21 | policy: 22 | _target_: diffusion_policy.policy.ibc_dfo_lowdim_policy.IbcDfoLowdimPolicy 23 | 24 | horizon: ${horizon} 25 | obs_dim: ${obs_dim} 26 | action_dim: ${action_dim} 27 | n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'} 28 | n_obs_steps: ${n_obs_steps} 29 | dropout: 0.1 30 | train_n_neg: 1024 31 | pred_n_iter: 5 32 | pred_n_samples: 1024 33 | kevin_inference: False 34 | andy_train: False 35 | 36 | dataloader: 37 | batch_size: 256 38 | num_workers: 1 39 | shuffle: True 40 | pin_memory: True 41 | persistent_workers: False 42 | 43 | val_dataloader: 44 | batch_size: 256 45 | num_workers: 1 46 | shuffle: False 47 | pin_memory: True 48 | persistent_workers: False 49 | 50 | optimizer: 51 | _target_: torch.optim.AdamW 52 | lr: 1.0e-4 53 | betas: [0.95, 0.999] 54 | eps: 1.0e-8 55 | weight_decay: 1.0e-6 56 | 57 | training: 58 | device: "cuda:0" 59 | seed: 42 60 | debug: False 61 | resume: True 62 | # optimization 63 | lr_scheduler: cosine 64 | lr_warmup_steps: 500 65 | num_epochs: 5000 66 | gradient_accumulate_every: 1 67 | # training loop control 68 | # in epochs 69 | rollout_every: 50 70 | checkpoint_every: 50 71 | val_every: 1 72 | sample_every: 5 73 | sample_max_batch: 128 74 | # steps per epoch 75 | max_train_steps: null 76 | max_val_steps: null 77 | # misc 78 | tqdm_interval_sec: 1.0 79 | 80 | logging: 81 | project: diffusion_policy_debug 82 | resume: True 83 | mode: online 84 | name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 85 | tags: ["${name}", "${task_name}", "${exp_name}"] 86 | id: null 87 | group: null 88 | 89 | checkpoint: 90 | topk: 91 | monitor_key: test_mean_score 92 | mode: max 93 | k: 5 94 | format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt' 95 | save_last_ckpt: True 96 | save_last_snapshot: False 97 | 98 | multi_run: 99 | run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 100 | wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 101 | 102 | hydra: 103 | job: 104 | override_dirname: ${name} 105 | run: 106 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 107 | sweep: 108 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 109 | subdir: ${hydra.job.num} 110 | -------------------------------------------------------------------------------- /diffusion_policy/config/train_ibc_dfo_real_hybrid_workspace.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - task: real_pusht_image 4 | 5 | name: train_ibc_dfo_hybrid 6 | _target_: diffusion_policy.workspace.train_ibc_dfo_hybrid_workspace.TrainIbcDfoHybridWorkspace 7 | 8 | task_name: ${task.name} 9 | shape_meta: ${task.shape_meta} 10 | exp_name: "default" 11 | 12 | horizon: 2 13 | n_obs_steps: 2 14 | n_action_steps: 1 15 | n_latency_steps: 1 16 | dataset_obs_steps: ${n_obs_steps} 17 | past_action_visible: False 18 | keypoint_visible_rate: 1.0 19 | 20 | policy: 21 | _target_: diffusion_policy.policy.ibc_dfo_hybrid_image_policy.IbcDfoHybridImagePolicy 22 | 23 | shape_meta: ${shape_meta} 24 | 25 | horizon: ${horizon} 26 | n_action_steps: ${n_action_steps} 27 | n_obs_steps: ${n_obs_steps} 28 | dropout: 0.1 29 | train_n_neg: 256 30 | pred_n_iter: 3 31 | pred_n_samples: 1024 32 | kevin_inference: False 33 | andy_train: False 34 | obs_encoder_group_norm: True 35 | eval_fixed_crop: True 36 | crop_shape: [216, 288] # ch, cw 320x240 90% 37 | 38 | dataloader: 39 | batch_size: 128 40 | num_workers: 8 41 | shuffle: True 42 | pin_memory: True 43 | persistent_workers: False 44 | 45 | val_dataloader: 46 | batch_size: 128 47 | num_workers: 1 48 | shuffle: False 49 | pin_memory: True 50 | persistent_workers: False 51 | 52 | optimizer: 53 | _target_: torch.optim.AdamW 54 | lr: 1.0e-4 55 | betas: [0.95, 0.999] 56 | eps: 1.0e-8 57 | weight_decay: 1.0e-6 58 | 59 | training: 60 | device: "cuda:0" 61 | seed: 42 62 | debug: False 63 | resume: True 64 | # optimization 65 | lr_scheduler: cosine 66 | lr_warmup_steps: 500 67 | num_epochs: 1000 68 | gradient_accumulate_every: 1 69 | # training loop control 70 | # in epochs 71 | rollout_every: 50 72 | checkpoint_every: 5 73 | val_every: 1 74 | sample_every: 5 75 | sample_max_batch: 128 76 | # steps per epoch 77 | max_train_steps: null 78 | max_val_steps: null 79 | # misc 80 | tqdm_interval_sec: 1.0 81 | 82 | logging: 83 | project: diffusion_policy_debug 84 | resume: True 85 | mode: online 86 | name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 87 | tags: ["${name}", "${task_name}", "${exp_name}"] 88 | id: null 89 | group: null 90 | 91 | checkpoint: 92 | topk: 93 | monitor_key: train_action_mse_error 94 | mode: min 95 | k: 5 96 | format_str: 'epoch={epoch:04d}-train_action_mse_error={train_action_mse_error:.3f}.ckpt' 97 | save_last_ckpt: True 98 | save_last_snapshot: False 99 | 100 | multi_run: 101 | run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 102 | wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 103 | 104 | hydra: 105 | job: 106 | override_dirname: ${name} 107 | run: 108 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 109 | sweep: 110 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 111 | subdir: ${hydra.job.num} 112 | -------------------------------------------------------------------------------- /diffusion_policy/config/train_robomimic_image_workspace.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - task: lift_image 4 | 5 | name: train_robomimic_image 6 | _target_: diffusion_policy.workspace.train_robomimic_image_workspace.TrainRobomimicImageWorkspace 7 | 8 | task_name: ${task.name} 9 | shape_meta: ${task.shape_meta} 10 | exp_name: "default" 11 | 12 | horizon: &horizon 10 13 | n_obs_steps: 1 14 | n_action_steps: 1 15 | n_latency_steps: 0 16 | dataset_obs_steps: *horizon 17 | past_action_visible: False 18 | keypoint_visible_rate: 1.0 19 | 20 | policy: 21 | _target_: diffusion_policy.policy.robomimic_image_policy.RobomimicImagePolicy 22 | shape_meta: ${shape_meta} 23 | algo_name: bc_rnn 24 | obs_type: image 25 | # oc.select resolver: key, default 26 | task_name: ${oc.select:task.task_name,lift} 27 | dataset_type: ${oc.select:task.dataset_type,ph} 28 | crop_shape: [76,76] 29 | 30 | dataloader: 31 | batch_size: 64 32 | num_workers: 16 33 | shuffle: True 34 | pin_memory: True 35 | persistent_workers: False 36 | 37 | val_dataloader: 38 | batch_size: 64 39 | num_workers: 16 40 | shuffle: False 41 | pin_memory: True 42 | persistent_workers: False 43 | 44 | training: 45 | device: "cuda:0" 46 | seed: 42 47 | debug: False 48 | resume: True 49 | # optimization 50 | num_epochs: 3050 51 | # training loop control 52 | # in epochs 53 | rollout_every: 50 54 | checkpoint_every: 50 55 | val_every: 1 56 | sample_every: 5 57 | # steps per epoch 58 | max_train_steps: null 59 | max_val_steps: null 60 | # misc 61 | tqdm_interval_sec: 1.0 62 | 63 | logging: 64 | project: diffusion_policy_debug 65 | resume: True 66 | mode: online 67 | name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 68 | tags: ["${name}", "${task_name}", "${exp_name}"] 69 | id: null 70 | group: null 71 | 72 | checkpoint: 73 | topk: 74 | monitor_key: test_mean_score 75 | mode: max 76 | k: 5 77 | format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt' 78 | save_last_ckpt: True 79 | save_last_snapshot: False 80 | 81 | multi_run: 82 | run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 83 | wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 84 | 85 | hydra: 86 | job: 87 | override_dirname: ${name} 88 | run: 89 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 90 | sweep: 91 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 92 | subdir: ${hydra.job.num} 93 | -------------------------------------------------------------------------------- /diffusion_policy/config/train_robomimic_lowdim_workspace.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - task: pusht_lowdim 4 | 5 | name: train_robomimic_lowdim 6 | _target_: diffusion_policy.workspace.train_robomimic_lowdim_workspace.TrainRobomimicLowdimWorkspace 7 | 8 | obs_dim: ${task.obs_dim} 9 | action_dim: ${task.action_dim} 10 | transition_dim: "${eval: ${task.obs_dim} + ${task.action_dim}}" 11 | task_name: ${task.name} 12 | exp_name: "default" 13 | 14 | horizon: 10 15 | n_obs_steps: 1 16 | n_action_steps: 1 17 | n_latency_steps: 0 18 | past_action_visible: False 19 | keypoint_visible_rate: 1.0 20 | 21 | policy: 22 | _target_: diffusion_policy.policy.robomimic_lowdim_policy.RobomimicLowdimPolicy 23 | action_dim: ${action_dim} 24 | obs_dim: ${obs_dim} 25 | algo_name: bc_rnn 26 | obs_type: low_dim 27 | # oc.select resolver: key, default 28 | task_name: ${oc.select:task.task_name,lift} 29 | dataset_type: ${oc.select:task.dataset_type,ph} 30 | 31 | dataloader: 32 | batch_size: 256 33 | num_workers: 1 34 | shuffle: True 35 | pin_memory: True 36 | persistent_workers: False 37 | 38 | val_dataloader: 39 | batch_size: 256 40 | num_workers: 1 41 | shuffle: False 42 | pin_memory: True 43 | persistent_workers: False 44 | 45 | training: 46 | device: "cuda:0" 47 | seed: 42 48 | debug: False 49 | resume: True 50 | # optimization 51 | num_epochs: 5000 52 | # training loop control 53 | # in epochs 54 | rollout_every: 50 55 | checkpoint_every: 50 56 | val_every: 1 57 | # steps per epoch 58 | max_train_steps: null 59 | max_val_steps: null 60 | # misc 61 | tqdm_interval_sec: 1.0 62 | 63 | logging: 64 | project: diffusion_policy_debug 65 | resume: True 66 | mode: online 67 | name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 68 | tags: ["${name}", "${task_name}", "${exp_name}"] 69 | id: null 70 | group: null 71 | 72 | checkpoint: 73 | topk: 74 | monitor_key: test_mean_score 75 | mode: max 76 | k: 5 77 | format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt' 78 | save_last_ckpt: True 79 | save_last_snapshot: False 80 | 81 | multi_run: 82 | run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 83 | wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 84 | 85 | hydra: 86 | job: 87 | override_dirname: ${name} 88 | run: 89 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 90 | sweep: 91 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 92 | subdir: ${hydra.job.num} 93 | -------------------------------------------------------------------------------- /diffusion_policy/config/train_robomimic_real_image_workspace.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - task: real_pusht_image 4 | 5 | name: train_robomimic_image 6 | _target_: diffusion_policy.workspace.train_robomimic_image_workspace.TrainRobomimicImageWorkspace 7 | 8 | task_name: ${task.name} 9 | shape_meta: ${task.shape_meta} 10 | exp_name: "default" 11 | 12 | horizon: &horizon 10 13 | n_obs_steps: 1 14 | n_action_steps: 1 15 | n_latency_steps: 1 16 | dataset_obs_steps: *horizon 17 | past_action_visible: False 18 | keypoint_visible_rate: 1.0 19 | 20 | policy: 21 | _target_: diffusion_policy.policy.robomimic_image_policy.RobomimicImagePolicy 22 | shape_meta: ${shape_meta} 23 | algo_name: bc_rnn 24 | obs_type: image 25 | # oc.select resolver: key, default 26 | task_name: ${oc.select:task.task_name,tool_hang} 27 | dataset_type: ${oc.select:task.dataset_type,ph} 28 | crop_shape: [216, 288] # ch, cw 320x240 90% 29 | 30 | dataloader: 31 | batch_size: 32 32 | num_workers: 8 33 | shuffle: True 34 | pin_memory: True 35 | persistent_workers: True 36 | 37 | val_dataloader: 38 | batch_size: 32 39 | num_workers: 1 40 | shuffle: False 41 | pin_memory: True 42 | persistent_workers: False 43 | 44 | training: 45 | device: "cuda:0" 46 | seed: 42 47 | debug: False 48 | resume: True 49 | # optimization 50 | num_epochs: 1000 51 | # training loop control 52 | # in epochs 53 | rollout_every: 50 54 | checkpoint_every: 50 55 | val_every: 1 56 | sample_every: 5 57 | # steps per epoch 58 | max_train_steps: null 59 | max_val_steps: null 60 | # misc 61 | tqdm_interval_sec: 1.0 62 | 63 | logging: 64 | project: diffusion_policy_debug 65 | resume: True 66 | mode: online 67 | name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 68 | tags: ["${name}", "${task_name}", "${exp_name}"] 69 | id: null 70 | group: null 71 | 72 | checkpoint: 73 | topk: 74 | monitor_key: train_loss 75 | mode: min 76 | k: 5 77 | format_str: 'epoch={epoch:04d}-train_loss={train_loss:.3f}.ckpt' 78 | save_last_ckpt: True 79 | save_last_snapshot: False 80 | 81 | multi_run: 82 | run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 83 | wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 84 | 85 | hydra: 86 | job: 87 | override_dirname: ${name} 88 | run: 89 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 90 | sweep: 91 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 92 | subdir: ${hydra.job.num} 93 | -------------------------------------------------------------------------------- /diffusion_policy/dataset/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/.DS_Store -------------------------------------------------------------------------------- /diffusion_policy/dataset/base_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import torch 4 | import torch.nn 5 | from diffusion_policy.model.common.normalizer import LinearNormalizer 6 | 7 | class BaseLowdimDataset(torch.utils.data.Dataset): 8 | def get_validation_dataset(self) -> 'BaseLowdimDataset': 9 | # return an empty dataset by default 10 | return BaseLowdimDataset() 11 | 12 | def get_normalizer(self, **kwargs) -> LinearNormalizer: 13 | raise NotImplementedError() 14 | 15 | def get_all_actions(self) -> torch.Tensor: 16 | raise NotImplementedError() 17 | 18 | def __len__(self) -> int: 19 | return 0 20 | 21 | def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]: 22 | """ 23 | output: 24 | obs: T, Do 25 | action: T, Da 26 | """ 27 | raise NotImplementedError() 28 | 29 | 30 | class BaseImageDataset(torch.utils.data.Dataset): 31 | def get_validation_dataset(self) -> 'BaseLowdimDataset': 32 | # return an empty dataset by default 33 | return BaseImageDataset() 34 | 35 | def get_normalizer(self, **kwargs) -> LinearNormalizer: 36 | raise NotImplementedError() 37 | 38 | def get_all_actions(self) -> torch.Tensor: 39 | raise NotImplementedError() 40 | 41 | def __len__(self) -> int: 42 | return 0 43 | 44 | def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]: 45 | """ 46 | output: 47 | obs: 48 | key: T, * 49 | action: T, Da 50 | """ 51 | raise NotImplementedError() 52 | -------------------------------------------------------------------------------- /diffusion_policy/dataset/gibson_dataset.py: -------------------------------------------------------------------------------- 1 | import gc 2 | import cv2 3 | import bz2 4 | import math 5 | import json 6 | import tqdm 7 | import h5py 8 | import glob 9 | import torch 10 | import random 11 | import numpy as np 12 | import os.path as osp 13 | import _pickle as cPickle 14 | import skimage.morphology as skmp 15 | from torch.utils.data import Dataset 16 | import os 17 | import clip 18 | from diffusion_policy.common.pytorch_util import dict_apply 19 | from torch.utils.data import DataLoader 20 | from diffusion_policy.model.common.normalizer import LinearNormalizer 21 | from diffusion_policy.common.normalize_util import get_image_range_normalizer 22 | from typing import Dict 23 | 24 | def count_file_in_folder(path): 25 | count = 0 26 | for _, _, files in os.walk(path): 27 | count += len(files) 28 | return count 29 | 30 | class TrajectoryDataset(Dataset): 31 | def __init__(self, train_idx): 32 | self.train_idx = train_idx 33 | 34 | def get_normalizer(self, mode='limits', **kwargs): 35 | with bz2.BZ2File("..diffusion/data/sample_h16/{}.pbz2".format(str(0)), 'rb') as fp: 36 | tmp_data = cPickle.load(fp) 37 | 38 | data = { 39 | 'clip_feature': tmp_data['obs']['clip_feature'].numpy(), 40 | 'action': tmp_data['action'].numpy() 41 | } 42 | 43 | normalizer = LinearNormalizer() 44 | normalizer.fit(data=data, last_n_dims=1, mode=mode, **kwargs) 45 | return normalizer 46 | 47 | def __len__(self): 48 | return len(self.train_idx) 49 | 50 | def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]: 51 | tmp_idx = self.train_idx[idx] 52 | with bz2.BZ2File("..diffusion/data/sample_h16/{}.pbz2".format(str(tmp_idx)), 'rb') as fp: 53 | data = cPickle.load(fp) 54 | return data 55 | 56 | -------------------------------------------------------------------------------- /diffusion_policy/dataset/kitchen_lowdim_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import torch 3 | import numpy as np 4 | import copy 5 | import pathlib 6 | from diffusion_policy.common.pytorch_util import dict_apply 7 | from diffusion_policy.common.replay_buffer import ReplayBuffer 8 | from diffusion_policy.common.sampler import SequenceSampler, get_val_mask 9 | from diffusion_policy.model.common.normalizer import LinearNormalizer, SingleFieldLinearNormalizer 10 | from diffusion_policy.dataset.base_dataset import BaseLowdimDataset 11 | 12 | class KitchenLowdimDataset(BaseLowdimDataset): 13 | def __init__(self, 14 | dataset_dir, 15 | horizon=1, 16 | pad_before=0, 17 | pad_after=0, 18 | seed=42, 19 | val_ratio=0.0 20 | ): 21 | super().__init__() 22 | 23 | data_directory = pathlib.Path(dataset_dir) 24 | observations = np.load(data_directory / "observations_seq.npy") 25 | actions = np.load(data_directory / "actions_seq.npy") 26 | masks = np.load(data_directory / "existence_mask.npy") 27 | 28 | self.replay_buffer = ReplayBuffer.create_empty_numpy() 29 | for i in range(len(masks)): 30 | eps_len = int(masks[i].sum()) 31 | obs = observations[i,:eps_len].astype(np.float32) 32 | action = actions[i,:eps_len].astype(np.float32) 33 | data = { 34 | 'obs': obs, 35 | 'action': action 36 | } 37 | self.replay_buffer.add_episode(data) 38 | 39 | val_mask = get_val_mask( 40 | n_episodes=self.replay_buffer.n_episodes, 41 | val_ratio=val_ratio, 42 | seed=seed) 43 | train_mask = ~val_mask 44 | self.sampler = SequenceSampler( 45 | replay_buffer=self.replay_buffer, 46 | sequence_length=horizon, 47 | pad_before=pad_before, 48 | pad_after=pad_after, 49 | episode_mask=train_mask) 50 | 51 | self.train_mask = train_mask 52 | self.horizon = horizon 53 | self.pad_before = pad_before 54 | self.pad_after = pad_after 55 | 56 | def get_validation_dataset(self): 57 | val_set = copy.copy(self) 58 | val_set.sampler = SequenceSampler( 59 | replay_buffer=self.replay_buffer, 60 | sequence_length=self.horizon, 61 | pad_before=self.pad_before, 62 | pad_after=self.pad_after, 63 | episode_mask=~self.train_mask 64 | ) 65 | val_set.train_mask = ~self.train_mask 66 | return val_set 67 | 68 | def get_normalizer(self, mode='limits', **kwargs): 69 | data = { 70 | 'obs': self.replay_buffer['obs'], 71 | 'action': self.replay_buffer['action'] 72 | } 73 | if 'range_eps' not in kwargs: 74 | # to prevent blowing up dims that barely change 75 | kwargs['range_eps'] = 5e-2 76 | normalizer = LinearNormalizer() 77 | normalizer.fit(data=data, last_n_dims=1, mode=mode, **kwargs) 78 | return normalizer 79 | 80 | def get_all_actions(self) -> torch.Tensor: 81 | return torch.from_numpy(self.replay_buffer['action']) 82 | 83 | def __len__(self) -> int: 84 | return len(self.sampler) 85 | 86 | def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]: 87 | sample = self.sampler.sample_sequence(idx) 88 | data = sample 89 | 90 | torch_data = dict_apply(data, torch.from_numpy) 91 | return torch_data 92 | -------------------------------------------------------------------------------- /diffusion_policy/dataset/pusht_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import torch 3 | import numpy as np 4 | import copy 5 | from diffusion_policy.common.pytorch_util import dict_apply 6 | from diffusion_policy.common.replay_buffer import ReplayBuffer 7 | from diffusion_policy.common.sampler import ( 8 | SequenceSampler, get_val_mask, downsample_mask) 9 | from diffusion_policy.model.common.normalizer import LinearNormalizer 10 | from diffusion_policy.dataset.base_dataset import BaseLowdimDataset 11 | 12 | class PushTLowdimDataset(BaseLowdimDataset): 13 | def __init__(self, 14 | zarr_path, 15 | horizon=1, 16 | pad_before=0, 17 | pad_after=0, 18 | obs_key='keypoint', 19 | state_key='state', 20 | action_key='action', 21 | seed=42, 22 | val_ratio=0.0, 23 | max_train_episodes=None 24 | ): 25 | super().__init__() 26 | self.replay_buffer = ReplayBuffer.copy_from_path( 27 | zarr_path, keys=[obs_key, state_key, action_key]) 28 | 29 | val_mask = get_val_mask( 30 | n_episodes=self.replay_buffer.n_episodes, 31 | val_ratio=val_ratio, 32 | seed=seed) 33 | train_mask = ~val_mask 34 | train_mask = downsample_mask( 35 | mask=train_mask, 36 | max_n=max_train_episodes, 37 | seed=seed) 38 | 39 | self.sampler = SequenceSampler( 40 | replay_buffer=self.replay_buffer, 41 | sequence_length=horizon, 42 | pad_before=pad_before, 43 | pad_after=pad_after, 44 | episode_mask=train_mask 45 | ) 46 | self.obs_key = obs_key 47 | self.state_key = state_key 48 | self.action_key = action_key 49 | self.train_mask = train_mask 50 | self.horizon = horizon 51 | self.pad_before = pad_before 52 | self.pad_after = pad_after 53 | 54 | def get_validation_dataset(self): 55 | val_set = copy.copy(self) 56 | val_set.sampler = SequenceSampler( 57 | replay_buffer=self.replay_buffer, 58 | sequence_length=self.horizon, 59 | pad_before=self.pad_before, 60 | pad_after=self.pad_after, 61 | episode_mask=~self.train_mask 62 | ) 63 | val_set.train_mask = ~self.train_mask 64 | return val_set 65 | 66 | def get_normalizer(self, mode='limits', **kwargs): 67 | data = self._sample_to_data(self.replay_buffer) 68 | normalizer = LinearNormalizer() 69 | normalizer.fit(data=data, last_n_dims=1, mode=mode, **kwargs) 70 | return normalizer 71 | 72 | def get_all_actions(self) -> torch.Tensor: 73 | return torch.from_numpy(self.replay_buffer[self.action_key]) 74 | 75 | def __len__(self) -> int: 76 | return len(self.sampler) 77 | 78 | def _sample_to_data(self, sample): 79 | keypoint = sample[self.obs_key] 80 | state = sample[self.state_key] 81 | agent_pos = state[:,:2] 82 | obs = np.concatenate([ 83 | keypoint.reshape(keypoint.shape[0], -1), 84 | agent_pos], axis=-1) 85 | 86 | data = { 87 | 'obs': obs, # T, D_o 88 | 'action': sample[self.action_key], # T, D_a 89 | } 90 | return data 91 | 92 | def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]: 93 | sample = self.sampler.sample_sequence(idx) 94 | data = self._sample_to_data(sample) 95 | 96 | torch_data = dict_apply(data, torch.from_numpy) 97 | return torch_data 98 | -------------------------------------------------------------------------------- /diffusion_policy/dataset/pusht_image_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import torch 3 | import numpy as np 4 | import copy 5 | from diffusion_policy.common.pytorch_util import dict_apply 6 | from diffusion_policy.common.replay_buffer import ReplayBuffer 7 | from diffusion_policy.common.sampler import ( 8 | SequenceSampler, get_val_mask, downsample_mask) 9 | from diffusion_policy.model.common.normalizer import LinearNormalizer 10 | from diffusion_policy.dataset.base_dataset import BaseImageDataset 11 | from diffusion_policy.common.normalize_util import get_image_range_normalizer 12 | 13 | class PushTImageDataset(BaseImageDataset): 14 | def __init__(self, 15 | zarr_path, 16 | horizon=1, 17 | pad_before=0, 18 | pad_after=0, 19 | seed=42, 20 | val_ratio=0.0, 21 | max_train_episodes=None 22 | ): 23 | 24 | super().__init__() 25 | self.replay_buffer = ReplayBuffer.copy_from_path( 26 | zarr_path, keys=['img', 'state', 'action']) 27 | val_mask = get_val_mask( 28 | n_episodes=self.replay_buffer.n_episodes, 29 | val_ratio=val_ratio, 30 | seed=seed) 31 | train_mask = ~val_mask 32 | train_mask = downsample_mask( 33 | mask=train_mask, 34 | max_n=max_train_episodes, 35 | seed=seed) 36 | 37 | self.sampler = SequenceSampler( 38 | replay_buffer=self.replay_buffer, 39 | sequence_length=horizon, 40 | pad_before=pad_before, 41 | pad_after=pad_after, 42 | episode_mask=train_mask) 43 | self.train_mask = train_mask 44 | self.horizon = horizon 45 | self.pad_before = pad_before 46 | self.pad_after = pad_after 47 | 48 | def get_validation_dataset(self): 49 | val_set = copy.copy(self) 50 | val_set.sampler = SequenceSampler( 51 | replay_buffer=self.replay_buffer, 52 | sequence_length=self.horizon, 53 | pad_before=self.pad_before, 54 | pad_after=self.pad_after, 55 | episode_mask=~self.train_mask 56 | ) 57 | val_set.train_mask = ~self.train_mask 58 | return val_set 59 | 60 | def get_normalizer(self, mode='limits', **kwargs): 61 | data = { 62 | 'action': self.replay_buffer['action'], 63 | 'agent_pos': self.replay_buffer['state'][...,:2] 64 | } 65 | normalizer = LinearNormalizer() 66 | normalizer.fit(data=data, last_n_dims=1, mode=mode, **kwargs) 67 | normalizer['image'] = get_image_range_normalizer() 68 | return normalizer 69 | 70 | def __len__(self) -> int: 71 | return len(self.sampler) 72 | 73 | def _sample_to_data(self, sample): 74 | agent_pos = sample['state'][:,:2].astype(np.float32) # (agent_posx2, block_posex3) 75 | image = np.moveaxis(sample['img'],-1,1)/255 76 | 77 | data = { 78 | 'obs': { 79 | 'image': image, # T, 3, 96, 96 80 | 'agent_pos': agent_pos, # T, 2 81 | }, 82 | 'action': sample['action'].astype(np.float32) # T, 2 83 | } 84 | return data 85 | 86 | def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]: 87 | sample = self.sampler.sample_sequence(idx) 88 | data = self._sample_to_data(sample) 89 | torch_data = dict_apply(data, torch.from_numpy) # change to tensor 90 | return torch_data 91 | -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img.tar.gz -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/0.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/1.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/10.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/11.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/12.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/13.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/14.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/15.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/2.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/3.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/4.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/5.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/6.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/7.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/8.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/9.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/action.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/action.png -------------------------------------------------------------------------------- /diffusion_policy/dataset/test_img/pose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/pose.png -------------------------------------------------------------------------------- /diffusion_policy/env_runner/base_image_runner.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from diffusion_policy.policy.base_image_policy import BaseImagePolicy 3 | 4 | class BaseImageRunner: 5 | def __init__(self, output_dir): 6 | self.output_dir = output_dir 7 | 8 | def run(self, policy: BaseImagePolicy) -> Dict: 9 | raise NotImplementedError() 10 | -------------------------------------------------------------------------------- /diffusion_policy/env_runner/base_lowdim_runner.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy 3 | 4 | class BaseLowdimRunner: 5 | def __init__(self, output_dir): 6 | self.output_dir = output_dir 7 | 8 | def run(self, policy: BaseLowdimPolicy) -> Dict: 9 | raise NotImplementedError() 10 | -------------------------------------------------------------------------------- /diffusion_policy/env_runner/real_pusht_image_runner.py: -------------------------------------------------------------------------------- 1 | from diffusion_policy.policy.base_image_policy import BaseImagePolicy 2 | from diffusion_policy.env_runner.base_image_runner import BaseImageRunner 3 | 4 | class RealPushTImageRunner(BaseImageRunner): 5 | def __init__(self, 6 | output_dir): 7 | super().__init__(output_dir) 8 | 9 | def run(self, policy: BaseImagePolicy): 10 | return dict() 11 | -------------------------------------------------------------------------------- /diffusion_policy/gym_util/video_recording_wrapper.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | from diffusion_policy.real_world.video_recorder import VideoRecorder 4 | 5 | class VideoRecordingWrapper(gym.Wrapper): 6 | def __init__(self, 7 | env, 8 | video_recoder: VideoRecorder, 9 | mode='rgb_array', 10 | file_path=None, 11 | steps_per_render=1, 12 | **kwargs 13 | ): 14 | """ 15 | When file_path is None, don't record. 16 | """ 17 | super().__init__(env) 18 | 19 | self.mode = mode 20 | self.render_kwargs = kwargs 21 | self.steps_per_render = steps_per_render 22 | self.file_path = file_path 23 | self.video_recoder = video_recoder 24 | 25 | self.step_count = 0 26 | 27 | def reset(self, **kwargs): 28 | obs = super().reset(**kwargs) 29 | self.frames = list() 30 | self.step_count = 1 31 | self.video_recoder.stop() 32 | return obs 33 | 34 | def step(self, action): 35 | result = super().step(action) 36 | self.step_count += 1 37 | if self.file_path is not None \ 38 | and ((self.step_count % self.steps_per_render) == 0): 39 | if not self.video_recoder.is_ready(): 40 | self.video_recoder.start(self.file_path) 41 | 42 | frame = self.env.render( 43 | mode=self.mode, **self.render_kwargs) 44 | assert frame.dtype == np.uint8 45 | self.video_recoder.write_frame(frame) 46 | return result 47 | 48 | def render(self, mode='rgb_array', **kwargs): 49 | if self.video_recoder.is_ready(): 50 | self.video_recoder.stop() 51 | return self.file_path 52 | -------------------------------------------------------------------------------- /diffusion_policy/gym_util/video_wrapper.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | class VideoWrapper(gym.Wrapper): 5 | def __init__(self, 6 | env, 7 | mode='rgb_array', 8 | enabled=True, 9 | steps_per_render=1, 10 | **kwargs 11 | ): 12 | super().__init__(env) 13 | 14 | self.mode = mode 15 | self.enabled = enabled 16 | self.render_kwargs = kwargs 17 | self.steps_per_render = steps_per_render 18 | 19 | self.frames = list() 20 | self.step_count = 0 21 | 22 | def reset(self, **kwargs): 23 | obs = super().reset(**kwargs) 24 | self.frames = list() 25 | self.step_count = 1 26 | if self.enabled: 27 | frame = self.env.render( 28 | mode=self.mode, **self.render_kwargs) 29 | assert frame.dtype == np.uint8 30 | self.frames.append(frame) 31 | return obs 32 | 33 | def step(self, action): 34 | result = super().step(action) 35 | self.step_count += 1 36 | if self.enabled and ((self.step_count % self.steps_per_render) == 0): 37 | frame = self.env.render( 38 | mode=self.mode, **self.render_kwargs) 39 | assert frame.dtype == np.uint8 40 | self.frames.append(frame) 41 | return result 42 | 43 | def render(self, mode='rgb_array', **kwargs): 44 | return self.frames 45 | -------------------------------------------------------------------------------- /diffusion_policy/model/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/model/.DS_Store -------------------------------------------------------------------------------- /diffusion_policy/model/bet/action_ae/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.utils.data import DataLoader 4 | import abc 5 | 6 | from typing import Optional, Union 7 | 8 | import diffusion_policy.model.bet.utils as utils 9 | 10 | 11 | class AbstractActionAE(utils.SaveModule, abc.ABC): 12 | @abc.abstractmethod 13 | def fit_model( 14 | self, 15 | input_dataloader: DataLoader, 16 | eval_dataloader: DataLoader, 17 | obs_encoding_net: Optional[nn.Module] = None, 18 | ) -> None: 19 | pass 20 | 21 | @abc.abstractmethod 22 | def encode_into_latent( 23 | self, 24 | input_action: torch.Tensor, 25 | input_rep: Optional[torch.Tensor], 26 | ) -> torch.Tensor: 27 | """ 28 | Given the input action, discretize it. 29 | 30 | Inputs: 31 | input_action (shape: ... x action_dim): The input action to discretize. This can be in a batch, 32 | and is generally assumed that the last dimnesion is the action dimension. 33 | 34 | Outputs: 35 | discretized_action (shape: ... x num_tokens): The discretized action. 36 | """ 37 | raise NotImplementedError 38 | 39 | @abc.abstractmethod 40 | def decode_actions( 41 | self, 42 | latent_action_batch: Optional[torch.Tensor], 43 | input_rep_batch: Optional[torch.Tensor] = None, 44 | ) -> torch.Tensor: 45 | """ 46 | Given a discretized action, convert it to a continuous action. 47 | 48 | Inputs: 49 | latent_action_batch (shape: ... x num_tokens): The discretized action 50 | generated by the discretizer. 51 | 52 | Outputs: 53 | continuous_action (shape: ... x action_dim): The continuous action. 54 | """ 55 | raise NotImplementedError 56 | 57 | @property 58 | @abc.abstractmethod 59 | def num_latents(self) -> Union[int, float]: 60 | """ 61 | Number of possible latents for this generator, useful for state priors that use softmax. 62 | """ 63 | return float("inf") 64 | -------------------------------------------------------------------------------- /diffusion_policy/model/bet/latent_generators/latent_generator.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import torch 3 | from typing import Tuple, Optional 4 | 5 | import diffusion_policy.model.bet.utils as utils 6 | 7 | 8 | class AbstractLatentGenerator(abc.ABC, utils.SaveModule): 9 | """ 10 | Abstract class for a generative model that can generate latents given observation representations. 11 | 12 | In the probabilisitc sense, this model fits and samples from P(latent|observation) given some observation. 13 | """ 14 | 15 | @abc.abstractmethod 16 | def get_latent_and_loss( 17 | self, 18 | obs_rep: torch.Tensor, 19 | target_latents: torch.Tensor, 20 | seq_masks: Optional[torch.Tensor] = None, 21 | ) -> Tuple[torch.Tensor, torch.Tensor]: 22 | """ 23 | Given a set of observation representation and generated latents, get the encoded latent and the loss. 24 | 25 | Inputs: 26 | input_action: Batch of the actions taken in the multimodal demonstrations. 27 | target_latents: Batch of the latents that the generator should learn to generate the actions from. 28 | seq_masks: Batch of masks that indicate which timesteps are valid. 29 | 30 | Outputs: 31 | latent: The sampled latent from the observation. 32 | loss: The loss of the latent generator. 33 | """ 34 | pass 35 | 36 | @abc.abstractmethod 37 | def generate_latents( 38 | self, seq_obses: torch.Tensor, seq_masks: torch.Tensor 39 | ) -> torch.Tensor: 40 | """ 41 | Given a batch of sequences of observations, generate a batch of sequences of latents. 42 | 43 | Inputs: 44 | seq_obses: Batch of sequences of observations, of shape seq x batch x dim, following the transformer convention. 45 | seq_masks: Batch of sequences of masks, of shape seq x batch, following the transformer convention. 46 | 47 | Outputs: 48 | seq_latents: Batch of sequences of latents of shape seq x batch x latent_dim. 49 | """ 50 | pass 51 | 52 | def get_optimizer( 53 | self, weight_decay: float, learning_rate: float, betas: Tuple[float, float] 54 | ) -> torch.optim.Optimizer: 55 | """ 56 | Default optimizer class. Override this if you want to use a different optimizer. 57 | """ 58 | return torch.optim.Adam( 59 | self.parameters(), lr=learning_rate, weight_decay=weight_decay, betas=betas 60 | ) 61 | 62 | 63 | class LatentGeneratorDataParallel(torch.nn.DataParallel): 64 | def get_latent_and_loss(self, *args, **kwargs): 65 | return self.module.get_latent_and_loss(*args, **kwargs) # type: ignore 66 | 67 | def generate_latents(self, *args, **kwargs): 68 | return self.module.generate_latents(*args, **kwargs) # type: ignore 69 | 70 | def get_optimizer(self, *args, **kwargs): 71 | return self.module.get_optimizer(*args, **kwargs) # type: ignore 72 | -------------------------------------------------------------------------------- /diffusion_policy/model/bet/libraries/mingpt/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) Copyright (c) 2020 Andrej Karpathy 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | 9 | -------------------------------------------------------------------------------- /diffusion_policy/model/bet/libraries/mingpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/model/bet/libraries/mingpt/__init__.py -------------------------------------------------------------------------------- /diffusion_policy/model/bet/libraries/mingpt/utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn import functional as F 6 | 7 | 8 | def set_seed(seed): 9 | random.seed(seed) 10 | np.random.seed(seed) 11 | torch.manual_seed(seed) 12 | torch.cuda.manual_seed_all(seed) 13 | 14 | 15 | def top_k_logits(logits, k): 16 | v, ix = torch.topk(logits, k) 17 | out = logits.clone() 18 | out[out < v[:, [-1]]] = -float("Inf") 19 | return out 20 | 21 | 22 | @torch.no_grad() 23 | def sample(model, x, steps, temperature=1.0, sample=False, top_k=None): 24 | """ 25 | take a conditioning sequence of indices in x (of shape (b,t)) and predict the next token in 26 | the sequence, feeding the predictions back into the model each time. Clearly the sampling 27 | has quadratic complexity unlike an RNN that is only linear, and has a finite context window 28 | of block_size, unlike an RNN that has an infinite context window. 29 | """ 30 | block_size = model.get_block_size() 31 | model.eval() 32 | for k in range(steps): 33 | x_cond = ( 34 | x if x.size(1) <= block_size else x[:, -block_size:] 35 | ) # crop context if needed 36 | logits, _ = model(x_cond) 37 | # pluck the logits at the final step and scale by temperature 38 | logits = logits[:, -1, :] / temperature 39 | # optionally crop probabilities to only the top k options 40 | if top_k is not None: 41 | logits = top_k_logits(logits, top_k) 42 | # apply softmax to convert to probabilities 43 | probs = F.softmax(logits, dim=-1) 44 | # sample from the distribution or take the most likely 45 | if sample: 46 | ix = torch.multinomial(probs, num_samples=1) 47 | else: 48 | _, ix = torch.topk(probs, k=1, dim=-1) 49 | # append to the sequence and continue 50 | x = torch.cat((x, ix), dim=1) 51 | 52 | return x 53 | -------------------------------------------------------------------------------- /diffusion_policy/model/common/dict_of_tensor_mixin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class DictOfTensorMixin(nn.Module): 5 | def __init__(self, params_dict=None): 6 | super().__init__() 7 | if params_dict is None: 8 | params_dict = nn.ParameterDict() 9 | self.params_dict = params_dict 10 | 11 | @property 12 | def device(self): 13 | return next(iter(self.parameters())).device 14 | 15 | def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): 16 | def dfs_add(dest, keys, value: torch.Tensor): 17 | if len(keys) == 1: 18 | dest[keys[0]] = value 19 | return 20 | 21 | if keys[0] not in dest: 22 | dest[keys[0]] = nn.ParameterDict() 23 | dfs_add(dest[keys[0]], keys[1:], value) 24 | 25 | def load_dict(state_dict, prefix): 26 | out_dict = nn.ParameterDict() 27 | for key, value in state_dict.items(): 28 | value: torch.Tensor 29 | if key.startswith(prefix): 30 | param_keys = key[len(prefix):].split('.')[1:] 31 | # if len(param_keys) == 0: 32 | # import pdb; pdb.set_trace() 33 | dfs_add(out_dict, param_keys, value.clone()) 34 | return out_dict 35 | 36 | self.params_dict = load_dict(state_dict, prefix + 'params_dict') 37 | self.params_dict.requires_grad_(False) 38 | return 39 | -------------------------------------------------------------------------------- /diffusion_policy/model/common/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | from diffusers.optimization import ( 2 | Union, SchedulerType, Optional, 3 | Optimizer, TYPE_TO_SCHEDULER_FUNCTION 4 | ) 5 | 6 | def get_scheduler( 7 | name: Union[str, SchedulerType], 8 | optimizer: Optimizer, 9 | num_warmup_steps: Optional[int] = None, 10 | num_training_steps: Optional[int] = None, 11 | **kwargs 12 | ): 13 | """ 14 | Added kwargs vs diffuser's original implementation 15 | 16 | Unified API to get any scheduler from its name. 17 | 18 | Args: 19 | name (`str` or `SchedulerType`): 20 | The name of the scheduler to use. 21 | optimizer (`torch.optim.Optimizer`): 22 | The optimizer that will be used during training. 23 | num_warmup_steps (`int`, *optional*): 24 | The number of warmup steps to do. This is not required by all schedulers (hence the argument being 25 | optional), the function will raise an error if it's unset and the scheduler type requires it. 26 | num_training_steps (`int``, *optional*): 27 | The number of training steps to do. This is not required by all schedulers (hence the argument being 28 | optional), the function will raise an error if it's unset and the scheduler type requires it. 29 | """ 30 | name = SchedulerType(name) 31 | schedule_func = TYPE_TO_SCHEDULER_FUNCTION[name] 32 | if name == SchedulerType.CONSTANT: 33 | return schedule_func(optimizer, **kwargs) 34 | 35 | # All other schedulers require `num_warmup_steps` 36 | if num_warmup_steps is None: 37 | raise ValueError(f"{name} requires `num_warmup_steps`, please provide that argument.") 38 | 39 | if name == SchedulerType.CONSTANT_WITH_WARMUP: 40 | return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, **kwargs) 41 | 42 | # All other schedulers require `num_training_steps` 43 | if num_training_steps is None: 44 | raise ValueError(f"{name} requires `num_training_steps`, please provide that argument.") 45 | 46 | return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps, **kwargs) 47 | -------------------------------------------------------------------------------- /diffusion_policy/model/common/module_attr_mixin.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class ModuleAttrMixin(nn.Module): 4 | def __init__(self): 5 | super().__init__() 6 | self._dummy_variable = nn.Parameter() 7 | 8 | @property 9 | def device(self): 10 | return next(iter(self.parameters())).device 11 | 12 | @property 13 | def dtype(self): 14 | return next(iter(self.parameters())).dtype 15 | -------------------------------------------------------------------------------- /diffusion_policy/model/common/shape_util.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Tuple, Callable 2 | import torch 3 | import torch.nn as nn 4 | 5 | def get_module_device(m: nn.Module): 6 | device = torch.device('cpu') 7 | try: 8 | param = next(iter(m.parameters())) 9 | device = param.device 10 | except StopIteration: 11 | pass 12 | return device 13 | 14 | @torch.no_grad() 15 | def get_output_shape( 16 | input_shape: Tuple[int], 17 | net: Callable[[torch.Tensor], torch.Tensor] 18 | ): 19 | device = get_module_device(net) 20 | test_input = torch.zeros((1,)+tuple(input_shape), device=device) 21 | test_output = net(test_input) 22 | output_shape = tuple(test_output.shape[1:]) 23 | return output_shape 24 | -------------------------------------------------------------------------------- /diffusion_policy/model/diffusion/conv1d_components.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | # from einops.layers.torch import Rearrange 5 | 6 | 7 | class Downsample1d(nn.Module): 8 | def __init__(self, dim): 9 | super().__init__() 10 | self.conv = nn.Conv1d(dim, dim, 3, 2, 1) 11 | 12 | def forward(self, x): 13 | return self.conv(x) 14 | 15 | class Upsample1d(nn.Module): 16 | def __init__(self, dim): 17 | super().__init__() 18 | self.conv = nn.ConvTranspose1d(dim, dim, 4, 2, 1) 19 | 20 | def forward(self, x): 21 | return self.conv(x) 22 | 23 | class Conv1dBlock(nn.Module): 24 | ''' 25 | Conv1d --> GroupNorm --> Mish 26 | ''' 27 | 28 | def __init__(self, inp_channels, out_channels, kernel_size, n_groups=8): 29 | super().__init__() 30 | 31 | self.block = nn.Sequential( 32 | nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2), 33 | # Rearrange('batch channels horizon -> batch channels 1 horizon'), 34 | nn.GroupNorm(n_groups, out_channels), 35 | # Rearrange('batch channels 1 horizon -> batch channels horizon'), 36 | nn.Mish(), 37 | ) 38 | 39 | def forward(self, x): 40 | return self.block(x) 41 | 42 | 43 | def test(): 44 | cb = Conv1dBlock(256, 128, kernel_size=3) 45 | x = torch.zeros((1,256,16)) 46 | o = cb(x) 47 | -------------------------------------------------------------------------------- /diffusion_policy/model/diffusion/ema_model.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import torch 3 | from torch.nn.modules.batchnorm import _BatchNorm 4 | 5 | class EMAModel: 6 | """ 7 | Exponential Moving Average of models weights 8 | """ 9 | 10 | def __init__( 11 | self, 12 | model, 13 | update_after_step=0, 14 | inv_gamma=1.0, 15 | power=2 / 3, 16 | min_value=0.0, 17 | max_value=0.9999 18 | ): 19 | """ 20 | @crowsonkb's notes on EMA Warmup: 21 | If gamma=1 and power=1, implements a simple average. gamma=1, power=2/3 are good values for models you plan 22 | to train for a million or more steps (reaches decay factor 0.999 at 31.6K steps, 0.9999 at 1M steps), 23 | gamma=1, power=3/4 for models you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999 24 | at 215.4k steps). 25 | Args: 26 | inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1. 27 | power (float): Exponential factor of EMA warmup. Default: 2/3. 28 | min_value (float): The minimum EMA decay rate. Default: 0. 29 | """ 30 | 31 | self.averaged_model = model 32 | self.averaged_model.eval() 33 | self.averaged_model.requires_grad_(False) 34 | 35 | self.update_after_step = update_after_step 36 | self.inv_gamma = inv_gamma 37 | self.power = power 38 | self.min_value = min_value 39 | self.max_value = max_value 40 | 41 | self.decay = 0.0 42 | self.optimization_step = 0 43 | 44 | def get_decay(self, optimization_step): 45 | """ 46 | Compute the decay factor for the exponential moving average. 47 | """ 48 | step = max(0, optimization_step - self.update_after_step - 1) 49 | value = 1 - (1 + step / self.inv_gamma) ** -self.power 50 | 51 | if step <= 0: 52 | return 0.0 53 | 54 | return max(self.min_value, min(value, self.max_value)) 55 | 56 | @torch.no_grad() 57 | def step(self, new_model): 58 | self.decay = self.get_decay(self.optimization_step) 59 | 60 | # old_all_dataptrs = set() 61 | # for param in new_model.parameters(): 62 | # data_ptr = param.data_ptr() 63 | # if data_ptr != 0: 64 | # old_all_dataptrs.add(data_ptr) 65 | 66 | all_dataptrs = set() 67 | for module, ema_module in zip(new_model.modules(), self.averaged_model.modules()): 68 | for param, ema_param in zip(module.parameters(recurse=False), ema_module.parameters(recurse=False)): 69 | # iterative over immediate parameters only. 70 | if isinstance(param, dict): 71 | raise RuntimeError('Dict parameter not supported') 72 | 73 | # data_ptr = param.data_ptr() 74 | # if data_ptr != 0: 75 | # all_dataptrs.add(data_ptr) 76 | 77 | if isinstance(module, _BatchNorm): 78 | # skip batchnorms 79 | ema_param.copy_(param.to(dtype=ema_param.dtype).data) 80 | elif not param.requires_grad: 81 | ema_param.copy_(param.to(dtype=ema_param.dtype).data) 82 | else: 83 | ema_param.mul_(self.decay) 84 | ema_param.add_(param.data.to(dtype=ema_param.dtype), alpha=1 - self.decay) 85 | 86 | # verify that iterating over module and then parameters is identical to parameters recursively. 87 | # assert old_all_dataptrs == all_dataptrs 88 | self.optimization_step += 1 89 | -------------------------------------------------------------------------------- /diffusion_policy/model/diffusion/positional_embedding.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | 5 | class SinusoidalPosEmb(nn.Module): 6 | def __init__(self, dim): 7 | super().__init__() 8 | self.dim = dim 9 | 10 | def forward(self, x): 11 | device = x.device 12 | half_dim = self.dim // 2 13 | emb = math.log(10000) / (half_dim - 1) 14 | emb = torch.exp(torch.arange(half_dim, device=device) * -emb) 15 | emb = x[:, None] * emb[None, :] 16 | emb = torch.cat((emb.sin(), emb.cos()), dim=-1) 17 | return emb 18 | -------------------------------------------------------------------------------- /diffusion_policy/model/vision/model_getter.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | 4 | def get_resnet(name, weights=None, **kwargs): 5 | """ 6 | name: resnet18, resnet34, resnet50 7 | weights: "IMAGENET1K_V1", "r3m" 8 | """ 9 | # load r3m weights 10 | if (weights == "r3m") or (weights == "R3M"): 11 | return get_r3m(name=name, **kwargs) 12 | 13 | func = getattr(torchvision.models, name) 14 | resnet = func(weights=weights, **kwargs) 15 | resnet.fc = torch.nn.Identity() 16 | return resnet 17 | 18 | def get_r3m(name, **kwargs): 19 | """ 20 | name: resnet18, resnet34, resnet50 21 | """ 22 | import r3m 23 | r3m.device = 'cpu' 24 | model = r3m.load_r3m(name) 25 | r3m_model = model.module 26 | resnet_model = r3m_model.convnet 27 | resnet_model = resnet_model.to('cpu') 28 | return resnet_model 29 | -------------------------------------------------------------------------------- /diffusion_policy/policy/base_image_policy.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import torch 3 | import torch.nn as nn 4 | from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin 5 | from diffusion_policy.model.common.normalizer import LinearNormalizer 6 | 7 | class BaseImagePolicy(ModuleAttrMixin): 8 | # init accepts keyword argument shape_meta, see config/task/*_image.yaml 9 | 10 | def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: 11 | """ 12 | obs_dict: 13 | str: B,To,* 14 | return: B,Ta,Da 15 | """ 16 | raise NotImplementedError() 17 | 18 | # reset state for stateful policies 19 | def reset(self): 20 | pass 21 | 22 | # ========== training =========== 23 | # no standard training interface except setting normalizer 24 | def set_normalizer(self, normalizer: LinearNormalizer): 25 | raise NotImplementedError() 26 | -------------------------------------------------------------------------------- /diffusion_policy/policy/base_lowdim_policy.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import torch 3 | import torch.nn as nn 4 | from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin 5 | from diffusion_policy.model.common.normalizer import LinearNormalizer 6 | 7 | class BaseLowdimPolicy(ModuleAttrMixin): 8 | # ========= inference ============ 9 | # also as self.device and self.dtype for inference device transfer 10 | def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: 11 | """ 12 | obs_dict: 13 | obs: B,To,Do 14 | return: 15 | action: B,Ta,Da 16 | To = 3 17 | Ta = 4 18 | T = 6 19 | |o|o|o| 20 | | | |a|a|a|a| 21 | |o|o| 22 | | |a|a|a|a|a| 23 | | | | | |a|a| 24 | """ 25 | raise NotImplementedError() 26 | 27 | # reset state for stateful policies 28 | def reset(self): 29 | pass 30 | 31 | # ========== training =========== 32 | # no standard training interface except setting normalizer 33 | def set_normalizer(self, normalizer: LinearNormalizer): 34 | raise NotImplementedError() 35 | 36 | -------------------------------------------------------------------------------- /diffusion_policy/policy/robomimic_lowdim_policy.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import torch 3 | from diffusion_policy.model.common.normalizer import LinearNormalizer 4 | from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy 5 | 6 | from robomimic.algo import algo_factory 7 | from robomimic.algo.algo import PolicyAlgo 8 | import robomimic.utils.obs_utils as ObsUtils 9 | from diffusion_policy.common.robomimic_config_util import get_robomimic_config 10 | 11 | class RobomimicLowdimPolicy(BaseLowdimPolicy): 12 | def __init__(self, 13 | action_dim, 14 | obs_dim, 15 | algo_name='bc_rnn', 16 | obs_type='low_dim', 17 | task_name='square', 18 | dataset_type='ph', 19 | ): 20 | super().__init__() 21 | # key for robomimic obs input 22 | # previously this is 'object', 'robot0_eef_pos' etc 23 | obs_key = 'obs' 24 | 25 | config = get_robomimic_config( 26 | algo_name=algo_name, 27 | hdf5_type=obs_type, 28 | task_name=task_name, 29 | dataset_type=dataset_type) 30 | with config.unlocked(): 31 | config.observation.modalities.obs.low_dim = [obs_key] 32 | 33 | ObsUtils.initialize_obs_utils_with_config(config) 34 | model: PolicyAlgo = algo_factory( 35 | algo_name=config.algo_name, 36 | config=config, 37 | obs_key_shapes={obs_key: [obs_dim]}, 38 | ac_dim=action_dim, 39 | device='cpu', 40 | ) 41 | self.model = model 42 | self.nets = model.nets 43 | self.normalizer = LinearNormalizer() 44 | self.obs_key = obs_key 45 | self.config = config 46 | 47 | def to(self,*args,**kwargs): 48 | device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs) 49 | if device is not None: 50 | self.model.device = device 51 | super().to(*args,**kwargs) 52 | 53 | # =========== inference ============= 54 | def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: 55 | obs = self.normalizer['obs'].normalize(obs_dict['obs']) 56 | assert obs.shape[1] == 1 57 | robomimic_obs_dict = {self.obs_key: obs[:,0,:]} 58 | naction = self.model.get_action(robomimic_obs_dict) 59 | action = self.normalizer['action'].unnormalize(naction) 60 | # (B, Da) 61 | result = { 62 | 'action': action[:,None,:] # (B, 1, Da) 63 | } 64 | return result 65 | 66 | def reset(self): 67 | self.model.reset() 68 | 69 | # =========== training ============== 70 | def set_normalizer(self, normalizer: LinearNormalizer): 71 | self.normalizer.load_state_dict(normalizer.state_dict()) 72 | 73 | def train_on_batch(self, batch, epoch, validate=False): 74 | nbatch = self.normalizer.normalize(batch) 75 | robomimic_batch = { 76 | 'obs': {self.obs_key: nbatch['obs']}, 77 | 'actions': nbatch['action'] 78 | } 79 | input_batch = self.model.process_batch_for_training( 80 | robomimic_batch) 81 | info = self.model.train_on_batch( 82 | batch=input_batch, epoch=epoch, validate=validate) 83 | # keys: losses, predictions 84 | return info 85 | 86 | def get_optimizer(self): 87 | return self.model.optimizers['policy'] 88 | -------------------------------------------------------------------------------- /diffusion_policy/real_world/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/real_world/.DS_Store -------------------------------------------------------------------------------- /diffusion_policy/real_world/keystroke_counter.py: -------------------------------------------------------------------------------- 1 | from pynput.keyboard import Key, KeyCode, Listener 2 | from collections import defaultdict 3 | from threading import Lock 4 | 5 | class KeystrokeCounter(Listener): 6 | def __init__(self): 7 | self.key_count_map = defaultdict(lambda:0) 8 | self.key_press_list = list() 9 | self.lock = Lock() 10 | super().__init__(on_press=self.on_press, on_release=self.on_release) 11 | 12 | def on_press(self, key): 13 | with self.lock: 14 | self.key_count_map[key] += 1 15 | self.key_press_list.append(key) 16 | 17 | def on_release(self, key): 18 | pass 19 | 20 | def clear(self): 21 | with self.lock: 22 | self.key_count_map = defaultdict(lambda:0) 23 | self.key_press_list = list() 24 | 25 | def __getitem__(self, key): 26 | with self.lock: 27 | return self.key_count_map[key] 28 | 29 | def get_press_events(self): 30 | with self.lock: 31 | events = list(self.key_press_list) 32 | self.key_press_list = list() 33 | return events 34 | 35 | if __name__ == '__main__': 36 | import time 37 | with KeystrokeCounter() as counter: 38 | try: 39 | while True: 40 | print('Space:', counter[Key.space]) 41 | print('q:', counter[KeyCode(char='q')]) 42 | time.sleep(1/60) 43 | except KeyboardInterrupt: 44 | events = counter.get_press_events() 45 | print(events) 46 | -------------------------------------------------------------------------------- /diffusion_policy/real_world/multi_camera_visualizer.py: -------------------------------------------------------------------------------- 1 | import time 2 | import multiprocessing as mp 3 | import numpy as np 4 | import cv2 5 | from threadpoolctl import threadpool_limits 6 | from diffusion_policy.real_world.multi_realsense import MultiRealsense 7 | 8 | class MultiCameraVisualizer(mp.Process): 9 | def __init__(self, 10 | realsense: MultiRealsense, 11 | row, col, 12 | window_name='Multi Cam Vis', 13 | vis_fps=60, 14 | fill_value=0, 15 | rgb_to_bgr=True 16 | ): 17 | super().__init__() 18 | self.row = row 19 | self.col = col 20 | self.window_name = window_name 21 | self.vis_fps = vis_fps 22 | self.fill_value = fill_value 23 | self.rgb_to_bgr=rgb_to_bgr 24 | self.realsense = realsense 25 | # shared variables 26 | self.stop_event = mp.Event() 27 | 28 | def start(self, wait=False): 29 | super().start() 30 | 31 | def stop(self, wait=False): 32 | self.stop_event.set() 33 | if wait: 34 | self.stop_wait() 35 | 36 | def start_wait(self): 37 | pass 38 | 39 | def stop_wait(self): 40 | self.join() 41 | 42 | def run(self): 43 | cv2.setNumThreads(1) 44 | threadpool_limits(1) 45 | channel_slice = slice(None) 46 | if self.rgb_to_bgr: 47 | channel_slice = slice(None,None,-1) 48 | 49 | vis_data = None 50 | vis_img = None 51 | while not self.stop_event.is_set(): 52 | vis_data = self.realsense.get_vis(out=vis_data) 53 | color = vis_data['color'] 54 | N, H, W, C = color.shape 55 | assert C == 3 56 | oh = H * self.row 57 | ow = W * self.col 58 | if vis_img is None: 59 | vis_img = np.full((oh, ow, 3), 60 | fill_value=self.fill_value, dtype=np.uint8) 61 | for row in range(self.row): 62 | for col in range(self.col): 63 | idx = col + row * self.col 64 | h_start = H * row 65 | h_end = h_start + H 66 | w_start = W * col 67 | w_end = w_start + W 68 | if idx < N: 69 | # opencv uses bgr 70 | vis_img[h_start:h_end,w_start:w_end 71 | ] = color[idx,:,:,channel_slice] 72 | cv2.imshow(self.window_name, vis_img) 73 | cv2.pollKey() 74 | time.sleep(1 / self.vis_fps) 75 | -------------------------------------------------------------------------------- /diffusion_policy/real_world/real_inference_util.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Callable, Tuple 2 | import numpy as np 3 | from diffusion_policy.common.cv2_util import get_image_transform 4 | 5 | def get_real_obs_dict( 6 | env_obs: Dict[str, np.ndarray], 7 | shape_meta: dict, 8 | ) -> Dict[str, np.ndarray]: 9 | obs_dict_np = dict() 10 | obs_shape_meta = shape_meta['obs'] 11 | for key, attr in obs_shape_meta.items(): 12 | type = attr.get('type', 'low_dim') 13 | shape = attr.get('shape') 14 | if type == 'rgb': 15 | this_imgs_in = env_obs[key] 16 | t,hi,wi,ci = this_imgs_in.shape 17 | co,ho,wo = shape 18 | assert ci == co 19 | out_imgs = this_imgs_in 20 | if (ho != hi) or (wo != wi) or (this_imgs_in.dtype == np.uint8): 21 | tf = get_image_transform( 22 | input_res=(wi,hi), 23 | output_res=(wo,ho), 24 | bgr_to_rgb=False) 25 | out_imgs = np.stack([tf(x) for x in this_imgs_in]) 26 | if this_imgs_in.dtype == np.uint8: 27 | out_imgs = out_imgs.astype(np.float32) / 255 28 | # THWC to TCHW 29 | obs_dict_np[key] = np.moveaxis(out_imgs,-1,1) 30 | elif type == 'low_dim': 31 | this_data_in = env_obs[key] 32 | if 'pose' in key and shape == (2,): 33 | # take X,Y coordinates 34 | this_data_in = this_data_in[...,[0,1]] 35 | obs_dict_np[key] = this_data_in 36 | return obs_dict_np 37 | 38 | 39 | def get_real_obs_resolution( 40 | shape_meta: dict 41 | ) -> Tuple[int, int]: 42 | out_res = None 43 | obs_shape_meta = shape_meta['obs'] 44 | for key, attr in obs_shape_meta.items(): 45 | type = attr.get('type', 'low_dim') 46 | shape = attr.get('shape') 47 | if type == 'rgb': 48 | co,ho,wo = shape 49 | if out_res is None: 50 | out_res = (wo, ho) 51 | assert out_res == (wo, ho) 52 | return out_res 53 | -------------------------------------------------------------------------------- /diffusion_policy/scripts/bet_blockpush_conversion.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import sys 3 | import os 4 | import pathlib 5 | 6 | ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent) 7 | sys.path.append(ROOT_DIR) 8 | 9 | 10 | import os 11 | import click 12 | import pathlib 13 | import numpy as np 14 | from diffusion_policy.common.replay_buffer import ReplayBuffer 15 | 16 | @click.command() 17 | @click.option('-i', '--input', required=True, help='input dir contains npy files') 18 | @click.option('-o', '--output', required=True, help='output zarr path') 19 | @click.option('--abs_action', is_flag=True, default=False) 20 | def main(input, output, abs_action): 21 | data_directory = pathlib.Path(input) 22 | observations = np.load( 23 | data_directory / "multimodal_push_observations.npy" 24 | ) 25 | actions = np.load(data_directory / "multimodal_push_actions.npy") 26 | masks = np.load(data_directory / "multimodal_push_masks.npy") 27 | 28 | buffer = ReplayBuffer.create_empty_numpy() 29 | for i in range(len(masks)): 30 | eps_len = int(masks[i].sum()) 31 | obs = observations[i,:eps_len].astype(np.float32) 32 | action = actions[i,:eps_len].astype(np.float32) 33 | if abs_action: 34 | prev_eef_target = obs[:,8:10] 35 | next_eef_target = prev_eef_target + action 36 | action = next_eef_target 37 | data = { 38 | 'obs': obs, 39 | 'action': action 40 | } 41 | buffer.add_episode(data) 42 | 43 | buffer.save_to_path(zarr_path=output, chunk_length=-1) 44 | 45 | if __name__ == '__main__': 46 | main() 47 | -------------------------------------------------------------------------------- /diffusion_policy/scripts/blockpush_abs_conversion.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import sys 3 | import os 4 | import pathlib 5 | 6 | ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent) 7 | sys.path.append(ROOT_DIR) 8 | 9 | import os 10 | import click 11 | import pathlib 12 | from diffusion_policy.common.replay_buffer import ReplayBuffer 13 | 14 | 15 | @click.command() 16 | @click.option('-i', '--input', required=True) 17 | @click.option('-o', '--output', required=True) 18 | @click.option('-t', '--target_eef_idx', default=8, type=int) 19 | def main(input, output, target_eef_idx): 20 | buffer = ReplayBuffer.copy_from_path(input) 21 | obs = buffer['obs'] 22 | action = buffer['action'] 23 | prev_eef_target = obs[:,target_eef_idx:target_eef_idx+action.shape[1]] 24 | next_eef_target = prev_eef_target + action 25 | action[:] = next_eef_target 26 | buffer.save_to_path(zarr_path=output, chunk_length=-1) 27 | 28 | if __name__ == '__main__': 29 | main() 30 | -------------------------------------------------------------------------------- /diffusion_policy/scripts/episode_lengths.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import sys 3 | import os 4 | import pathlib 5 | 6 | ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent) 7 | sys.path.append(ROOT_DIR) 8 | 9 | import click 10 | import numpy as np 11 | import json 12 | from diffusion_policy.common.replay_buffer import ReplayBuffer 13 | 14 | @click.command() 15 | @click.option('--input', '-i', required=True) 16 | @click.option('--dt', default=0.1, type=float) 17 | def main(input, dt): 18 | buffer = ReplayBuffer.create_from_path(input) 19 | lengths = buffer.episode_lengths 20 | durations = lengths * dt 21 | result = { 22 | 'duration/mean': np.mean(durations) 23 | } 24 | 25 | text = json.dumps(result, indent=2) 26 | print(text) 27 | 28 | if __name__ == '__main__': 29 | main() 30 | -------------------------------------------------------------------------------- /diffusion_policy/scripts/generate_bet_blockpush.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import sys 3 | import os 4 | import pathlib 5 | 6 | ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent) 7 | sys.path.append(ROOT_DIR) 8 | 9 | 10 | import os 11 | import click 12 | import pathlib 13 | import numpy as np 14 | from tqdm import tqdm 15 | from diffusion_policy.common.replay_buffer import ReplayBuffer 16 | from tf_agents.environments.wrappers import TimeLimit 17 | from tf_agents.environments.gym_wrapper import GymWrapper 18 | from tf_agents.trajectories.time_step import StepType 19 | from diffusion_policy.env.block_pushing.block_pushing_multimodal import BlockPushMultimodal 20 | from diffusion_policy.env.block_pushing.block_pushing import BlockPush 21 | from diffusion_policy.env.block_pushing.oracles.multimodal_push_oracle import MultimodalOrientedPushOracle 22 | 23 | @click.command() 24 | @click.option('-o', '--output', required=True) 25 | @click.option('-n', '--n_episodes', default=1000) 26 | @click.option('-c', '--chunk_length', default=-1) 27 | def main(output, n_episodes, chunk_length): 28 | 29 | buffer = ReplayBuffer.create_empty_numpy() 30 | env = TimeLimit(GymWrapper(BlockPushMultimodal()), duration=350) 31 | for i in tqdm(range(n_episodes)): 32 | print(i) 33 | obs_history = list() 34 | action_history = list() 35 | 36 | env.seed(i) 37 | policy = MultimodalOrientedPushOracle(env) 38 | time_step = env.reset() 39 | policy_state = policy.get_initial_state(1) 40 | while True: 41 | action_step = policy.action(time_step, policy_state) 42 | obs = np.concatenate(list(time_step.observation.values()), axis=-1) 43 | action = action_step.action 44 | obs_history.append(obs) 45 | action_history.append(action) 46 | 47 | if time_step.step_type == 2: 48 | break 49 | 50 | # state = env.wrapped_env().gym.get_pybullet_state() 51 | time_step = env.step(action) 52 | obs_history = np.array(obs_history) 53 | action_history = np.array(action_history) 54 | 55 | episode = { 56 | 'obs': obs_history, 57 | 'action': action_history 58 | } 59 | buffer.add_episode(episode) 60 | 61 | buffer.save_to_path(output, chunk_length=chunk_length) 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /diffusion_policy/scripts/real_dataset_conversion.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import sys 3 | import os 4 | import pathlib 5 | 6 | ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent) 7 | sys.path.append(ROOT_DIR) 8 | 9 | import os 10 | import click 11 | import pathlib 12 | import zarr 13 | import cv2 14 | import threadpoolctl 15 | from diffusion_policy.real_world.real_data_conversion import real_data_to_replay_buffer 16 | 17 | @click.command() 18 | @click.option('--input', '-i', required=True) 19 | @click.option('--output', '-o', default=None) 20 | @click.option('--resolution', '-r', default='640x480') 21 | @click.option('--n_decoding_threads', '-nd', default=-1, type=int) 22 | @click.option('--n_encoding_threads', '-ne', default=-1, type=int) 23 | def main(input, output, resolution, n_decoding_threads, n_encoding_threads): 24 | out_resolution = tuple(int(x) for x in resolution.split('x')) 25 | input = pathlib.Path(os.path.expanduser(input)) 26 | in_zarr_path = input.joinpath('replay_buffer.zarr') 27 | in_video_dir = input.joinpath('videos') 28 | assert in_zarr_path.is_dir() 29 | assert in_video_dir.is_dir() 30 | if output is None: 31 | output = input.joinpath(resolution + '.zarr.zip') 32 | else: 33 | output = pathlib.Path(os.path.expanduser(output)) 34 | 35 | if output.exists(): 36 | click.confirm('Output path already exists! Overrite?', abort=True) 37 | 38 | cv2.setNumThreads(1) 39 | with threadpoolctl.threadpool_limits(1): 40 | replay_buffer = real_data_to_replay_buffer( 41 | dataset_path=str(input), 42 | out_resolutions=out_resolution, 43 | n_decoding_threads=n_decoding_threads, 44 | n_encoding_threads=n_encoding_threads 45 | ) 46 | 47 | print('Saving to disk') 48 | if output.suffix == '.zip': 49 | with zarr.ZipStore(output) as zip_store: 50 | replay_buffer.save_to_store( 51 | store=zip_store 52 | ) 53 | else: 54 | with zarr.DirectoryStore(output) as store: 55 | replay_buffer.save_to_store( 56 | store=store 57 | ) 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /diffusion_policy/scripts/real_pusht_successrate.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import sys 3 | import os 4 | import pathlib 5 | 6 | ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent) 7 | sys.path.append(ROOT_DIR) 8 | 9 | import os 10 | import click 11 | import collections 12 | import numpy as np 13 | from tqdm import tqdm 14 | import json 15 | 16 | @click.command() 17 | @click.option( 18 | '--reference', '-r', required=True, 19 | help='Reference metrics_raw.json from demonstration dataset.' 20 | ) 21 | @click.option( 22 | '--input', '-i', required=True, 23 | help='Data search path' 24 | ) 25 | def main(reference, input): 26 | # compute the min last metric for demo metrics 27 | demo_metrics = json.load(open(reference, 'r')) 28 | demo_min_metrics = collections.defaultdict(lambda:float('inf')) 29 | for episode_idx, metrics in demo_metrics.items(): 30 | for key, value in metrics.items(): 31 | last_value = value[-1] 32 | demo_min_metrics[key] = min(demo_min_metrics[key], last_value) 33 | print(demo_min_metrics) 34 | 35 | # find all metric 36 | name = 'metrics_raw.json' 37 | search_dir = pathlib.Path(input) 38 | success_rate_map = dict() 39 | for json_path in search_dir.glob('**/'+name): 40 | rel_path = json_path.relative_to(search_dir) 41 | rel_name = str(rel_path.parent) 42 | this_metrics = json.load(json_path.open('r')) 43 | metric_success_idxs = collections.defaultdict(list) 44 | metric_failure_idxs = collections.defaultdict(list) 45 | for episode_idx, metrics in this_metrics.items(): 46 | for key, value in metrics.items(): 47 | last_value = value[-1] 48 | # print(episode_idx, key, last_value) 49 | demo_min = demo_min_metrics[key] 50 | if last_value >= demo_min: 51 | # success 52 | metric_success_idxs[key].append(episode_idx) 53 | else: 54 | metric_failure_idxs[key].append(episode_idx) 55 | # in case of no success 56 | _ = metric_success_idxs[key] 57 | _ = metric_failure_idxs[key] 58 | metric_success_rate = dict() 59 | n_episodes = len(this_metrics) 60 | for key, value in metric_success_idxs.items(): 61 | metric_success_rate[key] = len(value) / n_episodes 62 | # metric_success_rate['failured_idxs'] = metric_failure_idxs 63 | success_rate_map[rel_name] = metric_success_rate 64 | 65 | text = json.dumps(success_rate_map, indent=2) 66 | print(text) 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /diffusion_policy/scripts/robomimic_dataset_action_comparison.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | import sys 3 | import os 4 | import pathlib 5 | 6 | ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent) 7 | sys.path.append(ROOT_DIR) 8 | 9 | import os 10 | import click 11 | import pathlib 12 | import h5py 13 | import numpy as np 14 | from tqdm import tqdm 15 | from scipy.spatial.transform import Rotation 16 | 17 | def read_all_actions(hdf5_file, metric_skip_steps=1): 18 | n_demos = len(hdf5_file['data']) 19 | all_actions = list() 20 | for i in tqdm(range(n_demos)): 21 | actions = hdf5_file[f'data/demo_{i}/actions'][:] 22 | all_actions.append(actions[metric_skip_steps:]) 23 | all_actions = np.concatenate(all_actions, axis=0) 24 | return all_actions 25 | 26 | 27 | @click.command() 28 | @click.option('-i', '--input', required=True, help='input hdf5 path') 29 | @click.option('-o', '--output', required=True, help='output hdf5 path. Parent directory must exist') 30 | def main(input, output): 31 | # process inputs 32 | input = pathlib.Path(input).expanduser() 33 | assert input.is_file() 34 | output = pathlib.Path(output).expanduser() 35 | assert output.is_file() 36 | 37 | input_file = h5py.File(str(input), 'r') 38 | output_file = h5py.File(str(output), 'r') 39 | 40 | input_all_actions = read_all_actions(input_file) 41 | output_all_actions = read_all_actions(output_file) 42 | pos_dist = np.linalg.norm(input_all_actions[:,:3] - output_all_actions[:,:3], axis=-1) 43 | rot_dist = (Rotation.from_rotvec(input_all_actions[:,3:6] 44 | ) * Rotation.from_rotvec(output_all_actions[:,3:6]).inv() 45 | ).magnitude() 46 | 47 | print(f'max pos dist: {pos_dist.max()}') 48 | print(f'max rot dist: {rot_dist.max()}') 49 | 50 | if __name__ == "__main__": 51 | main() 52 | -------------------------------------------------------------------------------- /diffusion_policy/shared_memory/shared_memory_util.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | from dataclasses import dataclass 3 | import numpy as np 4 | from multiprocessing.managers import SharedMemoryManager 5 | from atomics import atomicview, MemoryOrder, UINT 6 | 7 | @dataclass 8 | class ArraySpec: 9 | name: str 10 | shape: Tuple[int] 11 | dtype: np.dtype 12 | 13 | 14 | class SharedAtomicCounter: 15 | def __init__(self, 16 | shm_manager: SharedMemoryManager, 17 | size :int=8 # 64bit int 18 | ): 19 | shm = shm_manager.SharedMemory(size=size) 20 | self.shm = shm 21 | self.size = size 22 | self.store(0) # initialize 23 | 24 | @property 25 | def buf(self): 26 | return self.shm.buf[:self.size] 27 | 28 | def load(self) -> int: 29 | with atomicview(buffer=self.buf, atype=UINT) as a: 30 | value = a.load(order=MemoryOrder.ACQUIRE) 31 | return value 32 | 33 | def store(self, value: int): 34 | with atomicview(buffer=self.buf, atype=UINT) as a: 35 | a.store(value, order=MemoryOrder.RELEASE) 36 | 37 | def add(self, value: int): 38 | with atomicview(buffer=self.buf, atype=UINT) as a: 39 | a.add(value, order=MemoryOrder.ACQ_REL) 40 | -------------------------------------------------------------------------------- /experiment_scripts/gibson/eval_tdiff.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PYTHONPATH=..T-Diff 4 | export CUDA_VISIBLE_DEVICES=0,1 5 | cd ..T-Diff/semexp 6 | 7 | # conda activate tdiff 8 | 9 | python eval_tdiff.py \ 10 | --split val \ 11 | --seed 345 \ 12 | --eval 1 \ 13 | --pf_model_path "models_ckpt/area_model.ckpt" \ 14 | --diff_model_path "models_ckpt/diff_model.ckpt" \ 15 | -d ..experiments \ 16 | --num_local_steps 1 \ 17 | --exp_name "debug" \ 18 | --global_downscaling 1 \ 19 | --mask_nearest_locations \ 20 | --pf_masking_opt 'unexplored' \ 21 | --use_nearest_frontier \ 22 | --total_num_scenes "5" \ 23 | --select_diff_step 27 \ 24 | --horizon 32 \ -------------------------------------------------------------------------------- /semexp/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/.DS_Store -------------------------------------------------------------------------------- /semexp/configs/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | ROI_BOX_HEAD: 25 | NAME: "FastRCNNConvFCHead" 26 | NUM_FC: 2 27 | POOLER_RESOLUTION: 7 28 | ROI_MASK_HEAD: 29 | NAME: "MaskRCNNConvUpsampleHead" 30 | NUM_CONV: 4 31 | POOLER_RESOLUTION: 14 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.02 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | INPUT: 41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 42 | VERSION: 2 43 | -------------------------------------------------------------------------------- /semexp/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /semexp/docs/legend_gibson.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/docs/legend_gibson.png -------------------------------------------------------------------------------- /semexp/docs/legend_mp3d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/docs/legend_mp3d.png -------------------------------------------------------------------------------- /semexp/envs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/envs/.DS_Store -------------------------------------------------------------------------------- /semexp/envs/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .habitat import construct_envs 4 | 5 | 6 | def make_vec_envs(args, workers_ignore_signals: bool = False, **kwargs): 7 | envs = construct_envs(args, workers_ignore_signals=workers_ignore_signals, **kwargs) 8 | envs = VecPyTorch(envs, args.device) 9 | return envs 10 | 11 | 12 | # Adapted from 13 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/envs.py#L159 14 | class VecPyTorch: 15 | def __init__(self, venv, device): 16 | self.venv = venv 17 | self.num_envs = venv.num_envs 18 | self.observation_space = venv.observation_space 19 | self.action_space = venv.action_space 20 | self.device = device 21 | 22 | def reset(self): 23 | obs, info = self.venv.reset() 24 | obs = torch.from_numpy(obs).float().to(self.device) 25 | return obs, info 26 | 27 | def step_async(self, actions): 28 | actions = actions.cpu().numpy() 29 | self.venv.step_async(actions) 30 | 31 | def step_wait(self): 32 | obs, reward, done, info = self.venv.step_wait() 33 | obs = torch.from_numpy(obs).float().to(self.device) 34 | reward = torch.from_numpy(reward).float() 35 | return obs, reward, done, info 36 | 37 | def step(self, actions): 38 | actions = actions.cpu().numpy() 39 | obs, reward, done, info = self.venv.step(actions) 40 | obs = torch.from_numpy(obs).float().to(self.device) 41 | reward = torch.from_numpy(reward).float() 42 | return obs, reward, done, info 43 | 44 | def get_rewards(self, inputs): 45 | reward = self.venv.get_rewards(inputs) 46 | reward = torch.from_numpy(reward).float() 47 | return reward 48 | 49 | def plan_act_and_preprocess(self, inputs): 50 | obs, reward, done, info = self.venv.plan_act_and_preprocess(inputs) 51 | obs = torch.from_numpy(obs).float().to(self.device) 52 | reward = torch.from_numpy(reward).float() 53 | return obs, reward, done, info 54 | 55 | def get_reachability_map(self, inputs): 56 | reachability_maps, fmm_dists = self.venv.get_reachability_map(inputs) 57 | reachability_maps = torch.from_numpy(reachability_maps).float().to(self.device) 58 | fmm_dists = torch.from_numpy(fmm_dists).float().to(self.device) 59 | return reachability_maps, fmm_dists 60 | 61 | def get_frontier_map(self, inputs): 62 | frontier_maps = self.venv.get_frontier_map(inputs) 63 | frontier_maps = torch.from_numpy(frontier_maps).to(self.device) 64 | return frontier_maps 65 | 66 | def get_fmm_dists(self, inputs): 67 | fmm_dists = self.venv.get_fmm_dists(inputs) 68 | fmm_dists = torch.from_numpy(fmm_dists).to(self.device) 69 | return fmm_dists 70 | 71 | def current_episodes(self): 72 | curr_eps = self.venv.current_episodes() 73 | return curr_eps 74 | 75 | def get_current_episodes(self): 76 | curr_eps = self.venv.get_current_episodes() 77 | return curr_eps 78 | 79 | def close(self): 80 | return self.venv.close() 81 | -------------------------------------------------------------------------------- /semexp/envs/habitat/configs/tasks/objectnav_gibson.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 500 3 | SIMULATOR: 4 | TURN_ANGLE: 30 5 | TILT_ANGLE: 30 6 | ACTION_SPACE_CONFIG: "v1" 7 | AGENT_0: 8 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'SEMANTIC_SENSOR'] 9 | HEIGHT: 0.88 10 | RADIUS: 0.18 11 | HABITAT_SIM_V0: 12 | GPU_DEVICE_ID: 0 13 | ALLOW_SLIDING: True 14 | SEMANTIC_SENSOR: 15 | WIDTH: 640 16 | HEIGHT: 480 17 | HFOV: 79 18 | POSITION: [0, 0.88, 0] 19 | RGB_SENSOR: 20 | WIDTH: 640 21 | HEIGHT: 480 22 | HFOV: 79 23 | POSITION: [0, 0.88, 0] 24 | DEPTH_SENSOR: 25 | WIDTH: 640 26 | HEIGHT: 480 27 | HFOV: 79 28 | MIN_DEPTH: 0.5 29 | MAX_DEPTH: 5.0 30 | POSITION: [0, 0.88, 0] 31 | TASK: 32 | TYPE: ObjectNav-v1 33 | POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN"] 34 | SENSORS: ['GPS_SENSOR', 'COMPASS_SENSOR'] 35 | MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL'] 36 | SUCCESS: 37 | SUCCESS_DISTANCE: 0.2 38 | 39 | DATASET: 40 | TYPE: PointNav-v1 41 | SPLIT: train 42 | DATA_PATH: "../data/datasets/objectnav/gibson/v1/{split}/{split}.json.gz" 43 | EPISODES_DIR: "../data/datasets/objectnav/gibson/v1/{split}/" 44 | SCENES_DIR: "../data/scene_datasets/" 45 | -------------------------------------------------------------------------------- /semexp/envs/utils/pose.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_l2_distance(x1, x2, y1, y2): 5 | """ 6 | Computes the L2 distance between two points. 7 | """ 8 | return ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5 9 | 10 | 11 | def get_rel_pose_change(pos2, pos1): 12 | x1, y1, o1 = pos1 13 | x2, y2, o2 = pos2 14 | 15 | theta = np.arctan2(y2 - y1, x2 - x1) - o1 16 | dist = get_l2_distance(x1, x2, y1, y2) 17 | dx = dist * np.cos(theta) 18 | dy = dist * np.sin(theta) 19 | do = o2 - o1 20 | 21 | return dx, dy, do 22 | 23 | 24 | def get_new_pose(pose, rel_pose_change): 25 | x, y, o = pose 26 | dx, dy, do = rel_pose_change 27 | 28 | global_dx = dx * np.sin(np.deg2rad(o)) + dy * np.cos(np.deg2rad(o)) 29 | global_dy = dx * np.cos(np.deg2rad(o)) - dy * np.sin(np.deg2rad(o)) 30 | x += global_dy 31 | y += global_dx 32 | o += np.rad2deg(do) 33 | if o > 180.0: 34 | o -= 360.0 35 | 36 | return x, y, o 37 | 38 | 39 | def threshold_poses(coords, shape): 40 | coords[0] = min(max(0, coords[0]), shape[0] - 1) 41 | coords[1] = min(max(0, coords[1]), shape[1] - 1) 42 | return coords 43 | -------------------------------------------------------------------------------- /semexp/envs/utils/rotation_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utilities for generating and applying rotation matrices. 17 | """ 18 | import numpy as np 19 | 20 | ANGLE_EPS = 0.001 21 | 22 | 23 | def normalize(v): 24 | return v / np.linalg.norm(v) 25 | 26 | 27 | def get_r_matrix(ax_, angle): 28 | ax = normalize(ax_) 29 | if np.abs(angle) > ANGLE_EPS: 30 | S_hat = np.array( 31 | [[0.0, -ax[2], ax[1]], [ax[2], 0.0, -ax[0]], [-ax[1], ax[0], 0.0]], 32 | dtype=np.float32, 33 | ) 34 | R = ( 35 | np.eye(3) 36 | + np.sin(angle) * S_hat 37 | + (1 - np.cos(angle)) * (np.linalg.matrix_power(S_hat, 2)) 38 | ) 39 | else: 40 | R = np.eye(3) 41 | return R 42 | 43 | 44 | def r_between(v_from_, v_to_): 45 | v_from = normalize(v_from_) 46 | v_to = normalize(v_to_) 47 | ax = normalize(np.cross(v_from, v_to)) 48 | angle = np.arccos(np.dot(v_from, v_to)) 49 | return get_r_matrix(ax, angle) 50 | 51 | 52 | def rotate_camera_to_point_at(up_from, lookat_from, up_to, lookat_to): 53 | inputs = [up_from, lookat_from, up_to, lookat_to] 54 | for i in range(4): 55 | inputs[i] = normalize(np.array(inputs[i]).reshape((-1,))) 56 | up_from, lookat_from, up_to, lookat_to = inputs 57 | r1 = r_between(lookat_from, lookat_to) 58 | 59 | new_x = np.dot(r1, np.array([1, 0, 0]).reshape((-1, 1))).reshape((-1)) 60 | to_x = normalize(np.cross(lookat_to, up_to)) 61 | angle = np.arccos(np.dot(new_x, to_x)) 62 | if angle > ANGLE_EPS: 63 | if angle < np.pi - ANGLE_EPS: 64 | ax = normalize(np.cross(new_x, to_x)) 65 | flip = np.dot(lookat_to, ax) 66 | if flip > 0: 67 | r2 = get_r_matrix(lookat_to, angle) 68 | elif flip < 0: 69 | r2 = get_r_matrix(lookat_to, -1.0 * angle) 70 | else: 71 | # Angle of rotation is too close to 180 degrees, direction of 72 | # rotation does not matter. 73 | r2 = get_r_matrix(lookat_to, angle) 74 | else: 75 | r2 = np.eye(3) 76 | return np.dot(r2, r1) 77 | -------------------------------------------------------------------------------- /semexp/sxz/img/circle0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/circle0.png -------------------------------------------------------------------------------- /semexp/sxz/img/circle1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/circle1.png -------------------------------------------------------------------------------- /semexp/sxz/img/circle2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/circle2.png -------------------------------------------------------------------------------- /semexp/sxz/img/circle3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/circle3.png -------------------------------------------------------------------------------- /semexp/sxz/img/dist_circle_test0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/dist_circle_test0.png -------------------------------------------------------------------------------- /semexp/sxz/img/dist_circle_test1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/dist_circle_test1.png -------------------------------------------------------------------------------- /semexp/sxz/img/dist_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/dist_map.png -------------------------------------------------------------------------------- /semexp/sxz/img/dist_map_dilate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/dist_map_dilate.png -------------------------------------------------------------------------------- /semexp/sxz/img/origin_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/origin_map.png -------------------------------------------------------------------------------- /semexp/sxz/img/pbz2_Collierville.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/pbz2_Collierville.png -------------------------------------------------------------------------------- /semexp/sxz/img/pbz2_Corozal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/pbz2_Corozal.png -------------------------------------------------------------------------------- /semexp/sxz/img/pbz2_Darden.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/pbz2_Darden.png -------------------------------------------------------------------------------- /semexp/sxz/img/pbz2_Markleeville.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/pbz2_Markleeville.png -------------------------------------------------------------------------------- /semexp/sxz/img/pbz2_Wiconisco.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/pbz2_Wiconisco.png -------------------------------------------------------------------------------- /semexp/sxz/img/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/test.png -------------------------------------------------------------------------------- /semexp/sxz/visualize.py: -------------------------------------------------------------------------------- 1 | import _pickle as cPickle 2 | import bz2 3 | 4 | import gzip 5 | import json 6 | 7 | import cv2 8 | import numpy as np 9 | from PIL import Image, ImageDraw, ImageFont 10 | import os 11 | 12 | GIBSON_OBJECT_COLORS = [ 13 | (0.9400000000000001, 0.7818, 0.66), 14 | (0.9400000000000001, 0.8868, 0.66), 15 | (0.8882000000000001, 0.9400000000000001, 0.66), 16 | (0.7832000000000001, 0.9400000000000001, 0.66), 17 | (0.6782000000000001, 0.9400000000000001, 0.66), 18 | (0.66, 0.9400000000000001, 0.7468000000000001), 19 | (0.66, 0.9400000000000001, 0.8518000000000001), 20 | (0.66, 0.9232, 0.9400000000000001), 21 | (0.66, 0.8182, 0.9400000000000001), 22 | (0.66, 0.7132, 0.9400000000000001), 23 | (0.7117999999999999, 0.66, 0.9400000000000001), 24 | (0.8168, 0.66, 0.9400000000000001), 25 | (0.9218, 0.66, 0.9400000000000001), 26 | (0.9400000000000001, 0.66, 0.8531999999999998), 27 | (0.9400000000000001, 0.66, 0.748199999999999), 28 | ] 29 | 30 | COLOR_PALETTE = [ 31 | 1.0, 32 | 1.0, 33 | 1.0, # Out-of-bounds 34 | 0.9, 35 | 0.9, 36 | 0.9, # Floor 37 | *[oci for oc in GIBSON_OBJECT_COLORS for oci in oc], 38 | ] 39 | 40 | val_rooms = ['Collierville', 'Corozal', 'Darden', 'Markleeville', 'Wiconisco'] 41 | episodes_file = '..data/datasets/objectnav/gibson/v1.1/val/content/Darden_episodes.json.gz' 42 | dataset_info_file = '..data/datasets/objectnav/gibson/v1.1/val/val_info.pbz2' 43 | 44 | def visualize_sem_map(sem_map): 45 | c_map = sem_map.astype(np.int32) 46 | color_palette = [int(x * 255.0) for x in COLOR_PALETTE] 47 | semantic_img = Image.new("P", (c_map.shape[1], c_map.shape[0])) 48 | semantic_img.putpalette(color_palette) 49 | semantic_img.putdata((c_map.flatten() % 40).astype(np.uint8)) 50 | semantic_img = semantic_img.convert("RGB") 51 | semantic_img = np.array(semantic_img) 52 | 53 | return semantic_img 54 | 55 | def projection_img(sem_mp): 56 | semantic_img = np.zeros((sem_mp.shape[1], sem_mp.shape[2])) 57 | for i in range(sem_mp.shape[0]): 58 | semantic_img[sem_map[i].astype(np.bool)] = i+1 59 | return semantic_img.transpose() 60 | 61 | with bz2.BZ2File(dataset_info_file, "rb") as f: 62 | dataset_info = cPickle.load(f) 63 | 64 | for scene_name in val_rooms: 65 | print(scene_name) 66 | episodes_file = '..data/datasets/objectnav/gibson/v1.1/val/content/{}_episodes.json.gz'.format(scene_name) 67 | with gzip.open(episodes_file, "r") as f: 68 | eps_data = json.loads(f.read().decode("utf-8"))["episodes"] 69 | all_floor_id = [] 70 | for eps in eps_data: 71 | floor_id = eps['floor_id'] 72 | all_floor_id.append(floor_id) 73 | all_floor_id = list(set(all_floor_id)) 74 | print(all_floor_id) 75 | -------------------------------------------------------------------------------- /semexp/util/crop.py: -------------------------------------------------------------------------------- 1 | # This source code is licensed under the license found in the 2 | # LICENSE file in the root directory of this source tree. 3 | # -------------------------------------------------------- 4 | # References: 5 | # MAE: https://github.com/facebookresearch/mae 6 | # -------------------------------------------------------- 7 | 8 | import math 9 | 10 | import torch 11 | 12 | from torchvision import transforms 13 | from torchvision.transforms import functional as F 14 | 15 | 16 | class RandomResizedCrop(transforms.RandomResizedCrop): 17 | """ 18 | RandomResizedCrop for matching TF/TPU implementation: no for-loop is used. 19 | This may lead to results different with torchvision's version. 20 | Following BYOL's TF code: 21 | https://github.com/deepmind/deepmind-research/blob/master/byol/utils/dataset.py#L206 22 | """ 23 | @staticmethod 24 | def get_params(img, scale, ratio): 25 | width, height = F._get_image_size(img) 26 | area = height * width 27 | 28 | target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item() 29 | log_ratio = torch.log(torch.tensor(ratio)) 30 | aspect_ratio = torch.exp( 31 | torch.empty(1).uniform_(log_ratio[0], log_ratio[1]) 32 | ).item() 33 | 34 | w = int(round(math.sqrt(target_area * aspect_ratio))) 35 | h = int(round(math.sqrt(target_area / aspect_ratio))) 36 | 37 | w = min(w, width) 38 | h = min(h, height) 39 | 40 | i = torch.randint(0, height - h + 1, size=(1,)).item() 41 | j = torch.randint(0, width - w + 1, size=(1,)).item() 42 | 43 | return i, j, h, w -------------------------------------------------------------------------------- /semexp/util/datasets.py: -------------------------------------------------------------------------------- 1 | # This source code is licensed under the license found in the 2 | # LICENSE file in the root directory of this source tree. 3 | # -------------------------------------------------------- 4 | # References: 5 | # DeiT: https://github.com/facebookresearch/deit 6 | # MAE: https://github.com/facebookresearch/mae 7 | # -------------------------------------------------------- 8 | 9 | import os 10 | import PIL 11 | 12 | from torchvision import datasets, transforms 13 | from torchvision.datasets.folder import default_loader 14 | 15 | from timm.data import create_transform 16 | from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD 17 | 18 | 19 | class ImageListFolder(datasets.ImageFolder): 20 | def __init__(self, root, transform=None, target_transform=None, 21 | ann_file=None, loader=default_loader): 22 | self.root = root 23 | self.transform = transform 24 | self.loader = loader 25 | self.target_transform = target_transform 26 | self.nb_classes = 1000 27 | 28 | assert ann_file is not None 29 | print('load info from', ann_file) 30 | 31 | self.samples = [] 32 | ann = open(ann_file) 33 | for elem in ann.readlines(): 34 | cut = elem.split(' ') 35 | path_current = os.path.join(root, cut[0]) 36 | target_current = int(cut[1]) 37 | self.samples.append((path_current, target_current)) 38 | ann.close() 39 | 40 | print('load finish') 41 | 42 | 43 | def build_dataset(is_train, args): 44 | transform = build_transform(is_train, args) 45 | 46 | # TODO modify your own dataset here 47 | folder = os.path.join(args.data_path, 'train' if is_train else 'val') 48 | ann_file = os.path.join(args.data_path, 'train.txt' if is_train else 'val.txt') 49 | dataset = ImageListFolder(folder, transform=transform, ann_file=ann_file) 50 | 51 | print(dataset) 52 | 53 | return dataset 54 | 55 | 56 | def build_transform(is_train, args): 57 | mean = IMAGENET_DEFAULT_MEAN 58 | std = IMAGENET_DEFAULT_STD 59 | # train transform 60 | if is_train: 61 | # this should always dispatch to transforms_imagenet_train 62 | transform = create_transform( 63 | input_size=args.input_size, 64 | is_training=True, 65 | color_jitter=args.color_jitter, 66 | auto_augment=args.aa, 67 | interpolation='bicubic', 68 | re_prob=args.reprob, 69 | re_mode=args.remode, 70 | re_count=args.recount, 71 | mean=mean, 72 | std=std, 73 | ) 74 | return transform 75 | 76 | # eval transform 77 | t = [] 78 | if args.input_size <= 224: 79 | crop_pct = 224 / 256 80 | else: 81 | crop_pct = 1.0 82 | size = int(args.input_size / crop_pct) 83 | t.append( 84 | transforms.Resize(size, interpolation=PIL.Image.BICUBIC), # to maintain same ratio w.r.t. 224 images 85 | ) 86 | t.append(transforms.CenterCrop(args.input_size)) 87 | 88 | t.append(transforms.ToTensor()) 89 | t.append(transforms.Normalize(mean, std)) 90 | return transforms.Compose(t) 91 | -------------------------------------------------------------------------------- /semexp/util/lr_decay.py: -------------------------------------------------------------------------------- 1 | # This source code is licensed under the license found in the 2 | # LICENSE file in the root directory of this source tree. 3 | # -------------------------------------------------------- 4 | # References: 5 | # ELECTRA https://github.com/google-research/electra 6 | # BEiT: https://github.com/microsoft/unilm/tree/master/beit 7 | # MAE: https://github.com/facebookresearch/mae 8 | # -------------------------------------------------------- 9 | 10 | import json 11 | 12 | 13 | def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75): 14 | """ 15 | Parameter groups for layer-wise lr decay 16 | Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58 17 | """ 18 | param_group_names = {} 19 | param_groups = {} 20 | 21 | num_layers = len(model.blocks) + 1 22 | 23 | layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1)) 24 | 25 | for n, p in model.named_parameters(): 26 | if not p.requires_grad: 27 | continue 28 | 29 | # no decay: all 1D parameters and model specific ones 30 | if p.ndim == 1 or n in no_weight_decay_list: 31 | g_decay = "no_decay" 32 | this_decay = 0. 33 | else: 34 | g_decay = "decay" 35 | this_decay = weight_decay 36 | 37 | layer_id = get_layer_id_for_vit(n, num_layers) 38 | group_name = "layer_%d_%s" % (layer_id, g_decay) 39 | 40 | if group_name not in param_group_names: 41 | this_scale = layer_scales[layer_id] 42 | 43 | param_group_names[group_name] = { 44 | "lr_scale": this_scale, 45 | "weight_decay": this_decay, 46 | "params": [], 47 | } 48 | param_groups[group_name] = { 49 | "lr_scale": this_scale, 50 | "weight_decay": this_decay, 51 | "params": [], 52 | } 53 | 54 | param_group_names[group_name]["params"].append(n) 55 | param_groups[group_name]["params"].append(p) 56 | 57 | print("parameter groups: \n%s" % json.dumps(param_group_names, indent=2)) 58 | 59 | return list(param_groups.values()) 60 | 61 | 62 | def get_layer_id_for_vit(name, num_layers): 63 | """ 64 | Assign a parameter with its layer id 65 | Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33 66 | """ 67 | if name in ['cls_token', 'pos_embed']: 68 | return 0 69 | elif name.startswith('patch_embed'): 70 | return 0 71 | elif name.startswith('blocks'): 72 | return int(name.split('.')[1]) + 1 73 | else: 74 | return num_layers -------------------------------------------------------------------------------- /semexp/util/lr_sched.py: -------------------------------------------------------------------------------- 1 | # This source code is licensed under the license found in the 2 | # LICENSE file in the root directory of this source tree. 3 | # -------------------------------------------------------- 4 | # References: 5 | # MAE: https://github.com/facebookresearch/mae 6 | # -------------------------------------------------------- 7 | 8 | import math 9 | 10 | def adjust_learning_rate(optimizer, epoch, args): 11 | """Decay the learning rate with half-cycle cosine after warmup""" 12 | if epoch < args.warmup_epochs: 13 | lr = args.lr * epoch / args.warmup_epochs 14 | else: 15 | lr = args.min_lr + (args.lr - args.min_lr) * 0.5 * \ 16 | (1. + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs))) 17 | for param_group in optimizer.param_groups: 18 | if "lr_scale" in param_group: 19 | param_group["lr"] = lr * param_group["lr_scale"] 20 | else: 21 | param_group["lr"] = lr 22 | return lr 23 | -------------------------------------------------------------------------------- /semexp/utils/distributions.py: -------------------------------------------------------------------------------- 1 | # The following code is largely borrowed from: 2 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/distributions.py 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from .model import AddBias 8 | 9 | """ 10 | Modify standard PyTorch distributions so they are compatible with this code. 11 | """ 12 | 13 | FixedCategorical = torch.distributions.Categorical 14 | 15 | old_sample = FixedCategorical.sample 16 | FixedCategorical.sample = lambda self: old_sample(self) 17 | 18 | log_prob_cat = FixedCategorical.log_prob 19 | FixedCategorical.log_probs = lambda self, actions: log_prob_cat( 20 | self, actions.squeeze(-1) 21 | ) 22 | FixedCategorical.mode = lambda self: self.probs.argmax(dim=1, keepdim=True) 23 | 24 | FixedNormal = torch.distributions.Normal 25 | log_prob_normal = FixedNormal.log_prob 26 | FixedNormal.log_probs = lambda self, actions: log_prob_normal(self, actions).sum( 27 | -1, keepdim=False 28 | ) 29 | 30 | entropy = FixedNormal.entropy 31 | FixedNormal.entropy = lambda self: entropy(self).sum(-1) 32 | 33 | FixedNormal.mode = lambda self: self.mean 34 | 35 | 36 | class Categorical(nn.Module): 37 | def __init__(self, num_inputs, num_outputs): 38 | super(Categorical, self).__init__() 39 | self.linear = nn.Linear(num_inputs, num_outputs) 40 | 41 | def forward(self, x): 42 | x = self.linear(x) 43 | return FixedCategorical(logits=x) 44 | 45 | 46 | class DiagGaussian(nn.Module): 47 | def __init__(self, num_inputs, num_outputs): 48 | super(DiagGaussian, self).__init__() 49 | 50 | self.fc_mean = nn.Linear(num_inputs, num_outputs) 51 | self.logstd = AddBias(torch.zeros(num_outputs)) 52 | 53 | def forward(self, x): 54 | action_mean = self.fc_mean(x) 55 | 56 | zeros = torch.zeros(action_mean.size()) 57 | if x.is_cuda: 58 | zeros = zeros.cuda() 59 | 60 | action_logstd = self.logstd(zeros) 61 | return FixedNormal(action_mean, action_logstd.exp()) 62 | -------------------------------------------------------------------------------- /tdiff/train_utils.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.utils.data._utils.collate import ( 6 | default_collate_err_msg_format, 7 | np_str_obj_array_pattern, 8 | string_classes, 9 | ) 10 | 11 | 12 | def get_loss_fn(loss_type): 13 | assert loss_type in ["bce", "l2", "l1", "xent"] 14 | loss_fn = None 15 | if loss_type == "bce": 16 | loss_fn = nn.BCELoss(reduction="none") 17 | elif loss_type == "l2": 18 | loss_fn = nn.MSELoss(reduction="none") 19 | elif loss_type == "l1": 20 | loss_fn = nn.L1Loss(reduction="none") 21 | elif loss_type == "xent": 22 | loss_fn = nn.CrossEntropyLoss(reduction="none") 23 | return loss_fn 24 | 25 | 26 | def get_activation_fn(activation_type): 27 | assert activation_type in ["none", "sigmoid", "relu"] 28 | activation = nn.Identity() 29 | if activation_type == "sigmoid": 30 | activation = nn.Sigmoid() 31 | elif activation_type == "relu": 32 | activation = nn.ReLU() 33 | return activation 34 | 35 | 36 | def collate_fn(batch): 37 | r"""Puts each data field into a tensor with outer dimension batch size. 38 | Modified version of default_collate which returns the batch as it has lists 39 | of varying length sizes. 40 | """ 41 | 42 | elem = batch[0] 43 | elem_type = type(elem) 44 | if isinstance(elem, torch.Tensor): 45 | out = None 46 | if torch.utils.data.get_worker_info() is not None: 47 | # If we're in a background process, concatenate directly into a 48 | # shared memory tensor to avoid an extra copy 49 | numel = sum(x.numel() for x in batch) 50 | storage = elem.storage()._new_shared(numel) 51 | out = elem.new(storage) 52 | return torch.stack(batch, 0, out=out) 53 | elif ( 54 | elem_type.__module__ == "numpy" 55 | and elem_type.__name__ != "str_" 56 | and elem_type.__name__ != "string_" 57 | ): 58 | if elem_type.__name__ == "ndarray" or elem_type.__name__ == "memmap": 59 | # array of string classes and object 60 | if np_str_obj_array_pattern.search(elem.dtype.str) is not None: 61 | raise TypeError(default_collate_err_msg_format.format(elem.dtype)) 62 | 63 | return collate_fn([torch.as_tensor(b) for b in batch]) 64 | elif elem.shape == (): # scalars 65 | return torch.as_tensor(batch) 66 | elif isinstance(elem, float): 67 | return torch.tensor(batch, dtype=torch.float64) 68 | elif isinstance(elem, int): 69 | return torch.tensor(batch) 70 | elif isinstance(elem, string_classes): 71 | return batch 72 | elif isinstance(elem, collections.abc.Mapping): 73 | return {key: collate_fn([d[key] for d in batch]) for key in elem} 74 | elif isinstance(elem, tuple) and hasattr(elem, "_fields"): # namedtuple 75 | return elem_type(*(collate_fn(samples) for samples in zip(*batch))) 76 | elif isinstance(elem, collections.abc.Sequence): 77 | # check to make sure that the elements in batch have consistent size 78 | it = iter(batch) 79 | elem_size = len(next(it)) 80 | if not all(len(elem) == elem_size for elem in it): 81 | return batch 82 | transposed = zip(*batch) 83 | return [collate_fn(samples) for samples in transposed] 84 | 85 | raise TypeError(default_collate_err_msg_format.format(elem_type)) 86 | -------------------------------------------------------------------------------- /train_traj/train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Usage: 3 | Training: 4 | python train.py --config-name=train_diffusion_lowdim_workspace 5 | """ 6 | 7 | import sys 8 | # use line-buffering for both stdout and stderr 9 | sys.stdout = open(sys.stdout.fileno(), mode='w', buffering=1) 10 | sys.stderr = open(sys.stderr.fileno(), mode='w', buffering=1) 11 | 12 | import hydra 13 | from omegaconf import OmegaConf 14 | import pathlib 15 | from trajectory_diffusion.workspace.base_workspace import BaseWorkspace 16 | 17 | # allows arbitrary python code execution in configs using the ${eval:''} resolver 18 | OmegaConf.register_new_resolver("eval", eval, replace=True) 19 | 20 | @hydra.main( 21 | version_base=None, 22 | config_path=str(pathlib.Path(__file__).parent.joinpath( 23 | 'trajectory_diffusion','config')) 24 | ) 25 | def main(cfg: OmegaConf): 26 | # resolve immediately so all the ${now:} resolvers 27 | # will use the same time. 28 | OmegaConf.resolve(cfg) 29 | 30 | cls = hydra.utils.get_class(cfg._target_) 31 | workspace: BaseWorkspace = cls(cfg) 32 | workspace.run() 33 | 34 | if __name__ == "__main__": 35 | main() 36 | 37 | # python train.py --config-dir=. --config-name=image_pusht_diffusion_policy_cnn.yaml training.seed=42 training.device=cuda:0 hydra.run.dir='data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}' -------------------------------------------------------------------------------- /train_traj/train.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=diffusion_traj 2 | export CUDA_VISIBLE_DEVICES=0 3 | 4 | conda activate diff_train 5 | 6 | python train.py --config-dir=. --config-name=train_diffusion_traj_gibson.yaml training.seed=42 training.device=cuda:0 hydra.run.dir='data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_train_traj_diff_gibson' 7 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/common/checkpoint_util.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Dict 2 | import os 3 | 4 | class TopKCheckpointManager: 5 | def __init__(self, 6 | save_dir, 7 | monitor_key: str, 8 | mode='min', 9 | k=1, 10 | format_str='epoch={epoch:03d}-train_loss={train_loss:.3f}.ckpt' 11 | ): 12 | assert mode in ['max', 'min'] 13 | assert k >= 0 14 | 15 | self.save_dir = save_dir 16 | # self.monitor_key = monitor_key 17 | self.monitor_key = "val_loss" 18 | self.mode = mode 19 | self.k = k 20 | self.format_str = format_str 21 | self.path_value_map = dict() 22 | 23 | def get_ckpt_path(self, data: Dict[str, float]) -> Optional[str]: 24 | 25 | # ckpt_path = os.path.join( 26 | # self.save_dir, "1111.ckpt") 27 | # return ckpt_path 28 | 29 | if self.k == 0: 30 | return None 31 | 32 | value = data[self.monitor_key] 33 | ckpt_path = os.path.join( 34 | self.save_dir, self.format_str.format(**data)) 35 | return ckpt_path 36 | # if len(self.path_value_map) < self.k: 37 | # # under-capacity 38 | # self.path_value_map[ckpt_path] = value 39 | # return ckpt_path 40 | 41 | # # at capacity 42 | # sorted_map = sorted(self.path_value_map.items(), key=lambda x: x[1]) 43 | # min_path, min_value = sorted_map[0] 44 | # max_path, max_value = sorted_map[-1] 45 | 46 | # delete_path = None 47 | # if self.mode == 'max': 48 | # if value > min_value: 49 | # delete_path = min_path 50 | # else: 51 | # if value < max_value: 52 | # delete_path = max_path 53 | 54 | # if delete_path is None: 55 | # return None 56 | # else: 57 | # del self.path_value_map[delete_path] 58 | # self.path_value_map[ckpt_path] = value 59 | 60 | # if not os.path.exists(self.save_dir): 61 | # os.mkdir(self.save_dir) 62 | 63 | # if os.path.exists(delete_path): 64 | # os.remove(delete_path) 65 | # return ckpt_path 66 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/common/env_util.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | def render_env_video(env, states, actions=None): 6 | observations = states 7 | imgs = list() 8 | for i in range(len(observations)): 9 | state = observations[i] 10 | env.set_state(state) 11 | if i == 0: 12 | env.set_state(state) 13 | img = env.render() 14 | # draw action 15 | if actions is not None: 16 | action = actions[i] 17 | coord = (action / 512 * 96).astype(np.int32) 18 | cv2.drawMarker(img, coord, 19 | color=(255,0,0), markerType=cv2.MARKER_CROSS, 20 | markerSize=8, thickness=1) 21 | imgs.append(img) 22 | imgs = np.array(imgs) 23 | return imgs 24 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/common/nested_dict_util.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | def nested_dict_map(f, x): 4 | """ 5 | Map f over all leaf of nested dict x 6 | """ 7 | 8 | if not isinstance(x, dict): 9 | return f(x) 10 | y = dict() 11 | for key, value in x.items(): 12 | y[key] = nested_dict_map(f, value) 13 | return y 14 | 15 | def nested_dict_reduce(f, x): 16 | """ 17 | Map f over all values of nested dict x, and reduce to a single value 18 | """ 19 | if not isinstance(x, dict): 20 | return x 21 | 22 | reduced_values = list() 23 | for value in x.values(): 24 | reduced_values.append(nested_dict_reduce(f, value)) 25 | y = functools.reduce(f, reduced_values) 26 | return y 27 | 28 | 29 | def nested_dict_check(f, x): 30 | bool_dict = nested_dict_map(f, x) 31 | result = nested_dict_reduce(lambda x, y: x and y, bool_dict) 32 | return result 33 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/common/precise_sleep.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | def precise_sleep(dt: float, slack_time: float=0.001, time_func=time.monotonic): 4 | """ 5 | Use hybrid of time.sleep and spinning to minimize jitter. 6 | Sleep dt - slack_time seconds first, then spin for the rest. 7 | """ 8 | t_start = time_func() 9 | if dt > slack_time: 10 | time.sleep(dt - slack_time) 11 | t_end = t_start + dt 12 | while time_func() < t_end: 13 | pass 14 | return 15 | 16 | def precise_wait(t_end: float, slack_time: float=0.001, time_func=time.monotonic): 17 | t_start = time_func() 18 | t_wait = t_end - t_start 19 | if t_wait > 0: 20 | t_sleep = t_wait - slack_time 21 | if t_sleep > 0: 22 | time.sleep(t_sleep) 23 | while time_func() < t_end: 24 | pass 25 | return 26 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/common/pymunk_util.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import pymunk 3 | import pymunk.pygame_util 4 | import numpy as np 5 | 6 | COLLTYPE_DEFAULT = 0 7 | COLLTYPE_MOUSE = 1 8 | COLLTYPE_BALL = 2 9 | 10 | def get_body_type(static=False): 11 | body_type = pymunk.Body.DYNAMIC 12 | if static: 13 | body_type = pymunk.Body.STATIC 14 | return body_type 15 | 16 | 17 | def create_rectangle(space, 18 | pos_x,pos_y,width,height, 19 | density=3,static=False): 20 | body = pymunk.Body(body_type=get_body_type(static)) 21 | body.position = (pos_x,pos_y) 22 | shape = pymunk.Poly.create_box(body,(width,height)) 23 | shape.density = density 24 | space.add(body,shape) 25 | return body, shape 26 | 27 | 28 | def create_rectangle_bb(space, 29 | left, bottom, right, top, 30 | **kwargs): 31 | pos_x = (left + right) / 2 32 | pos_y = (top + bottom) / 2 33 | height = top - bottom 34 | width = right - left 35 | return create_rectangle(space, pos_x, pos_y, width, height, **kwargs) 36 | 37 | def create_circle(space, pos_x, pos_y, radius, density=3, static=False): 38 | body = pymunk.Body(body_type=get_body_type(static)) 39 | body.position = (pos_x, pos_y) 40 | shape = pymunk.Circle(body, radius=radius) 41 | shape.density = density 42 | shape.collision_type = COLLTYPE_BALL 43 | space.add(body, shape) 44 | return body, shape 45 | 46 | def get_body_state(body): 47 | state = np.zeros(6, dtype=np.float32) 48 | state[:2] = body.position 49 | state[2] = body.angle 50 | state[3:5] = body.velocity 51 | state[5] = body.angular_velocity 52 | return state 53 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/common/pytorch_util.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Callable, List 2 | import collections 3 | import torch 4 | import torch.nn as nn 5 | 6 | def dict_apply( 7 | x: Dict[str, torch.Tensor], 8 | func: Callable[[torch.Tensor], torch.Tensor] 9 | ) -> Dict[str, torch.Tensor]: 10 | result = dict() 11 | for key, value in x.items(): 12 | if isinstance(value, dict): 13 | result[key] = dict_apply(value, func) 14 | else: 15 | result[key] = func(value) 16 | return result 17 | 18 | def pad_remaining_dims(x, target): 19 | assert x.shape == target.shape[:len(x.shape)] 20 | return x.reshape(x.shape + (1,)*(len(target.shape) - len(x.shape))) 21 | 22 | def dict_apply_split( 23 | x: Dict[str, torch.Tensor], 24 | split_func: Callable[[torch.Tensor], Dict[str, torch.Tensor]] 25 | ) -> Dict[str, torch.Tensor]: 26 | results = collections.defaultdict(dict) 27 | for key, value in x.items(): 28 | result = split_func(value) 29 | for k, v in result.items(): 30 | results[k][key] = v 31 | return results 32 | 33 | def dict_apply_reduce( 34 | x: List[Dict[str, torch.Tensor]], 35 | reduce_func: Callable[[List[torch.Tensor]], torch.Tensor] 36 | ) -> Dict[str, torch.Tensor]: 37 | result = dict() 38 | for key in x[0].keys(): 39 | result[key] = reduce_func([x_[key] for x_ in x]) 40 | return result 41 | 42 | 43 | def replace_submodules( 44 | root_module: nn.Module, 45 | predicate: Callable[[nn.Module], bool], 46 | func: Callable[[nn.Module], nn.Module]) -> nn.Module: 47 | """ 48 | predicate: Return true if the module is to be replaced. 49 | func: Return new module to use. 50 | """ 51 | if predicate(root_module): 52 | return func(root_module) 53 | 54 | bn_list = [k.split('.') for k, m 55 | in root_module.named_modules(remove_duplicate=True) 56 | if predicate(m)] 57 | for *parent, k in bn_list: 58 | parent_module = root_module 59 | if len(parent) > 0: 60 | parent_module = root_module.get_submodule('.'.join(parent)) 61 | if isinstance(parent_module, nn.Sequential): 62 | src_module = parent_module[int(k)] 63 | else: 64 | src_module = getattr(parent_module, k) 65 | tgt_module = func(src_module) 66 | if isinstance(parent_module, nn.Sequential): 67 | parent_module[int(k)] = tgt_module 68 | else: 69 | setattr(parent_module, k, tgt_module) 70 | # verify that all BN are replaced 71 | bn_list = [k.split('.') for k, m 72 | in root_module.named_modules(remove_duplicate=True) 73 | if predicate(m)] 74 | assert len(bn_list) == 0 75 | return root_module 76 | 77 | def optimizer_to(optimizer, device): 78 | for state in optimizer.state.values(): 79 | for k, v in state.items(): 80 | if isinstance(v, torch.Tensor): 81 | state[k] = v.to(device=device) 82 | return optimizer 83 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/common/robomimic_config_util.py: -------------------------------------------------------------------------------- 1 | from omegaconf import OmegaConf 2 | from robomimic.config import config_factory 3 | import robomimic.scripts.generate_paper_configs as gpc 4 | from robomimic.scripts.generate_paper_configs import ( 5 | modify_config_for_default_image_exp, 6 | modify_config_for_default_low_dim_exp, 7 | modify_config_for_dataset, 8 | ) 9 | 10 | def get_robomimic_config( 11 | algo_name='bc_rnn', 12 | hdf5_type='low_dim', 13 | task_name='square', 14 | dataset_type='ph' 15 | ): 16 | base_dataset_dir = '/tmp/null' 17 | filter_key = None 18 | 19 | # decide whether to use low-dim or image training defaults 20 | modifier_for_obs = modify_config_for_default_image_exp 21 | if hdf5_type in ["low_dim", "low_dim_sparse", "low_dim_dense"]: 22 | modifier_for_obs = modify_config_for_default_low_dim_exp 23 | 24 | algo_config_name = "bc" if algo_name == "bc_rnn" else algo_name 25 | config = config_factory(algo_name=algo_config_name) 26 | # turn into default config for observation modalities (e.g.: low-dim or rgb) 27 | config = modifier_for_obs(config) 28 | # add in config based on the dataset 29 | config = modify_config_for_dataset( 30 | config=config, 31 | task_name=task_name, 32 | dataset_type=dataset_type, 33 | hdf5_type=hdf5_type, 34 | base_dataset_dir=base_dataset_dir, 35 | filter_key=filter_key, 36 | ) 37 | # add in algo hypers based on dataset 38 | algo_config_modifier = getattr(gpc, f'modify_{algo_name}_config_for_dataset') 39 | config = algo_config_modifier( 40 | config=config, 41 | task_name=task_name, 42 | dataset_type=dataset_type, 43 | hdf5_type=hdf5_type, 44 | ) 45 | return config 46 | 47 | 48 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/dataset/base_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import torch 4 | import torch.nn 5 | from trajectory_diffusion.model.common.normalizer import LinearNormalizer 6 | 7 | class BaseLowdimDataset(torch.utils.data.Dataset): 8 | def get_validation_dataset(self) -> 'BaseLowdimDataset': 9 | # return an empty dataset by default 10 | return BaseLowdimDataset() 11 | 12 | def get_normalizer(self, **kwargs) -> LinearNormalizer: 13 | raise NotImplementedError() 14 | 15 | def get_all_actions(self) -> torch.Tensor: 16 | raise NotImplementedError() 17 | 18 | def __len__(self) -> int: 19 | return 0 20 | 21 | def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]: 22 | """ 23 | output: 24 | obs: T, Do 25 | action: T, Da 26 | """ 27 | raise NotImplementedError() 28 | 29 | 30 | class BaseImageDataset(torch.utils.data.Dataset): 31 | def get_validation_dataset(self) -> 'BaseLowdimDataset': 32 | # return an empty dataset by default 33 | return BaseImageDataset() 34 | 35 | def get_normalizer(self, **kwargs) -> LinearNormalizer: 36 | raise NotImplementedError() 37 | 38 | def get_all_actions(self) -> torch.Tensor: 39 | raise NotImplementedError() 40 | 41 | def __len__(self) -> int: 42 | return 0 43 | 44 | def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]: 45 | """ 46 | output: 47 | obs: 48 | key: T, * 49 | action: T, Da 50 | """ 51 | raise NotImplementedError() 52 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/dataset/gibson_dataset.py: -------------------------------------------------------------------------------- 1 | import bz2 2 | import torch 3 | import _pickle as cPickle 4 | from torch.utils.data import Dataset 5 | import os 6 | from typing import Dict 7 | 8 | def count_file_in_folder(path): 9 | count = 0 10 | for _, _, files in os.walk(path): 11 | count += len(files) 12 | return count 13 | 14 | class GibsonMapDataset(Dataset): 15 | def __init__(self, path, train_idx): 16 | self.train_idx = train_idx 17 | self.path = path 18 | 19 | def __len__(self): 20 | return len(self.train_idx) 21 | 22 | def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]: 23 | tmp_idx = self.train_idx[idx] 24 | with bz2.BZ2File("{}/{}.pbz2".format(self.path, str(tmp_idx)), 'rb') as fp: 25 | data = cPickle.load(fp) 26 | 27 | tmp = torch.zeros(32,2) 28 | tmp[:, 0] = 1-data['action'][:, 1] 29 | tmp[:, 1] = data['action'][:, 0] 30 | o_data = { 31 | 'obs':{ 32 | 'sem_map': data['obs']['sem_map'], 33 | 'target': data['obs']['target'], 34 | 'loc': torch.Tensor([1-data['obs']['loc'][1],data['obs']['loc'][0]]), 35 | }, 36 | 'action': tmp[:28, :], 37 | } 38 | 39 | return o_data 40 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/env/objnav/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | import trajectory_diffusion.env.objnav 3 | 4 | register( 5 | id='objnav-traj-diff-v0', 6 | entry_point='envs.objnav.objnav_keypoints_env:ObjNavKeypointsEnv', 7 | max_episode_steps=200, 8 | reward_threshold=1.0 9 | ) -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/env/objnav/objnav_gibson_env.py: -------------------------------------------------------------------------------- 1 | from gym import spaces 2 | from trajectory_diffusion.env.objnav.objnav_env import ObjNavEnv 3 | import numpy as np 4 | import cv2 5 | 6 | class ObjNavGibsonEnv(ObjNavEnv): 7 | metadata = {"render.modes": ["rgb_array"], "video.frames_per_second": 10} 8 | 9 | def __init__(self, 10 | legacy=False, 11 | block_cog=None, 12 | damping=None, 13 | render_size=96): 14 | super().__init__( 15 | legacy=legacy, 16 | block_cog=block_cog, 17 | damping=damping, 18 | render_size=render_size, 19 | render_action=False) 20 | ws = self.window_size 21 | self.observation_space = spaces.Dict({ 22 | 'image': spaces.Box( 23 | low=0, 24 | high=1, 25 | shape=(3,render_size,render_size), 26 | dtype=np.float32 27 | ), 28 | 'agent_pos': spaces.Box( 29 | low=0, 30 | high=ws, 31 | shape=(2,), 32 | dtype=np.float32 33 | ) 34 | }) 35 | self.render_cache = None 36 | 37 | def _get_obs(self): 38 | img = super()._render_frame(mode='rgb_array') 39 | 40 | agent_pos = np.array(self.agent.position) 41 | img_obs = np.moveaxis(img.astype(np.float32) / 255, -1, 0) 42 | obs = { 43 | 'image': img_obs, 44 | 'agent_pos': agent_pos 45 | } 46 | 47 | # draw action 48 | if self.latest_action is not None: 49 | action = np.array(self.latest_action) 50 | coord = (action / 512 * 96).astype(np.int32) 51 | marker_size = int(8/96*self.render_size) 52 | thickness = int(1/96*self.render_size) 53 | cv2.drawMarker(img, coord, 54 | color=(255,0,0), markerType=cv2.MARKER_CROSS, 55 | markerSize=marker_size, thickness=thickness) 56 | self.render_cache = img 57 | 58 | return obs 59 | 60 | def render(self, mode): 61 | assert mode == 'rgb_array' 62 | 63 | if self.render_cache is None: 64 | self._get_obs() 65 | 66 | return self.render_cache 67 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/env_runner/base_image_runner.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from trajectory_diffusion.policy.base_image_policy import BaseImagePolicy 3 | 4 | class BaseImageRunner: 5 | def __init__(self, output_dir): 6 | self.output_dir = output_dir 7 | 8 | def run(self, policy: BaseImagePolicy) -> Dict: 9 | raise NotImplementedError() 10 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/gym_util/video_recording_wrapper.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | from trajectory_diffusion.real_world.video_recorder import VideoRecorder 4 | 5 | class VideoRecordingWrapper(gym.Wrapper): 6 | def __init__(self, 7 | env, 8 | video_recoder: VideoRecorder, 9 | mode='rgb_array', 10 | file_path=None, 11 | steps_per_render=1, 12 | **kwargs 13 | ): 14 | """ 15 | When file_path is None, don't record. 16 | """ 17 | super().__init__(env) 18 | 19 | self.mode = mode 20 | self.render_kwargs = kwargs 21 | self.steps_per_render = steps_per_render 22 | self.file_path = file_path 23 | self.video_recoder = video_recoder 24 | 25 | self.step_count = 0 26 | 27 | def reset(self, **kwargs): 28 | obs = super().reset(**kwargs) 29 | self.frames = list() 30 | self.step_count = 1 31 | self.video_recoder.stop() 32 | return obs 33 | 34 | def step(self, action): 35 | result = super().step(action) 36 | self.step_count += 1 37 | if self.file_path is not None \ 38 | and ((self.step_count % self.steps_per_render) == 0): 39 | if not self.video_recoder.is_ready(): 40 | self.video_recoder.start(self.file_path) 41 | 42 | frame = self.env.render( 43 | mode=self.mode, **self.render_kwargs) 44 | assert frame.dtype == np.uint8 45 | self.video_recoder.write_frame(frame) 46 | return result 47 | 48 | def render(self, mode='rgb_array', **kwargs): 49 | if self.video_recoder.is_ready(): 50 | self.video_recoder.stop() 51 | return self.file_path 52 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/gym_util/video_wrapper.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | class VideoWrapper(gym.Wrapper): 5 | def __init__(self, 6 | env, 7 | mode='rgb_array', 8 | enabled=True, 9 | steps_per_render=1, 10 | **kwargs 11 | ): 12 | super().__init__(env) 13 | 14 | self.mode = mode 15 | self.enabled = enabled 16 | self.render_kwargs = kwargs 17 | self.steps_per_render = steps_per_render 18 | 19 | self.frames = list() 20 | self.step_count = 0 21 | 22 | def reset(self, **kwargs): 23 | obs = super().reset(**kwargs) 24 | self.frames = list() 25 | self.step_count = 1 26 | if self.enabled: 27 | frame = self.env.render( 28 | mode=self.mode, **self.render_kwargs) 29 | assert frame.dtype == np.uint8 30 | self.frames.append(frame) 31 | return obs 32 | 33 | def step(self, action): 34 | result = super().step(action) 35 | self.step_count += 1 36 | if self.enabled and ((self.step_count % self.steps_per_render) == 0): 37 | frame = self.env.render( 38 | mode=self.mode, **self.render_kwargs) 39 | assert frame.dtype == np.uint8 40 | self.frames.append(frame) 41 | return result 42 | 43 | def render(self, mode='rgb_array', **kwargs): 44 | return self.frames 45 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/model/common/dict_of_tensor_mixin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class DictOfTensorMixin(nn.Module): 5 | def __init__(self, params_dict=None): 6 | super().__init__() 7 | if params_dict is None: 8 | params_dict = nn.ParameterDict() 9 | self.params_dict = params_dict 10 | 11 | @property 12 | def device(self): 13 | return next(iter(self.parameters())).device 14 | 15 | def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): 16 | def dfs_add(dest, keys, value: torch.Tensor): 17 | if len(keys) == 1: 18 | dest[keys[0]] = value 19 | return 20 | 21 | if keys[0] not in dest: 22 | dest[keys[0]] = nn.ParameterDict() 23 | dfs_add(dest[keys[0]], keys[1:], value) 24 | 25 | def load_dict(state_dict, prefix): 26 | out_dict = nn.ParameterDict() 27 | for key, value in state_dict.items(): 28 | value: torch.Tensor 29 | if key.startswith(prefix): 30 | param_keys = key[len(prefix):].split('.')[1:] 31 | # if len(param_keys) == 0: 32 | # import pdb; pdb.set_trace() 33 | dfs_add(out_dict, param_keys, value.clone()) 34 | return out_dict 35 | 36 | self.params_dict = load_dict(state_dict, prefix + 'params_dict') 37 | self.params_dict.requires_grad_(False) 38 | return 39 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/model/common/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | from diffusers.optimization import ( 2 | Union, SchedulerType, Optional, 3 | Optimizer, TYPE_TO_SCHEDULER_FUNCTION 4 | ) 5 | 6 | def get_scheduler( 7 | name: Union[str, SchedulerType], 8 | optimizer: Optimizer, 9 | num_warmup_steps: Optional[int] = None, 10 | num_training_steps: Optional[int] = None, 11 | **kwargs 12 | ): 13 | """ 14 | Added kwargs vs diffuser's original implementation 15 | 16 | Unified API to get any scheduler from its name. 17 | 18 | Args: 19 | name (`str` or `SchedulerType`): 20 | The name of the scheduler to use. 21 | optimizer (`torch.optim.Optimizer`): 22 | The optimizer that will be used during training. 23 | num_warmup_steps (`int`, *optional*): 24 | The number of warmup steps to do. This is not required by all schedulers (hence the argument being 25 | optional), the function will raise an error if it's unset and the scheduler type requires it. 26 | num_training_steps (`int``, *optional*): 27 | The number of training steps to do. This is not required by all schedulers (hence the argument being 28 | optional), the function will raise an error if it's unset and the scheduler type requires it. 29 | """ 30 | name = SchedulerType(name) 31 | schedule_func = TYPE_TO_SCHEDULER_FUNCTION[name] 32 | if name == SchedulerType.CONSTANT: 33 | return schedule_func(optimizer, **kwargs) 34 | 35 | # All other schedulers require `num_warmup_steps` 36 | if num_warmup_steps is None: 37 | raise ValueError(f"{name} requires `num_warmup_steps`, please provide that argument.") 38 | 39 | if name == SchedulerType.CONSTANT_WITH_WARMUP: 40 | return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, **kwargs) 41 | 42 | # All other schedulers require `num_training_steps` 43 | if num_training_steps is None: 44 | raise ValueError(f"{name} requires `num_training_steps`, please provide that argument.") 45 | 46 | return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps, **kwargs) 47 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/model/common/module_attr_mixin.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class ModuleAttrMixin(nn.Module): 4 | def __init__(self): 5 | super().__init__() 6 | self._dummy_variable = nn.Parameter() 7 | 8 | @property 9 | def device(self): 10 | return next(iter(self.parameters())).device 11 | 12 | @property 13 | def dtype(self): 14 | return next(iter(self.parameters())).dtype 15 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/model/common/shape_util.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Tuple, Callable 2 | import torch 3 | import torch.nn as nn 4 | 5 | def get_module_device(m: nn.Module): 6 | device = torch.device('cpu') 7 | try: 8 | param = next(iter(m.parameters())) 9 | device = param.device 10 | except StopIteration: 11 | pass 12 | return device 13 | 14 | @torch.no_grad() 15 | def get_output_shape( 16 | input_shape: Tuple[int], 17 | net: Callable[[torch.Tensor], torch.Tensor] 18 | ): 19 | device = get_module_device(net) 20 | test_input = torch.zeros((1,)+tuple(input_shape), device=device) 21 | test_output = net(test_input) 22 | output_shape = tuple(test_output.shape[1:]) 23 | return output_shape 24 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/model/diffusion/conv1d_components.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | # from einops.layers.torch import Rearrange 5 | 6 | 7 | class Downsample1d(nn.Module): 8 | def __init__(self, dim): 9 | super().__init__() 10 | self.conv = nn.Conv1d(dim, dim, 3, 2, 1) 11 | 12 | def forward(self, x): 13 | return self.conv(x) 14 | 15 | class Upsample1d(nn.Module): 16 | def __init__(self, dim): 17 | super().__init__() 18 | self.conv = nn.ConvTranspose1d(dim, dim, 4, 2, 1) 19 | 20 | def forward(self, x): 21 | return self.conv(x) 22 | 23 | class Conv1dBlock(nn.Module): 24 | ''' 25 | Conv1d --> GroupNorm --> Mish 26 | ''' 27 | 28 | def __init__(self, inp_channels, out_channels, kernel_size, n_groups=8): 29 | super().__init__() 30 | 31 | self.block = nn.Sequential( 32 | nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2), 33 | # Rearrange('batch channels horizon -> batch channels 1 horizon'), 34 | nn.GroupNorm(n_groups, out_channels), 35 | # Rearrange('batch channels 1 horizon -> batch channels horizon'), 36 | nn.Mish(), 37 | ) 38 | 39 | def forward(self, x): 40 | return self.block(x) 41 | 42 | 43 | def test(): 44 | cb = Conv1dBlock(256, 128, kernel_size=3) 45 | x = torch.zeros((1,256,16)) 46 | o = cb(x) 47 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/model/diffusion/positional_embedding.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | 5 | class SinusoidalPosEmb(nn.Module): 6 | def __init__(self, dim): 7 | super().__init__() 8 | self.dim = dim 9 | 10 | def forward(self, x): 11 | device = x.device 12 | half_dim = self.dim // 2 13 | emb = math.log(10000) / (half_dim - 1) 14 | emb = torch.exp(torch.arange(half_dim, device=device) * -emb) 15 | emb = x[:, None] * emb[None, :] 16 | emb = torch.cat((emb.sin(), emb.cos()), dim=-1) 17 | return emb 18 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/model/vision/model_getter.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | 4 | def get_resnet(name, weights=None, **kwargs): 5 | """ 6 | name: resnet18, resnet34, resnet50 7 | weights: "IMAGENET1K_V1", "r3m" 8 | """ 9 | # load r3m weights 10 | if (weights == "r3m") or (weights == "R3M"): 11 | return get_r3m(name=name, **kwargs) 12 | 13 | func = getattr(torchvision.models, name) 14 | resnet = func(weights=weights, **kwargs) 15 | resnet.fc = torch.nn.Identity() 16 | return resnet 17 | 18 | def get_r3m(name, **kwargs): 19 | """ 20 | name: resnet18, resnet34, resnet50 21 | """ 22 | import r3m 23 | r3m.device = 'cpu' 24 | model = r3m.load_r3m(name) 25 | r3m_model = model.module 26 | resnet_model = r3m_model.convnet 27 | resnet_model = resnet_model.to('cpu') 28 | return resnet_model 29 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/policy/base_image_policy.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import torch 3 | import torch.nn as nn 4 | from trajectory_diffusion.model.common.module_attr_mixin import ModuleAttrMixin 5 | from trajectory_diffusion.model.common.normalizer import LinearNormalizer 6 | 7 | class BaseImagePolicy(ModuleAttrMixin): 8 | # init accepts keyword argument shape_meta, see config/task/*_image.yaml 9 | 10 | def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: 11 | """ 12 | obs_dict: 13 | str: B,To,* 14 | return: B,Ta,Da 15 | """ 16 | raise NotImplementedError() 17 | 18 | # reset state for stateful policies 19 | def reset(self): 20 | pass 21 | 22 | # ========== training =========== 23 | # no standard training interface except setting normalizer 24 | def set_normalizer(self, normalizer: LinearNormalizer): 25 | raise NotImplementedError() 26 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/real_world/keystroke_counter.py: -------------------------------------------------------------------------------- 1 | from pynput.keyboard import Key, KeyCode, Listener 2 | from collections import defaultdict 3 | from threading import Lock 4 | 5 | class KeystrokeCounter(Listener): 6 | def __init__(self): 7 | self.key_count_map = defaultdict(lambda:0) 8 | self.key_press_list = list() 9 | self.lock = Lock() 10 | super().__init__(on_press=self.on_press, on_release=self.on_release) 11 | 12 | def on_press(self, key): 13 | with self.lock: 14 | self.key_count_map[key] += 1 15 | self.key_press_list.append(key) 16 | 17 | def on_release(self, key): 18 | pass 19 | 20 | def clear(self): 21 | with self.lock: 22 | self.key_count_map = defaultdict(lambda:0) 23 | self.key_press_list = list() 24 | 25 | def __getitem__(self, key): 26 | with self.lock: 27 | return self.key_count_map[key] 28 | 29 | def get_press_events(self): 30 | with self.lock: 31 | events = list(self.key_press_list) 32 | self.key_press_list = list() 33 | return events 34 | 35 | if __name__ == '__main__': 36 | import time 37 | with KeystrokeCounter() as counter: 38 | try: 39 | while True: 40 | print('Space:', counter[Key.space]) 41 | print('q:', counter[KeyCode(char='q')]) 42 | time.sleep(1/60) 43 | except KeyboardInterrupt: 44 | events = counter.get_press_events() 45 | print(events) 46 | -------------------------------------------------------------------------------- /train_traj/trajectory_diffusion/real_world/real_inference_util.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Callable, Tuple 2 | import numpy as np 3 | from trajectory_diffusion.common.cv2_util import get_image_transform 4 | 5 | def get_real_obs_dict( 6 | env_obs: Dict[str, np.ndarray], 7 | shape_meta: dict, 8 | ) -> Dict[str, np.ndarray]: 9 | obs_dict_np = dict() 10 | obs_shape_meta = shape_meta['obs'] 11 | for key, attr in obs_shape_meta.items(): 12 | type = attr.get('type', 'low_dim') 13 | shape = attr.get('shape') 14 | if type == 'rgb': 15 | this_imgs_in = env_obs[key] 16 | t,hi,wi,ci = this_imgs_in.shape 17 | co,ho,wo = shape 18 | assert ci == co 19 | out_imgs = this_imgs_in 20 | if (ho != hi) or (wo != wi) or (this_imgs_in.dtype == np.uint8): 21 | tf = get_image_transform( 22 | input_res=(wi,hi), 23 | output_res=(wo,ho), 24 | bgr_to_rgb=False) 25 | out_imgs = np.stack([tf(x) for x in this_imgs_in]) 26 | if this_imgs_in.dtype == np.uint8: 27 | out_imgs = out_imgs.astype(np.float32) / 255 28 | # THWC to TCHW 29 | obs_dict_np[key] = np.moveaxis(out_imgs,-1,1) 30 | elif type == 'low_dim': 31 | this_data_in = env_obs[key] 32 | if 'pose' in key and shape == (2,): 33 | # take X,Y coordinates 34 | this_data_in = this_data_in[...,[0,1]] 35 | obs_dict_np[key] = this_data_in 36 | return obs_dict_np 37 | 38 | 39 | def get_real_obs_resolution( 40 | shape_meta: dict 41 | ) -> Tuple[int, int]: 42 | out_res = None 43 | obs_shape_meta = shape_meta['obs'] 44 | for key, attr in obs_shape_meta.items(): 45 | type = attr.get('type', 'low_dim') 46 | shape = attr.get('shape') 47 | if type == 'rgb': 48 | co,ho,wo = shape 49 | if out_res is None: 50 | out_res = (wo, ho) 51 | assert out_res == (wo, ho) 52 | return out_res 53 | --------------------------------------------------------------------------------