├── .DS_Store
├── .gitignore
├── LICENSE
├── README.md
├── diffusion_policy
    ├── .DS_Store
    ├── codecs
    │   └── imagecodecs_numcodecs.py
    ├── common
    │   ├── checkpoint_util.py
    │   ├── cv2_util.py
    │   ├── env_util.py
    │   ├── json_logger.py
    │   ├── nested_dict_util.py
    │   ├── normalize_util.py
    │   ├── pose_trajectory_interpolator.py
    │   ├── precise_sleep.py
    │   ├── pymunk_override.py
    │   ├── pymunk_util.py
    │   ├── pytorch_util.py
    │   ├── replay_buffer.py
    │   ├── robomimic_config_util.py
    │   ├── robomimic_util.py
    │   ├── sampler.py
    │   └── timestamp_accumulator.py
    ├── config
    │   ├── task
    │   │   ├── blockpush_lowdim_seed.yaml
    │   │   ├── blockpush_lowdim_seed_abs.yaml
    │   │   ├── can_image.yaml
    │   │   ├── can_image_abs.yaml
    │   │   ├── can_lowdim.yaml
    │   │   ├── can_lowdim_abs.yaml
    │   │   ├── kitchen_lowdim.yaml
    │   │   ├── kitchen_lowdim_abs.yaml
    │   │   ├── lift_image.yaml
    │   │   ├── lift_image_abs.yaml
    │   │   ├── lift_lowdim.yaml
    │   │   ├── lift_lowdim_abs.yaml
    │   │   ├── pusht_image.yaml
    │   │   ├── pusht_lowdim.yaml
    │   │   ├── real_pusht_image.yaml
    │   │   ├── square_image.yaml
    │   │   ├── square_image_abs.yaml
    │   │   ├── square_lowdim.yaml
    │   │   ├── square_lowdim_abs.yaml
    │   │   ├── tool_hang_image.yaml
    │   │   ├── tool_hang_image_abs.yaml
    │   │   ├── tool_hang_lowdim.yaml
    │   │   ├── tool_hang_lowdim_abs.yaml
    │   │   ├── transport_image.yaml
    │   │   ├── transport_image_abs.yaml
    │   │   ├── transport_lowdim.yaml
    │   │   └── transport_lowdim_abs.yaml
    │   ├── train_bet_lowdim_workspace.yaml
    │   ├── train_diffusion_transformer_hybrid_workspace.yaml
    │   ├── train_diffusion_transformer_lowdim_kitchen_workspace.yaml
    │   ├── train_diffusion_transformer_lowdim_pusht_workspace.yaml
    │   ├── train_diffusion_transformer_lowdim_workspace.yaml
    │   ├── train_diffusion_transformer_real_hybrid_workspace.yaml
    │   ├── train_diffusion_unet_ddim_hybrid_workspace.yaml
    │   ├── train_diffusion_unet_ddim_lowdim_workspace.yaml
    │   ├── train_diffusion_unet_hybrid_workspace.yaml
    │   ├── train_diffusion_unet_image_pretrained_workspace.yaml
    │   ├── train_diffusion_unet_image_workspace.yaml
    │   ├── train_diffusion_unet_lowdim_workspace.yaml
    │   ├── train_diffusion_unet_real_hybrid_workspace.yaml
    │   ├── train_diffusion_unet_real_image_workspace.yaml
    │   ├── train_diffusion_unet_real_pretrained_workspace.yaml
    │   ├── train_diffusion_unet_video_workspace.yaml
    │   ├── train_ibc_dfo_hybrid_workspace.yaml
    │   ├── train_ibc_dfo_lowdim_workspace.yaml
    │   ├── train_ibc_dfo_real_hybrid_workspace.yaml
    │   ├── train_robomimic_image_workspace.yaml
    │   ├── train_robomimic_lowdim_workspace.yaml
    │   └── train_robomimic_real_image_workspace.yaml
    ├── dataset
    │   ├── .DS_Store
    │   ├── base_dataset.py
    │   ├── blockpush_lowdim_dataset.py
    │   ├── gibson_dataset.py
    │   ├── kitchen_lowdim_dataset.py
    │   ├── kitchen_mjl_lowdim_dataset.py
    │   ├── pusht_dataset.py
    │   ├── pusht_image_dataset.py
    │   ├── real_pusht_image_dataset.py
    │   ├── robomimic_replay_image_dataset.py
    │   ├── robomimic_replay_lowdim_dataset.py
    │   ├── test_img.tar.gz
    │   └── test_img
    │   │   ├── 0.png
    │   │   ├── 1.png
    │   │   ├── 10.png
    │   │   ├── 11.png
    │   │   ├── 12.png
    │   │   ├── 13.png
    │   │   ├── 14.png
    │   │   ├── 15.png
    │   │   ├── 2.png
    │   │   ├── 3.png
    │   │   ├── 4.png
    │   │   ├── 5.png
    │   │   ├── 6.png
    │   │   ├── 7.png
    │   │   ├── 8.png
    │   │   ├── 9.png
    │   │   ├── action.png
    │   │   └── pose.png
    ├── env_runner
    │   ├── base_image_runner.py
    │   ├── base_lowdim_runner.py
    │   ├── blockpush_lowdim_runner.py
    │   ├── kitchen_lowdim_runner.py
    │   ├── pusht_image_runner.py
    │   ├── pusht_keypoints_runner.py
    │   ├── real_pusht_image_runner.py
    │   ├── robomimic_image_runner.py
    │   └── robomimic_lowdim_runner.py
    ├── gym_util
    │   ├── async_vector_env.py
    │   ├── multistep_wrapper.py
    │   ├── sync_vector_env.py
    │   ├── video_recording_wrapper.py
    │   └── video_wrapper.py
    ├── model
    │   ├── .DS_Store
    │   ├── bet
    │   │   ├── action_ae
    │   │   │   ├── __init__.py
    │   │   │   └── discretizers
    │   │   │   │   └── k_means.py
    │   │   ├── latent_generators
    │   │   │   ├── latent_generator.py
    │   │   │   ├── mingpt.py
    │   │   │   └── transformer.py
    │   │   ├── libraries
    │   │   │   ├── loss_fn.py
    │   │   │   └── mingpt
    │   │   │   │   ├── LICENSE
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── model.py
    │   │   │   │   ├── trainer.py
    │   │   │   │   └── utils.py
    │   │   └── utils.py
    │   ├── clip_model
    │   │   └── clip_model_for_features.py
    │   ├── common
    │   │   ├── dict_of_tensor_mixin.py
    │   │   ├── lr_scheduler.py
    │   │   ├── module_attr_mixin.py
    │   │   ├── normalizer.py
    │   │   ├── rotation_transformer.py
    │   │   ├── shape_util.py
    │   │   └── tensor_util.py
    │   ├── diffusion
    │   │   ├── conditional_unet1d.py
    │   │   ├── conv1d_components.py
    │   │   ├── ema_model.py
    │   │   ├── mask_generator.py
    │   │   ├── positional_embedding.py
    │   │   └── transformer_for_diffusion.py
    │   └── vision
    │   │   ├── crop_randomizer.py
    │   │   ├── model_getter.py
    │   │   └── multi_image_obs_encoder.py
    ├── policy
    │   ├── base_image_policy.py
    │   ├── base_lowdim_policy.py
    │   ├── bet_lowdim_policy.py
    │   ├── diffusion_transformer_hybrid_image_policy.py
    │   ├── diffusion_transformer_hybrid_image_policy_backup.py
    │   ├── diffusion_transformer_hybrid_image_policy_for_vis.py
    │   ├── diffusion_transformer_lowdim_policy.py
    │   ├── diffusion_unet_hybrid_image_policy copy.py
    │   ├── diffusion_unet_hybrid_image_policy.py
    │   ├── diffusion_unet_image_policy.py
    │   ├── diffusion_unet_lowdim_policy.py
    │   ├── diffusion_unet_video_policy.py
    │   ├── ibc_dfo_hybrid_image_policy.py
    │   ├── ibc_dfo_lowdim_policy.py
    │   ├── robomimic_image_policy.py
    │   └── robomimic_lowdim_policy.py
    ├── real_world
    │   ├── .DS_Store
    │   ├── keystroke_counter.py
    │   ├── multi_camera_visualizer.py
    │   ├── multi_realsense.py
    │   ├── real_data_conversion.py
    │   ├── real_env.py
    │   ├── real_inference_util.py
    │   ├── realsense_config
    │   │   ├── 415_high_accuracy_mode.json
    │   │   └── 435_high_accuracy_mode.json
    │   ├── rtde_interpolation_controller.py
    │   ├── single_realsense.py
    │   ├── spacemouse.py
    │   ├── spacemouse_shared_memory.py
    │   └── video_recorder.py
    ├── scripts
    │   ├── bet_blockpush_conversion.py
    │   ├── blockpush_abs_conversion.py
    │   ├── episode_lengths.py
    │   ├── generate_bet_blockpush.py
    │   ├── real_dataset_conversion.py
    │   ├── real_pusht_metrics.py
    │   ├── real_pusht_successrate.py
    │   ├── robomimic_dataset_action_comparison.py
    │   └── robomimic_dataset_conversion.py
    ├── shared_memory
    │   ├── shared_memory_queue.py
    │   ├── shared_memory_ring_buffer.py
    │   ├── shared_memory_util.py
    │   └── shared_ndarray.py
    └── workspace
    │   ├── base_workspace.py
    │   ├── train_bet_lowdim_workspace.py
    │   ├── train_diffusion_transformer_hybrid_workspace.py
    │   ├── train_diffusion_transformer_lowdim_workspace.py
    │   ├── train_diffusion_unet_hybrid_workspace.py
    │   ├── train_diffusion_unet_image_workspace.py
    │   ├── train_diffusion_unet_lowdim_workspace.py
    │   ├── train_diffusion_unet_video_workspace.py
    │   ├── train_ibc_dfo_hybrid_workspace.py
    │   ├── train_ibc_dfo_lowdim_workspace.py
    │   ├── train_robomimic_image_workspace.py
    │   └── train_robomimic_lowdim_workspace.py
├── environment.yml
├── experiment_scripts
    └── gibson
    │   └── eval_tdiff.sh
├── semexp
    ├── .DS_Store
    ├── agents
    │   └── utils
    │   │   ├── semantic_prediction.py
    │   │   └── visualization.py
    ├── arguments.py
    ├── configs
    │   ├── Base-RCNN-FPN.yaml
    │   └── COCO-InstanceSegmentation
    │   │   └── mask_rcnn_R_50_FPN_3x.yaml
    ├── constants.py
    ├── docs
    │   ├── legend_gibson.png
    │   └── legend_mp3d.png
    ├── envs
    │   ├── .DS_Store
    │   ├── __init__.py
    │   ├── habitat
    │   │   ├── __init__.py
    │   │   ├── configs
    │   │   │   └── tasks
    │   │   │   │   └── objectnav_gibson.yaml
    │   │   ├── objectgoal_env.py
    │   │   ├── sem_exp.py
    │   │   └── utils
    │   │   │   └── vector_env.py
    │   └── utils
    │   │   ├── depth_utils.py
    │   │   ├── fmm_planner.py
    │   │   ├── map_builder.py
    │   │   ├── pose.py
    │   │   └── rotation_utils.py
    ├── eval_tdiff.py
    ├── km_match.py
    ├── model.py
    ├── model_pf.py
    ├── sxz
    │   ├── data_check.py
    │   ├── img
    │   │   ├── circle0.png
    │   │   ├── circle1.png
    │   │   ├── circle2.png
    │   │   ├── circle3.png
    │   │   ├── dist_circle_test0.png
    │   │   ├── dist_circle_test1.png
    │   │   ├── dist_map.png
    │   │   ├── dist_map_dilate.png
    │   │   ├── origin_map.png
    │   │   ├── pbz2_Collierville.png
    │   │   ├── pbz2_Corozal.png
    │   │   ├── pbz2_Darden.png
    │   │   ├── pbz2_Markleeville.png
    │   │   ├── pbz2_Wiconisco.png
    │   │   └── test.png
    │   └── visualize.py
    ├── util
    │   ├── crop.py
    │   ├── cross_attention.py
    │   ├── datasets.py
    │   ├── lr_decay.py
    │   ├── lr_sched.py
    │   ├── misc.py
    │   └── pos_embed.py
    ├── utils
    │   ├── agent_helper.py
    │   ├── agent_state.py
    │   ├── distributions.py
    │   ├── mapping_module.py
    │   ├── model.py
    │   ├── rednet.py
    │   ├── storage.py
    │   ├── stubborn_agent.py
    │   └── visualize_tools.py
    └── vis_adds.py
├── tdiff
    ├── constants.py
    ├── dataset.py
    ├── default.py
    ├── fmm_planner.py
    ├── geometry.py
    ├── hab_utils.py
    ├── model.py
    ├── train_utils.py
    └── visualize_tools.py
├── train.py
└── train_traj
    ├── environment.yaml
    ├── train.py
    ├── train.sh
    ├── train_diffusion_traj_gibson.yaml
    └── trajectory_diffusion
        ├── codecs
            └── imagecodecs_numcodecs.py
        ├── common
            ├── checkpoint_util.py
            ├── cv2_util.py
            ├── env_util.py
            ├── json_logger.py
            ├── nested_dict_util.py
            ├── normalize_util.py
            ├── pose_trajectory_interpolator.py
            ├── precise_sleep.py
            ├── pymunk_override.py
            ├── pymunk_util.py
            ├── pytorch_util.py
            ├── replay_buffer.py
            ├── robomimic_config_util.py
            ├── robomimic_util.py
            ├── sampler.py
            └── timestamp_accumulator.py
        ├── config
            └── train_diffusion_transformer_gibson_workspace.yaml
        ├── dataset
            ├── base_dataset.py
            └── gibson_dataset.py
        ├── env
            └── objnav
            │   ├── __init__.py
            │   ├── objnav_env.py
            │   ├── objnav_gibson_env.py
            │   ├── objnav_keypoints_env.py
            │   ├── pymunk_keypoint_manager.py
            │   └── pymunk_override.py
        ├── env_runner
            ├── base_image_runner.py
            └── gibson_traj_diff_runner.py
        ├── gym_util
            ├── async_vector_env.py
            ├── multistep_wrapper.py
            ├── sync_vector_env.py
            ├── video_recording_wrapper.py
            └── video_wrapper.py
        ├── model
            ├── clip_model
            │   └── clip_model_for_features.py
            ├── common
            │   ├── dict_of_tensor_mixin.py
            │   ├── lr_scheduler.py
            │   ├── module_attr_mixin.py
            │   ├── normalizer.py
            │   ├── rotation_transformer.py
            │   ├── shape_util.py
            │   └── tensor_util.py
            ├── diffusion
            │   ├── conditional_unet1d.py
            │   ├── conv1d_components.py
            │   ├── ema_model.py
            │   ├── mask_generator.py
            │   ├── positional_embedding.py
            │   └── transformer_for_diffusion.py
            └── vision
            │   ├── crop_randomizer.py
            │   ├── model_getter.py
            │   └── multi_image_obs_encoder.py
        ├── policy
            ├── base_image_policy.py
            └── trajectory_diffusion_transformer_gibson_policy.py
        ├── real_world
            ├── keystroke_counter.py
            ├── real_data_conversion.py
            ├── real_env.py
            ├── real_inference_util.py
            ├── realsense_config
            │   ├── 415_high_accuracy_mode.json
            │   └── 435_high_accuracy_mode.json
            ├── spacemouse.py
            ├── spacemouse_shared_memory.py
            └── video_recorder.py
        └── workspace
            ├── base_workspace.py
            └── train_diffusion_transformer_gibson_workspace.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/.DS_Store


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Trajectory Diffusion for ObjectGoal Navigation
 2 | 
 3 | ## Setup
 4 | - Clone the repository and move into the top-level directory `cd T-Diff`
 5 | - Create conda environment. `conda env create -f environment.yml`
 6 | - Activate the environment. `conda activate tdiff`
 7 | - We provide pre-trained model of [T-Diff](https://drive.google.com/file/d/1AzJEfhy8Sfu_CUiNTwO4RkZEmbX04k42/view?usp=sharing) and [area_prediction](https://drive.google.com/file/d/113hMyZFT5orwfcFlrX_ESRawbrr6UiT7/view?usp=sharing). For evaluation, you can download them to the directory.
 8 | - Download the [t_diff_dataset](https://drive.google.com/file/d/1p5h7wxRwnPZ63cwZK6DWhpJKErhWNuDb/view).
 9 | - Download the [semantic maps (gt)](https://drive.google.com/file/d/1lOJlZXWBeCsnPzqgdnvXbEmF2yGxRwY4/view?usp=sharing).  
10 | 
11 | ## Dataset
12 | We use a modified version of the Gibson ObjectNav evaluation setup from [SemExp](https://github.com/devendrachaplot/Object-Goal-Navigation).
13 | 
14 | 1. Download the [Gibson ObjectNav dataset](https://utexas.box.com/s/tss7udt3ralioalb6eskj3z3spuvwz7v) to `$T_Diff_ROOT/data/datasets/objectnav/gibson`.
15 |     ```
16 |     cd $T_Diff_ROOT/data/datasets/objectnav
17 |     wget -O gibson_objectnav_episodes.tar.gz https://utexas.box.com/shared/static/tss7udt3ralioalb6eskj3z3spuvwz7v.gz
18 |     tar -xvzf gibson_objectnav_episodes.tar.gz && rm gibson_objectnav_episodes.tar.gz
19 |     ```
20 | 2. Download the image segmentation model [[URL](https://utexas.box.com/s/sf4prmup4fsiu6taljnt5ht8unev5ikq)] to `$T_Diff_ROOT/pretrained_models`.
21 | 3. To visualize episodes with the semantic map and potential function predictions, add the arguments `--print_images 1 --num_pf_maps 3` in the evaluation script.
22 | 
23 | The `data` folder should look like this
24 | ```python
25 |   data/ 
26 |     ├── datasets/objectnav/gibdon/v1.1
27 |         ├── train/
28 |         │   ├── content/
29 |         │   ├── train_info.pbz2
30 |         │   └── train.json.gz
31 |         ├── val/
32 |         │   ├── content/
33 |         │   ├── val_info.pbz2
34 |         │   └── val.json.gz
35 |     ├── scene_datasets/
36 |         ├── gibson_semantic/
37 |             ├── Allensville_semantic.ply
38 |             ├── Allensville.glb
39 |             ├── Allensville.ids
40 |             ├── Allensville.navmesh
41 |             ├── Allensville.scn
42 |             ├── ...
43 |     ├── semantic_maps/
44 |         ├── gibson/semantic_maps
45 |             ├── semmap_GT_info.json
46 |             ├── Allensville_0.png
47 |             ├── Allensville.h5
48 |             ├── ...
49 | ```
50 | 
51 | <!-- ## Training and Evaluation -->
52 | <!-- ### Train your own trajectory diffusion model  -->
53 | 
54 | ## Evaluation 
55 | `sh experiment_scripts/gibson/eval_tdiff.sh`
56 | 
57 | ## Training
58 | Download the [Gibson Traj dataset](https://drive.google.com/file/d/1p5h7wxRwnPZ63cwZK6DWhpJKErhWNuDb/view?usp=sharing) to `$T_Diff_ROOT/train_traj/data/gibson_traj_32`.
59 | 
60 | 1. Create conda environment. `conda env create -f train_traj/environment.yml`
61 | 2. Activate the environment. `conda activate diff_train`
62 | 3. `sh $T_Diff_ROOT/train_traj/train.sh`
63 | 


--------------------------------------------------------------------------------
/diffusion_policy/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/.DS_Store


--------------------------------------------------------------------------------
/diffusion_policy/common/checkpoint_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Dict
 2 | import os
 3 | 
 4 | class TopKCheckpointManager:
 5 |     def __init__(self,
 6 |             save_dir,
 7 |             monitor_key: str,
 8 |             mode='min',
 9 |             k=1,
10 |             format_str='epoch={epoch:03d}-train_loss={train_loss:.3f}.ckpt'
11 |         ):
12 |         assert mode in ['max', 'min']
13 |         assert k >= 0
14 | 
15 |         self.save_dir = save_dir
16 |         # self.monitor_key = monitor_key
17 |         self.monitor_key = "val_loss"
18 |         self.mode = mode
19 |         self.k = k
20 |         self.format_str = format_str
21 |         self.path_value_map = dict()
22 |     
23 |     def get_ckpt_path(self, data: Dict[str, float]) -> Optional[str]:
24 |         if self.k == 0:
25 |             return None
26 |         
27 |         value = data[self.monitor_key]
28 |         ckpt_path = os.path.join(
29 |             self.save_dir, self.format_str.format(**data))
30 |         
31 |         if len(self.path_value_map) < self.k:
32 |             # under-capacity
33 |             self.path_value_map[ckpt_path] = value
34 |             return ckpt_path
35 |         
36 |         # at capacity
37 |         sorted_map = sorted(self.path_value_map.items(), key=lambda x: x[1])
38 |         min_path, min_value = sorted_map[0]
39 |         max_path, max_value = sorted_map[-1]
40 | 
41 |         delete_path = None
42 |         if self.mode == 'max':
43 |             if value > min_value:
44 |                 delete_path = min_path
45 |         else:
46 |             if value < max_value:
47 |                 delete_path = max_path
48 | 
49 |         if delete_path is None:
50 |             return None
51 |         else:
52 |             del self.path_value_map[delete_path]
53 |             self.path_value_map[ckpt_path] = value
54 | 
55 |             if not os.path.exists(self.save_dir):
56 |                 os.mkdir(self.save_dir)
57 | 
58 |             if os.path.exists(delete_path):
59 |                 os.remove(delete_path)
60 |             return ckpt_path
61 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/env_util.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | 
 5 | def render_env_video(env, states, actions=None):
 6 |     observations = states
 7 |     imgs = list()
 8 |     for i in range(len(observations)):
 9 |         state = observations[i]
10 |         env.set_state(state)
11 |         if i == 0:
12 |             env.set_state(state)
13 |         img = env.render()
14 |         # draw action
15 |         if actions is not None:
16 |             action = actions[i]
17 |             coord = (action / 512 * 96).astype(np.int32)
18 |             cv2.drawMarker(img, coord, 
19 |                 color=(255,0,0), markerType=cv2.MARKER_CROSS,
20 |                 markerSize=8, thickness=1)
21 |         imgs.append(img)
22 |     imgs = np.array(imgs)
23 |     return imgs
24 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/nested_dict_util.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | def nested_dict_map(f, x):
 4 |     """
 5 |     Map f over all leaf of nested dict x
 6 |     """
 7 | 
 8 |     if not isinstance(x, dict):
 9 |         return f(x)
10 |     y = dict()
11 |     for key, value in x.items():
12 |         y[key] = nested_dict_map(f, value)
13 |     return y
14 | 
15 | def nested_dict_reduce(f, x):
16 |     """
17 |     Map f over all values of nested dict x, and reduce to a single value
18 |     """
19 |     if not isinstance(x, dict):
20 |         return x
21 | 
22 |     reduced_values = list()
23 |     for value in x.values():
24 |         reduced_values.append(nested_dict_reduce(f, value))
25 |     y = functools.reduce(f, reduced_values)
26 |     return y
27 | 
28 | 
29 | def nested_dict_check(f, x):
30 |     bool_dict = nested_dict_map(f, x)
31 |     result = nested_dict_reduce(lambda x, y: x and y, bool_dict)
32 |     return result
33 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/precise_sleep.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | def precise_sleep(dt: float, slack_time: float=0.001, time_func=time.monotonic):
 4 |     """
 5 |     Use hybrid of time.sleep and spinning to minimize jitter.
 6 |     Sleep dt - slack_time seconds first, then spin for the rest.
 7 |     """
 8 |     t_start = time_func()
 9 |     if dt > slack_time:
10 |         time.sleep(dt - slack_time)
11 |     t_end = t_start + dt
12 |     while time_func() < t_end:
13 |         pass
14 |     return
15 | 
16 | def precise_wait(t_end: float, slack_time: float=0.001, time_func=time.monotonic):
17 |     t_start = time_func()
18 |     t_wait = t_end - t_start
19 |     if t_wait > 0:
20 |         t_sleep = t_wait - slack_time
21 |         if t_sleep > 0:
22 |             time.sleep(t_sleep)
23 |         while time_func() < t_end:
24 |             pass
25 |     return
26 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/pymunk_util.py:
--------------------------------------------------------------------------------
 1 | import pygame
 2 | import pymunk
 3 | import pymunk.pygame_util
 4 | import numpy as np
 5 | 
 6 | COLLTYPE_DEFAULT = 0
 7 | COLLTYPE_MOUSE = 1
 8 | COLLTYPE_BALL = 2
 9 | 
10 | def get_body_type(static=False):
11 |     body_type = pymunk.Body.DYNAMIC
12 |     if static:
13 |         body_type = pymunk.Body.STATIC
14 |     return body_type
15 | 
16 | 
17 | def create_rectangle(space,
18 |         pos_x,pos_y,width,height,
19 |         density=3,static=False):
20 |     body = pymunk.Body(body_type=get_body_type(static))
21 |     body.position = (pos_x,pos_y)
22 |     shape = pymunk.Poly.create_box(body,(width,height))
23 |     shape.density = density
24 |     space.add(body,shape)
25 |     return body, shape
26 | 
27 | 
28 | def create_rectangle_bb(space, 
29 |         left, bottom, right, top, 
30 |         **kwargs):
31 |     pos_x = (left + right) / 2
32 |     pos_y = (top + bottom) / 2
33 |     height = top - bottom
34 |     width = right - left
35 |     return create_rectangle(space, pos_x, pos_y, width, height, **kwargs)
36 | 
37 | def create_circle(space, pos_x, pos_y, radius, density=3, static=False):
38 |     body = pymunk.Body(body_type=get_body_type(static))
39 |     body.position = (pos_x, pos_y)
40 |     shape = pymunk.Circle(body, radius=radius)
41 |     shape.density = density
42 |     shape.collision_type = COLLTYPE_BALL
43 |     space.add(body, shape)
44 |     return body, shape
45 | 
46 | def get_body_state(body):
47 |     state = np.zeros(6, dtype=np.float32)
48 |     state[:2] = body.position
49 |     state[2] = body.angle
50 |     state[3:5] = body.velocity
51 |     state[5] = body.angular_velocity
52 |     return state
53 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/pytorch_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Callable, List
 2 | import collections
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | def dict_apply(
 7 |         x: Dict[str, torch.Tensor], 
 8 |         func: Callable[[torch.Tensor], torch.Tensor]
 9 |         ) -> Dict[str, torch.Tensor]:
10 |     result = dict()
11 |     for key, value in x.items():
12 |         if isinstance(value, dict):
13 |             result[key] = dict_apply(value, func)
14 |         else:
15 |             result[key] = func(value)
16 |     return result
17 | 
18 | def pad_remaining_dims(x, target):
19 |     assert x.shape == target.shape[:len(x.shape)]
20 |     return x.reshape(x.shape + (1,)*(len(target.shape) - len(x.shape)))
21 | 
22 | def dict_apply_split(
23 |         x: Dict[str, torch.Tensor], 
24 |         split_func: Callable[[torch.Tensor], Dict[str, torch.Tensor]]
25 |         ) -> Dict[str, torch.Tensor]:
26 |     results = collections.defaultdict(dict)
27 |     for key, value in x.items():
28 |         result = split_func(value)
29 |         for k, v in result.items():
30 |             results[k][key] = v
31 |     return results
32 | 
33 | def dict_apply_reduce(
34 |         x: List[Dict[str, torch.Tensor]],
35 |         reduce_func: Callable[[List[torch.Tensor]], torch.Tensor]
36 |         ) -> Dict[str, torch.Tensor]:
37 |     result = dict()
38 |     for key in x[0].keys():
39 |         result[key] = reduce_func([x_[key] for x_ in x])
40 |     return result
41 | 
42 | 
43 | def replace_submodules(
44 |         root_module: nn.Module, 
45 |         predicate: Callable[[nn.Module], bool], 
46 |         func: Callable[[nn.Module], nn.Module]) -> nn.Module:
47 |     """
48 |     predicate: Return true if the module is to be replaced.
49 |     func: Return new module to use.
50 |     """
51 |     if predicate(root_module):
52 |         return func(root_module)
53 | 
54 |     bn_list = [k.split('.') for k, m 
55 |         in root_module.named_modules(remove_duplicate=True) 
56 |         if predicate(m)]
57 |     for *parent, k in bn_list:
58 |         parent_module = root_module
59 |         if len(parent) > 0:
60 |             parent_module = root_module.get_submodule('.'.join(parent))
61 |         if isinstance(parent_module, nn.Sequential):
62 |             src_module = parent_module[int(k)]
63 |         else:
64 |             src_module = getattr(parent_module, k)
65 |         tgt_module = func(src_module)
66 |         if isinstance(parent_module, nn.Sequential):
67 |             parent_module[int(k)] = tgt_module
68 |         else:
69 |             setattr(parent_module, k, tgt_module)
70 |     # verify that all BN are replaced
71 |     bn_list = [k.split('.') for k, m 
72 |         in root_module.named_modules(remove_duplicate=True) 
73 |         if predicate(m)]
74 |     assert len(bn_list) == 0
75 |     return root_module
76 | 
77 | def optimizer_to(optimizer, device):
78 |     for state in optimizer.state.values():
79 |         for k, v in state.items():
80 |             if isinstance(v, torch.Tensor):
81 |                 state[k] = v.to(device=device)
82 |     return optimizer
83 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/robomimic_config_util.py:
--------------------------------------------------------------------------------
 1 | from omegaconf import OmegaConf
 2 | from robomimic.config import config_factory
 3 | import robomimic.scripts.generate_paper_configs as gpc
 4 | from robomimic.scripts.generate_paper_configs import (
 5 |     modify_config_for_default_image_exp,
 6 |     modify_config_for_default_low_dim_exp,
 7 |     modify_config_for_dataset,
 8 | )
 9 | 
10 | def get_robomimic_config(
11 |         algo_name='bc_rnn', 
12 |         hdf5_type='low_dim', 
13 |         task_name='square', 
14 |         dataset_type='ph'
15 |     ):
16 |     base_dataset_dir = '/tmp/null'
17 |     filter_key = None
18 | 
19 |     # decide whether to use low-dim or image training defaults
20 |     modifier_for_obs = modify_config_for_default_image_exp
21 |     if hdf5_type in ["low_dim", "low_dim_sparse", "low_dim_dense"]:
22 |         modifier_for_obs = modify_config_for_default_low_dim_exp
23 | 
24 |     algo_config_name = "bc" if algo_name == "bc_rnn" else algo_name
25 |     config = config_factory(algo_name=algo_config_name)
26 |     # turn into default config for observation modalities (e.g.: low-dim or rgb)
27 |     config = modifier_for_obs(config)
28 |     # add in config based on the dataset
29 |     config = modify_config_for_dataset(
30 |         config=config, 
31 |         task_name=task_name, 
32 |         dataset_type=dataset_type, 
33 |         hdf5_type=hdf5_type, 
34 |         base_dataset_dir=base_dataset_dir,
35 |         filter_key=filter_key,
36 |     )
37 |     # add in algo hypers based on dataset
38 |     algo_config_modifier = getattr(gpc, f'modify_{algo_name}_config_for_dataset')
39 |     config = algo_config_modifier(
40 |         config=config, 
41 |         task_name=task_name, 
42 |         dataset_type=dataset_type, 
43 |         hdf5_type=hdf5_type,
44 |     )
45 |     return config
46 |     
47 | 
48 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/blockpush_lowdim_seed.yaml:
--------------------------------------------------------------------------------
 1 | name: blockpush_lowdim_seed
 2 | 
 3 | obs_dim: 16
 4 | action_dim: 2
 5 | keypoint_dim: 2
 6 | obs_eef_target: True
 7 | 
 8 | env_runner:
 9 |   _target_: diffusion_policy.env_runner.blockpush_lowdim_runner.BlockPushLowdimRunner
10 |   n_train: 6
11 |   n_train_vis: 2
12 |   train_start_seed: 0
13 |   n_test: 50
14 |   n_test_vis: 4
15 |   test_start_seed: 100000
16 |   max_steps: 350
17 |   n_obs_steps: ${n_obs_steps}
18 |   n_action_steps: ${n_action_steps}
19 |   fps: 5
20 |   past_action: ${past_action_visible}
21 |   abs_action: False
22 |   obs_eef_target: ${task.obs_eef_target}
23 |   n_envs: null
24 | 
25 | dataset:
26 |   _target_: diffusion_policy.dataset.blockpush_lowdim_dataset.BlockPushLowdimDataset
27 |   zarr_path: data/block_pushing/multimodal_push_seed.zarr
28 |   horizon: ${horizon}
29 |   pad_before: ${eval:'${n_obs_steps}-1'}
30 |   pad_after: ${eval:'${n_action_steps}-1'}
31 |   obs_eef_target: ${task.obs_eef_target}
32 |   use_manual_normalizer: False
33 |   seed: 42
34 |   val_ratio: 0.02
35 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/blockpush_lowdim_seed_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: blockpush_lowdim_seed_abs
 2 | 
 3 | obs_dim: 16
 4 | action_dim: 2
 5 | keypoint_dim: 2
 6 | obs_eef_target: True
 7 | 
 8 | env_runner:
 9 |   _target_: diffusion_policy.env_runner.blockpush_lowdim_runner.BlockPushLowdimRunner
10 |   n_train: 6
11 |   n_train_vis: 2
12 |   train_start_seed: 0
13 |   n_test: 50
14 |   n_test_vis: 4
15 |   test_start_seed: 100000
16 |   max_steps: 350
17 |   n_obs_steps: ${n_obs_steps}
18 |   n_action_steps: ${n_action_steps}
19 |   fps: 5
20 |   past_action: ${past_action_visible}
21 |   abs_action: True
22 |   obs_eef_target: ${task.obs_eef_target}
23 |   n_envs: null
24 | 
25 | dataset:
26 |   _target_: diffusion_policy.dataset.blockpush_lowdim_dataset.BlockPushLowdimDataset
27 |   zarr_path: data/block_pushing/multimodal_push_seed_abs.zarr
28 |   horizon: ${horizon}
29 |   pad_before: ${eval:'${n_obs_steps}-1'}
30 |   pad_after: ${eval:'${n_action_steps}-1'}
31 |   obs_eef_target: ${task.obs_eef_target}
32 |   use_manual_normalizer: False
33 |   seed: 42
34 |   val_ratio: 0.02
35 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/can_image.yaml:
--------------------------------------------------------------------------------
 1 | name: can_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     agentview_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [7]
21 | 
22 | task_name: &task_name can
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
25 | abs_action: &abs_action False
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 2
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 4
37 |   test_start_seed: 100000
38 |   # use python's eval function as resolver, single-quoted string as argument
39 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
40 |   n_obs_steps: ${n_obs_steps}
41 |   n_action_steps: ${n_action_steps}
42 |   render_obs_key: 'agentview_image'
43 |   fps: 10
44 |   crf: 22
45 |   past_action: ${past_action_visible}
46 |   abs_action: *abs_action
47 |   tqdm_interval_sec: 1.0
48 |   n_envs: 28
49 | # evaluation at this config requires a 16 core 64GB instance.
50 | 
51 | dataset:
52 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
53 |   shape_meta: *shape_meta
54 |   dataset_path: *dataset_path
55 |   horizon: ${horizon}
56 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
57 |   pad_after: ${eval:'${n_action_steps}-1'}
58 |   n_obs_steps: ${dataset_obs_steps}
59 |   abs_action: *abs_action
60 |   rotation_rep: 'rotation_6d'
61 |   use_legacy_normalizer: False
62 |   use_cache: True
63 |   seed: 42
64 |   val_ratio: 0.02
65 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/can_image_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: can_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     agentview_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [10]
21 | 
22 | task_name: &task_name can
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
25 | abs_action: &abs_action True
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 2
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 4
37 |   test_start_seed: 100000
38 |   # use python's eval function as resolver, single-quoted string as argument
39 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
40 |   n_obs_steps: ${n_obs_steps}
41 |   n_action_steps: ${n_action_steps}
42 |   render_obs_key: 'agentview_image'
43 |   fps: 10
44 |   crf: 22
45 |   past_action: ${past_action_visible}
46 |   abs_action: *abs_action
47 |   tqdm_interval_sec: 1.0
48 |   n_envs: 28
49 | # evaluation at this config requires a 16 core 64GB instance.
50 | 
51 | dataset:
52 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
53 |   shape_meta: *shape_meta
54 |   dataset_path: *dataset_path
55 |   horizon: ${horizon}
56 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
57 |   pad_after: ${eval:'${n_action_steps}-1'}
58 |   n_obs_steps: ${dataset_obs_steps}
59 |   abs_action: *abs_action
60 |   rotation_rep: 'rotation_6d'
61 |   use_legacy_normalizer: False
62 |   use_cache: True
63 |   seed: 42
64 |   val_ratio: 0.02
65 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/can_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: can_lowdim
 2 | 
 3 | obs_dim: 23
 4 | action_dim: 7
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name can
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
11 | abs_action: &abs_action False
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 4
22 |   test_start_seed: 100000
23 |   # use python's eval function as resolver, single-quoted string as argument
24 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
25 |   n_obs_steps: ${n_obs_steps}
26 |   n_action_steps: ${n_action_steps}
27 |   n_latency_steps: ${n_latency_steps}
28 |   render_hw: [128,128]
29 |   fps: 10
30 |   crf: 22
31 |   past_action: ${past_action_visible}
32 |   abs_action: *abs_action
33 |   n_envs: 28
34 | 
35 | dataset:
36 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
37 |   dataset_path: *dataset_path
38 |   horizon: ${horizon}
39 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
40 |   pad_after: ${eval:'${n_action_steps}-1'}
41 |   obs_keys: *obs_keys
42 |   abs_action: *abs_action
43 |   use_legacy_normalizer: False
44 |   seed: 42
45 |   val_ratio: 0.02
46 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/can_lowdim_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: can_lowdim
 2 | 
 3 | obs_dim: 23
 4 | action_dim: 10
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name can
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
11 | abs_action: &abs_action True
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 4
22 |   test_start_seed: 100000
23 |   # use python's eval function as resolver, single-quoted string as argument
24 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
25 |   n_obs_steps: ${n_obs_steps}
26 |   n_action_steps: ${n_action_steps}
27 |   n_latency_steps: ${n_latency_steps}
28 |   render_hw: [128,128]
29 |   fps: 10
30 |   crf: 22
31 |   past_action: ${past_action_visible}
32 |   abs_action: *abs_action
33 |   n_envs: 28
34 | 
35 | dataset:
36 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
37 |   dataset_path: *dataset_path
38 |   horizon: ${horizon}
39 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
40 |   pad_after: ${eval:'${n_action_steps}-1'}
41 |   obs_keys: *obs_keys
42 |   abs_action: *abs_action
43 |   use_legacy_normalizer: False
44 |   rotation_rep: rotation_6d
45 |   seed: 42
46 |   val_ratio: 0.02
47 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/kitchen_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: kitchen_lowdim
 2 | 
 3 | obs_dim: 60
 4 | action_dim: 9
 5 | keypoint_dim: 3
 6 | 
 7 | dataset_dir: &dataset_dir data/kitchen
 8 | 
 9 | env_runner:
10 |   _target_: diffusion_policy.env_runner.kitchen_lowdim_runner.KitchenLowdimRunner
11 |   dataset_dir: *dataset_dir
12 |   n_train: 6
13 |   n_train_vis: 2
14 |   train_start_seed: 0
15 |   n_test: 50
16 |   n_test_vis: 4
17 |   test_start_seed: 100000
18 |   max_steps: 280
19 |   n_obs_steps: ${n_obs_steps}
20 |   n_action_steps: ${n_action_steps}
21 |   render_hw: [240, 360]
22 |   fps: 12.5
23 |   past_action: ${past_action_visible}
24 |   n_envs: null
25 | 
26 | dataset:
27 |   _target_: diffusion_policy.dataset.kitchen_lowdim_dataset.KitchenLowdimDataset
28 |   dataset_dir: *dataset_dir
29 |   horizon: ${horizon}
30 |   pad_before: ${eval:'${n_obs_steps}-1'}
31 |   pad_after: ${eval:'${n_action_steps}-1'}
32 |   seed: 42
33 |   val_ratio: 0.02
34 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/kitchen_lowdim_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: kitchen_lowdim
 2 | 
 3 | obs_dim: 60
 4 | action_dim: 9
 5 | keypoint_dim: 3
 6 | 
 7 | abs_action: True
 8 | robot_noise_ratio: 0.1
 9 | 
10 | env_runner:
11 |   _target_: diffusion_policy.env_runner.kitchen_lowdim_runner.KitchenLowdimRunner
12 |   dataset_dir: data/kitchen
13 |   n_train: 6
14 |   n_train_vis: 2
15 |   train_start_seed: 0
16 |   n_test: 50
17 |   n_test_vis: 4
18 |   test_start_seed: 100000
19 |   max_steps: 280
20 |   n_obs_steps: ${n_obs_steps}
21 |   n_action_steps: ${n_action_steps}
22 |   render_hw: [240, 360]
23 |   fps: 12.5
24 |   past_action: ${past_action_visible}
25 |   abs_action: ${task.abs_action}
26 |   robot_noise_ratio: ${task.robot_noise_ratio}
27 |   n_envs: null
28 | 
29 | dataset:
30 |   _target_: diffusion_policy.dataset.kitchen_mjl_lowdim_dataset.KitchenMjlLowdimDataset
31 |   dataset_dir: data/kitchen/kitchen_demos_multitask
32 |   horizon: ${horizon}
33 |   pad_before: ${eval:'${n_obs_steps}-1'}
34 |   pad_after: ${eval:'${n_action_steps}-1'}
35 |   abs_action: ${task.abs_action}
36 |   robot_noise_ratio: ${task.robot_noise_ratio}
37 |   seed: 42
38 |   val_ratio: 0.02
39 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/lift_image.yaml:
--------------------------------------------------------------------------------
 1 | name: lift_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     agentview_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [7]
21 | 
22 | task_name: &task_name lift
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
25 | abs_action: &abs_action False
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 1
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 3
37 |   test_start_seed: 100000
38 |   # use python's eval function as resolver, single-quoted string as argument
39 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
40 |   n_obs_steps: ${n_obs_steps}
41 |   n_action_steps: ${n_action_steps}
42 |   render_obs_key: 'agentview_image'
43 |   fps: 10
44 |   crf: 22
45 |   past_action: ${past_action_visible}
46 |   abs_action: *abs_action
47 |   tqdm_interval_sec: 1.0
48 |   n_envs: 28
49 | # evaluation at this config requires a 16 core 64GB instance.
50 | 
51 | dataset:
52 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
53 |   shape_meta: *shape_meta
54 |   dataset_path: *dataset_path
55 |   horizon: ${horizon}
56 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
57 |   pad_after: ${eval:'${n_action_steps}-1'}
58 |   n_obs_steps: ${dataset_obs_steps}
59 |   abs_action: *abs_action
60 |   rotation_rep: 'rotation_6d'
61 |   use_legacy_normalizer: False
62 |   use_cache: True
63 |   seed: 42
64 |   val_ratio: 0.02
65 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/lift_image_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: lift_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     agentview_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [10]
21 | 
22 | task_name: &task_name lift
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
25 | abs_action: &abs_action True
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   n_train: 6
32 |   n_train_vis: 2
33 |   train_start_idx: 0
34 |   n_test: 50
35 |   n_test_vis: 4
36 |   test_start_seed: 100000
37 |   # use python's eval function as resolver, single-quoted string as argument
38 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
39 |   n_obs_steps: ${n_obs_steps}
40 |   n_action_steps: ${n_action_steps}
41 |   render_obs_key: 'agentview_image'
42 |   fps: 10
43 |   crf: 22
44 |   past_action: ${past_action_visible}
45 |   abs_action: *abs_action
46 |   tqdm_interval_sec: 1.0
47 |   n_envs: 28
48 | # evaluation at this config requires a 16 core 64GB instance.
49 | 
50 | dataset:
51 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
52 |   shape_meta: *shape_meta
53 |   dataset_path: *dataset_path
54 |   horizon: ${horizon}
55 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
56 |   pad_after: ${eval:'${n_action_steps}-1'}
57 |   n_obs_steps: ${dataset_obs_steps}
58 |   abs_action: *abs_action
59 |   rotation_rep: 'rotation_6d'
60 |   use_legacy_normalizer: False
61 |   use_cache: True
62 |   seed: 42
63 |   val_ratio: 0.02
64 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/lift_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: lift_lowdim
 2 | 
 3 | obs_dim: 19
 4 | action_dim: 7
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name lift
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
11 | abs_action: &abs_action False
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 4
22 |   test_start_seed: 100000
23 |   # use python's eval function as resolver, single-quoted string as argument
24 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
25 |   n_obs_steps: ${n_obs_steps}
26 |   n_action_steps: ${n_action_steps}
27 |   n_latency_steps: ${n_latency_steps}
28 |   render_hw: [128,128]
29 |   fps: 10
30 |   crf: 22
31 |   past_action: ${past_action_visible}
32 |   abs_action: *abs_action
33 |   tqdm_interval_sec: 1.0
34 |   n_envs: 28
35 | 
36 | dataset:
37 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
38 |   dataset_path: *dataset_path
39 |   horizon: ${horizon}
40 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
41 |   pad_after: ${eval:'${n_action_steps}-1'}
42 |   obs_keys: *obs_keys
43 |   abs_action: *abs_action
44 |   use_legacy_normalizer: False
45 |   seed: 42
46 |   val_ratio: 0.02
47 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/lift_lowdim_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: lift_lowdim
 2 | 
 3 | obs_dim: 19
 4 | action_dim: 10
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name lift
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
11 | abs_action: &abs_action True
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 3
22 |   test_start_seed: 100000
23 |   # use python's eval function as resolver, single-quoted string as argument
24 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
25 |   n_obs_steps: ${n_obs_steps}
26 |   n_action_steps: ${n_action_steps}
27 |   n_latency_steps: ${n_latency_steps}
28 |   render_hw: [128,128]
29 |   fps: 10
30 |   crf: 22
31 |   past_action: ${past_action_visible}
32 |   abs_action: *abs_action
33 |   tqdm_interval_sec: 1.0
34 |   n_envs: 28
35 | 
36 | dataset:
37 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
38 |   dataset_path: *dataset_path
39 |   horizon: ${horizon}
40 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
41 |   pad_after: ${eval:'${n_action_steps}-1'}
42 |   obs_keys: *obs_keys
43 |   abs_action: *abs_action
44 |   use_legacy_normalizer: False
45 |   rotation_rep: rotation_6d
46 |   seed: 42
47 |   val_ratio: 0.02
48 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/pusht_image.yaml:
--------------------------------------------------------------------------------
 1 | name: pusht_image
 2 | 
 3 | image_shape: &image_shape [3, 96, 96]
 4 | shape_meta: &shape_meta
 5 |   # acceptable types: rgb, low_dim
 6 |   obs:
 7 |     image:
 8 |       shape: *image_shape
 9 |       type: rgb
10 |     agent_pos:
11 |       shape: [2]
12 |       type: low_dim
13 |   action:
14 |     shape: [2]
15 | 
16 | env_runner:
17 |   _target_: diffusion_policy.env_runner.pusht_image_runner.PushTImageRunner
18 |   n_train: 6
19 |   n_train_vis: 2
20 |   train_start_seed: 0
21 |   n_test: 50
22 |   n_test_vis: 4
23 |   legacy_test: True
24 |   test_start_seed: 100000
25 |   max_steps: 300
26 |   n_obs_steps: ${n_obs_steps}
27 |   n_action_steps: ${n_action_steps}
28 |   fps: 10
29 |   past_action: ${past_action_visible}
30 |   n_envs: null
31 | 
32 | dataset:
33 |   _target_: diffusion_policy.dataset.pusht_image_dataset.PushTImageDataset
34 |   zarr_path: data/pusht/pusht_cchi_v7_replay.zarr
35 |   horizon: ${horizon}
36 |   pad_before: ${eval:'${n_obs_steps}-1'}
37 |   pad_after: ${eval:'${n_action_steps}-1'}
38 |   seed: 42
39 |   val_ratio: 0.02
40 |   max_train_episodes: 90
41 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/pusht_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: pusht_lowdim
 2 | 
 3 | obs_dim: 20 # 9*2 keypoints + 2 state
 4 | action_dim: 2
 5 | keypoint_dim: 2
 6 | 
 7 | env_runner:
 8 |   _target_: diffusion_policy.env_runner.pusht_keypoints_runner.PushTKeypointsRunner
 9 |   keypoint_visible_rate: ${keypoint_visible_rate}
10 |   n_train: 6
11 |   n_train_vis: 2
12 |   train_start_seed: 0
13 |   n_test: 50
14 |   n_test_vis: 4
15 |   legacy_test: True
16 |   test_start_seed: 100000
17 |   max_steps: 300
18 |   n_obs_steps: ${n_obs_steps}
19 |   n_action_steps: ${n_action_steps}
20 |   n_latency_steps: ${n_latency_steps}
21 |   fps: 10
22 |   agent_keypoints: False
23 |   past_action: ${past_action_visible}
24 |   n_envs: null
25 | 
26 | dataset:
27 |   _target_: diffusion_policy.dataset.pusht_dataset.PushTLowdimDataset
28 |   zarr_path: data/pusht/pusht_cchi_v7_replay.zarr
29 |   horizon: ${horizon}
30 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
31 |   pad_after: ${eval:'${n_action_steps}-1'}
32 |   seed: 42
33 |   val_ratio: 0.02
34 |   max_train_episodes: 90
35 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/real_pusht_image.yaml:
--------------------------------------------------------------------------------
 1 | name: real_image
 2 | 
 3 | image_shape: [3, 240, 320]
 4 | dataset_path: data/pusht_real/real_pusht_20230105
 5 | 
 6 | shape_meta: &shape_meta
 7 |   # acceptable types: rgb, low_dim
 8 |   obs:
 9 |     # camera_0:
10 |     #   shape: ${task.image_shape}
11 |     #   type: rgb
12 |     camera_1:
13 |       shape: ${task.image_shape}
14 |       type: rgb
15 |     # camera_2:
16 |     #   shape: ${task.image_shape}
17 |     #   type: rgb
18 |     camera_3:
19 |       shape: ${task.image_shape}
20 |       type: rgb
21 |     # camera_4:
22 |     #   shape: ${task.image_shape}
23 |     #   type: rgb
24 |     robot_eef_pose:
25 |       shape: [2]
26 |       type: low_dim
27 |   action: 
28 |     shape: [2]
29 | 
30 | env_runner:
31 |   _target_: diffusion_policy.env_runner.real_pusht_image_runner.RealPushTImageRunner
32 | 
33 | dataset:
34 |   _target_: diffusion_policy.dataset.real_pusht_image_dataset.RealPushTImageDataset
35 |   shape_meta: *shape_meta
36 |   dataset_path: ${task.dataset_path}
37 |   horizon: ${horizon}
38 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
39 |   pad_after: ${eval:'${n_action_steps}-1'}
40 |   n_obs_steps: ${dataset_obs_steps}
41 |   n_latency_steps: ${n_latency_steps}
42 |   use_cache: True
43 |   seed: 42
44 |   val_ratio: 0.00
45 |   max_train_episodes: null
46 |   delta_action: False
47 | 
48 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/square_image.yaml:
--------------------------------------------------------------------------------
 1 | name: square_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     agentview_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [7]
21 | 
22 | task_name: &task_name square
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
25 | abs_action: &abs_action False
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 2
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 4
37 |   test_start_seed: 100000
38 |   # use python's eval function as resolver, single-quoted string as argument
39 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
40 |   n_obs_steps: ${n_obs_steps}
41 |   n_action_steps: ${n_action_steps}
42 |   render_obs_key: 'agentview_image'
43 |   fps: 10
44 |   crf: 22
45 |   past_action: ${past_action_visible}
46 |   abs_action: *abs_action
47 |   tqdm_interval_sec: 1.0
48 |   n_envs: 28
49 | # evaluation at this config requires a 16 core 64GB instance.
50 | 
51 | dataset:
52 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
53 |   shape_meta: *shape_meta
54 |   dataset_path: *dataset_path
55 |   horizon: ${horizon}
56 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
57 |   pad_after: ${eval:'${n_action_steps}-1'}
58 |   n_obs_steps: ${dataset_obs_steps}
59 |   abs_action: *abs_action
60 |   rotation_rep: 'rotation_6d'
61 |   use_legacy_normalizer: False
62 |   use_cache: True
63 |   seed: 42
64 |   val_ratio: 0.02
65 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/square_image_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: square_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     agentview_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [10]
21 | 
22 | task_name: &task_name square
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
25 | abs_action: &abs_action True
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 2
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 4
37 |   test_start_seed: 100000
38 |   # use python's eval function as resolver, single-quoted string as argument
39 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
40 |   n_obs_steps: ${n_obs_steps}
41 |   n_action_steps: ${n_action_steps}
42 |   render_obs_key: 'agentview_image'
43 |   fps: 10
44 |   crf: 22
45 |   past_action: ${past_action_visible}
46 |   abs_action: *abs_action
47 |   tqdm_interval_sec: 1.0
48 |   n_envs: 28
49 | # evaluation at this config requires a 16 core 64GB instance.
50 | 
51 | dataset:
52 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
53 |   shape_meta: *shape_meta
54 |   dataset_path: *dataset_path
55 |   horizon: ${horizon}
56 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
57 |   pad_after: ${eval:'${n_action_steps}-1'}
58 |   n_obs_steps: ${dataset_obs_steps}
59 |   abs_action: *abs_action
60 |   rotation_rep: 'rotation_6d'
61 |   use_legacy_normalizer: False
62 |   use_cache: True
63 |   seed: 42
64 |   val_ratio: 0.02
65 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/square_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: square_lowdim
 2 | 
 3 | obs_dim: 23
 4 | action_dim: 7
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name square
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
11 | abs_action: &abs_action False
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 4
22 |   test_start_seed: 100000
23 |   # use python's eval function as resolver, single-quoted string as argument
24 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
25 |   n_obs_steps: ${n_obs_steps}
26 |   n_action_steps: ${n_action_steps}
27 |   n_latency_steps: ${n_latency_steps}
28 |   render_hw: [128,128]
29 |   fps: 10
30 |   crf: 22
31 |   past_action: ${past_action_visible}
32 |   abs_action: *abs_action
33 |   n_envs: 28
34 | 
35 | dataset:
36 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
37 |   dataset_path: *dataset_path
38 |   horizon: ${horizon}
39 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
40 |   pad_after: ${eval:'${n_action_steps}-1'}
41 |   obs_keys: *obs_keys
42 |   abs_action: *abs_action
43 |   use_legacy_normalizer: False
44 |   seed: 42
45 |   val_ratio: 0.02
46 |   max_train_episodes: null
47 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/square_lowdim_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: square_lowdim
 2 | 
 3 | obs_dim: 23
 4 | action_dim: 10
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name square
 9 | dataset_type: &dataset_type ph
10 | abs_action: &abs_action True
11 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
12 | 
13 | 
14 | env_runner:
15 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
16 |   dataset_path: *dataset_path
17 |   obs_keys: *obs_keys
18 |   n_train: 6
19 |   n_train_vis: 2
20 |   train_start_idx: 0
21 |   n_test: 50
22 |   n_test_vis: 4
23 |   test_start_seed: 100000
24 |   # use python's eval function as resolver, single-quoted string as argument
25 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
26 |   n_obs_steps: ${n_obs_steps}
27 |   n_action_steps: ${n_action_steps}
28 |   n_latency_steps: ${n_latency_steps}
29 |   render_hw: [128,128]
30 |   fps: 10
31 |   crf: 22
32 |   past_action: ${past_action_visible}
33 |   abs_action: *abs_action
34 |   n_envs: 28
35 | 
36 | dataset:
37 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
38 |   dataset_path: *dataset_path
39 |   horizon: ${horizon}
40 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
41 |   pad_after: ${eval:'${n_action_steps}-1'}
42 |   obs_keys: *obs_keys
43 |   abs_action: *abs_action
44 |   use_legacy_normalizer: False
45 |   seed: 42
46 |   val_ratio: 0.02
47 |   max_train_episodes: null
48 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/tool_hang_image.yaml:
--------------------------------------------------------------------------------
 1 | name: tool_hang_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     sideview_image:
 7 |       shape: [3, 240, 240]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 240, 240]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [7]
21 | 
22 | task_name: &task_name tool_hang
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
25 | abs_action: &abs_action False
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 2
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 4
37 |   test_start_seed: 100000
38 |   max_steps: 700
39 |   n_obs_steps: ${n_obs_steps}
40 |   n_action_steps: ${n_action_steps}
41 |   render_obs_key: 'sideview_image'
42 |   fps: 10
43 |   crf: 22
44 |   past_action: ${past_action_visible}
45 |   abs_action: *abs_action
46 |   tqdm_interval_sec: 1.0
47 |   n_envs: 28
48 | # evaluation at this config requires a 16 core 64GB instance.
49 | 
50 | dataset:
51 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
52 |   shape_meta: *shape_meta
53 |   dataset_path: *dataset_path
54 |   horizon: ${horizon}
55 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
56 |   pad_after: ${eval:'${n_action_steps}-1'}
57 |   n_obs_steps: ${dataset_obs_steps}
58 |   abs_action: *abs_action
59 |   rotation_rep: 'rotation_6d'
60 |   use_legacy_normalizer: False
61 |   use_cache: True
62 |   seed: 42
63 |   val_ratio: 0.02
64 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/tool_hang_image_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: tool_hang_image_abs
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     sideview_image:
 7 |       shape: [3, 240, 240]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 240, 240]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [10]
21 | 
22 | task_name: &task_name tool_hang
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
25 | abs_action: &abs_action True
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 2
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 4
37 |   test_start_seed: 100000
38 |   max_steps: 700
39 |   n_obs_steps: ${n_obs_steps}
40 |   n_action_steps: ${n_action_steps}
41 |   render_obs_key: 'sideview_image'
42 |   fps: 10
43 |   crf: 22
44 |   past_action: ${past_action_visible}
45 |   abs_action: *abs_action
46 |   tqdm_interval_sec: 1.0
47 |   n_envs: 28
48 | # evaluation at this config requires a 16 core 64GB instance.
49 | 
50 | dataset:
51 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
52 |   shape_meta: *shape_meta
53 |   dataset_path: *dataset_path
54 |   horizon: ${horizon}
55 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
56 |   pad_after: ${eval:'${n_action_steps}-1'}
57 |   n_obs_steps: ${dataset_obs_steps}
58 |   abs_action: *abs_action
59 |   rotation_rep: 'rotation_6d'
60 |   use_legacy_normalizer: False
61 |   use_cache: True
62 |   seed: 42
63 |   val_ratio: 0.02
64 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/tool_hang_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: tool_hang_lowdim
 2 | 
 3 | obs_dim: 53
 4 | action_dim: 7
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name tool_hang
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
11 | abs_action: &abs_action False
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 4
22 |   test_start_seed: 100000
23 |   max_steps: 700
24 |   n_obs_steps: ${n_obs_steps}
25 |   n_action_steps: ${n_action_steps}
26 |   n_latency_steps: ${n_latency_steps}
27 |   render_hw: [128,128]
28 |   fps: 10
29 |   crf: 22
30 |   past_action: ${past_action_visible}
31 |   abs_action: *abs_action
32 |   n_envs: 28
33 | # seed 42 will crash MuJoCo for some reason.
34 | 
35 | dataset:
36 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
37 |   dataset_path: *dataset_path
38 |   horizon: ${horizon}
39 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
40 |   pad_after: ${eval:'${n_action_steps}-1'}
41 |   obs_keys: *obs_keys
42 |   abs_action: *abs_action
43 |   use_legacy_normalizer: False
44 |   seed: 42
45 |   val_ratio: 0.02
46 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/tool_hang_lowdim_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: tool_hang_lowdim
 2 | 
 3 | obs_dim: 53
 4 | action_dim: 10
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name tool_hang
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
11 | abs_action: &abs_action True
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 4
22 |   test_start_seed: 100000
23 |   max_steps: 700
24 |   n_obs_steps: ${n_obs_steps}
25 |   n_action_steps: ${n_action_steps}
26 |   n_latency_steps: ${n_latency_steps}
27 |   render_hw: [128,128]
28 |   fps: 10
29 |   crf: 22
30 |   past_action: ${past_action_visible}
31 |   abs_action: *abs_action
32 |   n_envs: 28
33 | # seed 42 will crash MuJoCo for some reason.
34 | 
35 | dataset:
36 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
37 |   dataset_path: *dataset_path
38 |   horizon: ${horizon}
39 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
40 |   pad_after: ${eval:'${n_action_steps}-1'}
41 |   obs_keys: *obs_keys
42 |   abs_action: *abs_action
43 |   use_legacy_normalizer: False
44 |   rotation_rep: rotation_6d
45 |   seed: 42
46 |   val_ratio: 0.02
47 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/transport_image.yaml:
--------------------------------------------------------------------------------
 1 | name: transport_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     shouldercamera0_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |     shouldercamera1_image:
20 |       shape: [3, 84, 84]
21 |       type: rgb
22 |     robot1_eye_in_hand_image:
23 |       shape: [3, 84, 84]
24 |       type: rgb
25 |     robot1_eef_pos:
26 |       shape: [3]
27 |       # type default: low_dim
28 |     robot1_eef_quat:
29 |       shape: [4]
30 |     robot1_gripper_qpos:
31 |       shape: [2]
32 |   action: 
33 |     shape: [14]
34 | 
35 | task_name: &task_name transport
36 | dataset_type: &dataset_type ph
37 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
38 | abs_action: &abs_action False
39 | 
40 | env_runner:
41 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
42 |   dataset_path: *dataset_path
43 |   shape_meta: *shape_meta
44 |   n_train: 6
45 |   n_train_vis: 2
46 |   train_start_idx: 0
47 |   n_test: 50
48 |   n_test_vis: 4
49 |   test_start_seed: 100000
50 |   max_steps: 700
51 |   n_obs_steps: ${n_obs_steps}
52 |   n_action_steps: ${n_action_steps}
53 |   render_obs_key: 'shouldercamera0_image'
54 |   fps: 10
55 |   crf: 22
56 |   past_action: ${past_action_visible}
57 |   abs_action: *abs_action
58 |   tqdm_interval_sec: 1.0
59 |   n_envs: 28
60 | # evaluation at this config requires a 16 core 64GB instance.
61 | 
62 | dataset:
63 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
64 |   shape_meta: *shape_meta
65 |   dataset_path: *dataset_path
66 |   horizon: ${horizon}
67 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
68 |   pad_after: ${eval:'${n_action_steps}-1'}
69 |   n_obs_steps: ${dataset_obs_steps}
70 |   abs_action: *abs_action
71 |   rotation_rep: 'rotation_6d'
72 |   use_legacy_normalizer: False
73 |   use_cache: True
74 |   seed: 42
75 |   val_ratio: 0.02
76 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/transport_image_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: transport_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     shouldercamera0_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |     shouldercamera1_image:
20 |       shape: [3, 84, 84]
21 |       type: rgb
22 |     robot1_eye_in_hand_image:
23 |       shape: [3, 84, 84]
24 |       type: rgb
25 |     robot1_eef_pos:
26 |       shape: [3]
27 |       # type default: low_dim
28 |     robot1_eef_quat:
29 |       shape: [4]
30 |     robot1_gripper_qpos:
31 |       shape: [2]
32 |   action: 
33 |     shape: [20]
34 | 
35 | task_name: &task_name transport
36 | dataset_type: &dataset_type ph
37 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
38 | abs_action: &abs_action True
39 | 
40 | env_runner:
41 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
42 |   dataset_path: *dataset_path
43 |   shape_meta: *shape_meta
44 |   n_train: 6
45 |   n_train_vis: 2
46 |   train_start_idx: 0
47 |   n_test: 50
48 |   n_test_vis: 4
49 |   test_start_seed: 100000
50 |   max_steps: 700
51 |   n_obs_steps: ${n_obs_steps}
52 |   n_action_steps: ${n_action_steps}
53 |   render_obs_key: 'shouldercamera0_image'
54 |   fps: 10
55 |   crf: 22
56 |   past_action: ${past_action_visible}
57 |   abs_action: *abs_action
58 |   tqdm_interval_sec: 1.0
59 |   n_envs: 28
60 | # evaluation at this config requires a 16 core 64GB instance.
61 | 
62 | dataset:
63 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
64 |   shape_meta: *shape_meta
65 |   dataset_path: *dataset_path
66 |   horizon: ${horizon}
67 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
68 |   pad_after: ${eval:'${n_action_steps}-1'}
69 |   n_obs_steps: ${dataset_obs_steps}
70 |   abs_action: *abs_action
71 |   rotation_rep: 'rotation_6d'
72 |   use_legacy_normalizer: False
73 |   use_cache: True
74 |   seed: 42
75 |   val_ratio: 0.02
76 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/transport_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: transport_lowdim
 2 | 
 3 | obs_dim: 59 # 41+(3+4+2)*2
 4 | action_dim: 14 # 7*2
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys [
 8 |   'object', 
 9 |   'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', 
10 |   'robot1_eef_pos', 'robot1_eef_quat', 'robot1_gripper_qpos'
11 | ]
12 | task_name: &task_name transport
13 | dataset_type: &dataset_type ph
14 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
15 | abs_action: &abs_action False
16 | 
17 | env_runner:
18 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
19 |   dataset_path: *dataset_path
20 |   obs_keys: *obs_keys
21 |   n_train: 6
22 |   n_train_vis: 2
23 |   train_start_idx: 0
24 |   n_test: 50
25 |   n_test_vis: 5
26 |   test_start_seed: 100000
27 |   max_steps: 700
28 |   n_obs_steps: ${n_obs_steps}
29 |   n_action_steps: ${n_action_steps}
30 |   n_latency_steps: ${n_latency_steps}
31 |   render_hw: [128,128]
32 |   fps: 10
33 |   crf: 22
34 |   past_action: ${past_action_visible}
35 |   abs_action: *abs_action
36 |   n_envs: 28
37 | # evaluation at this config requires a 16 core 64GB instance.
38 | 
39 | dataset:
40 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
41 |   dataset_path: *dataset_path
42 |   horizon: ${horizon}
43 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
44 |   pad_after: ${eval:'${n_action_steps}-1'}
45 |   obs_keys: *obs_keys
46 |   abs_action: *abs_action
47 |   use_legacy_normalizer: False
48 |   seed: 42
49 |   val_ratio: 0.02
50 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/transport_lowdim_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: transport_lowdim
 2 | 
 3 | obs_dim: 59 # 41+(3+4+2)*2
 4 | action_dim: 20 # 10*2
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys [
 8 |   'object', 
 9 |   'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', 
10 |   'robot1_eef_pos', 'robot1_eef_quat', 'robot1_gripper_qpos'
11 | ]
12 | task_name: &task_name transport
13 | dataset_type: &dataset_type ph
14 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
15 | abs_action: &abs_action True
16 | 
17 | env_runner:
18 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
19 |   dataset_path: *dataset_path
20 |   obs_keys: *obs_keys
21 |   n_train: 6
22 |   n_train_vis: 2
23 |   train_start_idx: 0
24 |   n_test: 50
25 |   n_test_vis: 4
26 |   test_start_seed: 100000
27 |   max_steps: 700
28 |   n_obs_steps: ${n_obs_steps}
29 |   n_action_steps: ${n_action_steps}
30 |   n_latency_steps: ${n_latency_steps}
31 |   render_hw: [128,128]
32 |   fps: 10
33 |   crf: 22
34 |   past_action: ${past_action_visible}
35 |   abs_action: *abs_action
36 |   n_envs: 28
37 | # evaluation at this config requires a 16 core 64GB instance.
38 | 
39 | dataset:
40 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
41 |   dataset_path: *dataset_path
42 |   horizon: ${horizon}
43 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
44 |   pad_after: ${eval:'${n_action_steps}-1'}
45 |   obs_keys: *obs_keys
46 |   abs_action: *abs_action
47 |   use_legacy_normalizer: False
48 |   seed: 42
49 |   val_ratio: 0.02
50 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/train_bet_lowdim_workspace.yaml:
--------------------------------------------------------------------------------
  1 | defaults:
  2 |   - _self_
  3 |   - task: blockpush_lowdim_seed
  4 | 
  5 | name: train_bet_lowdim
  6 | _target_: diffusion_policy.workspace.train_bet_lowdim_workspace.TrainBETLowdimWorkspace
  7 | 
  8 | obs_dim: ${task.obs_dim}
  9 | action_dim: ${task.action_dim}
 10 | keypoint_dim: ${task.keypoint_dim}
 11 | task_name: ${task.name}
 12 | exp_name: "default"
 13 | 
 14 | horizon: 3
 15 | n_obs_steps: 3
 16 | n_action_steps: 1
 17 | n_latency_steps: 0
 18 | past_action_visible: False
 19 | keypoint_visible_rate: 1.0
 20 | obs_as_local_cond: False
 21 | obs_as_global_cond: False
 22 | pred_action_steps_only: False
 23 | 
 24 | policy:
 25 |   _target_: diffusion_policy.policy.bet_lowdim_policy.BETLowdimPolicy
 26 | 
 27 |   action_ae:
 28 |     _target_: diffusion_policy.model.bet.action_ae.discretizers.k_means.KMeansDiscretizer
 29 |     num_bins: 24
 30 |     action_dim: ${action_dim}
 31 |     predict_offsets: True
 32 |   
 33 |   obs_encoding_net:
 34 |     _target_: torch.nn.Identity
 35 |     output_dim: ${obs_dim}
 36 |   
 37 |   state_prior:
 38 |     _target_: diffusion_policy.model.bet.latent_generators.mingpt.MinGPT
 39 | 
 40 |     discrete_input: false
 41 |     input_dim: ${obs_dim}
 42 | 
 43 |     vocab_size: ${policy.action_ae.num_bins}
 44 | 
 45 |     # Architecture details
 46 |     n_layer: 4
 47 |     n_head: 4
 48 |     n_embd: 72
 49 | 
 50 |     block_size: ${horizon}  # Length of history/context
 51 |     predict_offsets: True
 52 |     offset_loss_scale: 1000.0  # actions are very small
 53 |     focal_loss_gamma: 2.0
 54 |     action_dim: ${action_dim}
 55 | 
 56 |   horizon: ${horizon}
 57 |   n_obs_steps: ${n_obs_steps}
 58 |   n_action_steps: ${n_action_steps}
 59 | 
 60 | dataloader:
 61 |   batch_size: 256
 62 |   num_workers: 1
 63 |   shuffle: True
 64 |   pin_memory: True
 65 |   persistent_workers: False
 66 | 
 67 | val_dataloader:
 68 |   batch_size: 256
 69 |   num_workers: 1
 70 |   shuffle: False
 71 |   pin_memory: True
 72 |   persistent_workers: False
 73 | 
 74 | optimizer:
 75 |   learning_rate: 0.0001 # 1e-4
 76 |   weight_decay: 0.1
 77 |   betas: [0.9, 0.95]
 78 | 
 79 | training:
 80 |   device: "cuda:0"
 81 |   seed: 42
 82 |   debug: False
 83 |   resume: True
 84 |   # optimization
 85 |   lr_scheduler: cosine
 86 |   lr_warmup_steps: 500
 87 |   num_epochs: 5000
 88 |   gradient_accumulate_every: 1
 89 |   grad_norm_clip: 1.0
 90 |   enable_normalizer: True
 91 |   # training loop control
 92 |   # in epochs
 93 |   rollout_every: 50
 94 |   checkpoint_every: 50
 95 |   val_every: 1
 96 |   sample_every: 5
 97 |   # steps per epoch
 98 |   max_train_steps: null
 99 |   max_val_steps: null
100 |   # misc
101 |   tqdm_interval_sec: 1.0
102 | 
103 | logging:
104 |   project: diffusion_policy_debug
105 |   resume: True
106 |   mode: online
107 |   name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
108 |   tags: ["${name}", "${task_name}", "${exp_name}"]
109 |   id: null
110 |   group: null
111 | 
112 | checkpoint:
113 |   topk:
114 |     monitor_key: test_mean_score
115 |     mode: max
116 |     k: 5
117 |     format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
118 |   save_last_ckpt: True
119 |   save_last_snapshot: False
120 | 
121 | multi_run:
122 |   run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
123 |   wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
124 | 
125 | hydra:
126 |   job:
127 |     override_dirname: ${name}
128 |   run:
129 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
130 |   sweep:
131 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
132 |     subdir: ${hydra.job.num}
133 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/train_ibc_dfo_hybrid_workspace.yaml:
--------------------------------------------------------------------------------
  1 | defaults:
  2 |   - _self_
  3 |   - task: pusht_image
  4 | 
  5 | name: train_ibc_dfo_hybrid
  6 | _target_: diffusion_policy.workspace.train_ibc_dfo_hybrid_workspace.TrainIbcDfoHybridWorkspace
  7 | 
  8 | task_name: ${task.name}
  9 | shape_meta: ${task.shape_meta}
 10 | exp_name: "default"
 11 | 
 12 | horizon: 2
 13 | n_obs_steps: 2
 14 | n_action_steps: 1
 15 | n_latency_steps: 0
 16 | dataset_obs_steps: ${n_obs_steps}
 17 | past_action_visible: False
 18 | keypoint_visible_rate: 1.0
 19 | 
 20 | policy:
 21 |   _target_: diffusion_policy.policy.ibc_dfo_hybrid_image_policy.IbcDfoHybridImagePolicy
 22 | 
 23 |   shape_meta: ${shape_meta}
 24 | 
 25 |   horizon: ${horizon}
 26 |   n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
 27 |   n_obs_steps: ${n_obs_steps}
 28 |   dropout: 0.1
 29 |   train_n_neg: 1024
 30 |   pred_n_iter: 5
 31 |   pred_n_samples: 1024
 32 |   kevin_inference: False
 33 |   andy_train: False
 34 |   obs_encoder_group_norm: True
 35 |   eval_fixed_crop: True
 36 |   crop_shape: [84, 84]
 37 | 
 38 | dataloader:
 39 |   batch_size: 128
 40 |   num_workers: 8
 41 |   shuffle: True
 42 |   pin_memory: True
 43 |   persistent_workers: False
 44 | 
 45 | val_dataloader:
 46 |   batch_size: 128
 47 |   num_workers: 8
 48 |   shuffle: False
 49 |   pin_memory: True
 50 |   persistent_workers: False
 51 | 
 52 | optimizer:
 53 |   _target_: torch.optim.AdamW
 54 |   lr: 1.0e-4
 55 |   betas: [0.95, 0.999]
 56 |   eps: 1.0e-8
 57 |   weight_decay: 1.0e-6
 58 | 
 59 | training:
 60 |   device: "cuda:0"
 61 |   seed: 42
 62 |   debug: False
 63 |   resume: True
 64 |   # optimization
 65 |   lr_scheduler: cosine
 66 |   lr_warmup_steps: 500
 67 |   num_epochs: 3050
 68 |   gradient_accumulate_every: 1
 69 |   # training loop control
 70 |   # in epochs
 71 |   rollout_every: 50
 72 |   checkpoint_every: 50
 73 |   val_every: 1
 74 |   sample_every: 5
 75 |   sample_max_batch: 128
 76 |   # steps per epoch
 77 |   max_train_steps: null
 78 |   max_val_steps: null
 79 |   # misc
 80 |   tqdm_interval_sec: 1.0
 81 | 
 82 | logging:
 83 |   project: diffusion_policy_debug
 84 |   resume: True
 85 |   mode: online
 86 |   name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
 87 |   tags: ["${name}", "${task_name}", "${exp_name}"]
 88 |   id: null
 89 |   group: null
 90 | 
 91 | checkpoint:
 92 |   topk:
 93 |     monitor_key: test_mean_score
 94 |     mode: max
 95 |     k: 5
 96 |     format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
 97 |   save_last_ckpt: True
 98 |   save_last_snapshot: False
 99 | 
100 | multi_run:
101 |   run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
102 |   wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
103 | 
104 | hydra:
105 |   job:
106 |     override_dirname: ${name}
107 |   run:
108 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
109 |   sweep:
110 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
111 |     subdir: ${hydra.job.num}
112 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/train_ibc_dfo_lowdim_workspace.yaml:
--------------------------------------------------------------------------------
  1 | defaults:
  2 |   - _self_
  3 |   - task: pusht_lowdim
  4 | 
  5 | name: train_ibc_dfo_lowdim
  6 | _target_: diffusion_policy.workspace.train_ibc_dfo_lowdim_workspace.TrainIbcDfoLowdimWorkspace
  7 | 
  8 | obs_dim: ${task.obs_dim}
  9 | action_dim: ${task.action_dim}
 10 | keypoint_dim: ${task.keypoint_dim}
 11 | task_name: ${task.name}
 12 | exp_name: "default"
 13 | 
 14 | horizon: 2
 15 | n_obs_steps: 2
 16 | n_action_steps: 1
 17 | n_latency_steps: 0
 18 | past_action_visible: False
 19 | keypoint_visible_rate: 1.0
 20 | 
 21 | policy:
 22 |   _target_: diffusion_policy.policy.ibc_dfo_lowdim_policy.IbcDfoLowdimPolicy
 23 | 
 24 |   horizon: ${horizon}
 25 |   obs_dim: ${obs_dim}
 26 |   action_dim: ${action_dim}
 27 |   n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
 28 |   n_obs_steps: ${n_obs_steps}
 29 |   dropout: 0.1
 30 |   train_n_neg: 1024
 31 |   pred_n_iter: 5
 32 |   pred_n_samples: 1024
 33 |   kevin_inference: False
 34 |   andy_train: False
 35 | 
 36 | dataloader:
 37 |   batch_size: 256
 38 |   num_workers: 1
 39 |   shuffle: True
 40 |   pin_memory: True
 41 |   persistent_workers: False
 42 | 
 43 | val_dataloader:
 44 |   batch_size: 256
 45 |   num_workers: 1
 46 |   shuffle: False
 47 |   pin_memory: True
 48 |   persistent_workers: False
 49 | 
 50 | optimizer:
 51 |   _target_: torch.optim.AdamW
 52 |   lr: 1.0e-4
 53 |   betas: [0.95, 0.999]
 54 |   eps: 1.0e-8
 55 |   weight_decay: 1.0e-6
 56 | 
 57 | training:
 58 |   device: "cuda:0"
 59 |   seed: 42
 60 |   debug: False
 61 |   resume: True
 62 |   # optimization
 63 |   lr_scheduler: cosine
 64 |   lr_warmup_steps: 500
 65 |   num_epochs: 5000
 66 |   gradient_accumulate_every: 1
 67 |   # training loop control
 68 |   # in epochs
 69 |   rollout_every: 50
 70 |   checkpoint_every: 50
 71 |   val_every: 1
 72 |   sample_every: 5
 73 |   sample_max_batch: 128
 74 |   # steps per epoch
 75 |   max_train_steps: null
 76 |   max_val_steps: null
 77 |   # misc
 78 |   tqdm_interval_sec: 1.0
 79 | 
 80 | logging:
 81 |   project: diffusion_policy_debug
 82 |   resume: True
 83 |   mode: online
 84 |   name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
 85 |   tags: ["${name}", "${task_name}", "${exp_name}"]
 86 |   id: null
 87 |   group: null
 88 | 
 89 | checkpoint:
 90 |   topk:
 91 |     monitor_key: test_mean_score
 92 |     mode: max
 93 |     k: 5
 94 |     format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
 95 |   save_last_ckpt: True
 96 |   save_last_snapshot: False
 97 | 
 98 | multi_run:
 99 |   run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
100 |   wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
101 | 
102 | hydra:
103 |   job:
104 |     override_dirname: ${name}
105 |   run:
106 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
107 |   sweep:
108 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
109 |     subdir: ${hydra.job.num}
110 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/train_ibc_dfo_real_hybrid_workspace.yaml:
--------------------------------------------------------------------------------
  1 | defaults:
  2 |   - _self_
  3 |   - task: real_pusht_image
  4 | 
  5 | name: train_ibc_dfo_hybrid
  6 | _target_: diffusion_policy.workspace.train_ibc_dfo_hybrid_workspace.TrainIbcDfoHybridWorkspace
  7 | 
  8 | task_name: ${task.name}
  9 | shape_meta: ${task.shape_meta}
 10 | exp_name: "default"
 11 | 
 12 | horizon: 2
 13 | n_obs_steps: 2
 14 | n_action_steps: 1
 15 | n_latency_steps: 1
 16 | dataset_obs_steps: ${n_obs_steps}
 17 | past_action_visible: False
 18 | keypoint_visible_rate: 1.0
 19 | 
 20 | policy:
 21 |   _target_: diffusion_policy.policy.ibc_dfo_hybrid_image_policy.IbcDfoHybridImagePolicy
 22 | 
 23 |   shape_meta: ${shape_meta}
 24 | 
 25 |   horizon: ${horizon}
 26 |   n_action_steps: ${n_action_steps}
 27 |   n_obs_steps: ${n_obs_steps}
 28 |   dropout: 0.1
 29 |   train_n_neg: 256
 30 |   pred_n_iter: 3
 31 |   pred_n_samples: 1024
 32 |   kevin_inference: False
 33 |   andy_train: False
 34 |   obs_encoder_group_norm: True
 35 |   eval_fixed_crop: True
 36 |   crop_shape: [216, 288] # ch, cw 320x240 90%
 37 | 
 38 | dataloader:
 39 |   batch_size: 128
 40 |   num_workers: 8
 41 |   shuffle: True
 42 |   pin_memory: True
 43 |   persistent_workers: False
 44 | 
 45 | val_dataloader:
 46 |   batch_size: 128
 47 |   num_workers: 1
 48 |   shuffle: False
 49 |   pin_memory: True
 50 |   persistent_workers: False
 51 | 
 52 | optimizer:
 53 |   _target_: torch.optim.AdamW
 54 |   lr: 1.0e-4
 55 |   betas: [0.95, 0.999]
 56 |   eps: 1.0e-8
 57 |   weight_decay: 1.0e-6
 58 | 
 59 | training:
 60 |   device: "cuda:0"
 61 |   seed: 42
 62 |   debug: False
 63 |   resume: True
 64 |   # optimization
 65 |   lr_scheduler: cosine
 66 |   lr_warmup_steps: 500
 67 |   num_epochs: 1000
 68 |   gradient_accumulate_every: 1
 69 |   # training loop control
 70 |   # in epochs
 71 |   rollout_every: 50
 72 |   checkpoint_every: 5
 73 |   val_every: 1
 74 |   sample_every: 5
 75 |   sample_max_batch: 128
 76 |   # steps per epoch
 77 |   max_train_steps: null
 78 |   max_val_steps: null
 79 |   # misc
 80 |   tqdm_interval_sec: 1.0
 81 | 
 82 | logging:
 83 |   project: diffusion_policy_debug
 84 |   resume: True
 85 |   mode: online
 86 |   name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
 87 |   tags: ["${name}", "${task_name}", "${exp_name}"]
 88 |   id: null
 89 |   group: null
 90 | 
 91 | checkpoint:
 92 |   topk:
 93 |     monitor_key: train_action_mse_error
 94 |     mode: min
 95 |     k: 5
 96 |     format_str: 'epoch={epoch:04d}-train_action_mse_error={train_action_mse_error:.3f}.ckpt'
 97 |   save_last_ckpt: True
 98 |   save_last_snapshot: False
 99 | 
100 | multi_run:
101 |   run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
102 |   wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
103 | 
104 | hydra:
105 |   job:
106 |     override_dirname: ${name}
107 |   run:
108 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
109 |   sweep:
110 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
111 |     subdir: ${hydra.job.num}
112 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/train_robomimic_image_workspace.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - task: lift_image
 4 | 
 5 | name: train_robomimic_image
 6 | _target_: diffusion_policy.workspace.train_robomimic_image_workspace.TrainRobomimicImageWorkspace
 7 | 
 8 | task_name: ${task.name}
 9 | shape_meta: ${task.shape_meta}
10 | exp_name: "default"
11 | 
12 | horizon: &horizon 10
13 | n_obs_steps: 1
14 | n_action_steps: 1
15 | n_latency_steps: 0
16 | dataset_obs_steps: *horizon
17 | past_action_visible: False
18 | keypoint_visible_rate: 1.0
19 | 
20 | policy:
21 |   _target_: diffusion_policy.policy.robomimic_image_policy.RobomimicImagePolicy
22 |   shape_meta: ${shape_meta}
23 |   algo_name: bc_rnn
24 |   obs_type: image
25 |   # oc.select resolver: key, default
26 |   task_name: ${oc.select:task.task_name,lift}
27 |   dataset_type: ${oc.select:task.dataset_type,ph}
28 |   crop_shape: [76,76]
29 | 
30 | dataloader:
31 |   batch_size: 64
32 |   num_workers: 16
33 |   shuffle: True
34 |   pin_memory: True
35 |   persistent_workers: False
36 | 
37 | val_dataloader:
38 |   batch_size: 64
39 |   num_workers: 16
40 |   shuffle: False
41 |   pin_memory: True
42 |   persistent_workers: False
43 | 
44 | training:
45 |   device: "cuda:0"
46 |   seed: 42
47 |   debug: False
48 |   resume: True
49 |   # optimization
50 |   num_epochs: 3050
51 |   # training loop control
52 |   # in epochs
53 |   rollout_every: 50
54 |   checkpoint_every: 50
55 |   val_every: 1
56 |   sample_every: 5
57 |   # steps per epoch
58 |   max_train_steps: null
59 |   max_val_steps: null
60 |   # misc
61 |   tqdm_interval_sec: 1.0
62 | 
63 | logging:
64 |   project: diffusion_policy_debug
65 |   resume: True
66 |   mode: online
67 |   name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
68 |   tags: ["${name}", "${task_name}", "${exp_name}"]
69 |   id: null
70 |   group: null
71 | 
72 | checkpoint:
73 |   topk:
74 |     monitor_key: test_mean_score
75 |     mode: max
76 |     k: 5
77 |     format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
78 |   save_last_ckpt: True
79 |   save_last_snapshot: False
80 | 
81 | multi_run:
82 |   run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
83 |   wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
84 | 
85 | hydra:
86 |   job:
87 |     override_dirname: ${name}
88 |   run:
89 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
90 |   sweep:
91 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
92 |     subdir: ${hydra.job.num}
93 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/train_robomimic_lowdim_workspace.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - task: pusht_lowdim
 4 | 
 5 | name: train_robomimic_lowdim
 6 | _target_: diffusion_policy.workspace.train_robomimic_lowdim_workspace.TrainRobomimicLowdimWorkspace
 7 | 
 8 | obs_dim: ${task.obs_dim}
 9 | action_dim: ${task.action_dim}
10 | transition_dim: "${eval: ${task.obs_dim} + ${task.action_dim}}"
11 | task_name: ${task.name}
12 | exp_name: "default"
13 | 
14 | horizon: 10
15 | n_obs_steps: 1
16 | n_action_steps: 1
17 | n_latency_steps: 0
18 | past_action_visible: False
19 | keypoint_visible_rate: 1.0
20 | 
21 | policy:
22 |   _target_: diffusion_policy.policy.robomimic_lowdim_policy.RobomimicLowdimPolicy
23 |   action_dim: ${action_dim}
24 |   obs_dim: ${obs_dim}
25 |   algo_name: bc_rnn
26 |   obs_type: low_dim
27 |   # oc.select resolver: key, default
28 |   task_name: ${oc.select:task.task_name,lift}
29 |   dataset_type: ${oc.select:task.dataset_type,ph}
30 | 
31 | dataloader:
32 |   batch_size: 256
33 |   num_workers: 1
34 |   shuffle: True
35 |   pin_memory: True
36 |   persistent_workers: False
37 | 
38 | val_dataloader:
39 |   batch_size: 256
40 |   num_workers: 1
41 |   shuffle: False
42 |   pin_memory: True
43 |   persistent_workers: False
44 | 
45 | training:
46 |   device: "cuda:0"
47 |   seed: 42
48 |   debug: False
49 |   resume: True
50 |   # optimization
51 |   num_epochs: 5000
52 |   # training loop control
53 |   # in epochs
54 |   rollout_every: 50
55 |   checkpoint_every: 50
56 |   val_every: 1
57 |   # steps per epoch
58 |   max_train_steps: null
59 |   max_val_steps: null
60 |   # misc
61 |   tqdm_interval_sec: 1.0
62 | 
63 | logging:
64 |   project: diffusion_policy_debug
65 |   resume: True
66 |   mode: online
67 |   name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
68 |   tags: ["${name}", "${task_name}", "${exp_name}"]
69 |   id: null
70 |   group: null
71 | 
72 | checkpoint:
73 |   topk:
74 |     monitor_key: test_mean_score
75 |     mode: max
76 |     k: 5
77 |     format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
78 |   save_last_ckpt: True
79 |   save_last_snapshot: False
80 | 
81 | multi_run:
82 |   run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
83 |   wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
84 | 
85 | hydra:
86 |   job:
87 |     override_dirname: ${name}
88 |   run:
89 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
90 |   sweep:
91 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
92 |     subdir: ${hydra.job.num}
93 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/train_robomimic_real_image_workspace.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - task: real_pusht_image
 4 | 
 5 | name: train_robomimic_image
 6 | _target_: diffusion_policy.workspace.train_robomimic_image_workspace.TrainRobomimicImageWorkspace
 7 | 
 8 | task_name: ${task.name}
 9 | shape_meta: ${task.shape_meta}
10 | exp_name: "default"
11 | 
12 | horizon: &horizon 10
13 | n_obs_steps: 1
14 | n_action_steps: 1
15 | n_latency_steps: 1
16 | dataset_obs_steps: *horizon
17 | past_action_visible: False
18 | keypoint_visible_rate: 1.0
19 | 
20 | policy:
21 |   _target_: diffusion_policy.policy.robomimic_image_policy.RobomimicImagePolicy
22 |   shape_meta: ${shape_meta}
23 |   algo_name: bc_rnn
24 |   obs_type: image
25 |   # oc.select resolver: key, default
26 |   task_name: ${oc.select:task.task_name,tool_hang}
27 |   dataset_type: ${oc.select:task.dataset_type,ph}
28 |   crop_shape: [216, 288] # ch, cw 320x240 90%
29 | 
30 | dataloader:
31 |   batch_size: 32
32 |   num_workers: 8
33 |   shuffle: True
34 |   pin_memory: True
35 |   persistent_workers: True
36 | 
37 | val_dataloader:
38 |   batch_size: 32
39 |   num_workers: 1
40 |   shuffle: False
41 |   pin_memory: True
42 |   persistent_workers: False
43 | 
44 | training:
45 |   device: "cuda:0"
46 |   seed: 42
47 |   debug: False
48 |   resume: True
49 |   # optimization
50 |   num_epochs: 1000
51 |   # training loop control
52 |   # in epochs
53 |   rollout_every: 50
54 |   checkpoint_every: 50
55 |   val_every: 1
56 |   sample_every: 5
57 |   # steps per epoch
58 |   max_train_steps: null
59 |   max_val_steps: null
60 |   # misc
61 |   tqdm_interval_sec: 1.0
62 | 
63 | logging:
64 |   project: diffusion_policy_debug
65 |   resume: True
66 |   mode: online
67 |   name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
68 |   tags: ["${name}", "${task_name}", "${exp_name}"]
69 |   id: null
70 |   group: null
71 | 
72 | checkpoint:
73 |   topk:
74 |     monitor_key: train_loss
75 |     mode: min
76 |     k: 5
77 |     format_str: 'epoch={epoch:04d}-train_loss={train_loss:.3f}.ckpt'
78 |   save_last_ckpt: True
79 |   save_last_snapshot: False
80 | 
81 | multi_run:
82 |   run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
83 |   wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
84 | 
85 | hydra:
86 |   job:
87 |     override_dirname: ${name}
88 |   run:
89 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
90 |   sweep:
91 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
92 |     subdir: ${hydra.job.num}
93 | 


--------------------------------------------------------------------------------
/diffusion_policy/dataset/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/.DS_Store


--------------------------------------------------------------------------------
/diffusion_policy/dataset/base_dataset.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | import torch
 4 | import torch.nn
 5 | from diffusion_policy.model.common.normalizer import LinearNormalizer
 6 | 
 7 | class BaseLowdimDataset(torch.utils.data.Dataset):
 8 |     def get_validation_dataset(self) -> 'BaseLowdimDataset':
 9 |         # return an empty dataset by default
10 |         return BaseLowdimDataset()
11 | 
12 |     def get_normalizer(self, **kwargs) -> LinearNormalizer:
13 |         raise NotImplementedError()
14 | 
15 |     def get_all_actions(self) -> torch.Tensor:
16 |         raise NotImplementedError()
17 |     
18 |     def __len__(self) -> int:
19 |         return 0
20 |     
21 |     def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
22 |         """
23 |         output:
24 |             obs: T, Do
25 |             action: T, Da
26 |         """
27 |         raise NotImplementedError()
28 | 
29 | 
30 | class BaseImageDataset(torch.utils.data.Dataset):
31 |     def get_validation_dataset(self) -> 'BaseLowdimDataset':
32 |         # return an empty dataset by default
33 |         return BaseImageDataset()
34 | 
35 |     def get_normalizer(self, **kwargs) -> LinearNormalizer:
36 |         raise NotImplementedError()
37 | 
38 |     def get_all_actions(self) -> torch.Tensor:
39 |         raise NotImplementedError()
40 |     
41 |     def __len__(self) -> int:
42 |         return 0
43 |     
44 |     def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
45 |         """
46 |         output:
47 |             obs: 
48 |                 key: T, *
49 |             action: T, Da
50 |         """
51 |         raise NotImplementedError()
52 | 


--------------------------------------------------------------------------------
/diffusion_policy/dataset/gibson_dataset.py:
--------------------------------------------------------------------------------
 1 | import gc
 2 | import cv2
 3 | import bz2
 4 | import math
 5 | import json
 6 | import tqdm
 7 | import h5py
 8 | import glob
 9 | import torch
10 | import random
11 | import numpy as np
12 | import os.path as osp
13 | import _pickle as cPickle
14 | import skimage.morphology as skmp
15 | from torch.utils.data import Dataset
16 | import os
17 | import clip
18 | from diffusion_policy.common.pytorch_util import dict_apply
19 | from torch.utils.data import DataLoader
20 | from diffusion_policy.model.common.normalizer import LinearNormalizer
21 | from diffusion_policy.common.normalize_util import get_image_range_normalizer
22 | from typing import Dict
23 | 
24 | def count_file_in_folder(path):
25 |     count = 0
26 |     for _, _, files in os.walk(path):
27 |         count += len(files)
28 |     return count
29 | 
30 | class TrajectoryDataset(Dataset):
31 |     def __init__(self, train_idx):
32 |         self.train_idx = train_idx
33 |     
34 |     def  get_normalizer(self, mode='limits', **kwargs):
35 |         with bz2.BZ2File("..diffusion/data/sample_h16/{}.pbz2".format(str(0)), 'rb') as fp:
36 |             tmp_data = cPickle.load(fp)
37 |             
38 |         data = {
39 |             'clip_feature': tmp_data['obs']['clip_feature'].numpy(),
40 |             'action': tmp_data['action'].numpy()
41 |         }
42 |         
43 |         normalizer = LinearNormalizer()
44 |         normalizer.fit(data=data, last_n_dims=1, mode=mode, **kwargs)
45 |         return normalizer
46 |                     
47 |     def __len__(self):
48 |         return len(self.train_idx)
49 |     
50 |     def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
51 |         tmp_idx = self.train_idx[idx]
52 |         with bz2.BZ2File("..diffusion/data/sample_h16/{}.pbz2".format(str(tmp_idx)), 'rb') as fp:
53 |             data = cPickle.load(fp)
54 |         return data
55 |     
56 | 


--------------------------------------------------------------------------------
/diffusion_policy/dataset/kitchen_lowdim_dataset.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | import torch
 3 | import numpy as np
 4 | import copy
 5 | import pathlib
 6 | from diffusion_policy.common.pytorch_util import dict_apply
 7 | from diffusion_policy.common.replay_buffer import ReplayBuffer
 8 | from diffusion_policy.common.sampler import SequenceSampler, get_val_mask
 9 | from diffusion_policy.model.common.normalizer import LinearNormalizer, SingleFieldLinearNormalizer
10 | from diffusion_policy.dataset.base_dataset import BaseLowdimDataset
11 | 
12 | class KitchenLowdimDataset(BaseLowdimDataset):
13 |     def __init__(self,
14 |             dataset_dir,
15 |             horizon=1,
16 |             pad_before=0,
17 |             pad_after=0,
18 |             seed=42,
19 |             val_ratio=0.0
20 |         ):
21 |         super().__init__()
22 | 
23 |         data_directory = pathlib.Path(dataset_dir)
24 |         observations = np.load(data_directory / "observations_seq.npy")
25 |         actions = np.load(data_directory / "actions_seq.npy")
26 |         masks = np.load(data_directory / "existence_mask.npy")
27 | 
28 |         self.replay_buffer = ReplayBuffer.create_empty_numpy()
29 |         for i in range(len(masks)):
30 |             eps_len = int(masks[i].sum())
31 |             obs = observations[i,:eps_len].astype(np.float32)
32 |             action = actions[i,:eps_len].astype(np.float32)
33 |             data = {                              
34 |                 'obs': obs,
35 |                 'action': action
36 |             }
37 |             self.replay_buffer.add_episode(data)
38 |         
39 |         val_mask = get_val_mask(
40 |             n_episodes=self.replay_buffer.n_episodes, 
41 |             val_ratio=val_ratio,
42 |             seed=seed)
43 |         train_mask = ~val_mask
44 |         self.sampler = SequenceSampler(
45 |             replay_buffer=self.replay_buffer, 
46 |             sequence_length=horizon,
47 |             pad_before=pad_before, 
48 |             pad_after=pad_after,
49 |             episode_mask=train_mask)
50 | 
51 |         self.train_mask = train_mask
52 |         self.horizon = horizon
53 |         self.pad_before = pad_before
54 |         self.pad_after = pad_after
55 | 
56 |     def get_validation_dataset(self):
57 |         val_set = copy.copy(self)
58 |         val_set.sampler = SequenceSampler(
59 |             replay_buffer=self.replay_buffer, 
60 |             sequence_length=self.horizon,
61 |             pad_before=self.pad_before, 
62 |             pad_after=self.pad_after,
63 |             episode_mask=~self.train_mask
64 |             )
65 |         val_set.train_mask = ~self.train_mask
66 |         return val_set
67 | 
68 |     def get_normalizer(self, mode='limits', **kwargs):
69 |         data = {
70 |             'obs': self.replay_buffer['obs'],
71 |             'action': self.replay_buffer['action']
72 |         }
73 |         if 'range_eps' not in kwargs:
74 |             # to prevent blowing up dims that barely change
75 |             kwargs['range_eps'] = 5e-2
76 |         normalizer = LinearNormalizer()
77 |         normalizer.fit(data=data, last_n_dims=1, mode=mode, **kwargs)
78 |         return normalizer
79 | 
80 |     def get_all_actions(self) -> torch.Tensor:
81 |         return torch.from_numpy(self.replay_buffer['action'])
82 | 
83 |     def __len__(self) -> int:
84 |         return len(self.sampler)
85 | 
86 |     def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
87 |         sample = self.sampler.sample_sequence(idx)
88 |         data = sample
89 | 
90 |         torch_data = dict_apply(data, torch.from_numpy)
91 |         return torch_data
92 | 


--------------------------------------------------------------------------------
/diffusion_policy/dataset/pusht_dataset.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | import torch
 3 | import numpy as np
 4 | import copy
 5 | from diffusion_policy.common.pytorch_util import dict_apply
 6 | from diffusion_policy.common.replay_buffer import ReplayBuffer
 7 | from diffusion_policy.common.sampler import (
 8 |     SequenceSampler, get_val_mask, downsample_mask)
 9 | from diffusion_policy.model.common.normalizer import LinearNormalizer
10 | from diffusion_policy.dataset.base_dataset import BaseLowdimDataset
11 | 
12 | class PushTLowdimDataset(BaseLowdimDataset):
13 |     def __init__(self, 
14 |             zarr_path, 
15 |             horizon=1,
16 |             pad_before=0,
17 |             pad_after=0,
18 |             obs_key='keypoint',
19 |             state_key='state',
20 |             action_key='action',
21 |             seed=42,
22 |             val_ratio=0.0,
23 |             max_train_episodes=None
24 |             ):
25 |         super().__init__()
26 |         self.replay_buffer = ReplayBuffer.copy_from_path(
27 |             zarr_path, keys=[obs_key, state_key, action_key])
28 | 
29 |         val_mask = get_val_mask(
30 |             n_episodes=self.replay_buffer.n_episodes, 
31 |             val_ratio=val_ratio,
32 |             seed=seed)
33 |         train_mask = ~val_mask
34 |         train_mask = downsample_mask(
35 |             mask=train_mask, 
36 |             max_n=max_train_episodes, 
37 |             seed=seed)
38 | 
39 |         self.sampler = SequenceSampler(
40 |             replay_buffer=self.replay_buffer, 
41 |             sequence_length=horizon,
42 |             pad_before=pad_before, 
43 |             pad_after=pad_after,
44 |             episode_mask=train_mask
45 |             )
46 |         self.obs_key = obs_key
47 |         self.state_key = state_key
48 |         self.action_key = action_key
49 |         self.train_mask = train_mask
50 |         self.horizon = horizon
51 |         self.pad_before = pad_before
52 |         self.pad_after = pad_after
53 | 
54 |     def get_validation_dataset(self):
55 |         val_set = copy.copy(self)
56 |         val_set.sampler = SequenceSampler(
57 |             replay_buffer=self.replay_buffer, 
58 |             sequence_length=self.horizon,
59 |             pad_before=self.pad_before, 
60 |             pad_after=self.pad_after,
61 |             episode_mask=~self.train_mask
62 |             )
63 |         val_set.train_mask = ~self.train_mask
64 |         return val_set
65 | 
66 |     def get_normalizer(self, mode='limits', **kwargs):
67 |         data = self._sample_to_data(self.replay_buffer)
68 |         normalizer = LinearNormalizer()
69 |         normalizer.fit(data=data, last_n_dims=1, mode=mode, **kwargs)
70 |         return normalizer
71 | 
72 |     def get_all_actions(self) -> torch.Tensor:
73 |         return torch.from_numpy(self.replay_buffer[self.action_key])
74 | 
75 |     def __len__(self) -> int:
76 |         return len(self.sampler)
77 | 
78 |     def _sample_to_data(self, sample):
79 |         keypoint = sample[self.obs_key]
80 |         state = sample[self.state_key]
81 |         agent_pos = state[:,:2]
82 |         obs = np.concatenate([
83 |             keypoint.reshape(keypoint.shape[0], -1), 
84 |             agent_pos], axis=-1)
85 | 
86 |         data = {
87 |             'obs': obs, # T, D_o
88 |             'action': sample[self.action_key], # T, D_a
89 |         }
90 |         return data
91 | 
92 |     def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
93 |         sample = self.sampler.sample_sequence(idx)
94 |         data = self._sample_to_data(sample)
95 | 
96 |         torch_data = dict_apply(data, torch.from_numpy)
97 |         return torch_data
98 | 


--------------------------------------------------------------------------------
/diffusion_policy/dataset/pusht_image_dataset.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | import torch
 3 | import numpy as np
 4 | import copy
 5 | from diffusion_policy.common.pytorch_util import dict_apply
 6 | from diffusion_policy.common.replay_buffer import ReplayBuffer
 7 | from diffusion_policy.common.sampler import (
 8 |     SequenceSampler, get_val_mask, downsample_mask)
 9 | from diffusion_policy.model.common.normalizer import LinearNormalizer
10 | from diffusion_policy.dataset.base_dataset import BaseImageDataset
11 | from diffusion_policy.common.normalize_util import get_image_range_normalizer
12 | 
13 | class PushTImageDataset(BaseImageDataset):
14 |     def __init__(self,
15 |             zarr_path, 
16 |             horizon=1,
17 |             pad_before=0,
18 |             pad_after=0,
19 |             seed=42,
20 |             val_ratio=0.0,
21 |             max_train_episodes=None
22 |             ):
23 |         
24 |         super().__init__()
25 |         self.replay_buffer = ReplayBuffer.copy_from_path(
26 |             zarr_path, keys=['img', 'state', 'action'])
27 |         val_mask = get_val_mask(
28 |             n_episodes=self.replay_buffer.n_episodes, 
29 |             val_ratio=val_ratio,
30 |             seed=seed)
31 |         train_mask = ~val_mask
32 |         train_mask = downsample_mask(
33 |             mask=train_mask, 
34 |             max_n=max_train_episodes, 
35 |             seed=seed)
36 | 
37 |         self.sampler = SequenceSampler(
38 |             replay_buffer=self.replay_buffer, 
39 |             sequence_length=horizon,
40 |             pad_before=pad_before, 
41 |             pad_after=pad_after,
42 |             episode_mask=train_mask)
43 |         self.train_mask = train_mask
44 |         self.horizon = horizon
45 |         self.pad_before = pad_before
46 |         self.pad_after = pad_after
47 | 
48 |     def get_validation_dataset(self):
49 |         val_set = copy.copy(self)
50 |         val_set.sampler = SequenceSampler(
51 |             replay_buffer=self.replay_buffer, 
52 |             sequence_length=self.horizon,
53 |             pad_before=self.pad_before, 
54 |             pad_after=self.pad_after,
55 |             episode_mask=~self.train_mask
56 |             )
57 |         val_set.train_mask = ~self.train_mask
58 |         return val_set
59 | 
60 |     def get_normalizer(self, mode='limits', **kwargs):
61 |         data = {
62 |             'action': self.replay_buffer['action'],
63 |             'agent_pos': self.replay_buffer['state'][...,:2]
64 |         }
65 |         normalizer = LinearNormalizer()
66 |         normalizer.fit(data=data, last_n_dims=1, mode=mode, **kwargs)
67 |         normalizer['image'] = get_image_range_normalizer()
68 |         return normalizer
69 | 
70 |     def __len__(self) -> int:
71 |         return len(self.sampler)
72 | 
73 |     def _sample_to_data(self, sample):
74 |         agent_pos = sample['state'][:,:2].astype(np.float32) # (agent_posx2, block_posex3)
75 |         image = np.moveaxis(sample['img'],-1,1)/255
76 |         
77 |         data = {
78 |             'obs': {
79 |                 'image': image, # T, 3, 96, 96
80 |                 'agent_pos': agent_pos, # T, 2
81 |             },
82 |             'action': sample['action'].astype(np.float32) # T, 2
83 |         }
84 |         return data
85 |     
86 |     def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
87 |         sample = self.sampler.sample_sequence(idx)
88 |         data = self._sample_to_data(sample)
89 |         torch_data = dict_apply(data, torch.from_numpy) # change to tensor
90 |         return torch_data
91 | 


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img.tar.gz


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/0.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/1.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/10.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/11.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/12.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/13.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/14.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/15.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/2.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/3.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/4.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/5.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/6.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/7.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/8.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/9.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/action.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/action.png


--------------------------------------------------------------------------------
/diffusion_policy/dataset/test_img/pose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/dataset/test_img/pose.png


--------------------------------------------------------------------------------
/diffusion_policy/env_runner/base_image_runner.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | from diffusion_policy.policy.base_image_policy import BaseImagePolicy
 3 | 
 4 | class BaseImageRunner:
 5 |     def __init__(self, output_dir):
 6 |         self.output_dir = output_dir
 7 | 
 8 |     def run(self, policy: BaseImagePolicy) -> Dict:
 9 |         raise NotImplementedError()
10 | 


--------------------------------------------------------------------------------
/diffusion_policy/env_runner/base_lowdim_runner.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy
 3 | 
 4 | class BaseLowdimRunner:
 5 |     def __init__(self, output_dir):
 6 |         self.output_dir = output_dir
 7 | 
 8 |     def run(self, policy: BaseLowdimPolicy) -> Dict:
 9 |         raise NotImplementedError()
10 | 


--------------------------------------------------------------------------------
/diffusion_policy/env_runner/real_pusht_image_runner.py:
--------------------------------------------------------------------------------
 1 | from diffusion_policy.policy.base_image_policy import BaseImagePolicy
 2 | from diffusion_policy.env_runner.base_image_runner import BaseImageRunner
 3 | 
 4 | class RealPushTImageRunner(BaseImageRunner):
 5 |     def __init__(self,
 6 |             output_dir):
 7 |         super().__init__(output_dir)
 8 |     
 9 |     def run(self, policy: BaseImagePolicy):
10 |         return dict()
11 | 


--------------------------------------------------------------------------------
/diffusion_policy/gym_util/video_recording_wrapper.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | from diffusion_policy.real_world.video_recorder import VideoRecorder
 4 | 
 5 | class VideoRecordingWrapper(gym.Wrapper):
 6 |     def __init__(self, 
 7 |             env, 
 8 |             video_recoder: VideoRecorder,
 9 |             mode='rgb_array',
10 |             file_path=None,
11 |             steps_per_render=1,
12 |             **kwargs
13 |         ):
14 |         """
15 |         When file_path is None, don't record.
16 |         """
17 |         super().__init__(env)
18 |         
19 |         self.mode = mode
20 |         self.render_kwargs = kwargs
21 |         self.steps_per_render = steps_per_render
22 |         self.file_path = file_path
23 |         self.video_recoder = video_recoder
24 | 
25 |         self.step_count = 0
26 | 
27 |     def reset(self, **kwargs):
28 |         obs = super().reset(**kwargs)
29 |         self.frames = list()
30 |         self.step_count = 1
31 |         self.video_recoder.stop()
32 |         return obs
33 |     
34 |     def step(self, action):
35 |         result = super().step(action)
36 |         self.step_count += 1
37 |         if self.file_path is not None \
38 |             and ((self.step_count % self.steps_per_render) == 0):
39 |             if not self.video_recoder.is_ready():
40 |                 self.video_recoder.start(self.file_path)
41 | 
42 |             frame = self.env.render(
43 |                 mode=self.mode, **self.render_kwargs)
44 |             assert frame.dtype == np.uint8
45 |             self.video_recoder.write_frame(frame)
46 |         return result
47 |     
48 |     def render(self, mode='rgb_array', **kwargs):
49 |         if self.video_recoder.is_ready():
50 |             self.video_recoder.stop()
51 |         return self.file_path
52 | 


--------------------------------------------------------------------------------
/diffusion_policy/gym_util/video_wrapper.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | 
 4 | class VideoWrapper(gym.Wrapper):
 5 |     def __init__(self, 
 6 |             env, 
 7 |             mode='rgb_array',
 8 |             enabled=True,
 9 |             steps_per_render=1,
10 |             **kwargs
11 |         ):
12 |         super().__init__(env)
13 |         
14 |         self.mode = mode
15 |         self.enabled = enabled
16 |         self.render_kwargs = kwargs
17 |         self.steps_per_render = steps_per_render
18 | 
19 |         self.frames = list()
20 |         self.step_count = 0
21 | 
22 |     def reset(self, **kwargs):
23 |         obs = super().reset(**kwargs)
24 |         self.frames = list()
25 |         self.step_count = 1
26 |         if self.enabled:
27 |             frame = self.env.render(
28 |                 mode=self.mode, **self.render_kwargs)
29 |             assert frame.dtype == np.uint8
30 |             self.frames.append(frame)
31 |         return obs
32 |     
33 |     def step(self, action):
34 |         result = super().step(action)
35 |         self.step_count += 1
36 |         if self.enabled and ((self.step_count % self.steps_per_render) == 0):
37 |             frame = self.env.render(
38 |                 mode=self.mode, **self.render_kwargs)
39 |             assert frame.dtype == np.uint8
40 |             self.frames.append(frame)
41 |         return result
42 |     
43 |     def render(self, mode='rgb_array', **kwargs):
44 |         return self.frames
45 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/model/.DS_Store


--------------------------------------------------------------------------------
/diffusion_policy/model/bet/action_ae/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.utils.data import DataLoader
 4 | import abc
 5 | 
 6 | from typing import Optional, Union
 7 | 
 8 | import diffusion_policy.model.bet.utils as utils
 9 | 
10 | 
11 | class AbstractActionAE(utils.SaveModule, abc.ABC):
12 |     @abc.abstractmethod
13 |     def fit_model(
14 |         self,
15 |         input_dataloader: DataLoader,
16 |         eval_dataloader: DataLoader,
17 |         obs_encoding_net: Optional[nn.Module] = None,
18 |     ) -> None:
19 |         pass
20 | 
21 |     @abc.abstractmethod
22 |     def encode_into_latent(
23 |         self,
24 |         input_action: torch.Tensor,
25 |         input_rep: Optional[torch.Tensor],
26 |     ) -> torch.Tensor:
27 |         """
28 |         Given the input action, discretize it.
29 | 
30 |         Inputs:
31 |         input_action (shape: ... x action_dim): The input action to discretize. This can be in a batch,
32 |         and is generally assumed that the last dimnesion is the action dimension.
33 | 
34 |         Outputs:
35 |         discretized_action (shape: ... x num_tokens): The discretized action.
36 |         """
37 |         raise NotImplementedError
38 | 
39 |     @abc.abstractmethod
40 |     def decode_actions(
41 |         self,
42 |         latent_action_batch: Optional[torch.Tensor],
43 |         input_rep_batch: Optional[torch.Tensor] = None,
44 |     ) -> torch.Tensor:
45 |         """
46 |         Given a discretized action, convert it to a continuous action.
47 | 
48 |         Inputs:
49 |         latent_action_batch (shape: ... x num_tokens): The discretized action
50 |         generated by the discretizer.
51 | 
52 |         Outputs:
53 |         continuous_action (shape: ... x action_dim): The continuous action.
54 |         """
55 |         raise NotImplementedError
56 | 
57 |     @property
58 |     @abc.abstractmethod
59 |     def num_latents(self) -> Union[int, float]:
60 |         """
61 |         Number of possible latents for this generator, useful for state priors that use softmax.
62 |         """
63 |         return float("inf")
64 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/bet/latent_generators/latent_generator.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import torch
 3 | from typing import Tuple, Optional
 4 | 
 5 | import diffusion_policy.model.bet.utils as utils
 6 | 
 7 | 
 8 | class AbstractLatentGenerator(abc.ABC, utils.SaveModule):
 9 |     """
10 |     Abstract class for a generative model that can generate latents given observation representations.
11 | 
12 |     In the probabilisitc sense, this model fits and samples from P(latent|observation) given some observation.
13 |     """
14 | 
15 |     @abc.abstractmethod
16 |     def get_latent_and_loss(
17 |         self,
18 |         obs_rep: torch.Tensor,
19 |         target_latents: torch.Tensor,
20 |         seq_masks: Optional[torch.Tensor] = None,
21 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
22 |         """
23 |         Given a set of observation representation and generated latents, get the encoded latent and the loss.
24 | 
25 |         Inputs:
26 |         input_action: Batch of the actions taken in the multimodal demonstrations.
27 |         target_latents: Batch of the latents that the generator should learn to generate the actions from.
28 |         seq_masks: Batch of masks that indicate which timesteps are valid.
29 | 
30 |         Outputs:
31 |         latent: The sampled latent from the observation.
32 |         loss: The loss of the latent generator.
33 |         """
34 |         pass
35 | 
36 |     @abc.abstractmethod
37 |     def generate_latents(
38 |         self, seq_obses: torch.Tensor, seq_masks: torch.Tensor
39 |     ) -> torch.Tensor:
40 |         """
41 |         Given a batch of sequences of observations, generate a batch of sequences of latents.
42 | 
43 |         Inputs:
44 |         seq_obses: Batch of sequences of observations, of shape seq x batch x dim, following the transformer convention.
45 |         seq_masks: Batch of sequences of masks, of shape seq x batch, following the transformer convention.
46 | 
47 |         Outputs:
48 |         seq_latents: Batch of sequences of latents of shape seq x batch x latent_dim.
49 |         """
50 |         pass
51 | 
52 |     def get_optimizer(
53 |         self, weight_decay: float, learning_rate: float, betas: Tuple[float, float]
54 |     ) -> torch.optim.Optimizer:
55 |         """
56 |         Default optimizer class. Override this if you want to use a different optimizer.
57 |         """
58 |         return torch.optim.Adam(
59 |             self.parameters(), lr=learning_rate, weight_decay=weight_decay, betas=betas
60 |         )
61 | 
62 | 
63 | class LatentGeneratorDataParallel(torch.nn.DataParallel):
64 |     def get_latent_and_loss(self, *args, **kwargs):
65 |         return self.module.get_latent_and_loss(*args, **kwargs)  # type: ignore
66 | 
67 |     def generate_latents(self, *args, **kwargs):
68 |         return self.module.generate_latents(*args, **kwargs)  # type: ignore
69 | 
70 |     def get_optimizer(self, *args, **kwargs):
71 |         return self.module.get_optimizer(*args, **kwargs)  # type: ignore
72 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/bet/libraries/mingpt/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT) Copyright (c) 2020 Andrej Karpathy
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 
9 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/bet/libraries/mingpt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/model/bet/libraries/mingpt/__init__.py


--------------------------------------------------------------------------------
/diffusion_policy/model/bet/libraries/mingpt/utils.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.nn import functional as F
 6 | 
 7 | 
 8 | def set_seed(seed):
 9 |     random.seed(seed)
10 |     np.random.seed(seed)
11 |     torch.manual_seed(seed)
12 |     torch.cuda.manual_seed_all(seed)
13 | 
14 | 
15 | def top_k_logits(logits, k):
16 |     v, ix = torch.topk(logits, k)
17 |     out = logits.clone()
18 |     out[out < v[:, [-1]]] = -float("Inf")
19 |     return out
20 | 
21 | 
22 | @torch.no_grad()
23 | def sample(model, x, steps, temperature=1.0, sample=False, top_k=None):
24 |     """
25 |     take a conditioning sequence of indices in x (of shape (b,t)) and predict the next token in
26 |     the sequence, feeding the predictions back into the model each time. Clearly the sampling
27 |     has quadratic complexity unlike an RNN that is only linear, and has a finite context window
28 |     of block_size, unlike an RNN that has an infinite context window.
29 |     """
30 |     block_size = model.get_block_size()
31 |     model.eval()
32 |     for k in range(steps):
33 |         x_cond = (
34 |             x if x.size(1) <= block_size else x[:, -block_size:]
35 |         )  # crop context if needed
36 |         logits, _ = model(x_cond)
37 |         # pluck the logits at the final step and scale by temperature
38 |         logits = logits[:, -1, :] / temperature
39 |         # optionally crop probabilities to only the top k options
40 |         if top_k is not None:
41 |             logits = top_k_logits(logits, top_k)
42 |         # apply softmax to convert to probabilities
43 |         probs = F.softmax(logits, dim=-1)
44 |         # sample from the distribution or take the most likely
45 |         if sample:
46 |             ix = torch.multinomial(probs, num_samples=1)
47 |         else:
48 |             _, ix = torch.topk(probs, k=1, dim=-1)
49 |         # append to the sequence and continue
50 |         x = torch.cat((x, ix), dim=1)
51 | 
52 |     return x
53 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/common/dict_of_tensor_mixin.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class DictOfTensorMixin(nn.Module):
 5 |     def __init__(self, params_dict=None):
 6 |         super().__init__()
 7 |         if params_dict is None:
 8 |             params_dict = nn.ParameterDict()
 9 |         self.params_dict = params_dict
10 | 
11 |     @property
12 |     def device(self):
13 |         return next(iter(self.parameters())).device
14 | 
15 |     def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
16 |         def dfs_add(dest, keys, value: torch.Tensor):
17 |             if len(keys) == 1:
18 |                 dest[keys[0]] = value
19 |                 return
20 | 
21 |             if keys[0] not in dest:
22 |                 dest[keys[0]] = nn.ParameterDict()
23 |             dfs_add(dest[keys[0]], keys[1:], value)
24 | 
25 |         def load_dict(state_dict, prefix):
26 |             out_dict = nn.ParameterDict()
27 |             for key, value in state_dict.items():
28 |                 value: torch.Tensor
29 |                 if key.startswith(prefix):
30 |                     param_keys = key[len(prefix):].split('.')[1:]
31 |                     # if len(param_keys) == 0:
32 |                     #     import pdb; pdb.set_trace()
33 |                     dfs_add(out_dict, param_keys, value.clone())
34 |             return out_dict
35 | 
36 |         self.params_dict = load_dict(state_dict, prefix + 'params_dict')
37 |         self.params_dict.requires_grad_(False)
38 |         return 
39 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/common/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | from diffusers.optimization import (
 2 |     Union, SchedulerType, Optional,
 3 |     Optimizer, TYPE_TO_SCHEDULER_FUNCTION
 4 | )
 5 | 
 6 | def get_scheduler(
 7 |     name: Union[str, SchedulerType],
 8 |     optimizer: Optimizer,
 9 |     num_warmup_steps: Optional[int] = None,
10 |     num_training_steps: Optional[int] = None,
11 |     **kwargs
12 | ):
13 |     """
14 |     Added kwargs vs diffuser's original implementation
15 | 
16 |     Unified API to get any scheduler from its name.
17 | 
18 |     Args:
19 |         name (`str` or `SchedulerType`):
20 |             The name of the scheduler to use.
21 |         optimizer (`torch.optim.Optimizer`):
22 |             The optimizer that will be used during training.
23 |         num_warmup_steps (`int`, *optional*):
24 |             The number of warmup steps to do. This is not required by all schedulers (hence the argument being
25 |             optional), the function will raise an error if it's unset and the scheduler type requires it.
26 |         num_training_steps (`int``, *optional*):
27 |             The number of training steps to do. This is not required by all schedulers (hence the argument being
28 |             optional), the function will raise an error if it's unset and the scheduler type requires it.
29 |     """
30 |     name = SchedulerType(name)
31 |     schedule_func = TYPE_TO_SCHEDULER_FUNCTION[name]
32 |     if name == SchedulerType.CONSTANT:
33 |         return schedule_func(optimizer, **kwargs)
34 | 
35 |     # All other schedulers require `num_warmup_steps`
36 |     if num_warmup_steps is None:
37 |         raise ValueError(f"{name} requires `num_warmup_steps`, please provide that argument.")
38 | 
39 |     if name == SchedulerType.CONSTANT_WITH_WARMUP:
40 |         return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, **kwargs)
41 | 
42 |     # All other schedulers require `num_training_steps`
43 |     if num_training_steps is None:
44 |         raise ValueError(f"{name} requires `num_training_steps`, please provide that argument.")
45 | 
46 |     return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps, **kwargs)
47 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/common/module_attr_mixin.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | class ModuleAttrMixin(nn.Module):
 4 |     def __init__(self):
 5 |         super().__init__()
 6 |         self._dummy_variable = nn.Parameter()
 7 | 
 8 |     @property
 9 |     def device(self):
10 |         return next(iter(self.parameters())).device
11 |     
12 |     @property
13 |     def dtype(self):
14 |         return next(iter(self.parameters())).dtype
15 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/common/shape_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Tuple, Callable
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | def get_module_device(m: nn.Module):
 6 |     device = torch.device('cpu')
 7 |     try:
 8 |         param = next(iter(m.parameters()))
 9 |         device = param.device
10 |     except StopIteration:
11 |         pass
12 |     return device
13 | 
14 | @torch.no_grad()
15 | def get_output_shape(
16 |         input_shape: Tuple[int],
17 |         net: Callable[[torch.Tensor], torch.Tensor]
18 |     ):  
19 |         device = get_module_device(net)
20 |         test_input = torch.zeros((1,)+tuple(input_shape), device=device)
21 |         test_output = net(test_input)
22 |         output_shape = tuple(test_output.shape[1:])
23 |         return output_shape
24 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/diffusion/conv1d_components.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | # from einops.layers.torch import Rearrange
 5 | 
 6 | 
 7 | class Downsample1d(nn.Module):
 8 |     def __init__(self, dim):
 9 |         super().__init__()
10 |         self.conv = nn.Conv1d(dim, dim, 3, 2, 1)
11 | 
12 |     def forward(self, x):
13 |         return self.conv(x)
14 | 
15 | class Upsample1d(nn.Module):
16 |     def __init__(self, dim):
17 |         super().__init__()
18 |         self.conv = nn.ConvTranspose1d(dim, dim, 4, 2, 1)
19 | 
20 |     def forward(self, x):
21 |         return self.conv(x)
22 | 
23 | class Conv1dBlock(nn.Module):
24 |     '''
25 |         Conv1d --> GroupNorm --> Mish
26 |     '''
27 | 
28 |     def __init__(self, inp_channels, out_channels, kernel_size, n_groups=8):
29 |         super().__init__()
30 | 
31 |         self.block = nn.Sequential(
32 |             nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2),
33 |             # Rearrange('batch channels horizon -> batch channels 1 horizon'),
34 |             nn.GroupNorm(n_groups, out_channels),
35 |             # Rearrange('batch channels 1 horizon -> batch channels horizon'),
36 |             nn.Mish(),
37 |         )
38 | 
39 |     def forward(self, x):
40 |         return self.block(x)
41 | 
42 | 
43 | def test():
44 |     cb = Conv1dBlock(256, 128, kernel_size=3)
45 |     x = torch.zeros((1,256,16))
46 |     o = cb(x)
47 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/diffusion/ema_model.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import torch
 3 | from torch.nn.modules.batchnorm import _BatchNorm
 4 | 
 5 | class EMAModel:
 6 |     """
 7 |     Exponential Moving Average of models weights
 8 |     """
 9 | 
10 |     def __init__(
11 |         self,
12 |         model,
13 |         update_after_step=0,
14 |         inv_gamma=1.0,
15 |         power=2 / 3,
16 |         min_value=0.0,
17 |         max_value=0.9999
18 |     ):
19 |         """
20 |         @crowsonkb's notes on EMA Warmup:
21 |             If gamma=1 and power=1, implements a simple average. gamma=1, power=2/3 are good values for models you plan
22 |             to train for a million or more steps (reaches decay factor 0.999 at 31.6K steps, 0.9999 at 1M steps),
23 |             gamma=1, power=3/4 for models you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999
24 |             at 215.4k steps).
25 |         Args:
26 |             inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1.
27 |             power (float): Exponential factor of EMA warmup. Default: 2/3.
28 |             min_value (float): The minimum EMA decay rate. Default: 0.
29 |         """
30 | 
31 |         self.averaged_model = model
32 |         self.averaged_model.eval()
33 |         self.averaged_model.requires_grad_(False)
34 | 
35 |         self.update_after_step = update_after_step
36 |         self.inv_gamma = inv_gamma
37 |         self.power = power
38 |         self.min_value = min_value
39 |         self.max_value = max_value
40 | 
41 |         self.decay = 0.0
42 |         self.optimization_step = 0
43 | 
44 |     def get_decay(self, optimization_step):
45 |         """
46 |         Compute the decay factor for the exponential moving average.
47 |         """
48 |         step = max(0, optimization_step - self.update_after_step - 1)
49 |         value = 1 - (1 + step / self.inv_gamma) ** -self.power
50 | 
51 |         if step <= 0:
52 |             return 0.0
53 | 
54 |         return max(self.min_value, min(value, self.max_value))
55 | 
56 |     @torch.no_grad()
57 |     def step(self, new_model):
58 |         self.decay = self.get_decay(self.optimization_step)
59 | 
60 |         # old_all_dataptrs = set()
61 |         # for param in new_model.parameters():
62 |         #     data_ptr = param.data_ptr()
63 |         #     if data_ptr != 0:
64 |         #         old_all_dataptrs.add(data_ptr)
65 | 
66 |         all_dataptrs = set()
67 |         for module, ema_module in zip(new_model.modules(), self.averaged_model.modules()):            
68 |             for param, ema_param in zip(module.parameters(recurse=False), ema_module.parameters(recurse=False)):
69 |                 # iterative over immediate parameters only.
70 |                 if isinstance(param, dict):
71 |                     raise RuntimeError('Dict parameter not supported')
72 |                 
73 |                 # data_ptr = param.data_ptr()
74 |                 # if data_ptr != 0:
75 |                 #     all_dataptrs.add(data_ptr)
76 | 
77 |                 if isinstance(module, _BatchNorm):
78 |                     # skip batchnorms
79 |                     ema_param.copy_(param.to(dtype=ema_param.dtype).data)
80 |                 elif not param.requires_grad:
81 |                     ema_param.copy_(param.to(dtype=ema_param.dtype).data)
82 |                 else:
83 |                     ema_param.mul_(self.decay)
84 |                     ema_param.add_(param.data.to(dtype=ema_param.dtype), alpha=1 - self.decay)
85 | 
86 |         # verify that iterating over module and then parameters is identical to parameters recursively.
87 |         # assert old_all_dataptrs == all_dataptrs
88 |         self.optimization_step += 1
89 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/diffusion/positional_embedding.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | class SinusoidalPosEmb(nn.Module):
 6 |     def __init__(self, dim):
 7 |         super().__init__()
 8 |         self.dim = dim
 9 | 
10 |     def forward(self, x):
11 |         device = x.device
12 |         half_dim = self.dim // 2
13 |         emb = math.log(10000) / (half_dim - 1)
14 |         emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
15 |         emb = x[:, None] * emb[None, :]
16 |         emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
17 |         return emb
18 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/vision/model_getter.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision
 3 | 
 4 | def get_resnet(name, weights=None, **kwargs):
 5 |     """
 6 |     name: resnet18, resnet34, resnet50
 7 |     weights: "IMAGENET1K_V1", "r3m"
 8 |     """
 9 |     # load r3m weights
10 |     if (weights == "r3m") or (weights == "R3M"):
11 |         return get_r3m(name=name, **kwargs)
12 | 
13 |     func = getattr(torchvision.models, name)
14 |     resnet = func(weights=weights, **kwargs)
15 |     resnet.fc = torch.nn.Identity()
16 |     return resnet
17 | 
18 | def get_r3m(name, **kwargs):
19 |     """
20 |     name: resnet18, resnet34, resnet50
21 |     """
22 |     import r3m
23 |     r3m.device = 'cpu'
24 |     model = r3m.load_r3m(name)
25 |     r3m_model = model.module
26 |     resnet_model = r3m_model.convnet
27 |     resnet_model = resnet_model.to('cpu')
28 |     return resnet_model
29 | 


--------------------------------------------------------------------------------
/diffusion_policy/policy/base_image_policy.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | import torch
 3 | import torch.nn as nn
 4 | from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin
 5 | from diffusion_policy.model.common.normalizer import LinearNormalizer
 6 | 
 7 | class BaseImagePolicy(ModuleAttrMixin):
 8 |     # init accepts keyword argument shape_meta, see config/task/*_image.yaml
 9 | 
10 |     def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
11 |         """
12 |         obs_dict:
13 |             str: B,To,*
14 |         return: B,Ta,Da
15 |         """
16 |         raise NotImplementedError()
17 | 
18 |     # reset state for stateful policies
19 |     def reset(self):
20 |         pass
21 | 
22 |     # ========== training ===========
23 |     # no standard training interface except setting normalizer
24 |     def set_normalizer(self, normalizer: LinearNormalizer):
25 |         raise NotImplementedError()
26 | 


--------------------------------------------------------------------------------
/diffusion_policy/policy/base_lowdim_policy.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | import torch
 3 | import torch.nn as nn
 4 | from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin
 5 | from diffusion_policy.model.common.normalizer import LinearNormalizer
 6 | 
 7 | class BaseLowdimPolicy(ModuleAttrMixin):  
 8 |     # ========= inference  ============
 9 |     # also as self.device and self.dtype for inference device transfer
10 |     def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
11 |         """
12 |         obs_dict:
13 |             obs: B,To,Do
14 |         return: 
15 |             action: B,Ta,Da
16 |         To = 3
17 |         Ta = 4
18 |         T = 6
19 |         |o|o|o|
20 |         | | |a|a|a|a|
21 |         |o|o|
22 |         | |a|a|a|a|a|
23 |         | | | | |a|a|
24 |         """
25 |         raise NotImplementedError()
26 | 
27 |     # reset state for stateful policies
28 |     def reset(self):
29 |         pass
30 | 
31 |     # ========== training ===========
32 |     # no standard training interface except setting normalizer
33 |     def set_normalizer(self, normalizer: LinearNormalizer):
34 |         raise NotImplementedError()
35 | 
36 |     


--------------------------------------------------------------------------------
/diffusion_policy/policy/robomimic_lowdim_policy.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | import torch
 3 | from diffusion_policy.model.common.normalizer import LinearNormalizer
 4 | from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy
 5 | 
 6 | from robomimic.algo import algo_factory
 7 | from robomimic.algo.algo import PolicyAlgo
 8 | import robomimic.utils.obs_utils as ObsUtils
 9 | from diffusion_policy.common.robomimic_config_util import get_robomimic_config
10 | 
11 | class RobomimicLowdimPolicy(BaseLowdimPolicy):
12 |     def __init__(self, 
13 |             action_dim, 
14 |             obs_dim,
15 |             algo_name='bc_rnn',
16 |             obs_type='low_dim',
17 |             task_name='square',
18 |             dataset_type='ph',
19 |         ):
20 |         super().__init__()
21 |         # key for robomimic obs input
22 |         # previously this is 'object', 'robot0_eef_pos' etc
23 |         obs_key = 'obs'
24 | 
25 |         config = get_robomimic_config(
26 |             algo_name=algo_name,
27 |             hdf5_type=obs_type,
28 |             task_name=task_name,
29 |             dataset_type=dataset_type)
30 |         with config.unlocked():
31 |             config.observation.modalities.obs.low_dim = [obs_key]
32 |         
33 |         ObsUtils.initialize_obs_utils_with_config(config)
34 |         model: PolicyAlgo = algo_factory(
35 |                 algo_name=config.algo_name,
36 |                 config=config,
37 |                 obs_key_shapes={obs_key: [obs_dim]},
38 |                 ac_dim=action_dim,
39 |                 device='cpu',
40 |             )
41 |         self.model = model
42 |         self.nets = model.nets
43 |         self.normalizer = LinearNormalizer()
44 |         self.obs_key = obs_key
45 |         self.config = config
46 | 
47 |     def to(self,*args,**kwargs):
48 |         device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs)
49 |         if device is not None:
50 |             self.model.device = device
51 |         super().to(*args,**kwargs)
52 |     
53 |     # =========== inference =============
54 |     def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
55 |         obs = self.normalizer['obs'].normalize(obs_dict['obs'])
56 |         assert obs.shape[1] == 1
57 |         robomimic_obs_dict = {self.obs_key: obs[:,0,:]}
58 |         naction = self.model.get_action(robomimic_obs_dict)
59 |         action = self.normalizer['action'].unnormalize(naction)
60 |         # (B, Da)
61 |         result = {
62 |             'action': action[:,None,:] # (B, 1, Da)
63 |         }
64 |         return result
65 |     
66 |     def reset(self):
67 |         self.model.reset()
68 |         
69 |     # =========== training ==============
70 |     def set_normalizer(self, normalizer: LinearNormalizer):
71 |         self.normalizer.load_state_dict(normalizer.state_dict())
72 |     
73 |     def train_on_batch(self, batch, epoch, validate=False):
74 |         nbatch = self.normalizer.normalize(batch)
75 |         robomimic_batch = {
76 |             'obs': {self.obs_key: nbatch['obs']},
77 |             'actions': nbatch['action']
78 |         }
79 |         input_batch = self.model.process_batch_for_training(
80 |             robomimic_batch)
81 |         info = self.model.train_on_batch(
82 |             batch=input_batch, epoch=epoch, validate=validate)
83 |         # keys: losses, predictions
84 |         return info
85 | 
86 |     def get_optimizer(self):
87 |         return self.model.optimizers['policy']
88 | 


--------------------------------------------------------------------------------
/diffusion_policy/real_world/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/diffusion_policy/real_world/.DS_Store


--------------------------------------------------------------------------------
/diffusion_policy/real_world/keystroke_counter.py:
--------------------------------------------------------------------------------
 1 | from pynput.keyboard import Key, KeyCode, Listener
 2 | from collections import defaultdict
 3 | from threading import Lock
 4 | 
 5 | class KeystrokeCounter(Listener):
 6 |     def __init__(self):
 7 |         self.key_count_map = defaultdict(lambda:0)
 8 |         self.key_press_list = list()
 9 |         self.lock = Lock()
10 |         super().__init__(on_press=self.on_press, on_release=self.on_release)
11 |     
12 |     def on_press(self, key):
13 |         with self.lock:
14 |             self.key_count_map[key] += 1
15 |             self.key_press_list.append(key)
16 |     
17 |     def on_release(self, key):
18 |         pass
19 |     
20 |     def clear(self):
21 |         with self.lock:
22 |             self.key_count_map = defaultdict(lambda:0)
23 |             self.key_press_list = list()
24 |     
25 |     def __getitem__(self, key):
26 |         with self.lock:
27 |             return self.key_count_map[key]
28 |     
29 |     def get_press_events(self):
30 |         with self.lock:
31 |             events = list(self.key_press_list)
32 |             self.key_press_list = list()
33 |             return events
34 | 
35 | if __name__ == '__main__':
36 |     import time
37 |     with KeystrokeCounter() as counter:
38 |         try:
39 |             while True:
40 |                 print('Space:', counter[Key.space])
41 |                 print('q:', counter[KeyCode(char='q')])
42 |                 time.sleep(1/60)
43 |         except KeyboardInterrupt:
44 |             events = counter.get_press_events()
45 |             print(events)
46 | 


--------------------------------------------------------------------------------
/diffusion_policy/real_world/multi_camera_visualizer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import multiprocessing as mp
 3 | import numpy as np
 4 | import cv2
 5 | from threadpoolctl import threadpool_limits
 6 | from diffusion_policy.real_world.multi_realsense import MultiRealsense
 7 | 
 8 | class MultiCameraVisualizer(mp.Process):
 9 |     def __init__(self,
10 |         realsense: MultiRealsense,
11 |         row, col,
12 |         window_name='Multi Cam Vis',
13 |         vis_fps=60,
14 |         fill_value=0,
15 |         rgb_to_bgr=True
16 |         ):
17 |         super().__init__()
18 |         self.row = row
19 |         self.col = col
20 |         self.window_name = window_name
21 |         self.vis_fps = vis_fps
22 |         self.fill_value = fill_value
23 |         self.rgb_to_bgr=rgb_to_bgr
24 |         self.realsense = realsense
25 |         # shared variables
26 |         self.stop_event = mp.Event()
27 | 
28 |     def start(self, wait=False):
29 |         super().start()
30 |     
31 |     def stop(self, wait=False):
32 |         self.stop_event.set()
33 |         if wait:
34 |             self.stop_wait()
35 | 
36 |     def start_wait(self):
37 |         pass
38 | 
39 |     def stop_wait(self):
40 |         self.join()        
41 |     
42 |     def run(self):
43 |         cv2.setNumThreads(1)
44 |         threadpool_limits(1)
45 |         channel_slice = slice(None)
46 |         if self.rgb_to_bgr:
47 |             channel_slice = slice(None,None,-1)
48 | 
49 |         vis_data = None
50 |         vis_img = None
51 |         while not self.stop_event.is_set():
52 |             vis_data = self.realsense.get_vis(out=vis_data)
53 |             color = vis_data['color']
54 |             N, H, W, C = color.shape
55 |             assert C == 3
56 |             oh = H * self.row
57 |             ow = W * self.col
58 |             if vis_img is None:
59 |                 vis_img = np.full((oh, ow, 3), 
60 |                     fill_value=self.fill_value, dtype=np.uint8)
61 |             for row in range(self.row):
62 |                 for col in range(self.col):
63 |                     idx = col + row * self.col
64 |                     h_start = H * row
65 |                     h_end = h_start + H
66 |                     w_start = W * col
67 |                     w_end = w_start + W
68 |                     if idx < N:
69 |                         # opencv uses bgr
70 |                         vis_img[h_start:h_end,w_start:w_end
71 |                             ] = color[idx,:,:,channel_slice]
72 |             cv2.imshow(self.window_name, vis_img)
73 |             cv2.pollKey()
74 |             time.sleep(1 / self.vis_fps)
75 | 


--------------------------------------------------------------------------------
/diffusion_policy/real_world/real_inference_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Callable, Tuple
 2 | import numpy as np
 3 | from diffusion_policy.common.cv2_util import get_image_transform
 4 | 
 5 | def get_real_obs_dict(
 6 |         env_obs: Dict[str, np.ndarray], 
 7 |         shape_meta: dict,
 8 |         ) -> Dict[str, np.ndarray]:
 9 |     obs_dict_np = dict()
10 |     obs_shape_meta = shape_meta['obs']
11 |     for key, attr in obs_shape_meta.items():
12 |         type = attr.get('type', 'low_dim')
13 |         shape = attr.get('shape')
14 |         if type == 'rgb':
15 |             this_imgs_in = env_obs[key]
16 |             t,hi,wi,ci = this_imgs_in.shape
17 |             co,ho,wo = shape
18 |             assert ci == co
19 |             out_imgs = this_imgs_in
20 |             if (ho != hi) or (wo != wi) or (this_imgs_in.dtype == np.uint8):
21 |                 tf = get_image_transform(
22 |                     input_res=(wi,hi), 
23 |                     output_res=(wo,ho), 
24 |                     bgr_to_rgb=False)
25 |                 out_imgs = np.stack([tf(x) for x in this_imgs_in])
26 |                 if this_imgs_in.dtype == np.uint8:
27 |                     out_imgs = out_imgs.astype(np.float32) / 255
28 |             # THWC to TCHW
29 |             obs_dict_np[key] = np.moveaxis(out_imgs,-1,1)
30 |         elif type == 'low_dim':
31 |             this_data_in = env_obs[key]
32 |             if 'pose' in key and shape == (2,):
33 |                 # take X,Y coordinates
34 |                 this_data_in = this_data_in[...,[0,1]]
35 |             obs_dict_np[key] = this_data_in
36 |     return obs_dict_np
37 | 
38 | 
39 | def get_real_obs_resolution(
40 |         shape_meta: dict
41 |         ) -> Tuple[int, int]:
42 |     out_res = None
43 |     obs_shape_meta = shape_meta['obs']
44 |     for key, attr in obs_shape_meta.items():
45 |         type = attr.get('type', 'low_dim')
46 |         shape = attr.get('shape')
47 |         if type == 'rgb':
48 |             co,ho,wo = shape
49 |             if out_res is None:
50 |                 out_res = (wo, ho)
51 |             assert out_res == (wo, ho)
52 |     return out_res
53 | 


--------------------------------------------------------------------------------
/diffusion_policy/scripts/bet_blockpush_conversion.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import sys
 3 |     import os
 4 |     import pathlib
 5 | 
 6 |     ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
 7 |     sys.path.append(ROOT_DIR)
 8 | 
 9 | 
10 | import os
11 | import click
12 | import pathlib
13 | import numpy as np
14 | from diffusion_policy.common.replay_buffer import ReplayBuffer
15 | 
16 | @click.command()
17 | @click.option('-i', '--input', required=True, help='input dir contains npy files')
18 | @click.option('-o', '--output', required=True, help='output zarr path')
19 | @click.option('--abs_action', is_flag=True, default=False)
20 | def main(input, output, abs_action):
21 |     data_directory = pathlib.Path(input)
22 |     observations = np.load(
23 |         data_directory / "multimodal_push_observations.npy"
24 |     )
25 |     actions = np.load(data_directory / "multimodal_push_actions.npy")
26 |     masks = np.load(data_directory / "multimodal_push_masks.npy")
27 | 
28 |     buffer = ReplayBuffer.create_empty_numpy()
29 |     for i in range(len(masks)):
30 |         eps_len = int(masks[i].sum())
31 |         obs = observations[i,:eps_len].astype(np.float32)
32 |         action = actions[i,:eps_len].astype(np.float32)
33 |         if abs_action:
34 |             prev_eef_target = obs[:,8:10]
35 |             next_eef_target = prev_eef_target + action
36 |             action = next_eef_target
37 |         data = {                              
38 |             'obs': obs,
39 |             'action': action
40 |         }
41 |         buffer.add_episode(data)
42 | 
43 |     buffer.save_to_path(zarr_path=output, chunk_length=-1)
44 | 
45 | if __name__ == '__main__':
46 |     main()
47 | 


--------------------------------------------------------------------------------
/diffusion_policy/scripts/blockpush_abs_conversion.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import sys
 3 |     import os
 4 |     import pathlib
 5 | 
 6 |     ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
 7 |     sys.path.append(ROOT_DIR)
 8 | 
 9 | import os
10 | import click
11 | import pathlib
12 | from diffusion_policy.common.replay_buffer import ReplayBuffer
13 | 
14 | 
15 | @click.command()
16 | @click.option('-i', '--input', required=True)
17 | @click.option('-o', '--output', required=True)
18 | @click.option('-t', '--target_eef_idx', default=8, type=int)
19 | def main(input, output, target_eef_idx):
20 |     buffer = ReplayBuffer.copy_from_path(input)
21 |     obs = buffer['obs']
22 |     action = buffer['action']
23 |     prev_eef_target = obs[:,target_eef_idx:target_eef_idx+action.shape[1]]
24 |     next_eef_target = prev_eef_target + action
25 |     action[:] = next_eef_target
26 |     buffer.save_to_path(zarr_path=output, chunk_length=-1)
27 | 
28 | if __name__ == '__main__':
29 |     main()
30 | 


--------------------------------------------------------------------------------
/diffusion_policy/scripts/episode_lengths.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import sys
 3 |     import os
 4 |     import pathlib
 5 | 
 6 |     ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
 7 |     sys.path.append(ROOT_DIR)
 8 | 
 9 | import click
10 | import numpy as np
11 | import json
12 | from diffusion_policy.common.replay_buffer import ReplayBuffer
13 | 
14 | @click.command()
15 | @click.option('--input', '-i', required=True)
16 | @click.option('--dt', default=0.1, type=float)
17 | def main(input, dt):
18 |     buffer = ReplayBuffer.create_from_path(input)
19 |     lengths = buffer.episode_lengths
20 |     durations = lengths * dt
21 |     result = {
22 |         'duration/mean': np.mean(durations)
23 |     }
24 | 
25 |     text = json.dumps(result, indent=2)
26 |     print(text)
27 | 
28 | if __name__ == '__main__':
29 |     main()
30 | 


--------------------------------------------------------------------------------
/diffusion_policy/scripts/generate_bet_blockpush.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import sys
 3 |     import os
 4 |     import pathlib
 5 | 
 6 |     ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
 7 |     sys.path.append(ROOT_DIR)
 8 | 
 9 | 
10 | import os
11 | import click
12 | import pathlib
13 | import numpy as np
14 | from tqdm import tqdm
15 | from diffusion_policy.common.replay_buffer import ReplayBuffer
16 | from tf_agents.environments.wrappers import TimeLimit
17 | from tf_agents.environments.gym_wrapper import GymWrapper
18 | from tf_agents.trajectories.time_step import StepType
19 | from diffusion_policy.env.block_pushing.block_pushing_multimodal import BlockPushMultimodal
20 | from diffusion_policy.env.block_pushing.block_pushing import BlockPush
21 | from diffusion_policy.env.block_pushing.oracles.multimodal_push_oracle import MultimodalOrientedPushOracle
22 | 
23 | @click.command()
24 | @click.option('-o', '--output', required=True)
25 | @click.option('-n', '--n_episodes', default=1000)
26 | @click.option('-c', '--chunk_length', default=-1)
27 | def main(output, n_episodes, chunk_length):
28 | 
29 |     buffer = ReplayBuffer.create_empty_numpy()
30 |     env = TimeLimit(GymWrapper(BlockPushMultimodal()), duration=350)
31 |     for i in tqdm(range(n_episodes)):
32 |         print(i)
33 |         obs_history = list()
34 |         action_history = list()
35 | 
36 |         env.seed(i)
37 |         policy = MultimodalOrientedPushOracle(env)
38 |         time_step = env.reset()
39 |         policy_state = policy.get_initial_state(1)
40 |         while True:
41 |             action_step = policy.action(time_step, policy_state)
42 |             obs = np.concatenate(list(time_step.observation.values()), axis=-1)
43 |             action = action_step.action
44 |             obs_history.append(obs)
45 |             action_history.append(action)
46 | 
47 |             if time_step.step_type == 2:
48 |                 break
49 | 
50 |             # state = env.wrapped_env().gym.get_pybullet_state()
51 |             time_step = env.step(action)
52 |         obs_history = np.array(obs_history)
53 |         action_history = np.array(action_history)
54 | 
55 |         episode = {
56 |             'obs': obs_history,
57 |             'action': action_history
58 |         }
59 |         buffer.add_episode(episode)
60 |     
61 |     buffer.save_to_path(output, chunk_length=chunk_length)
62 |         
63 | if __name__ == '__main__':
64 |     main()
65 | 


--------------------------------------------------------------------------------
/diffusion_policy/scripts/real_dataset_conversion.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import sys
 3 |     import os
 4 |     import pathlib
 5 | 
 6 |     ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
 7 |     sys.path.append(ROOT_DIR)
 8 | 
 9 | import os
10 | import click
11 | import pathlib
12 | import zarr
13 | import cv2
14 | import threadpoolctl
15 | from diffusion_policy.real_world.real_data_conversion import real_data_to_replay_buffer
16 | 
17 | @click.command()
18 | @click.option('--input', '-i',  required=True)
19 | @click.option('--output', '-o', default=None)
20 | @click.option('--resolution', '-r', default='640x480')
21 | @click.option('--n_decoding_threads', '-nd', default=-1, type=int)
22 | @click.option('--n_encoding_threads', '-ne', default=-1, type=int)
23 | def main(input, output, resolution, n_decoding_threads, n_encoding_threads):
24 |     out_resolution = tuple(int(x) for x in resolution.split('x'))
25 |     input = pathlib.Path(os.path.expanduser(input))
26 |     in_zarr_path = input.joinpath('replay_buffer.zarr')
27 |     in_video_dir = input.joinpath('videos')
28 |     assert in_zarr_path.is_dir()
29 |     assert in_video_dir.is_dir()
30 |     if output is None:
31 |         output = input.joinpath(resolution + '.zarr.zip')
32 |     else:
33 |         output = pathlib.Path(os.path.expanduser(output))
34 | 
35 |     if output.exists():
36 |         click.confirm('Output path already exists! Overrite?', abort=True)
37 | 
38 |     cv2.setNumThreads(1)
39 |     with threadpoolctl.threadpool_limits(1):
40 |         replay_buffer = real_data_to_replay_buffer(
41 |             dataset_path=str(input),
42 |             out_resolutions=out_resolution,
43 |             n_decoding_threads=n_decoding_threads,
44 |             n_encoding_threads=n_encoding_threads
45 |         )
46 |     
47 |     print('Saving to disk')
48 |     if output.suffix == '.zip':
49 |         with zarr.ZipStore(output) as zip_store:
50 |             replay_buffer.save_to_store(
51 |                 store=zip_store
52 |             )
53 |     else:
54 |         with zarr.DirectoryStore(output) as store:
55 |             replay_buffer.save_to_store(
56 |                 store=store
57 |             )
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/diffusion_policy/scripts/real_pusht_successrate.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import sys
 3 |     import os
 4 |     import pathlib
 5 | 
 6 |     ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
 7 |     sys.path.append(ROOT_DIR)
 8 | 
 9 | import os
10 | import click
11 | import collections
12 | import numpy as np
13 | from tqdm import tqdm
14 | import json
15 | 
16 | @click.command()
17 | @click.option(
18 |     '--reference', '-r', required=True,
19 |     help='Reference metrics_raw.json from demonstration dataset.'
20 | )
21 | @click.option(
22 |     '--input', '-i', required=True,
23 |     help='Data search path'
24 | )
25 | def main(reference, input):
26 |     # compute the min last metric for demo metrics
27 |     demo_metrics = json.load(open(reference, 'r'))
28 |     demo_min_metrics = collections.defaultdict(lambda:float('inf'))
29 |     for episode_idx, metrics in demo_metrics.items():
30 |         for key, value in metrics.items():
31 |             last_value = value[-1]
32 |             demo_min_metrics[key] = min(demo_min_metrics[key], last_value)
33 |     print(demo_min_metrics)
34 | 
35 |     # find all metric 
36 |     name = 'metrics_raw.json'
37 |     search_dir = pathlib.Path(input)
38 |     success_rate_map = dict()
39 |     for json_path in search_dir.glob('**/'+name):
40 |         rel_path = json_path.relative_to(search_dir)
41 |         rel_name = str(rel_path.parent)
42 |         this_metrics = json.load(json_path.open('r'))
43 |         metric_success_idxs = collections.defaultdict(list)
44 |         metric_failure_idxs = collections.defaultdict(list)
45 |         for episode_idx, metrics in this_metrics.items():
46 |             for key, value in metrics.items():
47 |                 last_value = value[-1]
48 |                 # print(episode_idx, key, last_value)
49 |                 demo_min = demo_min_metrics[key]
50 |                 if last_value >= demo_min:
51 |                     # success
52 |                     metric_success_idxs[key].append(episode_idx)
53 |                 else:
54 |                     metric_failure_idxs[key].append(episode_idx)
55 |                 # in case of no success
56 |                 _ = metric_success_idxs[key]
57 |                 _ = metric_failure_idxs[key]
58 |         metric_success_rate = dict()
59 |         n_episodes = len(this_metrics)
60 |         for key, value in metric_success_idxs.items():
61 |             metric_success_rate[key] = len(value) / n_episodes
62 |         # metric_success_rate['failured_idxs'] = metric_failure_idxs
63 |         success_rate_map[rel_name] = metric_success_rate
64 |     
65 |     text = json.dumps(success_rate_map, indent=2)
66 |     print(text)
67 | 
68 | if __name__ == '__main__':
69 |     main()
70 | 


--------------------------------------------------------------------------------
/diffusion_policy/scripts/robomimic_dataset_action_comparison.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import sys
 3 |     import os
 4 |     import pathlib
 5 | 
 6 |     ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
 7 |     sys.path.append(ROOT_DIR)
 8 | 
 9 | import os
10 | import click
11 | import pathlib
12 | import h5py
13 | import numpy as np
14 | from tqdm import tqdm
15 | from scipy.spatial.transform import Rotation
16 | 
17 | def read_all_actions(hdf5_file, metric_skip_steps=1):
18 |     n_demos = len(hdf5_file['data'])
19 |     all_actions = list()
20 |     for i in tqdm(range(n_demos)):
21 |         actions = hdf5_file[f'data/demo_{i}/actions'][:]
22 |         all_actions.append(actions[metric_skip_steps:])
23 |     all_actions = np.concatenate(all_actions, axis=0)
24 |     return all_actions
25 | 
26 | 
27 | @click.command()
28 | @click.option('-i', '--input', required=True, help='input hdf5 path')
29 | @click.option('-o', '--output', required=True, help='output hdf5 path. Parent directory must exist')
30 | def main(input, output):
31 |     # process inputs
32 |     input = pathlib.Path(input).expanduser()
33 |     assert input.is_file()
34 |     output = pathlib.Path(output).expanduser()
35 |     assert output.is_file()
36 | 
37 |     input_file = h5py.File(str(input), 'r')
38 |     output_file = h5py.File(str(output), 'r')
39 | 
40 |     input_all_actions = read_all_actions(input_file)
41 |     output_all_actions = read_all_actions(output_file)
42 |     pos_dist = np.linalg.norm(input_all_actions[:,:3] - output_all_actions[:,:3], axis=-1)
43 |     rot_dist = (Rotation.from_rotvec(input_all_actions[:,3:6]
44 |         ) * Rotation.from_rotvec(output_all_actions[:,3:6]).inv()
45 |         ).magnitude()
46 | 
47 |     print(f'max pos dist: {pos_dist.max()}')
48 |     print(f'max rot dist: {rot_dist.max()}')
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/diffusion_policy/shared_memory/shared_memory_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | from dataclasses import dataclass
 3 | import numpy as np
 4 | from multiprocessing.managers import SharedMemoryManager
 5 | from atomics import atomicview, MemoryOrder, UINT
 6 | 
 7 | @dataclass
 8 | class ArraySpec:
 9 |     name: str
10 |     shape: Tuple[int]
11 |     dtype: np.dtype
12 | 
13 | 
14 | class SharedAtomicCounter:
15 |     def __init__(self, 
16 |             shm_manager: SharedMemoryManager, 
17 |             size :int=8 # 64bit int
18 |             ):
19 |         shm = shm_manager.SharedMemory(size=size)
20 |         self.shm = shm
21 |         self.size = size
22 |         self.store(0) # initialize
23 | 
24 |     @property
25 |     def buf(self):
26 |         return self.shm.buf[:self.size]
27 | 
28 |     def load(self) -> int:
29 |         with atomicview(buffer=self.buf, atype=UINT) as a: 
30 |             value = a.load(order=MemoryOrder.ACQUIRE)
31 |         return value
32 |     
33 |     def store(self, value: int):
34 |         with atomicview(buffer=self.buf, atype=UINT) as a:
35 |             a.store(value, order=MemoryOrder.RELEASE)
36 |     
37 |     def add(self, value: int):
38 |         with atomicview(buffer=self.buf, atype=UINT) as a:
39 |             a.add(value, order=MemoryOrder.ACQ_REL)
40 | 


--------------------------------------------------------------------------------
/experiment_scripts/gibson/eval_tdiff.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export PYTHONPATH=..T-Diff
 4 | export CUDA_VISIBLE_DEVICES=0,1
 5 | cd ..T-Diff/semexp
 6 | 
 7 | # conda activate tdiff
 8 | 
 9 | python eval_tdiff.py \
10 |   --split val \
11 |   --seed 345 \
12 |   --eval 1 \
13 |   --pf_model_path "models_ckpt/area_model.ckpt" \
14 |   --diff_model_path "models_ckpt/diff_model.ckpt" \
15 |   -d ..experiments \
16 |   --num_local_steps 1 \
17 |   --exp_name "debug" \
18 |   --global_downscaling 1 \
19 |   --mask_nearest_locations \
20 |   --pf_masking_opt 'unexplored' \
21 |   --use_nearest_frontier \
22 |   --total_num_scenes "5" \
23 |   --select_diff_step 27 \
24 |   --horizon 32 \


--------------------------------------------------------------------------------
/semexp/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/.DS_Store


--------------------------------------------------------------------------------
/semexp/configs/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 |   ROI_BOX_HEAD:
25 |     NAME: "FastRCNNConvFCHead"
26 |     NUM_FC: 2
27 |     POOLER_RESOLUTION: 7
28 |   ROI_MASK_HEAD:
29 |     NAME: "MaskRCNNConvUpsampleHead"
30 |     NUM_CONV: 4
31 |     POOLER_RESOLUTION: 14
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_val",)
35 | SOLVER:
36 |   IMS_PER_BATCH: 16
37 |   BASE_LR: 0.02
38 |   STEPS: (60000, 80000)
39 |   MAX_ITER: 90000
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | VERSION: 2
43 | 


--------------------------------------------------------------------------------
/semexp/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/semexp/docs/legend_gibson.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/docs/legend_gibson.png


--------------------------------------------------------------------------------
/semexp/docs/legend_mp3d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/docs/legend_mp3d.png


--------------------------------------------------------------------------------
/semexp/envs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/envs/.DS_Store


--------------------------------------------------------------------------------
/semexp/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .habitat import construct_envs
 4 | 
 5 | 
 6 | def make_vec_envs(args, workers_ignore_signals: bool = False, **kwargs):
 7 |     envs = construct_envs(args, workers_ignore_signals=workers_ignore_signals, **kwargs)
 8 |     envs = VecPyTorch(envs, args.device)
 9 |     return envs
10 | 
11 | 
12 | # Adapted from
13 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/envs.py#L159
14 | class VecPyTorch:
15 |     def __init__(self, venv, device):
16 |         self.venv = venv
17 |         self.num_envs = venv.num_envs
18 |         self.observation_space = venv.observation_space
19 |         self.action_space = venv.action_space
20 |         self.device = device
21 | 
22 |     def reset(self):
23 |         obs, info = self.venv.reset()
24 |         obs = torch.from_numpy(obs).float().to(self.device)
25 |         return obs, info
26 | 
27 |     def step_async(self, actions):
28 |         actions = actions.cpu().numpy()
29 |         self.venv.step_async(actions)
30 | 
31 |     def step_wait(self):
32 |         obs, reward, done, info = self.venv.step_wait()
33 |         obs = torch.from_numpy(obs).float().to(self.device)
34 |         reward = torch.from_numpy(reward).float()
35 |         return obs, reward, done, info
36 | 
37 |     def step(self, actions):
38 |         actions = actions.cpu().numpy()
39 |         obs, reward, done, info = self.venv.step(actions)
40 |         obs = torch.from_numpy(obs).float().to(self.device)
41 |         reward = torch.from_numpy(reward).float()
42 |         return obs, reward, done, info
43 | 
44 |     def get_rewards(self, inputs):
45 |         reward = self.venv.get_rewards(inputs)
46 |         reward = torch.from_numpy(reward).float()
47 |         return reward
48 | 
49 |     def plan_act_and_preprocess(self, inputs):
50 |         obs, reward, done, info = self.venv.plan_act_and_preprocess(inputs)
51 |         obs = torch.from_numpy(obs).float().to(self.device)
52 |         reward = torch.from_numpy(reward).float()
53 |         return obs, reward, done, info
54 | 
55 |     def get_reachability_map(self, inputs):
56 |         reachability_maps, fmm_dists = self.venv.get_reachability_map(inputs)
57 |         reachability_maps = torch.from_numpy(reachability_maps).float().to(self.device)
58 |         fmm_dists = torch.from_numpy(fmm_dists).float().to(self.device)
59 |         return reachability_maps, fmm_dists
60 | 
61 |     def get_frontier_map(self, inputs):
62 |         frontier_maps = self.venv.get_frontier_map(inputs)
63 |         frontier_maps = torch.from_numpy(frontier_maps).to(self.device)
64 |         return frontier_maps
65 | 
66 |     def get_fmm_dists(self, inputs):
67 |         fmm_dists = self.venv.get_fmm_dists(inputs)
68 |         fmm_dists = torch.from_numpy(fmm_dists).to(self.device)
69 |         return fmm_dists
70 | 
71 |     def current_episodes(self):
72 |         curr_eps = self.venv.current_episodes()
73 |         return curr_eps
74 | 
75 |     def get_current_episodes(self):
76 |         curr_eps = self.venv.get_current_episodes()
77 |         return curr_eps
78 | 
79 |     def close(self):
80 |         return self.venv.close()
81 | 


--------------------------------------------------------------------------------
/semexp/envs/habitat/configs/tasks/objectnav_gibson.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 500
 3 | SIMULATOR:
 4 |   TURN_ANGLE: 30
 5 |   TILT_ANGLE: 30
 6 |   ACTION_SPACE_CONFIG: "v1"
 7 |   AGENT_0:
 8 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'SEMANTIC_SENSOR']
 9 |     HEIGHT: 0.88
10 |     RADIUS: 0.18
11 |   HABITAT_SIM_V0:
12 |     GPU_DEVICE_ID: 0
13 |     ALLOW_SLIDING: True
14 |   SEMANTIC_SENSOR:
15 |     WIDTH: 640
16 |     HEIGHT: 480
17 |     HFOV: 79
18 |     POSITION: [0, 0.88, 0]
19 |   RGB_SENSOR:
20 |     WIDTH: 640
21 |     HEIGHT: 480
22 |     HFOV: 79
23 |     POSITION: [0, 0.88, 0]
24 |   DEPTH_SENSOR:
25 |     WIDTH: 640
26 |     HEIGHT: 480
27 |     HFOV: 79
28 |     MIN_DEPTH: 0.5
29 |     MAX_DEPTH: 5.0
30 |     POSITION: [0, 0.88, 0]
31 | TASK:
32 |   TYPE: ObjectNav-v1
33 |   POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN"]
34 |   SENSORS: ['GPS_SENSOR', 'COMPASS_SENSOR']
35 |   MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL']
36 |   SUCCESS:
37 |     SUCCESS_DISTANCE: 0.2
38 | 
39 | DATASET:
40 |   TYPE: PointNav-v1
41 |   SPLIT: train
42 |   DATA_PATH: "../data/datasets/objectnav/gibson/v1/{split}/{split}.json.gz"
43 |   EPISODES_DIR: "../data/datasets/objectnav/gibson/v1/{split}/"
44 |   SCENES_DIR: "../data/scene_datasets/"
45 | 


--------------------------------------------------------------------------------
/semexp/envs/utils/pose.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def get_l2_distance(x1, x2, y1, y2):
 5 |     """
 6 |     Computes the L2 distance between two points.
 7 |     """
 8 |     return ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
 9 | 
10 | 
11 | def get_rel_pose_change(pos2, pos1):
12 |     x1, y1, o1 = pos1
13 |     x2, y2, o2 = pos2
14 | 
15 |     theta = np.arctan2(y2 - y1, x2 - x1) - o1
16 |     dist = get_l2_distance(x1, x2, y1, y2)
17 |     dx = dist * np.cos(theta)
18 |     dy = dist * np.sin(theta)
19 |     do = o2 - o1
20 | 
21 |     return dx, dy, do
22 | 
23 | 
24 | def get_new_pose(pose, rel_pose_change):
25 |     x, y, o = pose
26 |     dx, dy, do = rel_pose_change
27 | 
28 |     global_dx = dx * np.sin(np.deg2rad(o)) + dy * np.cos(np.deg2rad(o))
29 |     global_dy = dx * np.cos(np.deg2rad(o)) - dy * np.sin(np.deg2rad(o))
30 |     x += global_dy
31 |     y += global_dx
32 |     o += np.rad2deg(do)
33 |     if o > 180.0:
34 |         o -= 360.0
35 | 
36 |     return x, y, o
37 | 
38 | 
39 | def threshold_poses(coords, shape):
40 |     coords[0] = min(max(0, coords[0]), shape[0] - 1)
41 |     coords[1] = min(max(0, coords[1]), shape[1] - 1)
42 |     return coords
43 | 


--------------------------------------------------------------------------------
/semexp/envs/utils/rotation_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Utilities for generating and applying rotation matrices.
17 | """
18 | import numpy as np
19 | 
20 | ANGLE_EPS = 0.001
21 | 
22 | 
23 | def normalize(v):
24 |     return v / np.linalg.norm(v)
25 | 
26 | 
27 | def get_r_matrix(ax_, angle):
28 |     ax = normalize(ax_)
29 |     if np.abs(angle) > ANGLE_EPS:
30 |         S_hat = np.array(
31 |             [[0.0, -ax[2], ax[1]], [ax[2], 0.0, -ax[0]], [-ax[1], ax[0], 0.0]],
32 |             dtype=np.float32,
33 |         )
34 |         R = (
35 |             np.eye(3)
36 |             + np.sin(angle) * S_hat
37 |             + (1 - np.cos(angle)) * (np.linalg.matrix_power(S_hat, 2))
38 |         )
39 |     else:
40 |         R = np.eye(3)
41 |     return R
42 | 
43 | 
44 | def r_between(v_from_, v_to_):
45 |     v_from = normalize(v_from_)
46 |     v_to = normalize(v_to_)
47 |     ax = normalize(np.cross(v_from, v_to))
48 |     angle = np.arccos(np.dot(v_from, v_to))
49 |     return get_r_matrix(ax, angle)
50 | 
51 | 
52 | def rotate_camera_to_point_at(up_from, lookat_from, up_to, lookat_to):
53 |     inputs = [up_from, lookat_from, up_to, lookat_to]
54 |     for i in range(4):
55 |         inputs[i] = normalize(np.array(inputs[i]).reshape((-1,)))
56 |     up_from, lookat_from, up_to, lookat_to = inputs
57 |     r1 = r_between(lookat_from, lookat_to)
58 | 
59 |     new_x = np.dot(r1, np.array([1, 0, 0]).reshape((-1, 1))).reshape((-1))
60 |     to_x = normalize(np.cross(lookat_to, up_to))
61 |     angle = np.arccos(np.dot(new_x, to_x))
62 |     if angle > ANGLE_EPS:
63 |         if angle < np.pi - ANGLE_EPS:
64 |             ax = normalize(np.cross(new_x, to_x))
65 |             flip = np.dot(lookat_to, ax)
66 |             if flip > 0:
67 |                 r2 = get_r_matrix(lookat_to, angle)
68 |             elif flip < 0:
69 |                 r2 = get_r_matrix(lookat_to, -1.0 * angle)
70 |         else:
71 |             # Angle of rotation is too close to 180 degrees, direction of
72 |             # rotation does not matter.
73 |             r2 = get_r_matrix(lookat_to, angle)
74 |     else:
75 |         r2 = np.eye(3)
76 |     return np.dot(r2, r1)
77 | 


--------------------------------------------------------------------------------
/semexp/sxz/img/circle0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/circle0.png


--------------------------------------------------------------------------------
/semexp/sxz/img/circle1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/circle1.png


--------------------------------------------------------------------------------
/semexp/sxz/img/circle2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/circle2.png


--------------------------------------------------------------------------------
/semexp/sxz/img/circle3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/circle3.png


--------------------------------------------------------------------------------
/semexp/sxz/img/dist_circle_test0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/dist_circle_test0.png


--------------------------------------------------------------------------------
/semexp/sxz/img/dist_circle_test1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/dist_circle_test1.png


--------------------------------------------------------------------------------
/semexp/sxz/img/dist_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/dist_map.png


--------------------------------------------------------------------------------
/semexp/sxz/img/dist_map_dilate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/dist_map_dilate.png


--------------------------------------------------------------------------------
/semexp/sxz/img/origin_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/origin_map.png


--------------------------------------------------------------------------------
/semexp/sxz/img/pbz2_Collierville.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/pbz2_Collierville.png


--------------------------------------------------------------------------------
/semexp/sxz/img/pbz2_Corozal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/pbz2_Corozal.png


--------------------------------------------------------------------------------
/semexp/sxz/img/pbz2_Darden.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/pbz2_Darden.png


--------------------------------------------------------------------------------
/semexp/sxz/img/pbz2_Markleeville.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/pbz2_Markleeville.png


--------------------------------------------------------------------------------
/semexp/sxz/img/pbz2_Wiconisco.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/pbz2_Wiconisco.png


--------------------------------------------------------------------------------
/semexp/sxz/img/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sx-zhang/T-diff/9aa41a77ea1cb67be95a6224daddb9478379ca91/semexp/sxz/img/test.png


--------------------------------------------------------------------------------
/semexp/sxz/visualize.py:
--------------------------------------------------------------------------------
 1 | import _pickle as cPickle
 2 | import bz2
 3 | 
 4 | import gzip
 5 | import json
 6 | 
 7 | import cv2
 8 | import numpy as np
 9 | from PIL import Image, ImageDraw, ImageFont
10 | import os
11 | 
12 | GIBSON_OBJECT_COLORS = [
13 |     (0.9400000000000001, 0.7818, 0.66),
14 |     (0.9400000000000001, 0.8868, 0.66),
15 |     (0.8882000000000001, 0.9400000000000001, 0.66),
16 |     (0.7832000000000001, 0.9400000000000001, 0.66),
17 |     (0.6782000000000001, 0.9400000000000001, 0.66),
18 |     (0.66, 0.9400000000000001, 0.7468000000000001),
19 |     (0.66, 0.9400000000000001, 0.8518000000000001),
20 |     (0.66, 0.9232, 0.9400000000000001),
21 |     (0.66, 0.8182, 0.9400000000000001),
22 |     (0.66, 0.7132, 0.9400000000000001),
23 |     (0.7117999999999999, 0.66, 0.9400000000000001),
24 |     (0.8168, 0.66, 0.9400000000000001),
25 |     (0.9218, 0.66, 0.9400000000000001),
26 |     (0.9400000000000001, 0.66, 0.8531999999999998),
27 |     (0.9400000000000001, 0.66, 0.748199999999999),
28 | ]
29 | 
30 | COLOR_PALETTE = [
31 |     1.0,
32 |     1.0,
33 |     1.0,  # Out-of-bounds
34 |     0.9,
35 |     0.9,
36 |     0.9,  # Floor
37 |     *[oci for oc in GIBSON_OBJECT_COLORS for oci in oc],
38 | ]
39 | 
40 | val_rooms = ['Collierville', 'Corozal', 'Darden', 'Markleeville', 'Wiconisco']
41 | episodes_file = '..data/datasets/objectnav/gibson/v1.1/val/content/Darden_episodes.json.gz'
42 | dataset_info_file = '..data/datasets/objectnav/gibson/v1.1/val/val_info.pbz2'
43 | 
44 | def visualize_sem_map(sem_map):
45 |     c_map = sem_map.astype(np.int32)
46 |     color_palette = [int(x * 255.0) for x in COLOR_PALETTE]
47 |     semantic_img = Image.new("P", (c_map.shape[1], c_map.shape[0]))
48 |     semantic_img.putpalette(color_palette)
49 |     semantic_img.putdata((c_map.flatten() % 40).astype(np.uint8))
50 |     semantic_img = semantic_img.convert("RGB")
51 |     semantic_img = np.array(semantic_img)
52 | 
53 |     return semantic_img
54 | 
55 | def projection_img(sem_mp):
56 |     semantic_img = np.zeros((sem_mp.shape[1], sem_mp.shape[2]))
57 |     for i in range(sem_mp.shape[0]):
58 |         semantic_img[sem_map[i].astype(np.bool)] = i+1
59 |     return semantic_img.transpose()
60 | 
61 | with bz2.BZ2File(dataset_info_file, "rb") as f:
62 |     dataset_info = cPickle.load(f)
63 | 
64 | for scene_name in val_rooms:
65 |     print(scene_name)
66 |     episodes_file = '..data/datasets/objectnav/gibson/v1.1/val/content/{}_episodes.json.gz'.format(scene_name)
67 |     with gzip.open(episodes_file, "r") as f:
68 |         eps_data = json.loads(f.read().decode("utf-8"))["episodes"]
69 |     all_floor_id = []
70 |     for eps in eps_data:
71 |         floor_id = eps['floor_id']
72 |         all_floor_id.append(floor_id)
73 |     all_floor_id = list(set(all_floor_id))
74 |     print(all_floor_id)
75 |     


--------------------------------------------------------------------------------
/semexp/util/crop.py:
--------------------------------------------------------------------------------
 1 | # This source code is licensed under the license found in the
 2 | # LICENSE file in the root directory of this source tree.
 3 | # --------------------------------------------------------
 4 | # References:
 5 | # MAE: https://github.com/facebookresearch/mae
 6 | # --------------------------------------------------------
 7 | 
 8 | import math
 9 | 
10 | import torch
11 | 
12 | from torchvision import transforms
13 | from torchvision.transforms import functional as F
14 | 
15 | 
16 | class RandomResizedCrop(transforms.RandomResizedCrop):
17 |     """
18 |     RandomResizedCrop for matching TF/TPU implementation: no for-loop is used.
19 |     This may lead to results different with torchvision's version.
20 |     Following BYOL's TF code:
21 |     https://github.com/deepmind/deepmind-research/blob/master/byol/utils/dataset.py#L206
22 |     """
23 |     @staticmethod
24 |     def get_params(img, scale, ratio):
25 |         width, height = F._get_image_size(img)
26 |         area = height * width
27 | 
28 |         target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item()
29 |         log_ratio = torch.log(torch.tensor(ratio))
30 |         aspect_ratio = torch.exp(
31 |             torch.empty(1).uniform_(log_ratio[0], log_ratio[1])
32 |         ).item()
33 | 
34 |         w = int(round(math.sqrt(target_area * aspect_ratio)))
35 |         h = int(round(math.sqrt(target_area / aspect_ratio)))
36 | 
37 |         w = min(w, width)
38 |         h = min(h, height)
39 | 
40 |         i = torch.randint(0, height - h + 1, size=(1,)).item()
41 |         j = torch.randint(0, width - w + 1, size=(1,)).item()
42 | 
43 |         return i, j, h, w


--------------------------------------------------------------------------------
/semexp/util/datasets.py:
--------------------------------------------------------------------------------
 1 | # This source code is licensed under the license found in the
 2 | # LICENSE file in the root directory of this source tree.
 3 | # --------------------------------------------------------
 4 | # References:
 5 | # DeiT: https://github.com/facebookresearch/deit
 6 | # MAE: https://github.com/facebookresearch/mae
 7 | # --------------------------------------------------------
 8 | 
 9 | import os
10 | import PIL
11 | 
12 | from torchvision import datasets, transforms
13 | from torchvision.datasets.folder import default_loader
14 | 
15 | from timm.data import create_transform
16 | from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
17 | 
18 | 
19 | class ImageListFolder(datasets.ImageFolder):
20 |     def __init__(self, root, transform=None, target_transform=None,
21 |                  ann_file=None, loader=default_loader):
22 |         self.root = root
23 |         self.transform = transform
24 |         self.loader = loader
25 |         self.target_transform = target_transform
26 |         self.nb_classes = 1000
27 | 
28 |         assert ann_file is not None
29 |         print('load info from', ann_file)
30 | 
31 |         self.samples = []
32 |         ann = open(ann_file)
33 |         for elem in ann.readlines():
34 |             cut = elem.split(' ')
35 |             path_current = os.path.join(root, cut[0])
36 |             target_current = int(cut[1])
37 |             self.samples.append((path_current, target_current))
38 |         ann.close()
39 | 
40 |         print('load finish')
41 | 
42 | 
43 | def build_dataset(is_train, args):
44 |     transform = build_transform(is_train, args)
45 | 
46 |     # TODO modify your own dataset here
47 |     folder = os.path.join(args.data_path, 'train' if is_train else 'val')
48 |     ann_file = os.path.join(args.data_path, 'train.txt' if is_train else 'val.txt')
49 |     dataset = ImageListFolder(folder, transform=transform, ann_file=ann_file)
50 | 
51 |     print(dataset)
52 | 
53 |     return dataset
54 | 
55 | 
56 | def build_transform(is_train, args):
57 |     mean = IMAGENET_DEFAULT_MEAN
58 |     std = IMAGENET_DEFAULT_STD
59 |     # train transform
60 |     if is_train:
61 |         # this should always dispatch to transforms_imagenet_train
62 |         transform = create_transform(
63 |             input_size=args.input_size,
64 |             is_training=True,
65 |             color_jitter=args.color_jitter,
66 |             auto_augment=args.aa,
67 |             interpolation='bicubic',
68 |             re_prob=args.reprob,
69 |             re_mode=args.remode,
70 |             re_count=args.recount,
71 |             mean=mean,
72 |             std=std,
73 |         )
74 |         return transform
75 | 
76 |     # eval transform
77 |     t = []
78 |     if args.input_size <= 224:
79 |         crop_pct = 224 / 256
80 |     else:
81 |         crop_pct = 1.0
82 |     size = int(args.input_size / crop_pct)
83 |     t.append(
84 |         transforms.Resize(size, interpolation=PIL.Image.BICUBIC),  # to maintain same ratio w.r.t. 224 images
85 |     )
86 |     t.append(transforms.CenterCrop(args.input_size))
87 | 
88 |     t.append(transforms.ToTensor())
89 |     t.append(transforms.Normalize(mean, std))
90 |     return transforms.Compose(t)
91 | 


--------------------------------------------------------------------------------
/semexp/util/lr_decay.py:
--------------------------------------------------------------------------------
 1 | # This source code is licensed under the license found in the
 2 | # LICENSE file in the root directory of this source tree.
 3 | # --------------------------------------------------------
 4 | # References:
 5 | # ELECTRA https://github.com/google-research/electra
 6 | # BEiT: https://github.com/microsoft/unilm/tree/master/beit
 7 | # MAE: https://github.com/facebookresearch/mae
 8 | # --------------------------------------------------------
 9 | 
10 | import json
11 | 
12 | 
13 | def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75):
14 |     """
15 |     Parameter groups for layer-wise lr decay
16 |     Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58
17 |     """
18 |     param_group_names = {}
19 |     param_groups = {}
20 | 
21 |     num_layers = len(model.blocks) + 1
22 | 
23 |     layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1))
24 | 
25 |     for n, p in model.named_parameters():
26 |         if not p.requires_grad:
27 |             continue
28 | 
29 |         # no decay: all 1D parameters and model specific ones
30 |         if p.ndim == 1 or n in no_weight_decay_list:
31 |             g_decay = "no_decay"
32 |             this_decay = 0.
33 |         else:
34 |             g_decay = "decay"
35 |             this_decay = weight_decay
36 |             
37 |         layer_id = get_layer_id_for_vit(n, num_layers)
38 |         group_name = "layer_%d_%s" % (layer_id, g_decay)
39 | 
40 |         if group_name not in param_group_names:
41 |             this_scale = layer_scales[layer_id]
42 | 
43 |             param_group_names[group_name] = {
44 |                 "lr_scale": this_scale,
45 |                 "weight_decay": this_decay,
46 |                 "params": [],
47 |             }
48 |             param_groups[group_name] = {
49 |                 "lr_scale": this_scale,
50 |                 "weight_decay": this_decay,
51 |                 "params": [],
52 |             }
53 | 
54 |         param_group_names[group_name]["params"].append(n)
55 |         param_groups[group_name]["params"].append(p)
56 | 
57 |     print("parameter groups: \n%s" % json.dumps(param_group_names, indent=2))
58 | 
59 |     return list(param_groups.values())
60 | 
61 | 
62 | def get_layer_id_for_vit(name, num_layers):
63 |     """
64 |     Assign a parameter with its layer id
65 |     Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33
66 |     """
67 |     if name in ['cls_token', 'pos_embed']:
68 |         return 0
69 |     elif name.startswith('patch_embed'):
70 |         return 0
71 |     elif name.startswith('blocks'):
72 |         return int(name.split('.')[1]) + 1
73 |     else:
74 |         return num_layers


--------------------------------------------------------------------------------
/semexp/util/lr_sched.py:
--------------------------------------------------------------------------------
 1 | # This source code is licensed under the license found in the
 2 | # LICENSE file in the root directory of this source tree.
 3 | # --------------------------------------------------------
 4 | # References:
 5 | # MAE: https://github.com/facebookresearch/mae
 6 | # --------------------------------------------------------
 7 | 
 8 | import math
 9 | 
10 | def adjust_learning_rate(optimizer, epoch, args):
11 |     """Decay the learning rate with half-cycle cosine after warmup"""
12 |     if epoch < args.warmup_epochs:
13 |         lr = args.lr * epoch / args.warmup_epochs 
14 |     else:
15 |         lr = args.min_lr + (args.lr - args.min_lr) * 0.5 * \
16 |             (1. + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs)))
17 |     for param_group in optimizer.param_groups:
18 |         if "lr_scale" in param_group:
19 |             param_group["lr"] = lr * param_group["lr_scale"]
20 |         else:
21 |             param_group["lr"] = lr
22 |     return lr
23 | 


--------------------------------------------------------------------------------
/semexp/utils/distributions.py:
--------------------------------------------------------------------------------
 1 | # The following code is largely borrowed from:
 2 | # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/distributions.py
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from .model import AddBias
 8 | 
 9 | """
10 | Modify standard PyTorch distributions so they are compatible with this code.
11 | """
12 | 
13 | FixedCategorical = torch.distributions.Categorical
14 | 
15 | old_sample = FixedCategorical.sample
16 | FixedCategorical.sample = lambda self: old_sample(self)
17 | 
18 | log_prob_cat = FixedCategorical.log_prob
19 | FixedCategorical.log_probs = lambda self, actions: log_prob_cat(
20 |     self, actions.squeeze(-1)
21 | )
22 | FixedCategorical.mode = lambda self: self.probs.argmax(dim=1, keepdim=True)
23 | 
24 | FixedNormal = torch.distributions.Normal
25 | log_prob_normal = FixedNormal.log_prob
26 | FixedNormal.log_probs = lambda self, actions: log_prob_normal(self, actions).sum(
27 |     -1, keepdim=False
28 | )
29 | 
30 | entropy = FixedNormal.entropy
31 | FixedNormal.entropy = lambda self: entropy(self).sum(-1)
32 | 
33 | FixedNormal.mode = lambda self: self.mean
34 | 
35 | 
36 | class Categorical(nn.Module):
37 |     def __init__(self, num_inputs, num_outputs):
38 |         super(Categorical, self).__init__()
39 |         self.linear = nn.Linear(num_inputs, num_outputs)
40 | 
41 |     def forward(self, x):
42 |         x = self.linear(x)
43 |         return FixedCategorical(logits=x)
44 | 
45 | 
46 | class DiagGaussian(nn.Module):
47 |     def __init__(self, num_inputs, num_outputs):
48 |         super(DiagGaussian, self).__init__()
49 | 
50 |         self.fc_mean = nn.Linear(num_inputs, num_outputs)
51 |         self.logstd = AddBias(torch.zeros(num_outputs))
52 | 
53 |     def forward(self, x):
54 |         action_mean = self.fc_mean(x)
55 | 
56 |         zeros = torch.zeros(action_mean.size())
57 |         if x.is_cuda:
58 |             zeros = zeros.cuda()
59 | 
60 |         action_logstd = self.logstd(zeros)
61 |         return FixedNormal(action_mean, action_logstd.exp())
62 | 


--------------------------------------------------------------------------------
/tdiff/train_utils.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.utils.data._utils.collate import (
 6 |     default_collate_err_msg_format,
 7 |     np_str_obj_array_pattern,
 8 |     string_classes,
 9 | )
10 | 
11 | 
12 | def get_loss_fn(loss_type):
13 |     assert loss_type in ["bce", "l2", "l1", "xent"]
14 |     loss_fn = None
15 |     if loss_type == "bce":
16 |         loss_fn = nn.BCELoss(reduction="none")
17 |     elif loss_type == "l2":
18 |         loss_fn = nn.MSELoss(reduction="none")
19 |     elif loss_type == "l1":
20 |         loss_fn = nn.L1Loss(reduction="none")
21 |     elif loss_type == "xent":
22 |         loss_fn = nn.CrossEntropyLoss(reduction="none")
23 |     return loss_fn
24 | 
25 | 
26 | def get_activation_fn(activation_type):
27 |     assert activation_type in ["none", "sigmoid", "relu"]
28 |     activation = nn.Identity()
29 |     if activation_type == "sigmoid":
30 |         activation = nn.Sigmoid()
31 |     elif activation_type == "relu":
32 |         activation = nn.ReLU()
33 |     return activation
34 | 
35 | 
36 | def collate_fn(batch):
37 |     r"""Puts each data field into a tensor with outer dimension batch size.
38 |     Modified version of default_collate which returns the batch as it has lists
39 |     of varying length sizes.
40 |     """
41 | 
42 |     elem = batch[0]
43 |     elem_type = type(elem)
44 |     if isinstance(elem, torch.Tensor):
45 |         out = None
46 |         if torch.utils.data.get_worker_info() is not None:
47 |             # If we're in a background process, concatenate directly into a
48 |             # shared memory tensor to avoid an extra copy
49 |             numel = sum(x.numel() for x in batch)
50 |             storage = elem.storage()._new_shared(numel)
51 |             out = elem.new(storage)
52 |         return torch.stack(batch, 0, out=out)
53 |     elif (
54 |         elem_type.__module__ == "numpy"
55 |         and elem_type.__name__ != "str_"
56 |         and elem_type.__name__ != "string_"
57 |     ):
58 |         if elem_type.__name__ == "ndarray" or elem_type.__name__ == "memmap":
59 |             # array of string classes and object
60 |             if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
61 |                 raise TypeError(default_collate_err_msg_format.format(elem.dtype))
62 | 
63 |             return collate_fn([torch.as_tensor(b) for b in batch])
64 |         elif elem.shape == ():  # scalars
65 |             return torch.as_tensor(batch)
66 |     elif isinstance(elem, float):
67 |         return torch.tensor(batch, dtype=torch.float64)
68 |     elif isinstance(elem, int):
69 |         return torch.tensor(batch)
70 |     elif isinstance(elem, string_classes):
71 |         return batch
72 |     elif isinstance(elem, collections.abc.Mapping):
73 |         return {key: collate_fn([d[key] for d in batch]) for key in elem}
74 |     elif isinstance(elem, tuple) and hasattr(elem, "_fields"):  # namedtuple
75 |         return elem_type(*(collate_fn(samples) for samples in zip(*batch)))
76 |     elif isinstance(elem, collections.abc.Sequence):
77 |         # check to make sure that the elements in batch have consistent size
78 |         it = iter(batch)
79 |         elem_size = len(next(it))
80 |         if not all(len(elem) == elem_size for elem in it):
81 |             return batch
82 |         transposed = zip(*batch)
83 |         return [collate_fn(samples) for samples in transposed]
84 | 
85 |     raise TypeError(default_collate_err_msg_format.format(elem_type))
86 | 


--------------------------------------------------------------------------------
/train_traj/train.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Usage:
 3 | Training:
 4 | python train.py --config-name=train_diffusion_lowdim_workspace
 5 | """
 6 | 
 7 | import sys
 8 | # use line-buffering for both stdout and stderr
 9 | sys.stdout = open(sys.stdout.fileno(), mode='w', buffering=1)
10 | sys.stderr = open(sys.stderr.fileno(), mode='w', buffering=1)
11 | 
12 | import hydra
13 | from omegaconf import OmegaConf
14 | import pathlib
15 | from trajectory_diffusion.workspace.base_workspace import BaseWorkspace
16 | 
17 | # allows arbitrary python code execution in configs using the ${eval:''} resolver
18 | OmegaConf.register_new_resolver("eval", eval, replace=True)
19 | 
20 | @hydra.main(
21 |     version_base=None,
22 |     config_path=str(pathlib.Path(__file__).parent.joinpath(
23 |         'trajectory_diffusion','config'))
24 | )
25 | def main(cfg: OmegaConf):
26 |     # resolve immediately so all the ${now:} resolvers
27 |     # will use the same time.
28 |     OmegaConf.resolve(cfg)
29 | 
30 |     cls = hydra.utils.get_class(cfg._target_)
31 |     workspace: BaseWorkspace = cls(cfg)
32 |     workspace.run()
33 | 
34 | if __name__ == "__main__":
35 |     main()
36 | 
37 | # python train.py --config-dir=. --config-name=image_pusht_diffusion_policy_cnn.yaml training.seed=42 training.device=cuda:0 hydra.run.dir='data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}'


--------------------------------------------------------------------------------
/train_traj/train.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH=diffusion_traj
2 | export CUDA_VISIBLE_DEVICES=0
3 | 
4 | conda activate diff_train
5 | 
6 | python train.py --config-dir=. --config-name=train_diffusion_traj_gibson.yaml training.seed=42 training.device=cuda:0 hydra.run.dir='data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_train_traj_diff_gibson'
7 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/common/checkpoint_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Dict
 2 | import os
 3 | 
 4 | class TopKCheckpointManager:
 5 |     def __init__(self,
 6 |             save_dir,
 7 |             monitor_key: str,
 8 |             mode='min',
 9 |             k=1,
10 |             format_str='epoch={epoch:03d}-train_loss={train_loss:.3f}.ckpt'
11 |         ):
12 |         assert mode in ['max', 'min']
13 |         assert k >= 0
14 | 
15 |         self.save_dir = save_dir
16 |         # self.monitor_key = monitor_key
17 |         self.monitor_key = "val_loss"
18 |         self.mode = mode
19 |         self.k = k
20 |         self.format_str = format_str
21 |         self.path_value_map = dict()
22 |     
23 |     def get_ckpt_path(self, data: Dict[str, float]) -> Optional[str]:
24 |         
25 |         # ckpt_path = os.path.join(
26 |         #     self.save_dir, "1111.ckpt")
27 |         # return ckpt_path
28 |     
29 |         if self.k == 0:
30 |             return None
31 |         
32 |         value = data[self.monitor_key]
33 |         ckpt_path = os.path.join(
34 |             self.save_dir, self.format_str.format(**data))
35 |         return ckpt_path
36 |         # if len(self.path_value_map) < self.k:
37 |         #     # under-capacity
38 |         #     self.path_value_map[ckpt_path] = value
39 |         #     return ckpt_path
40 |         
41 |         # # at capacity
42 |         # sorted_map = sorted(self.path_value_map.items(), key=lambda x: x[1])
43 |         # min_path, min_value = sorted_map[0]
44 |         # max_path, max_value = sorted_map[-1]
45 | 
46 |         # delete_path = None
47 |         # if self.mode == 'max':
48 |         #     if value > min_value:
49 |         #         delete_path = min_path
50 |         # else:
51 |         #     if value < max_value:
52 |         #         delete_path = max_path
53 | 
54 |         # if delete_path is None:
55 |         #     return None
56 |         # else:
57 |         #     del self.path_value_map[delete_path]
58 |         #     self.path_value_map[ckpt_path] = value
59 | 
60 |         #     if not os.path.exists(self.save_dir):
61 |         #         os.mkdir(self.save_dir)
62 | 
63 |         #     if os.path.exists(delete_path):
64 |         #         os.remove(delete_path)
65 |         #     return ckpt_path
66 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/common/env_util.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | 
 5 | def render_env_video(env, states, actions=None):
 6 |     observations = states
 7 |     imgs = list()
 8 |     for i in range(len(observations)):
 9 |         state = observations[i]
10 |         env.set_state(state)
11 |         if i == 0:
12 |             env.set_state(state)
13 |         img = env.render()
14 |         # draw action
15 |         if actions is not None:
16 |             action = actions[i]
17 |             coord = (action / 512 * 96).astype(np.int32)
18 |             cv2.drawMarker(img, coord, 
19 |                 color=(255,0,0), markerType=cv2.MARKER_CROSS,
20 |                 markerSize=8, thickness=1)
21 |         imgs.append(img)
22 |     imgs = np.array(imgs)
23 |     return imgs
24 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/common/nested_dict_util.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | def nested_dict_map(f, x):
 4 |     """
 5 |     Map f over all leaf of nested dict x
 6 |     """
 7 | 
 8 |     if not isinstance(x, dict):
 9 |         return f(x)
10 |     y = dict()
11 |     for key, value in x.items():
12 |         y[key] = nested_dict_map(f, value)
13 |     return y
14 | 
15 | def nested_dict_reduce(f, x):
16 |     """
17 |     Map f over all values of nested dict x, and reduce to a single value
18 |     """
19 |     if not isinstance(x, dict):
20 |         return x
21 | 
22 |     reduced_values = list()
23 |     for value in x.values():
24 |         reduced_values.append(nested_dict_reduce(f, value))
25 |     y = functools.reduce(f, reduced_values)
26 |     return y
27 | 
28 | 
29 | def nested_dict_check(f, x):
30 |     bool_dict = nested_dict_map(f, x)
31 |     result = nested_dict_reduce(lambda x, y: x and y, bool_dict)
32 |     return result
33 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/common/precise_sleep.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | def precise_sleep(dt: float, slack_time: float=0.001, time_func=time.monotonic):
 4 |     """
 5 |     Use hybrid of time.sleep and spinning to minimize jitter.
 6 |     Sleep dt - slack_time seconds first, then spin for the rest.
 7 |     """
 8 |     t_start = time_func()
 9 |     if dt > slack_time:
10 |         time.sleep(dt - slack_time)
11 |     t_end = t_start + dt
12 |     while time_func() < t_end:
13 |         pass
14 |     return
15 | 
16 | def precise_wait(t_end: float, slack_time: float=0.001, time_func=time.monotonic):
17 |     t_start = time_func()
18 |     t_wait = t_end - t_start
19 |     if t_wait > 0:
20 |         t_sleep = t_wait - slack_time
21 |         if t_sleep > 0:
22 |             time.sleep(t_sleep)
23 |         while time_func() < t_end:
24 |             pass
25 |     return
26 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/common/pymunk_util.py:
--------------------------------------------------------------------------------
 1 | import pygame
 2 | import pymunk
 3 | import pymunk.pygame_util
 4 | import numpy as np
 5 | 
 6 | COLLTYPE_DEFAULT = 0
 7 | COLLTYPE_MOUSE = 1
 8 | COLLTYPE_BALL = 2
 9 | 
10 | def get_body_type(static=False):
11 |     body_type = pymunk.Body.DYNAMIC
12 |     if static:
13 |         body_type = pymunk.Body.STATIC
14 |     return body_type
15 | 
16 | 
17 | def create_rectangle(space,
18 |         pos_x,pos_y,width,height,
19 |         density=3,static=False):
20 |     body = pymunk.Body(body_type=get_body_type(static))
21 |     body.position = (pos_x,pos_y)
22 |     shape = pymunk.Poly.create_box(body,(width,height))
23 |     shape.density = density
24 |     space.add(body,shape)
25 |     return body, shape
26 | 
27 | 
28 | def create_rectangle_bb(space, 
29 |         left, bottom, right, top, 
30 |         **kwargs):
31 |     pos_x = (left + right) / 2
32 |     pos_y = (top + bottom) / 2
33 |     height = top - bottom
34 |     width = right - left
35 |     return create_rectangle(space, pos_x, pos_y, width, height, **kwargs)
36 | 
37 | def create_circle(space, pos_x, pos_y, radius, density=3, static=False):
38 |     body = pymunk.Body(body_type=get_body_type(static))
39 |     body.position = (pos_x, pos_y)
40 |     shape = pymunk.Circle(body, radius=radius)
41 |     shape.density = density
42 |     shape.collision_type = COLLTYPE_BALL
43 |     space.add(body, shape)
44 |     return body, shape
45 | 
46 | def get_body_state(body):
47 |     state = np.zeros(6, dtype=np.float32)
48 |     state[:2] = body.position
49 |     state[2] = body.angle
50 |     state[3:5] = body.velocity
51 |     state[5] = body.angular_velocity
52 |     return state
53 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/common/pytorch_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Callable, List
 2 | import collections
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | def dict_apply(
 7 |         x: Dict[str, torch.Tensor], 
 8 |         func: Callable[[torch.Tensor], torch.Tensor]
 9 |         ) -> Dict[str, torch.Tensor]:
10 |     result = dict()
11 |     for key, value in x.items():
12 |         if isinstance(value, dict):
13 |             result[key] = dict_apply(value, func)
14 |         else:
15 |             result[key] = func(value)
16 |     return result
17 | 
18 | def pad_remaining_dims(x, target):
19 |     assert x.shape == target.shape[:len(x.shape)]
20 |     return x.reshape(x.shape + (1,)*(len(target.shape) - len(x.shape)))
21 | 
22 | def dict_apply_split(
23 |         x: Dict[str, torch.Tensor], 
24 |         split_func: Callable[[torch.Tensor], Dict[str, torch.Tensor]]
25 |         ) -> Dict[str, torch.Tensor]:
26 |     results = collections.defaultdict(dict)
27 |     for key, value in x.items():
28 |         result = split_func(value)
29 |         for k, v in result.items():
30 |             results[k][key] = v
31 |     return results
32 | 
33 | def dict_apply_reduce(
34 |         x: List[Dict[str, torch.Tensor]],
35 |         reduce_func: Callable[[List[torch.Tensor]], torch.Tensor]
36 |         ) -> Dict[str, torch.Tensor]:
37 |     result = dict()
38 |     for key in x[0].keys():
39 |         result[key] = reduce_func([x_[key] for x_ in x])
40 |     return result
41 | 
42 | 
43 | def replace_submodules(
44 |         root_module: nn.Module, 
45 |         predicate: Callable[[nn.Module], bool], 
46 |         func: Callable[[nn.Module], nn.Module]) -> nn.Module:
47 |     """
48 |     predicate: Return true if the module is to be replaced.
49 |     func: Return new module to use.
50 |     """
51 |     if predicate(root_module):
52 |         return func(root_module)
53 | 
54 |     bn_list = [k.split('.') for k, m 
55 |         in root_module.named_modules(remove_duplicate=True) 
56 |         if predicate(m)]
57 |     for *parent, k in bn_list:
58 |         parent_module = root_module
59 |         if len(parent) > 0:
60 |             parent_module = root_module.get_submodule('.'.join(parent))
61 |         if isinstance(parent_module, nn.Sequential):
62 |             src_module = parent_module[int(k)]
63 |         else:
64 |             src_module = getattr(parent_module, k)
65 |         tgt_module = func(src_module)
66 |         if isinstance(parent_module, nn.Sequential):
67 |             parent_module[int(k)] = tgt_module
68 |         else:
69 |             setattr(parent_module, k, tgt_module)
70 |     # verify that all BN are replaced
71 |     bn_list = [k.split('.') for k, m 
72 |         in root_module.named_modules(remove_duplicate=True) 
73 |         if predicate(m)]
74 |     assert len(bn_list) == 0
75 |     return root_module
76 | 
77 | def optimizer_to(optimizer, device):
78 |     for state in optimizer.state.values():
79 |         for k, v in state.items():
80 |             if isinstance(v, torch.Tensor):
81 |                 state[k] = v.to(device=device)
82 |     return optimizer
83 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/common/robomimic_config_util.py:
--------------------------------------------------------------------------------
 1 | from omegaconf import OmegaConf
 2 | from robomimic.config import config_factory
 3 | import robomimic.scripts.generate_paper_configs as gpc
 4 | from robomimic.scripts.generate_paper_configs import (
 5 |     modify_config_for_default_image_exp,
 6 |     modify_config_for_default_low_dim_exp,
 7 |     modify_config_for_dataset,
 8 | )
 9 | 
10 | def get_robomimic_config(
11 |         algo_name='bc_rnn', 
12 |         hdf5_type='low_dim', 
13 |         task_name='square', 
14 |         dataset_type='ph'
15 |     ):
16 |     base_dataset_dir = '/tmp/null'
17 |     filter_key = None
18 | 
19 |     # decide whether to use low-dim or image training defaults
20 |     modifier_for_obs = modify_config_for_default_image_exp
21 |     if hdf5_type in ["low_dim", "low_dim_sparse", "low_dim_dense"]:
22 |         modifier_for_obs = modify_config_for_default_low_dim_exp
23 | 
24 |     algo_config_name = "bc" if algo_name == "bc_rnn" else algo_name
25 |     config = config_factory(algo_name=algo_config_name)
26 |     # turn into default config for observation modalities (e.g.: low-dim or rgb)
27 |     config = modifier_for_obs(config)
28 |     # add in config based on the dataset
29 |     config = modify_config_for_dataset(
30 |         config=config, 
31 |         task_name=task_name, 
32 |         dataset_type=dataset_type, 
33 |         hdf5_type=hdf5_type, 
34 |         base_dataset_dir=base_dataset_dir,
35 |         filter_key=filter_key,
36 |     )
37 |     # add in algo hypers based on dataset
38 |     algo_config_modifier = getattr(gpc, f'modify_{algo_name}_config_for_dataset')
39 |     config = algo_config_modifier(
40 |         config=config, 
41 |         task_name=task_name, 
42 |         dataset_type=dataset_type, 
43 |         hdf5_type=hdf5_type,
44 |     )
45 |     return config
46 |     
47 | 
48 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/dataset/base_dataset.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | import torch
 4 | import torch.nn
 5 | from trajectory_diffusion.model.common.normalizer import LinearNormalizer
 6 | 
 7 | class BaseLowdimDataset(torch.utils.data.Dataset):
 8 |     def get_validation_dataset(self) -> 'BaseLowdimDataset':
 9 |         # return an empty dataset by default
10 |         return BaseLowdimDataset()
11 | 
12 |     def get_normalizer(self, **kwargs) -> LinearNormalizer:
13 |         raise NotImplementedError()
14 | 
15 |     def get_all_actions(self) -> torch.Tensor:
16 |         raise NotImplementedError()
17 |     
18 |     def __len__(self) -> int:
19 |         return 0
20 |     
21 |     def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
22 |         """
23 |         output:
24 |             obs: T, Do
25 |             action: T, Da
26 |         """
27 |         raise NotImplementedError()
28 | 
29 | 
30 | class BaseImageDataset(torch.utils.data.Dataset):
31 |     def get_validation_dataset(self) -> 'BaseLowdimDataset':
32 |         # return an empty dataset by default
33 |         return BaseImageDataset()
34 | 
35 |     def get_normalizer(self, **kwargs) -> LinearNormalizer:
36 |         raise NotImplementedError()
37 | 
38 |     def get_all_actions(self) -> torch.Tensor:
39 |         raise NotImplementedError()
40 |     
41 |     def __len__(self) -> int:
42 |         return 0
43 |     
44 |     def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
45 |         """
46 |         output:
47 |             obs: 
48 |                 key: T, *
49 |             action: T, Da
50 |         """
51 |         raise NotImplementedError()
52 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/dataset/gibson_dataset.py:
--------------------------------------------------------------------------------
 1 | import bz2
 2 | import torch
 3 | import _pickle as cPickle
 4 | from torch.utils.data import Dataset
 5 | import os
 6 | from typing import Dict
 7 | 
 8 | def count_file_in_folder(path):
 9 |     count = 0
10 |     for _, _, files in os.walk(path):
11 |         count += len(files)
12 |     return count
13 | 
14 | class GibsonMapDataset(Dataset):
15 |     def __init__(self, path, train_idx):
16 |         self.train_idx = train_idx
17 |         self.path = path
18 |                     
19 |     def __len__(self):
20 |         return len(self.train_idx)
21 |     
22 |     def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
23 |         tmp_idx = self.train_idx[idx]
24 |         with bz2.BZ2File("{}/{}.pbz2".format(self.path, str(tmp_idx)), 'rb') as fp:
25 |             data = cPickle.load(fp)
26 |         
27 |         tmp = torch.zeros(32,2)
28 |         tmp[:, 0] = 1-data['action'][:, 1]
29 |         tmp[:, 1] = data['action'][:, 0]
30 |         o_data = {
31 |             'obs':{
32 |                 'sem_map': data['obs']['sem_map'],
33 |                 'target': data['obs']['target'],
34 |                 'loc': torch.Tensor([1-data['obs']['loc'][1],data['obs']['loc'][0]]),
35 |             },
36 |             'action': tmp[:28, :],
37 |         }
38 |         
39 |         return o_data
40 |     


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/env/objnav/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 | import trajectory_diffusion.env.objnav
3 | 
4 | register(
5 |     id='objnav-traj-diff-v0',
6 |     entry_point='envs.objnav.objnav_keypoints_env:ObjNavKeypointsEnv',
7 |     max_episode_steps=200,
8 |     reward_threshold=1.0
9 | )


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/env/objnav/objnav_gibson_env.py:
--------------------------------------------------------------------------------
 1 | from gym import spaces
 2 | from trajectory_diffusion.env.objnav.objnav_env import ObjNavEnv
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | class ObjNavGibsonEnv(ObjNavEnv):
 7 |     metadata = {"render.modes": ["rgb_array"], "video.frames_per_second": 10}
 8 | 
 9 |     def __init__(self,
10 |             legacy=False,
11 |             block_cog=None, 
12 |             damping=None,
13 |             render_size=96):
14 |         super().__init__(
15 |             legacy=legacy, 
16 |             block_cog=block_cog,
17 |             damping=damping,
18 |             render_size=render_size,
19 |             render_action=False)
20 |         ws = self.window_size
21 |         self.observation_space = spaces.Dict({
22 |             'image': spaces.Box(
23 |                 low=0,
24 |                 high=1,
25 |                 shape=(3,render_size,render_size),
26 |                 dtype=np.float32
27 |             ),
28 |             'agent_pos': spaces.Box(
29 |                 low=0,
30 |                 high=ws,
31 |                 shape=(2,),
32 |                 dtype=np.float32
33 |             )
34 |         })
35 |         self.render_cache = None
36 |     
37 |     def _get_obs(self):
38 |         img = super()._render_frame(mode='rgb_array')
39 | 
40 |         agent_pos = np.array(self.agent.position)
41 |         img_obs = np.moveaxis(img.astype(np.float32) / 255, -1, 0)
42 |         obs = {
43 |             'image': img_obs,
44 |             'agent_pos': agent_pos
45 |         }
46 | 
47 |         # draw action
48 |         if self.latest_action is not None:
49 |             action = np.array(self.latest_action)
50 |             coord = (action / 512 * 96).astype(np.int32)
51 |             marker_size = int(8/96*self.render_size)
52 |             thickness = int(1/96*self.render_size)
53 |             cv2.drawMarker(img, coord,
54 |                 color=(255,0,0), markerType=cv2.MARKER_CROSS,
55 |                 markerSize=marker_size, thickness=thickness)
56 |         self.render_cache = img
57 | 
58 |         return obs
59 | 
60 |     def render(self, mode):
61 |         assert mode == 'rgb_array'
62 | 
63 |         if self.render_cache is None:
64 |             self._get_obs()
65 |         
66 |         return self.render_cache
67 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/env_runner/base_image_runner.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | from trajectory_diffusion.policy.base_image_policy import BaseImagePolicy
 3 | 
 4 | class BaseImageRunner:
 5 |     def __init__(self, output_dir):
 6 |         self.output_dir = output_dir
 7 | 
 8 |     def run(self, policy: BaseImagePolicy) -> Dict:
 9 |         raise NotImplementedError()
10 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/gym_util/video_recording_wrapper.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | from trajectory_diffusion.real_world.video_recorder import VideoRecorder
 4 | 
 5 | class VideoRecordingWrapper(gym.Wrapper):
 6 |     def __init__(self, 
 7 |             env, 
 8 |             video_recoder: VideoRecorder,
 9 |             mode='rgb_array',
10 |             file_path=None,
11 |             steps_per_render=1,
12 |             **kwargs
13 |         ):
14 |         """
15 |         When file_path is None, don't record.
16 |         """
17 |         super().__init__(env)
18 |         
19 |         self.mode = mode
20 |         self.render_kwargs = kwargs
21 |         self.steps_per_render = steps_per_render
22 |         self.file_path = file_path
23 |         self.video_recoder = video_recoder
24 | 
25 |         self.step_count = 0
26 | 
27 |     def reset(self, **kwargs):
28 |         obs = super().reset(**kwargs)
29 |         self.frames = list()
30 |         self.step_count = 1
31 |         self.video_recoder.stop()
32 |         return obs
33 |     
34 |     def step(self, action):
35 |         result = super().step(action)
36 |         self.step_count += 1
37 |         if self.file_path is not None \
38 |             and ((self.step_count % self.steps_per_render) == 0):
39 |             if not self.video_recoder.is_ready():
40 |                 self.video_recoder.start(self.file_path)
41 | 
42 |             frame = self.env.render(
43 |                 mode=self.mode, **self.render_kwargs)
44 |             assert frame.dtype == np.uint8
45 |             self.video_recoder.write_frame(frame)
46 |         return result
47 |     
48 |     def render(self, mode='rgb_array', **kwargs):
49 |         if self.video_recoder.is_ready():
50 |             self.video_recoder.stop()
51 |         return self.file_path
52 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/gym_util/video_wrapper.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | 
 4 | class VideoWrapper(gym.Wrapper):
 5 |     def __init__(self, 
 6 |             env, 
 7 |             mode='rgb_array',
 8 |             enabled=True,
 9 |             steps_per_render=1,
10 |             **kwargs
11 |         ):
12 |         super().__init__(env)
13 |         
14 |         self.mode = mode
15 |         self.enabled = enabled
16 |         self.render_kwargs = kwargs
17 |         self.steps_per_render = steps_per_render
18 | 
19 |         self.frames = list()
20 |         self.step_count = 0
21 | 
22 |     def reset(self, **kwargs):
23 |         obs = super().reset(**kwargs)
24 |         self.frames = list()
25 |         self.step_count = 1
26 |         if self.enabled:
27 |             frame = self.env.render(
28 |                 mode=self.mode, **self.render_kwargs)
29 |             assert frame.dtype == np.uint8
30 |             self.frames.append(frame)
31 |         return obs
32 |     
33 |     def step(self, action):
34 |         result = super().step(action)
35 |         self.step_count += 1
36 |         if self.enabled and ((self.step_count % self.steps_per_render) == 0):
37 |             frame = self.env.render(
38 |                 mode=self.mode, **self.render_kwargs)
39 |             assert frame.dtype == np.uint8
40 |             self.frames.append(frame)
41 |         return result
42 |     
43 |     def render(self, mode='rgb_array', **kwargs):
44 |         return self.frames
45 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/model/common/dict_of_tensor_mixin.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class DictOfTensorMixin(nn.Module):
 5 |     def __init__(self, params_dict=None):
 6 |         super().__init__()
 7 |         if params_dict is None:
 8 |             params_dict = nn.ParameterDict()
 9 |         self.params_dict = params_dict
10 | 
11 |     @property
12 |     def device(self):
13 |         return next(iter(self.parameters())).device
14 | 
15 |     def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
16 |         def dfs_add(dest, keys, value: torch.Tensor):
17 |             if len(keys) == 1:
18 |                 dest[keys[0]] = value
19 |                 return
20 | 
21 |             if keys[0] not in dest:
22 |                 dest[keys[0]] = nn.ParameterDict()
23 |             dfs_add(dest[keys[0]], keys[1:], value)
24 | 
25 |         def load_dict(state_dict, prefix):
26 |             out_dict = nn.ParameterDict()
27 |             for key, value in state_dict.items():
28 |                 value: torch.Tensor
29 |                 if key.startswith(prefix):
30 |                     param_keys = key[len(prefix):].split('.')[1:]
31 |                     # if len(param_keys) == 0:
32 |                     #     import pdb; pdb.set_trace()
33 |                     dfs_add(out_dict, param_keys, value.clone())
34 |             return out_dict
35 | 
36 |         self.params_dict = load_dict(state_dict, prefix + 'params_dict')
37 |         self.params_dict.requires_grad_(False)
38 |         return 
39 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/model/common/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | from diffusers.optimization import (
 2 |     Union, SchedulerType, Optional,
 3 |     Optimizer, TYPE_TO_SCHEDULER_FUNCTION
 4 | )
 5 | 
 6 | def get_scheduler(
 7 |     name: Union[str, SchedulerType],
 8 |     optimizer: Optimizer,
 9 |     num_warmup_steps: Optional[int] = None,
10 |     num_training_steps: Optional[int] = None,
11 |     **kwargs
12 | ):
13 |     """
14 |     Added kwargs vs diffuser's original implementation
15 | 
16 |     Unified API to get any scheduler from its name.
17 | 
18 |     Args:
19 |         name (`str` or `SchedulerType`):
20 |             The name of the scheduler to use.
21 |         optimizer (`torch.optim.Optimizer`):
22 |             The optimizer that will be used during training.
23 |         num_warmup_steps (`int`, *optional*):
24 |             The number of warmup steps to do. This is not required by all schedulers (hence the argument being
25 |             optional), the function will raise an error if it's unset and the scheduler type requires it.
26 |         num_training_steps (`int``, *optional*):
27 |             The number of training steps to do. This is not required by all schedulers (hence the argument being
28 |             optional), the function will raise an error if it's unset and the scheduler type requires it.
29 |     """
30 |     name = SchedulerType(name)
31 |     schedule_func = TYPE_TO_SCHEDULER_FUNCTION[name]
32 |     if name == SchedulerType.CONSTANT:
33 |         return schedule_func(optimizer, **kwargs)
34 | 
35 |     # All other schedulers require `num_warmup_steps`
36 |     if num_warmup_steps is None:
37 |         raise ValueError(f"{name} requires `num_warmup_steps`, please provide that argument.")
38 | 
39 |     if name == SchedulerType.CONSTANT_WITH_WARMUP:
40 |         return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, **kwargs)
41 | 
42 |     # All other schedulers require `num_training_steps`
43 |     if num_training_steps is None:
44 |         raise ValueError(f"{name} requires `num_training_steps`, please provide that argument.")
45 | 
46 |     return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps, **kwargs)
47 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/model/common/module_attr_mixin.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | class ModuleAttrMixin(nn.Module):
 4 |     def __init__(self):
 5 |         super().__init__()
 6 |         self._dummy_variable = nn.Parameter()
 7 | 
 8 |     @property
 9 |     def device(self):
10 |         return next(iter(self.parameters())).device
11 |     
12 |     @property
13 |     def dtype(self):
14 |         return next(iter(self.parameters())).dtype
15 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/model/common/shape_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Tuple, Callable
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | def get_module_device(m: nn.Module):
 6 |     device = torch.device('cpu')
 7 |     try:
 8 |         param = next(iter(m.parameters()))
 9 |         device = param.device
10 |     except StopIteration:
11 |         pass
12 |     return device
13 | 
14 | @torch.no_grad()
15 | def get_output_shape(
16 |         input_shape: Tuple[int],
17 |         net: Callable[[torch.Tensor], torch.Tensor]
18 |     ):  
19 |         device = get_module_device(net)
20 |         test_input = torch.zeros((1,)+tuple(input_shape), device=device)
21 |         test_output = net(test_input)
22 |         output_shape = tuple(test_output.shape[1:])
23 |         return output_shape
24 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/model/diffusion/conv1d_components.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | # from einops.layers.torch import Rearrange
 5 | 
 6 | 
 7 | class Downsample1d(nn.Module):
 8 |     def __init__(self, dim):
 9 |         super().__init__()
10 |         self.conv = nn.Conv1d(dim, dim, 3, 2, 1)
11 | 
12 |     def forward(self, x):
13 |         return self.conv(x)
14 | 
15 | class Upsample1d(nn.Module):
16 |     def __init__(self, dim):
17 |         super().__init__()
18 |         self.conv = nn.ConvTranspose1d(dim, dim, 4, 2, 1)
19 | 
20 |     def forward(self, x):
21 |         return self.conv(x)
22 | 
23 | class Conv1dBlock(nn.Module):
24 |     '''
25 |         Conv1d --> GroupNorm --> Mish
26 |     '''
27 | 
28 |     def __init__(self, inp_channels, out_channels, kernel_size, n_groups=8):
29 |         super().__init__()
30 | 
31 |         self.block = nn.Sequential(
32 |             nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2),
33 |             # Rearrange('batch channels horizon -> batch channels 1 horizon'),
34 |             nn.GroupNorm(n_groups, out_channels),
35 |             # Rearrange('batch channels 1 horizon -> batch channels horizon'),
36 |             nn.Mish(),
37 |         )
38 | 
39 |     def forward(self, x):
40 |         return self.block(x)
41 | 
42 | 
43 | def test():
44 |     cb = Conv1dBlock(256, 128, kernel_size=3)
45 |     x = torch.zeros((1,256,16))
46 |     o = cb(x)
47 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/model/diffusion/positional_embedding.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | class SinusoidalPosEmb(nn.Module):
 6 |     def __init__(self, dim):
 7 |         super().__init__()
 8 |         self.dim = dim
 9 | 
10 |     def forward(self, x):
11 |         device = x.device
12 |         half_dim = self.dim // 2
13 |         emb = math.log(10000) / (half_dim - 1)
14 |         emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
15 |         emb = x[:, None] * emb[None, :]
16 |         emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
17 |         return emb
18 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/model/vision/model_getter.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision
 3 | 
 4 | def get_resnet(name, weights=None, **kwargs):
 5 |     """
 6 |     name: resnet18, resnet34, resnet50
 7 |     weights: "IMAGENET1K_V1", "r3m"
 8 |     """
 9 |     # load r3m weights
10 |     if (weights == "r3m") or (weights == "R3M"):
11 |         return get_r3m(name=name, **kwargs)
12 | 
13 |     func = getattr(torchvision.models, name)
14 |     resnet = func(weights=weights, **kwargs)
15 |     resnet.fc = torch.nn.Identity()
16 |     return resnet
17 | 
18 | def get_r3m(name, **kwargs):
19 |     """
20 |     name: resnet18, resnet34, resnet50
21 |     """
22 |     import r3m
23 |     r3m.device = 'cpu'
24 |     model = r3m.load_r3m(name)
25 |     r3m_model = model.module
26 |     resnet_model = r3m_model.convnet
27 |     resnet_model = resnet_model.to('cpu')
28 |     return resnet_model
29 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/policy/base_image_policy.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | import torch
 3 | import torch.nn as nn
 4 | from trajectory_diffusion.model.common.module_attr_mixin import ModuleAttrMixin
 5 | from trajectory_diffusion.model.common.normalizer import LinearNormalizer
 6 | 
 7 | class BaseImagePolicy(ModuleAttrMixin):
 8 |     # init accepts keyword argument shape_meta, see config/task/*_image.yaml
 9 | 
10 |     def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
11 |         """
12 |         obs_dict:
13 |             str: B,To,*
14 |         return: B,Ta,Da
15 |         """
16 |         raise NotImplementedError()
17 | 
18 |     # reset state for stateful policies
19 |     def reset(self):
20 |         pass
21 | 
22 |     # ========== training ===========
23 |     # no standard training interface except setting normalizer
24 |     def set_normalizer(self, normalizer: LinearNormalizer):
25 |         raise NotImplementedError()
26 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/real_world/keystroke_counter.py:
--------------------------------------------------------------------------------
 1 | from pynput.keyboard import Key, KeyCode, Listener
 2 | from collections import defaultdict
 3 | from threading import Lock
 4 | 
 5 | class KeystrokeCounter(Listener):
 6 |     def __init__(self):
 7 |         self.key_count_map = defaultdict(lambda:0)
 8 |         self.key_press_list = list()
 9 |         self.lock = Lock()
10 |         super().__init__(on_press=self.on_press, on_release=self.on_release)
11 |     
12 |     def on_press(self, key):
13 |         with self.lock:
14 |             self.key_count_map[key] += 1
15 |             self.key_press_list.append(key)
16 |     
17 |     def on_release(self, key):
18 |         pass
19 |     
20 |     def clear(self):
21 |         with self.lock:
22 |             self.key_count_map = defaultdict(lambda:0)
23 |             self.key_press_list = list()
24 |     
25 |     def __getitem__(self, key):
26 |         with self.lock:
27 |             return self.key_count_map[key]
28 |     
29 |     def get_press_events(self):
30 |         with self.lock:
31 |             events = list(self.key_press_list)
32 |             self.key_press_list = list()
33 |             return events
34 | 
35 | if __name__ == '__main__':
36 |     import time
37 |     with KeystrokeCounter() as counter:
38 |         try:
39 |             while True:
40 |                 print('Space:', counter[Key.space])
41 |                 print('q:', counter[KeyCode(char='q')])
42 |                 time.sleep(1/60)
43 |         except KeyboardInterrupt:
44 |             events = counter.get_press_events()
45 |             print(events)
46 | 


--------------------------------------------------------------------------------
/train_traj/trajectory_diffusion/real_world/real_inference_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Callable, Tuple
 2 | import numpy as np
 3 | from trajectory_diffusion.common.cv2_util import get_image_transform
 4 | 
 5 | def get_real_obs_dict(
 6 |         env_obs: Dict[str, np.ndarray], 
 7 |         shape_meta: dict,
 8 |         ) -> Dict[str, np.ndarray]:
 9 |     obs_dict_np = dict()
10 |     obs_shape_meta = shape_meta['obs']
11 |     for key, attr in obs_shape_meta.items():
12 |         type = attr.get('type', 'low_dim')
13 |         shape = attr.get('shape')
14 |         if type == 'rgb':
15 |             this_imgs_in = env_obs[key]
16 |             t,hi,wi,ci = this_imgs_in.shape
17 |             co,ho,wo = shape
18 |             assert ci == co
19 |             out_imgs = this_imgs_in
20 |             if (ho != hi) or (wo != wi) or (this_imgs_in.dtype == np.uint8):
21 |                 tf = get_image_transform(
22 |                     input_res=(wi,hi), 
23 |                     output_res=(wo,ho), 
24 |                     bgr_to_rgb=False)
25 |                 out_imgs = np.stack([tf(x) for x in this_imgs_in])
26 |                 if this_imgs_in.dtype == np.uint8:
27 |                     out_imgs = out_imgs.astype(np.float32) / 255
28 |             # THWC to TCHW
29 |             obs_dict_np[key] = np.moveaxis(out_imgs,-1,1)
30 |         elif type == 'low_dim':
31 |             this_data_in = env_obs[key]
32 |             if 'pose' in key and shape == (2,):
33 |                 # take X,Y coordinates
34 |                 this_data_in = this_data_in[...,[0,1]]
35 |             obs_dict_np[key] = this_data_in
36 |     return obs_dict_np
37 | 
38 | 
39 | def get_real_obs_resolution(
40 |         shape_meta: dict
41 |         ) -> Tuple[int, int]:
42 |     out_res = None
43 |     obs_shape_meta = shape_meta['obs']
44 |     for key, attr in obs_shape_meta.items():
45 |         type = attr.get('type', 'low_dim')
46 |         shape = attr.get('shape')
47 |         if type == 'rgb':
48 |             co,ho,wo = shape
49 |             if out_res is None:
50 |                 out_res = (wo, ho)
51 |             assert out_res == (wo, ho)
52 |     return out_res
53 | 


--------------------------------------------------------------------------------