├── .gitignore ├── LICENSE ├── README.md ├── conda_environment.yaml ├── config ├── base.yaml ├── tasks │ ├── coffee_d0.yaml │ ├── coffee_preparation_d0.yaml │ ├── hammer_cleanup_d0.yaml │ ├── kitchen_d0.yaml │ ├── mug_cleanup_d0.yaml │ ├── nut_assembly_d0.yaml │ ├── pick_place_d0.yaml │ ├── square_d0.yaml │ ├── stack_d0.yaml │ ├── stack_three_d0.yaml │ ├── threading_d0.yaml │ └── three_piece_assembly_d0.yaml └── tmp │ └── full.yaml ├── diffusion_policy ├── codecs │ └── imagecodecs_numcodecs.py ├── common │ ├── checkpoint_util.py │ ├── cv2_util.py │ ├── env_util.py │ ├── json_logger.py │ ├── nested_dict_util.py │ ├── normalize_util.py │ ├── pose_trajectory_interpolator.py │ ├── precise_sleep.py │ ├── pymunk_override.py │ ├── pymunk_util.py │ ├── pytorch_util.py │ ├── replay_buffer.py │ ├── robomimic_config_util.py │ ├── robomimic_util.py │ ├── sampler.py │ └── timestamp_accumulator.py ├── config │ ├── task │ │ ├── blockpush_lowdim_seed.yaml │ │ ├── blockpush_lowdim_seed_abs.yaml │ │ ├── can_image.yaml │ │ ├── can_image_abs.yaml │ │ ├── can_lowdim.yaml │ │ ├── can_lowdim_abs.yaml │ │ ├── kitchen_lowdim.yaml │ │ ├── kitchen_lowdim_abs.yaml │ │ ├── lift_image.yaml │ │ ├── lift_image_abs.yaml │ │ ├── lift_lowdim.yaml │ │ ├── lift_lowdim_abs.yaml │ │ ├── pusht_image.yaml │ │ ├── pusht_lowdim.yaml │ │ ├── real_pusht_image.yaml │ │ ├── square_image.yaml │ │ ├── square_image_abs.yaml │ │ ├── square_lowdim.yaml │ │ ├── square_lowdim_abs.yaml │ │ ├── tool_hang_image.yaml │ │ ├── tool_hang_image_abs.yaml │ │ ├── tool_hang_lowdim.yaml │ │ ├── tool_hang_lowdim_abs.yaml │ │ ├── transport_image.yaml │ │ ├── transport_image_abs.yaml │ │ ├── transport_lowdim.yaml │ │ └── transport_lowdim_abs.yaml │ ├── train_bet_lowdim_workspace.yaml │ ├── train_diffusion_transformer_hybrid_workspace.yaml │ └── train_robomimic_image_workspace.yaml ├── dataset │ ├── base_dataset.py │ ├── multitask_dataset.py │ ├── robomimic_replay_image_dataset.py │ └── robomimic_replay_lowdim_dataset.py ├── env │ └── robomimic │ │ ├── robomimic_image_wrapper.py │ │ └── robomimic_lowdim_wrapper.py ├── env_runner │ ├── base_image_runner.py │ └── robomimic_image_runner.py ├── gym_util │ ├── async_vector_env.py │ ├── multistep_wrapper.py │ ├── sync_vector_env.py │ ├── video_recording_wrapper.py │ └── video_wrapper.py ├── model │ ├── bet │ │ ├── action_ae │ │ │ └── __init__.py │ │ ├── latent_generators │ │ │ ├── latent_generator.py │ │ │ ├── mingpt.py │ │ │ └── transformer.py │ │ ├── libraries │ │ │ └── loss_fn.py │ │ └── utils.py │ ├── common │ │ ├── dict_of_tensor_mixin.py │ │ ├── lr_scheduler.py │ │ ├── module_attr_mixin.py │ │ ├── normalizer.py │ │ ├── rotation_transformer.py │ │ ├── shape_util.py │ │ └── tensor_util.py │ ├── diffusion │ │ ├── conditional_unet1d.py │ │ ├── conv1d_components.py │ │ ├── ema_model.py │ │ ├── mask_generator.py │ │ ├── positional_embedding.py │ │ └── transformer_for_diffusion.py │ └── vision │ │ ├── crop_randomizer.py │ │ ├── model_getter.py │ │ └── multi_image_obs_encoder.py ├── policy │ ├── base_image_policy.py │ └── diffusion_transformer_hybrid_image_policy.py ├── real_world │ ├── keystroke_counter.py │ ├── multi_camera_visualizer.py │ ├── multi_realsense.py │ ├── real_data_conversion.py │ ├── real_env.py │ ├── real_inference_util.py │ ├── realsense_config │ │ ├── 415_high_accuracy_mode.json │ │ └── 435_high_accuracy_mode.json │ ├── rtde_interpolation_controller.py │ ├── single_realsense.py │ ├── spacemouse.py │ ├── spacemouse_shared_memory.py │ └── video_recorder.py ├── shared_memory │ ├── shared_memory_queue.py │ ├── shared_memory_ring_buffer.py │ ├── shared_memory_util.py │ └── shared_ndarray.py └── workspace │ ├── base_workspace.py │ └── train_diffusion_transformer_hybrid_workspace.py ├── eval.py ├── mixture_of_experts ├── mixture_of_experts │ └── __init__.py ├── moe.png ├── moe.py ├── setup.py └── task_moe.py ├── moe └── code │ ├── moe │ ├── configs │ │ ├── davit_base_moe_lamb_16nodes.py │ │ ├── davit_small_moe_lamb_16nodes.py │ │ └── davit_tiny_moe_lamb_16nodes.py │ └── davit_moe.py │ └── mtl │ ├── configs │ ├── davit_base_lamb_16nodes.py │ ├── davit_small_lamb_16nodes.py │ └── davit_tiny_lamb_16nodes.py │ └── davit.py ├── parallel_linear ├── .gitignore ├── README.md ├── parallel_experts │ ├── __init__.py │ ├── moe.py │ ├── parallel_experts.py │ └── task_moe.py ├── parallel_linear.cc ├── parallel_linear_kernel.cu ├── setup.py └── test.py ├── patch_moe ├── encoder.py ├── gate.py ├── resnet.py └── test.py ├── pyrightconfig.json ├── requirements.txt ├── resnet_moe ├── moe_layer.py ├── resnet_moe.py └── router.py ├── setup.py ├── train.py └── utils └── recursive_yaml.py /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | logs 3 | wandb 4 | outputs 5 | data 6 | data_local 7 | .vscode 8 | _wandb 9 | test_eval 10 | *.ckpt 11 | **/.DS_Store 12 | 13 | fuse.cfg 14 | 15 | *.ai 16 | 17 | # Generation results 18 | results/ 19 | 20 | ray/auth.json 21 | 22 | # Byte-compiled / optimized / DLL files 23 | __pycache__/ 24 | *.py[cod] 25 | *$py.class 26 | 27 | # C extensions 28 | *.so 29 | 30 | # Distribution / packaging 31 | .Python 32 | build/ 33 | develop-eggs/ 34 | dist/ 35 | downloads/ 36 | eggs/ 37 | .eggs/ 38 | lib/ 39 | lib64/ 40 | parts/ 41 | sdist/ 42 | var/ 43 | wheels/ 44 | pip-wheel-metadata/ 45 | share/python-wheels/ 46 | *.egg-info/ 47 | .installed.cfg 48 | *.egg 49 | MANIFEST 50 | 51 | # PyInstaller 52 | # Usually these files are written by a python script from a template 53 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 54 | *.manifest 55 | *.spec 56 | 57 | # Installer logs 58 | pip-log.txt 59 | pip-delete-this-directory.txt 60 | 61 | # Unit test / coverage reports 62 | htmlcov/ 63 | .tox/ 64 | .nox/ 65 | .coverage 66 | .coverage.* 67 | .cache 68 | nosetests.xml 69 | coverage.xml 70 | *.cover 71 | *.py,cover 72 | .hypothesis/ 73 | .pytest_cache/ 74 | 75 | # Translations 76 | *.mo 77 | *.pot 78 | 79 | # Django stuff: 80 | *.log 81 | local_settings.py 82 | db.sqlite3 83 | db.sqlite3-journal 84 | 85 | # Flask stuff: 86 | instance/ 87 | .webassets-cache 88 | 89 | # Scrapy stuff: 90 | .scrapy 91 | 92 | # Sphinx documentation 93 | docs/_build/ 94 | 95 | # PyBuilder 96 | target/ 97 | 98 | # Jupyter Notebook 99 | .ipynb_checkpoints 100 | 101 | # IPython 102 | profile_default/ 103 | ipython_config.py 104 | 105 | # pyenv 106 | .python-version 107 | 108 | # pipenv 109 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 110 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 111 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 112 | # install all needed dependencies. 113 | #Pipfile.lock 114 | 115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 116 | __pypackages__/ 117 | 118 | # Celery stuff 119 | celerybeat-schedule 120 | celerybeat.pid 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Spyder project settings 126 | .spyderproject 127 | .spyproject 128 | 129 | # Rope project settings 130 | .ropeproject 131 | 132 | # mkdocs documentation 133 | /site 134 | 135 | # mypy 136 | .mypy_cache/ 137 | .dmypy.json 138 | dmypy.json 139 | 140 | # Pyre type checker 141 | .pyre/ 142 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Columbia Artificial Intelligence and Robotics Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CoRL2024: Sparse Diffusion Policy 2 | 3 | ### Dataset Download 4 | 5 | Using Direct Download Links 6 | 7 | You can download the datasets manually through Google Drive. The folders each correspond to the dataset types described in [Dataset Types](#dataset-types). 8 | 9 | **Google Drive folder with all mimicgen datasets:** [link](https://drive.google.com/drive/folders/14e9kkHGfApuQ709LBEbXrXVI1Lp5Ax7p?usp=drive_link) 10 | 11 | Then, you should download the dataset with core folder in the path robomimic/core. 12 | 13 | ### 🛠️ Installation 14 | #### 🖥️ Simulation 15 | To reproduce our simulation benchmark results, install our conda environment on a Linux machine with Nvidia GPU. On Ubuntu 20.04 you need to install the following apt packages for mujoco: 16 | ```console 17 | $ sudo apt install -y libosmesa6-dev libgl1-mesa-glx libglfw3 patchelf 18 | ``` 19 | 20 | We recommend [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge) instead of the standard anaconda distribution for faster installation: 21 | ```console 22 | $ mamba env create -f conda_environment.yaml 23 | ``` 24 | 25 | but you can use conda as well: 26 | ```console 27 | $ conda env create -f conda_environment.yaml 28 | ``` 29 | ### Installation 30 | 31 | Then we install the packages for mimicgen: 32 | 33 | ```sh 34 | conda activate sdp 35 | ``` 36 | 37 | You can install most of the dependencies by cloning the repository and then installing from source: 38 | 39 | ```sh 40 | cd 41 | git clone https://github.com/NVlabs/mimicgen_environments.git 42 | cd mimicgen_environments 43 | pip install -e . 44 | ``` 45 | 46 | There are some additional dependencies that we list below. These are installed from source: 47 | 48 | - [robosuite](https://robosuite.ai/) 49 | - **Installation** 50 | ```sh 51 | cd 52 | git clone https://github.com/ARISE-Initiative/robosuite.git 53 | cd robosuite 54 | git checkout b9d8d3de5e3dfd1724f4a0e6555246c460407daa 55 | pip install -e . 56 | ``` 57 | - **Note**: the git checkout command corresponds to the commit we used for testing our policy learning results. In general the `master` branch (`v1.4+`) should be fine. 58 | - For more detailed instructions, see [here](https://robosuite.ai/docs/installation.html) 59 | - [robomimic](https://robomimic.github.io/) 60 | - **Installation** 61 | ```sh 62 | cd 63 | git clone https://github.com/ARISE-Initiative/robomimic.git 64 | cd robomimic 65 | git checkout ab6c3dcb8506f7f06b43b41365e5b3288c858520 66 | pip install -e . 67 | ``` 68 | - **Note**: the git checkout command corresponds to the commit we used for testing our policy learning results. In general the `master` branch (`v0.3+`) should be fine. 69 | - For more detailed instructions, see [here](https://robomimic.github.io/docs/introduction/installation.html) 70 | - [robosuite_task_zoo](https://github.com/ARISE-Initiative/robosuite-task-zoo) 71 | - **Note**: This is optional and only needed for the Kitchen and Hammer Cleanup environments / datasets. 72 | - **Installation** 73 | ```sh 74 | cd 75 | git clone https://github.com/ARISE-Initiative/robosuite-task-zoo 76 | cd robosuite-task-zoo 77 | git checkout 74eab7f88214c21ca1ae8617c2b2f8d19718a9ed 78 | pip install -e . 79 | ``` 80 | 81 | Lastly, **please downgrade MuJoCo to 2.3.2**: 82 | ```sh 83 | pip install mujoco==2.3.2 84 | ``` 85 | 86 | **Note**: This MuJoCo version (`2.3.2`) is important -- in our testing, we found that other versions of MuJoCo could be problematic, especially for the Sawyer arm datasets (e.g. `2.3.5` causes problems with rendering and `2.3.7` changes the dynamics of the robot arm significantly from the collected datasets). 87 | 88 | The `conda_environment_macos.yaml` file is only for development on MacOS and does not have full support for benchmarks. 89 | 90 | ### Training 91 | ```console 92 | $ python train.py 93 | ``` 94 | The results in our paper is evaluated every 50 epochs, after 100 epochs, you can get a result similar in our paper. 95 | 96 | ### Training Checkpoints 97 | 98 | Within each experiment directory you may find in outputs folder: 99 | ``` 100 | 101 | ├── config.yaml 102 | ├── metrics 103 | │   └── logs.json.txt 104 | ├── train 105 | │   ├── checkpoints 106 | │   │   ├── epoch=0299-test_mean_score=6.070.ckpt 107 | │   │   └── latest.ckpt 108 | │   └── logs.json.txt 109 | 110 | ``` 111 | 112 | ### Checkpoints 113 | 114 | You can download ours SDP checkpoints manually through Google Drive. 115 | 116 | **Google Drive folder with our checkpoints:** [link](https://drive.google.com/file/d/1So-byi2hNXIrPLsMT1KLaSbJTpRM1pil/view) 117 | 118 | You can reload the link if it does not work. 119 | 120 | You can save the checkpoints in /path/to/ckpt. 121 | 122 | ### Evaluation 123 | ```console 124 | $ python eval.py --checkpoint /path/to/ckpt 125 | ``` 126 | 127 | Then you can get a similar multi-task results in our paper. 128 | -------------------------------------------------------------------------------- /conda_environment.yaml: -------------------------------------------------------------------------------- 1 | name: sdp 2 | channels: 3 | - pytorch 4 | - pytorch3d 5 | - nvidia 6 | - conda-forge 7 | dependencies: 8 | - python=3.9 9 | - pip=22.2.2 10 | - cudatoolkit=11.6 11 | - pytorch=1.12.1 12 | - torchvision=0.13.1 13 | - pytorch3d=0.7.0 14 | - numpy=1.23.3 15 | - numba==0.56.4 16 | - scipy==1.9.1 17 | - py-opencv=4.6.0 18 | - cffi=1.15.1 19 | - ipykernel=6.16 20 | - matplotlib=3.6.1 21 | - zarr=2.12.0 22 | - numcodecs=0.10.2 23 | - h5py=3.7.0 24 | - hydra-core=1.2.0 25 | - einops=0.4.1 26 | - tqdm=4.64.1 27 | - dill=0.3.5.1 28 | - scikit-video=1.1.11 29 | - scikit-image=0.19.3 30 | - gym=0.21.0 31 | - pymunk=6.2.1 32 | - wandb=0.13.3 33 | - threadpoolctl=3.1.0 34 | - shapely=1.8.4 35 | - cython=0.29.32 36 | - imageio=2.22.0 37 | - imageio-ffmpeg=0.4.7 38 | - termcolor=2.0.1 39 | - tensorboard=2.10.1 40 | - tensorboardx=2.5.1 41 | - psutil=5.9.2 42 | - click=8.0.4 43 | - boto3=1.24.96 44 | - accelerate=0.13.2 45 | - datasets=2.6.1 46 | - diffusers=0.11.1 47 | - av=10.0.0 48 | - cmake=3.24.3 49 | # trick to avoid cpu affinity issue described in https://github.com/pytorch/pytorch/issues/99625 50 | - llvm-openmp=14 51 | # trick to force reinstall imagecodecs via pip 52 | - imagecodecs==2022.8.8 53 | - pip: 54 | - ray[default,tune]==2.2.0 55 | # requires mujoco py dependencies libosmesa6-dev libgl1-mesa-glx libglfw3 patchelf 56 | - free-mujoco-py==2.1.6 57 | - pygame==2.1.2 58 | - pybullet-svl==3.1.6.4 59 | - robosuite @ https://github.com/cheng-chi/robosuite/archive/277ab9588ad7a4f4b55cf75508b44aa67ec171f0.tar.gz 60 | - robomimic==0.2.0 61 | - pytorchvideo==0.1.5 62 | # pip package required for jpeg-xl 63 | - imagecodecs==2022.9.26 64 | - r3m @ https://github.com/facebookresearch/r3m/archive/b2334e726887fa0206962d7984c69c5fb09cceab.tar.gz 65 | - dm-control==1.0.9 66 | -------------------------------------------------------------------------------- /config/base.yaml: -------------------------------------------------------------------------------- 1 | _target_: diffusion_policy.workspace.train_diffusion_transformer_hybrid_workspace.TrainDiffusionTransformerHybridWorkspace 2 | checkpoint: 3 | save_last_ckpt: true 4 | save_last_snapshot: false 5 | topk: 6 | format_str: epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt 7 | k: 5 8 | mode: max 9 | monitor_key: test_mean_score 10 | dataloader: 11 | batch_size: 64 12 | num_workers: 8 13 | persistent_workers: false 14 | pin_memory: true 15 | shuffle: true 16 | dataset_obs_steps: 2 17 | ema: 18 | _target_: diffusion_policy.model.diffusion.ema_model.EMAModel 19 | inv_gamma: 1.0 20 | max_value: 0.9999 21 | min_value: 0.0 22 | power: 0.75 23 | update_after_step: 0 24 | exp_name: default 25 | horizon: 10 26 | keypoint_visible_rate: 1.0 27 | logging: 28 | group: null 29 | id: null 30 | mode: online 31 | name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 32 | project: diffusion_policy_debug 33 | resume: true 34 | tags: 35 | - train_diffusion_transformer_hybrid 36 | - can_image 37 | - default 38 | multi_run: 39 | run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 40 | wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 41 | n_action_steps: 8 42 | n_latency_steps: 0 43 | n_obs_steps: 2 44 | name: train_diffusion_transformer_hybrid 45 | obs_as_cond: true 46 | optimizer: 47 | betas: 48 | - 0.9 49 | - 0.95 50 | learning_rate: 0.0001 51 | obs_encoder_weight_decay: 1.0e-06 52 | transformer_weight_decay: 0.001 53 | past_action_visible: false 54 | policy: 55 | _target_: diffusion_policy.policy.diffusion_transformer_hybrid_image_policy.DiffusionTransformerHybridImagePolicy 56 | causal_attn: true 57 | crop_shape: 58 | - 80 #76 59 | - 80 #76 60 | eval_fixed_crop: true 61 | horizon: 10 62 | n_tasks: 8 63 | n_action_steps: 8 64 | n_cond_layers: 0 65 | n_emb: 512 66 | n_head: 4 67 | n_layer: 12 68 | n_obs_steps: 2 69 | noise_scheduler: 70 | _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler 71 | beta_end: 0.02 72 | beta_schedule: squaredcos_cap_v2 73 | beta_start: 0.0001 74 | clip_sample: true 75 | num_train_timesteps: 100 76 | prediction_type: epsilon 77 | variance_type: fixed_small 78 | num_inference_steps: 100 79 | obs_as_cond: true 80 | obs_encoder_group_norm: true 81 | p_drop_attn: 0.3 82 | p_drop_emb: 0.0 83 | shape_meta: 84 | action: 85 | shape: 86 | - 7 87 | obs: 88 | agentview_image: 89 | shape: 90 | - 3 91 | - 84 92 | - 84 93 | type: rgb 94 | robot0_eef_pos: 95 | shape: 96 | - 3 97 | robot0_eef_quat: 98 | shape: 99 | - 4 100 | robot0_eye_in_hand_image: 101 | shape: 102 | - 3 103 | - 84 104 | - 84 105 | type: rgb 106 | robot0_gripper_qpos: 107 | shape: 108 | - 2 109 | time_as_cond: true 110 | shape_meta: 111 | action: 112 | shape: 113 | - 7 114 | obs: 115 | agentview_image: 116 | shape: 117 | - 3 118 | - 84 119 | - 84 120 | type: rgb 121 | robot0_eef_pos: 122 | shape: 123 | - 3 124 | robot0_eef_quat: 125 | shape: 126 | - 4 127 | robot0_eye_in_hand_image: 128 | shape: 129 | - 3 130 | - 84 131 | - 84 132 | type: rgb 133 | robot0_gripper_qpos: 134 | shape: 135 | - 2 136 | task_name: multi-task 137 | training: 138 | checkpoint_every: 1 139 | debug: false 140 | device: cuda:0 141 | gradient_accumulate_every: 1 142 | lr_scheduler: cosine 143 | lr_warmup_steps: 100 144 | max_train_steps: null 145 | max_val_steps: null 146 | num_epochs: 3500 147 | resume: false 148 | rollout_every: 50 149 | sample_every: 10 150 | seed: 42 151 | tqdm_interval_sec: 1.0 152 | use_ema: true 153 | val_every: 10 154 | val_dataloader: 155 | batch_size: 64 156 | num_workers: 4 157 | persistent_workers: false 158 | pin_memory: true 159 | shuffle: false 160 | 161 | task_num: 8 162 | task0: !include "config/tasks/square_d0.yaml" #2334 163 | task1: !include "config/tasks/stack_d0.yaml" #1632 164 | task2: !include "config/tasks/coffee_d0.yaml" #3402 165 | task3: !include "config/tasks/hammer_cleanup_d0.yaml" #4356 166 | task4: !include "config/tasks/mug_cleanup_d0.yaml" #5162 167 | task5: !include "config/tasks/nut_assembly_d0.yaml" #5476 168 | task6: !include "config/tasks/stack_three_d0.yaml" #3888 169 | task7: !include "config/tasks/threading_d0.yaml" #3424 -------------------------------------------------------------------------------- /config/tasks/coffee_d0.yaml: -------------------------------------------------------------------------------- 1 | abs_action: false 2 | dataset: 3 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 4 | abs_action: false 5 | dataset_path: robomimic/core/coffee_d0.hdf5 6 | horizon: 10 7 | n_obs_steps: 2 8 | pad_after: 7 9 | pad_before: 1 10 | rotation_rep: rotation_6d 11 | seed: 42 12 | shape_meta: 13 | action: 14 | shape: 15 | - 7 16 | obs: 17 | agentview_image: 18 | shape: 19 | - 3 20 | - 84 21 | - 84 22 | type: rgb 23 | robot0_eef_pos: 24 | shape: 25 | - 3 26 | robot0_eef_quat: 27 | shape: 28 | - 4 29 | robot0_eye_in_hand_image: 30 | shape: 31 | - 3 32 | - 84 33 | - 84 34 | type: rgb 35 | robot0_gripper_qpos: 36 | shape: 37 | - 2 38 | use_cache: true 39 | val_ratio: 0.02 40 | dataset_path: robomimic/core/coffee_d0.hdf5 41 | env_runner: 42 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 43 | abs_action: false 44 | crf: 22 45 | dataset_path: robomimic/core/coffee_d0.hdf5 46 | fps: 10 47 | max_steps: 400 48 | n_action_steps: 8 49 | n_envs: 28 50 | n_obs_steps: 2 51 | n_test: 50 52 | n_test_vis: 4 53 | n_train: 6 54 | n_train_vis: 2 55 | past_action: false 56 | render_obs_key: agentview_image 57 | shape_meta: 58 | action: 59 | shape: 60 | - 7 61 | obs: 62 | agentview_image: 63 | shape: 64 | - 3 65 | - 84 66 | - 84 67 | type: rgb 68 | robot0_eef_pos: 69 | shape: 70 | - 3 71 | robot0_eef_quat: 72 | shape: 73 | - 4 74 | robot0_eye_in_hand_image: 75 | shape: 76 | - 3 77 | - 84 78 | - 84 79 | type: rgb 80 | robot0_gripper_qpos: 81 | shape: 82 | - 2 83 | test_start_seed: 100000 84 | tqdm_interval_sec: 1.0 85 | train_start_idx: 0 86 | name: coffee_d0 87 | shape_meta: 88 | action: 89 | shape: 90 | - 7 91 | obs: 92 | agentview_image: 93 | shape: 94 | - 3 95 | - 84 96 | - 84 97 | type: rgb 98 | robot0_eef_pos: 99 | shape: 100 | - 3 101 | robot0_eef_quat: 102 | shape: 103 | - 4 104 | robot0_eye_in_hand_image: 105 | shape: 106 | - 3 107 | - 84 108 | - 84 109 | type: rgb 110 | robot0_gripper_qpos: 111 | shape: 112 | - 2 113 | task_name: coffee_d0 114 | -------------------------------------------------------------------------------- /config/tasks/coffee_preparation_d0.yaml: -------------------------------------------------------------------------------- 1 | abs_action: false 2 | dataset: 3 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 4 | abs_action: false 5 | dataset_path: robomimic/core/coffee_preparation_d0.hdf5 6 | horizon: 10 7 | n_obs_steps: 2 8 | pad_after: 7 9 | pad_before: 1 10 | rotation_rep: rotation_6d 11 | seed: 42 12 | shape_meta: 13 | action: 14 | shape: 15 | - 7 16 | obs: 17 | agentview_image: 18 | shape: 19 | - 3 20 | - 84 21 | - 84 22 | type: rgb 23 | robot0_eef_pos: 24 | shape: 25 | - 3 26 | robot0_eef_quat: 27 | shape: 28 | - 4 29 | robot0_eye_in_hand_image: 30 | shape: 31 | - 3 32 | - 84 33 | - 84 34 | type: rgb 35 | robot0_gripper_qpos: 36 | shape: 37 | - 2 38 | use_cache: true 39 | val_ratio: 0.02 40 | dataset_path: robomimic/core/coffee_preparation_d0.hdf5 41 | env_runner: 42 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 43 | abs_action: false 44 | crf: 22 45 | dataset_path: robomimic/core/coffee_preparation_d0.hdf5 46 | fps: 10 47 | max_steps: 400 48 | n_action_steps: 8 49 | n_envs: 28 50 | n_obs_steps: 2 51 | n_test: 50 52 | n_test_vis: 4 53 | n_train: 6 54 | n_train_vis: 2 55 | past_action: false 56 | render_obs_key: agentview_image 57 | shape_meta: 58 | action: 59 | shape: 60 | - 7 61 | obs: 62 | agentview_image: 63 | shape: 64 | - 3 65 | - 84 66 | - 84 67 | type: rgb 68 | robot0_eef_pos: 69 | shape: 70 | - 3 71 | robot0_eef_quat: 72 | shape: 73 | - 4 74 | robot0_eye_in_hand_image: 75 | shape: 76 | - 3 77 | - 84 78 | - 84 79 | type: rgb 80 | robot0_gripper_qpos: 81 | shape: 82 | - 2 83 | test_start_seed: 100000 84 | tqdm_interval_sec: 1.0 85 | train_start_idx: 0 86 | name: coffee_preparation_d0 87 | shape_meta: 88 | action: 89 | shape: 90 | - 7 91 | obs: 92 | agentview_image: 93 | shape: 94 | - 3 95 | - 84 96 | - 84 97 | type: rgb 98 | robot0_eef_pos: 99 | shape: 100 | - 3 101 | robot0_eef_quat: 102 | shape: 103 | - 4 104 | robot0_eye_in_hand_image: 105 | shape: 106 | - 3 107 | - 84 108 | - 84 109 | type: rgb 110 | robot0_gripper_qpos: 111 | shape: 112 | - 2 113 | task_name: coffee_preparation_d0 114 | -------------------------------------------------------------------------------- /config/tasks/hammer_cleanup_d0.yaml: -------------------------------------------------------------------------------- 1 | abs_action: false 2 | dataset: 3 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 4 | abs_action: false 5 | dataset_path: robomimic/core/hammer_cleanup_d0.hdf5 6 | horizon: 10 7 | n_obs_steps: 2 8 | pad_after: 7 9 | pad_before: 1 10 | rotation_rep: rotation_6d 11 | seed: 42 12 | shape_meta: 13 | action: 14 | shape: 15 | - 7 16 | obs: 17 | agentview_image: 18 | shape: 19 | - 3 20 | - 84 21 | - 84 22 | type: rgb 23 | robot0_eef_pos: 24 | shape: 25 | - 3 26 | robot0_eef_quat: 27 | shape: 28 | - 4 29 | robot0_eye_in_hand_image: 30 | shape: 31 | - 3 32 | - 84 33 | - 84 34 | type: rgb 35 | robot0_gripper_qpos: 36 | shape: 37 | - 2 38 | use_cache: true 39 | val_ratio: 0.02 40 | dataset_path: robomimic/core/hammer_cleanup_d0.hdf5 41 | env_runner: 42 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 43 | abs_action: false 44 | crf: 22 45 | dataset_path: robomimic/core/hammer_cleanup_d0.hdf5 46 | fps: 10 47 | max_steps: 400 48 | n_action_steps: 8 49 | n_envs: 28 50 | n_obs_steps: 2 51 | n_test: 50 52 | n_test_vis: 4 53 | n_train: 6 54 | n_train_vis: 2 55 | past_action: false 56 | render_obs_key: agentview_image 57 | shape_meta: 58 | action: 59 | shape: 60 | - 7 61 | obs: 62 | agentview_image: 63 | shape: 64 | - 3 65 | - 84 66 | - 84 67 | type: rgb 68 | robot0_eef_pos: 69 | shape: 70 | - 3 71 | robot0_eef_quat: 72 | shape: 73 | - 4 74 | robot0_eye_in_hand_image: 75 | shape: 76 | - 3 77 | - 84 78 | - 84 79 | type: rgb 80 | robot0_gripper_qpos: 81 | shape: 82 | - 2 83 | test_start_seed: 100000 84 | tqdm_interval_sec: 1.0 85 | train_start_idx: 0 86 | name: hammer_cleanup_d0 87 | shape_meta: 88 | action: 89 | shape: 90 | - 7 91 | obs: 92 | agentview_image: 93 | shape: 94 | - 3 95 | - 84 96 | - 84 97 | type: rgb 98 | robot0_eef_pos: 99 | shape: 100 | - 3 101 | robot0_eef_quat: 102 | shape: 103 | - 4 104 | robot0_eye_in_hand_image: 105 | shape: 106 | - 3 107 | - 84 108 | - 84 109 | type: rgb 110 | robot0_gripper_qpos: 111 | shape: 112 | - 2 113 | task_name: hammer_cleanup_d0 114 | -------------------------------------------------------------------------------- /config/tasks/kitchen_d0.yaml: -------------------------------------------------------------------------------- 1 | abs_action: false 2 | dataset: 3 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 4 | abs_action: false 5 | dataset_path: robomimic/core/kitchen_d0.hdf5 6 | horizon: 10 7 | n_obs_steps: 2 8 | pad_after: 7 9 | pad_before: 1 10 | rotation_rep: rotation_6d 11 | seed: 42 12 | shape_meta: 13 | action: 14 | shape: 15 | - 7 16 | obs: 17 | agentview_image: 18 | shape: 19 | - 3 20 | - 84 21 | - 84 22 | type: rgb 23 | robot0_eef_pos: 24 | shape: 25 | - 3 26 | robot0_eef_quat: 27 | shape: 28 | - 4 29 | robot0_eye_in_hand_image: 30 | shape: 31 | - 3 32 | - 84 33 | - 84 34 | type: rgb 35 | robot0_gripper_qpos: 36 | shape: 37 | - 2 38 | use_cache: true 39 | val_ratio: 0.02 40 | dataset_path: robomimic/core/core/kitchen_d0.hdf5 41 | env_runner: 42 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 43 | abs_action: false 44 | crf: 22 45 | dataset_path: robomimic/core/kitchen_d0.hdf5 46 | fps: 10 47 | max_steps: 400 48 | n_action_steps: 8 49 | n_envs: 28 50 | n_obs_steps: 2 51 | n_test: 50 52 | n_test_vis: 4 53 | n_train: 6 54 | n_train_vis: 2 55 | past_action: false 56 | render_obs_key: agentview_image 57 | shape_meta: 58 | action: 59 | shape: 60 | - 7 61 | obs: 62 | agentview_image: 63 | shape: 64 | - 3 65 | - 84 66 | - 84 67 | type: rgb 68 | robot0_eef_pos: 69 | shape: 70 | - 3 71 | robot0_eef_quat: 72 | shape: 73 | - 4 74 | robot0_eye_in_hand_image: 75 | shape: 76 | - 3 77 | - 84 78 | - 84 79 | type: rgb 80 | robot0_gripper_qpos: 81 | shape: 82 | - 2 83 | test_start_seed: 100000 84 | tqdm_interval_sec: 1.0 85 | train_start_idx: 0 86 | name: kitchen_d0 87 | shape_meta: 88 | action: 89 | shape: 90 | - 7 91 | obs: 92 | agentview_image: 93 | shape: 94 | - 3 95 | - 84 96 | - 84 97 | type: rgb 98 | robot0_eef_pos: 99 | shape: 100 | - 3 101 | robot0_eef_quat: 102 | shape: 103 | - 4 104 | robot0_eye_in_hand_image: 105 | shape: 106 | - 3 107 | - 84 108 | - 84 109 | type: rgb 110 | robot0_gripper_qpos: 111 | shape: 112 | - 2 113 | task_name: kitchen_d0 114 | -------------------------------------------------------------------------------- /config/tasks/mug_cleanup_d0.yaml: -------------------------------------------------------------------------------- 1 | abs_action: false 2 | dataset: 3 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 4 | abs_action: false 5 | dataset_path: robomimic/core/mug_cleanup_d0.hdf5 6 | horizon: 10 7 | n_obs_steps: 2 8 | pad_after: 7 9 | pad_before: 1 10 | rotation_rep: rotation_6d 11 | seed: 42 12 | shape_meta: 13 | action: 14 | shape: 15 | - 7 16 | obs: 17 | agentview_image: 18 | shape: 19 | - 3 20 | - 84 21 | - 84 22 | type: rgb 23 | robot0_eef_pos: 24 | shape: 25 | - 3 26 | robot0_eef_quat: 27 | shape: 28 | - 4 29 | robot0_eye_in_hand_image: 30 | shape: 31 | - 3 32 | - 84 33 | - 84 34 | type: rgb 35 | robot0_gripper_qpos: 36 | shape: 37 | - 2 38 | use_cache: true 39 | val_ratio: 0.02 40 | dataset_path: robomimic/core/mug_cleanup_d0.hdf5 41 | env_runner: 42 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 43 | abs_action: false 44 | crf: 22 45 | dataset_path: robomimic/core/mug_cleanup_d0.hdf5 46 | fps: 10 47 | max_steps: 400 48 | n_action_steps: 8 49 | n_envs: 28 50 | n_obs_steps: 2 51 | n_test: 50 52 | n_test_vis: 4 53 | n_train: 6 54 | n_train_vis: 2 55 | past_action: false 56 | render_obs_key: agentview_image 57 | shape_meta: 58 | action: 59 | shape: 60 | - 7 61 | obs: 62 | agentview_image: 63 | shape: 64 | - 3 65 | - 84 66 | - 84 67 | type: rgb 68 | robot0_eef_pos: 69 | shape: 70 | - 3 71 | robot0_eef_quat: 72 | shape: 73 | - 4 74 | robot0_eye_in_hand_image: 75 | shape: 76 | - 3 77 | - 84 78 | - 84 79 | type: rgb 80 | robot0_gripper_qpos: 81 | shape: 82 | - 2 83 | test_start_seed: 100000 84 | tqdm_interval_sec: 1.0 85 | train_start_idx: 0 86 | name: mug_cleanup_d0 87 | shape_meta: 88 | action: 89 | shape: 90 | - 7 91 | obs: 92 | agentview_image: 93 | shape: 94 | - 3 95 | - 84 96 | - 84 97 | type: rgb 98 | robot0_eef_pos: 99 | shape: 100 | - 3 101 | robot0_eef_quat: 102 | shape: 103 | - 4 104 | robot0_eye_in_hand_image: 105 | shape: 106 | - 3 107 | - 84 108 | - 84 109 | type: rgb 110 | robot0_gripper_qpos: 111 | shape: 112 | - 2 113 | task_name: mug_cleanup_d0 114 | -------------------------------------------------------------------------------- /config/tasks/nut_assembly_d0.yaml: -------------------------------------------------------------------------------- 1 | abs_action: false 2 | dataset: 3 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 4 | abs_action: false 5 | dataset_path: robomimic/core/nut_assembly_d0.hdf5 6 | horizon: 10 7 | n_obs_steps: 2 8 | pad_after: 7 9 | pad_before: 1 10 | rotation_rep: rotation_6d 11 | seed: 42 12 | shape_meta: 13 | action: 14 | shape: 15 | - 7 16 | obs: 17 | agentview_image: 18 | shape: 19 | - 3 20 | - 84 21 | - 84 22 | type: rgb 23 | robot0_eef_pos: 24 | shape: 25 | - 3 26 | robot0_eef_quat: 27 | shape: 28 | - 4 29 | robot0_eye_in_hand_image: 30 | shape: 31 | - 3 32 | - 84 33 | - 84 34 | type: rgb 35 | robot0_gripper_qpos: 36 | shape: 37 | - 2 38 | use_cache: true 39 | val_ratio: 0.02 40 | dataset_path: robomimic/core/nut_assembly_d0.hdf5 41 | env_runner: 42 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 43 | abs_action: false 44 | crf: 22 45 | dataset_path: robomimic/core/nut_assembly_d0.hdf5 46 | fps: 10 47 | max_steps: 400 48 | n_action_steps: 8 49 | n_envs: 28 50 | n_obs_steps: 2 51 | n_test: 50 52 | n_test_vis: 4 53 | n_train: 6 54 | n_train_vis: 2 55 | past_action: false 56 | render_obs_key: agentview_image 57 | shape_meta: 58 | action: 59 | shape: 60 | - 7 61 | obs: 62 | agentview_image: 63 | shape: 64 | - 3 65 | - 84 66 | - 84 67 | type: rgb 68 | robot0_eef_pos: 69 | shape: 70 | - 3 71 | robot0_eef_quat: 72 | shape: 73 | - 4 74 | robot0_eye_in_hand_image: 75 | shape: 76 | - 3 77 | - 84 78 | - 84 79 | type: rgb 80 | robot0_gripper_qpos: 81 | shape: 82 | - 2 83 | test_start_seed: 100000 84 | tqdm_interval_sec: 1.0 85 | train_start_idx: 0 86 | name: nut_assembly_d0 87 | shape_meta: 88 | action: 89 | shape: 90 | - 7 91 | obs: 92 | agentview_image: 93 | shape: 94 | - 3 95 | - 84 96 | - 84 97 | type: rgb 98 | robot0_eef_pos: 99 | shape: 100 | - 3 101 | robot0_eef_quat: 102 | shape: 103 | - 4 104 | robot0_eye_in_hand_image: 105 | shape: 106 | - 3 107 | - 84 108 | - 84 109 | type: rgb 110 | robot0_gripper_qpos: 111 | shape: 112 | - 2 113 | task_name: nut_assembly_d0 114 | -------------------------------------------------------------------------------- /config/tasks/pick_place_d0.yaml: -------------------------------------------------------------------------------- 1 | abs_action: false 2 | dataset: 3 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 4 | abs_action: false 5 | dataset_path: robomimic/core/pick_place_d0.hdf5 6 | horizon: 10 7 | n_obs_steps: 2 8 | pad_after: 7 9 | pad_before: 1 10 | rotation_rep: rotation_6d 11 | seed: 42 12 | shape_meta: 13 | action: 14 | shape: 15 | - 7 16 | obs: 17 | agentview_image: 18 | shape: 19 | - 3 20 | - 84 21 | - 84 22 | type: rgb 23 | robot0_eef_pos: 24 | shape: 25 | - 3 26 | robot0_eef_quat: 27 | shape: 28 | - 4 29 | robot0_eye_in_hand_image: 30 | shape: 31 | - 3 32 | - 84 33 | - 84 34 | type: rgb 35 | robot0_gripper_qpos: 36 | shape: 37 | - 2 38 | use_cache: true 39 | val_ratio: 0.02 40 | dataset_path: robomimic/core/pick_place_d0.hdf5 41 | env_runner: 42 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 43 | abs_action: false 44 | crf: 22 45 | dataset_path: robomimic/core/pick_place_d0.hdf5 46 | fps: 10 47 | max_steps: 400 48 | n_action_steps: 8 49 | n_envs: 28 50 | n_obs_steps: 2 51 | n_test: 50 52 | n_test_vis: 4 53 | n_train: 6 54 | n_train_vis: 2 55 | past_action: false 56 | render_obs_key: agentview_image 57 | shape_meta: 58 | action: 59 | shape: 60 | - 7 61 | obs: 62 | agentview_image: 63 | shape: 64 | - 3 65 | - 84 66 | - 84 67 | type: rgb 68 | robot0_eef_pos: 69 | shape: 70 | - 3 71 | robot0_eef_quat: 72 | shape: 73 | - 4 74 | robot0_eye_in_hand_image: 75 | shape: 76 | - 3 77 | - 84 78 | - 84 79 | type: rgb 80 | robot0_gripper_qpos: 81 | shape: 82 | - 2 83 | test_start_seed: 100000 84 | tqdm_interval_sec: 1.0 85 | train_start_idx: 0 86 | name: pick_place_d0 87 | shape_meta: 88 | action: 89 | shape: 90 | - 7 91 | obs: 92 | agentview_image: 93 | shape: 94 | - 3 95 | - 84 96 | - 84 97 | type: rgb 98 | robot0_eef_pos: 99 | shape: 100 | - 3 101 | robot0_eef_quat: 102 | shape: 103 | - 4 104 | robot0_eye_in_hand_image: 105 | shape: 106 | - 3 107 | - 84 108 | - 84 109 | type: rgb 110 | robot0_gripper_qpos: 111 | shape: 112 | - 2 113 | task_name: pick_place_d0 114 | -------------------------------------------------------------------------------- /config/tasks/square_d0.yaml: -------------------------------------------------------------------------------- 1 | abs_action: false 2 | dataset: 3 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 4 | abs_action: false 5 | dataset_path: robomimic/core/square_d0.hdf5 6 | horizon: 10 7 | n_obs_steps: 2 8 | pad_after: 7 9 | pad_before: 1 10 | rotation_rep: rotation_6d 11 | seed: 42 12 | shape_meta: 13 | action: 14 | shape: 15 | - 7 16 | obs: 17 | agentview_image: 18 | shape: 19 | - 3 20 | - 84 21 | - 84 22 | type: rgb 23 | robot0_eef_pos: 24 | shape: 25 | - 3 26 | robot0_eef_quat: 27 | shape: 28 | - 4 29 | robot0_eye_in_hand_image: 30 | shape: 31 | - 3 32 | - 84 33 | - 84 34 | type: rgb 35 | robot0_gripper_qpos: 36 | shape: 37 | - 2 38 | use_cache: true 39 | val_ratio: 0.02 40 | dataset_path: robomimic/core/square_d0.hdf5 41 | env_runner: 42 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 43 | abs_action: false 44 | crf: 22 45 | dataset_path: robomimic/core/square_d0.hdf5 46 | fps: 10 47 | max_steps: 400 48 | n_action_steps: 8 49 | n_envs: 28 50 | n_obs_steps: 2 51 | n_test: 50 52 | n_test_vis: 4 53 | n_train: 6 54 | n_train_vis: 2 55 | past_action: false 56 | render_obs_key: agentview_image 57 | shape_meta: 58 | action: 59 | shape: 60 | - 7 61 | obs: 62 | agentview_image: 63 | shape: 64 | - 3 65 | - 84 66 | - 84 67 | type: rgb 68 | robot0_eef_pos: 69 | shape: 70 | - 3 71 | robot0_eef_quat: 72 | shape: 73 | - 4 74 | robot0_eye_in_hand_image: 75 | shape: 76 | - 3 77 | - 84 78 | - 84 79 | type: rgb 80 | robot0_gripper_qpos: 81 | shape: 82 | - 2 83 | test_start_seed: 100000 84 | tqdm_interval_sec: 1.0 85 | train_start_idx: 0 86 | name: square_d0 87 | shape_meta: 88 | action: 89 | shape: 90 | - 7 91 | obs: 92 | agentview_image: 93 | shape: 94 | - 3 95 | - 84 96 | - 84 97 | type: rgb 98 | robot0_eef_pos: 99 | shape: 100 | - 3 101 | robot0_eef_quat: 102 | shape: 103 | - 4 104 | robot0_eye_in_hand_image: 105 | shape: 106 | - 3 107 | - 84 108 | - 84 109 | type: rgb 110 | robot0_gripper_qpos: 111 | shape: 112 | - 2 113 | task_name: square_d0 114 | -------------------------------------------------------------------------------- /config/tasks/stack_d0.yaml: -------------------------------------------------------------------------------- 1 | abs_action: false 2 | dataset: 3 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 4 | abs_action: false 5 | dataset_path: robomimic/core/stack_d0.hdf5 6 | horizon: 10 7 | n_obs_steps: 2 8 | pad_after: 7 9 | pad_before: 1 10 | rotation_rep: rotation_6d 11 | seed: 42 12 | shape_meta: 13 | action: 14 | shape: 15 | - 7 16 | obs: 17 | agentview_image: 18 | shape: 19 | - 3 20 | - 84 21 | - 84 22 | type: rgb 23 | robot0_eef_pos: 24 | shape: 25 | - 3 26 | robot0_eef_quat: 27 | shape: 28 | - 4 29 | robot0_eye_in_hand_image: 30 | shape: 31 | - 3 32 | - 84 33 | - 84 34 | type: rgb 35 | robot0_gripper_qpos: 36 | shape: 37 | - 2 38 | use_cache: true 39 | val_ratio: 0.02 40 | dataset_path: robomimic/core/stack_d0.hdf5 41 | env_runner: 42 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 43 | abs_action: false 44 | crf: 22 45 | dataset_path: robomimic/core/stack_d0.hdf5 46 | fps: 10 47 | max_steps: 400 48 | n_action_steps: 8 49 | n_envs: 28 50 | n_obs_steps: 2 51 | n_test: 50 52 | n_test_vis: 4 53 | n_train: 6 54 | n_train_vis: 2 55 | past_action: false 56 | render_obs_key: agentview_image 57 | shape_meta: 58 | action: 59 | shape: 60 | - 7 61 | obs: 62 | agentview_image: 63 | shape: 64 | - 3 65 | - 84 66 | - 84 67 | type: rgb 68 | robot0_eef_pos: 69 | shape: 70 | - 3 71 | robot0_eef_quat: 72 | shape: 73 | - 4 74 | robot0_eye_in_hand_image: 75 | shape: 76 | - 3 77 | - 84 78 | - 84 79 | type: rgb 80 | robot0_gripper_qpos: 81 | shape: 82 | - 2 83 | test_start_seed: 100000 84 | tqdm_interval_sec: 1.0 85 | train_start_idx: 0 86 | name: stack_d0 87 | shape_meta: 88 | action: 89 | shape: 90 | - 7 91 | obs: 92 | agentview_image: 93 | shape: 94 | - 3 95 | - 84 96 | - 84 97 | type: rgb 98 | robot0_eef_pos: 99 | shape: 100 | - 3 101 | robot0_eef_quat: 102 | shape: 103 | - 4 104 | robot0_eye_in_hand_image: 105 | shape: 106 | - 3 107 | - 84 108 | - 84 109 | type: rgb 110 | robot0_gripper_qpos: 111 | shape: 112 | - 2 113 | task_name: stack_d0 114 | -------------------------------------------------------------------------------- /config/tasks/stack_three_d0.yaml: -------------------------------------------------------------------------------- 1 | abs_action: false 2 | dataset: 3 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 4 | abs_action: false 5 | dataset_path: robomimic/core/stack_three_d0.hdf5 6 | horizon: 10 7 | n_obs_steps: 2 8 | pad_after: 7 9 | pad_before: 1 10 | rotation_rep: rotation_6d 11 | seed: 42 12 | shape_meta: 13 | action: 14 | shape: 15 | - 7 16 | obs: 17 | agentview_image: 18 | shape: 19 | - 3 20 | - 84 21 | - 84 22 | type: rgb 23 | robot0_eef_pos: 24 | shape: 25 | - 3 26 | robot0_eef_quat: 27 | shape: 28 | - 4 29 | robot0_eye_in_hand_image: 30 | shape: 31 | - 3 32 | - 84 33 | - 84 34 | type: rgb 35 | robot0_gripper_qpos: 36 | shape: 37 | - 2 38 | use_cache: true 39 | val_ratio: 0.02 40 | dataset_path: robomimic/core/stack_three_d0.hdf5 41 | env_runner: 42 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 43 | abs_action: false 44 | crf: 22 45 | dataset_path: robomimic/core/stack_three_d0.hdf5 46 | fps: 10 47 | max_steps: 400 48 | n_action_steps: 8 49 | n_envs: 28 50 | n_obs_steps: 2 51 | n_test: 50 52 | n_test_vis: 4 53 | n_train: 6 54 | n_train_vis: 2 55 | past_action: false 56 | render_obs_key: agentview_image 57 | shape_meta: 58 | action: 59 | shape: 60 | - 7 61 | obs: 62 | agentview_image: 63 | shape: 64 | - 3 65 | - 84 66 | - 84 67 | type: rgb 68 | robot0_eef_pos: 69 | shape: 70 | - 3 71 | robot0_eef_quat: 72 | shape: 73 | - 4 74 | robot0_eye_in_hand_image: 75 | shape: 76 | - 3 77 | - 84 78 | - 84 79 | type: rgb 80 | robot0_gripper_qpos: 81 | shape: 82 | - 2 83 | test_start_seed: 100000 84 | tqdm_interval_sec: 1.0 85 | train_start_idx: 0 86 | name: stack_three_d0 87 | shape_meta: 88 | action: 89 | shape: 90 | - 7 91 | obs: 92 | agentview_image: 93 | shape: 94 | - 3 95 | - 84 96 | - 84 97 | type: rgb 98 | robot0_eef_pos: 99 | shape: 100 | - 3 101 | robot0_eef_quat: 102 | shape: 103 | - 4 104 | robot0_eye_in_hand_image: 105 | shape: 106 | - 3 107 | - 84 108 | - 84 109 | type: rgb 110 | robot0_gripper_qpos: 111 | shape: 112 | - 2 113 | task_name: stack_three_d0 114 | -------------------------------------------------------------------------------- /config/tasks/threading_d0.yaml: -------------------------------------------------------------------------------- 1 | abs_action: false 2 | dataset: 3 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 4 | abs_action: false 5 | dataset_path: robomimic/core/threading_d0.hdf5 6 | horizon: 10 7 | n_obs_steps: 2 8 | pad_after: 7 9 | pad_before: 1 10 | rotation_rep: rotation_6d 11 | seed: 42 12 | shape_meta: 13 | action: 14 | shape: 15 | - 7 16 | obs: 17 | agentview_image: 18 | shape: 19 | - 3 20 | - 84 21 | - 84 22 | type: rgb 23 | robot0_eef_pos: 24 | shape: 25 | - 3 26 | robot0_eef_quat: 27 | shape: 28 | - 4 29 | robot0_eye_in_hand_image: 30 | shape: 31 | - 3 32 | - 84 33 | - 84 34 | type: rgb 35 | robot0_gripper_qpos: 36 | shape: 37 | - 2 38 | use_cache: true 39 | val_ratio: 0.02 40 | dataset_path: robomimic/core/threading_d0.hdf5 41 | env_runner: 42 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 43 | abs_action: false 44 | crf: 22 45 | dataset_path: robomimic/core/threading_d0.hdf5 46 | fps: 10 47 | max_steps: 400 48 | n_action_steps: 8 49 | n_envs: 28 50 | n_obs_steps: 2 51 | n_test: 50 52 | n_test_vis: 4 53 | n_train: 6 54 | n_train_vis: 2 55 | past_action: false 56 | render_obs_key: agentview_image 57 | shape_meta: 58 | action: 59 | shape: 60 | - 7 61 | obs: 62 | agentview_image: 63 | shape: 64 | - 3 65 | - 84 66 | - 84 67 | type: rgb 68 | robot0_eef_pos: 69 | shape: 70 | - 3 71 | robot0_eef_quat: 72 | shape: 73 | - 4 74 | robot0_eye_in_hand_image: 75 | shape: 76 | - 3 77 | - 84 78 | - 84 79 | type: rgb 80 | robot0_gripper_qpos: 81 | shape: 82 | - 2 83 | test_start_seed: 100000 84 | tqdm_interval_sec: 1.0 85 | train_start_idx: 0 86 | name: threading_d0 87 | shape_meta: 88 | action: 89 | shape: 90 | - 7 91 | obs: 92 | agentview_image: 93 | shape: 94 | - 3 95 | - 84 96 | - 84 97 | type: rgb 98 | robot0_eef_pos: 99 | shape: 100 | - 3 101 | robot0_eef_quat: 102 | shape: 103 | - 4 104 | robot0_eye_in_hand_image: 105 | shape: 106 | - 3 107 | - 84 108 | - 84 109 | type: rgb 110 | robot0_gripper_qpos: 111 | shape: 112 | - 2 113 | task_name: threading_d0 114 | -------------------------------------------------------------------------------- /config/tasks/three_piece_assembly_d0.yaml: -------------------------------------------------------------------------------- 1 | abs_action: false 2 | dataset: 3 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 4 | abs_action: false 5 | dataset_path: robomimic/core/three_piece_assembly_d0.hdf5 6 | horizon: 10 7 | n_obs_steps: 2 8 | pad_after: 7 9 | pad_before: 1 10 | rotation_rep: rotation_6d 11 | seed: 42 12 | shape_meta: 13 | action: 14 | shape: 15 | - 7 16 | obs: 17 | agentview_image: 18 | shape: 19 | - 3 20 | - 84 21 | - 84 22 | type: rgb 23 | robot0_eef_pos: 24 | shape: 25 | - 3 26 | robot0_eef_quat: 27 | shape: 28 | - 4 29 | robot0_eye_in_hand_image: 30 | shape: 31 | - 3 32 | - 84 33 | - 84 34 | type: rgb 35 | robot0_gripper_qpos: 36 | shape: 37 | - 2 38 | use_cache: true 39 | val_ratio: 0.02 40 | dataset_path: robomimic/core/three_piece_assembly_d0.hdf5 41 | env_runner: 42 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 43 | abs_action: false 44 | crf: 22 45 | dataset_path: robomimic/core/three_piece_assembly_d0.hdf5 46 | fps: 10 47 | max_steps: 400 48 | n_action_steps: 8 49 | n_envs: 28 50 | n_obs_steps: 2 51 | n_test: 50 52 | n_test_vis: 4 53 | n_train: 6 54 | n_train_vis: 2 55 | past_action: false 56 | render_obs_key: agentview_image 57 | shape_meta: 58 | action: 59 | shape: 60 | - 7 61 | obs: 62 | agentview_image: 63 | shape: 64 | - 3 65 | - 84 66 | - 84 67 | type: rgb 68 | robot0_eef_pos: 69 | shape: 70 | - 3 71 | robot0_eef_quat: 72 | shape: 73 | - 4 74 | robot0_eye_in_hand_image: 75 | shape: 76 | - 3 77 | - 84 78 | - 84 79 | type: rgb 80 | robot0_gripper_qpos: 81 | shape: 82 | - 2 83 | test_start_seed: 100000 84 | tqdm_interval_sec: 1.0 85 | train_start_idx: 0 86 | name: three_piece_assembly_d0 87 | shape_meta: 88 | action: 89 | shape: 90 | - 7 91 | obs: 92 | agentview_image: 93 | shape: 94 | - 3 95 | - 84 96 | - 84 97 | type: rgb 98 | robot0_eef_pos: 99 | shape: 100 | - 3 101 | robot0_eef_quat: 102 | shape: 103 | - 4 104 | robot0_eye_in_hand_image: 105 | shape: 106 | - 3 107 | - 84 108 | - 84 109 | type: rgb 110 | robot0_gripper_qpos: 111 | shape: 112 | - 2 113 | task_name: three_piece_assembly_d0 114 | -------------------------------------------------------------------------------- /diffusion_policy/common/checkpoint_util.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Dict 2 | import os 3 | 4 | class TopKCheckpointManager: 5 | def __init__(self, 6 | save_dir, 7 | monitor_key: str, 8 | mode='min', 9 | k=1, 10 | format_str='epoch={epoch:03d}-train_loss={train_loss:.3f}.ckpt' 11 | ): 12 | assert mode in ['max', 'min'] 13 | assert k >= 0 14 | 15 | self.save_dir = save_dir 16 | self.monitor_key = monitor_key 17 | self.mode = mode 18 | self.k = k 19 | self.format_str = format_str 20 | self.path_value_map = dict() 21 | 22 | def get_ckpt_path(self, data: Dict[str, float]) -> Optional[str]: 23 | if self.k == 0: 24 | return None 25 | 26 | value = data[self.monitor_key] 27 | ckpt_path = os.path.join( 28 | self.save_dir, self.format_str.format(**data)) 29 | 30 | if len(self.path_value_map) < self.k: 31 | # under-capacity 32 | self.path_value_map[ckpt_path] = value 33 | return ckpt_path 34 | 35 | # at capacity 36 | sorted_map = sorted(self.path_value_map.items(), key=lambda x: x[1]) 37 | min_path, min_value = sorted_map[0] 38 | max_path, max_value = sorted_map[-1] 39 | 40 | delete_path = None 41 | if self.mode == 'max': 42 | if value > min_value: 43 | delete_path = min_path 44 | else: 45 | if value < max_value: 46 | delete_path = max_path 47 | 48 | if delete_path is None: 49 | return None 50 | else: 51 | del self.path_value_map[delete_path] 52 | self.path_value_map[ckpt_path] = value 53 | 54 | if not os.path.exists(self.save_dir): 55 | os.mkdir(self.save_dir) 56 | 57 | if os.path.exists(delete_path): 58 | os.remove(delete_path) 59 | return ckpt_path 60 | -------------------------------------------------------------------------------- /diffusion_policy/common/cv2_util.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | import math 3 | import cv2 4 | import numpy as np 5 | 6 | def draw_reticle(img, u, v, label_color): 7 | """ 8 | Draws a reticle (cross-hair) on the image at the given position on top of 9 | the original image. 10 | @param img (In/Out) uint8 3 channel image 11 | @param u X coordinate (width) 12 | @param v Y coordinate (height) 13 | @param label_color tuple of 3 ints for RGB color used for drawing. 14 | """ 15 | # Cast to int. 16 | u = int(u) 17 | v = int(v) 18 | 19 | white = (255, 255, 255) 20 | cv2.circle(img, (u, v), 10, label_color, 1) 21 | cv2.circle(img, (u, v), 11, white, 1) 22 | cv2.circle(img, (u, v), 12, label_color, 1) 23 | cv2.line(img, (u, v + 1), (u, v + 3), white, 1) 24 | cv2.line(img, (u + 1, v), (u + 3, v), white, 1) 25 | cv2.line(img, (u, v - 1), (u, v - 3), white, 1) 26 | cv2.line(img, (u - 1, v), (u - 3, v), white, 1) 27 | 28 | 29 | def draw_text( 30 | img, 31 | *, 32 | text, 33 | uv_top_left, 34 | color=(255, 255, 255), 35 | fontScale=0.5, 36 | thickness=1, 37 | fontFace=cv2.FONT_HERSHEY_SIMPLEX, 38 | outline_color=(0, 0, 0), 39 | line_spacing=1.5, 40 | ): 41 | """ 42 | Draws multiline with an outline. 43 | """ 44 | assert isinstance(text, str) 45 | 46 | uv_top_left = np.array(uv_top_left, dtype=float) 47 | assert uv_top_left.shape == (2,) 48 | 49 | for line in text.splitlines(): 50 | (w, h), _ = cv2.getTextSize( 51 | text=line, 52 | fontFace=fontFace, 53 | fontScale=fontScale, 54 | thickness=thickness, 55 | ) 56 | uv_bottom_left_i = uv_top_left + [0, h] 57 | org = tuple(uv_bottom_left_i.astype(int)) 58 | 59 | if outline_color is not None: 60 | cv2.putText( 61 | img, 62 | text=line, 63 | org=org, 64 | fontFace=fontFace, 65 | fontScale=fontScale, 66 | color=outline_color, 67 | thickness=thickness * 3, 68 | lineType=cv2.LINE_AA, 69 | ) 70 | cv2.putText( 71 | img, 72 | text=line, 73 | org=org, 74 | fontFace=fontFace, 75 | fontScale=fontScale, 76 | color=color, 77 | thickness=thickness, 78 | lineType=cv2.LINE_AA, 79 | ) 80 | 81 | uv_top_left += [0, h * line_spacing] 82 | 83 | 84 | def get_image_transform( 85 | input_res: Tuple[int,int]=(1280,720), 86 | output_res: Tuple[int,int]=(640,480), 87 | bgr_to_rgb: bool=False): 88 | 89 | iw, ih = input_res 90 | ow, oh = output_res 91 | rw, rh = None, None 92 | interp_method = cv2.INTER_AREA 93 | 94 | if (iw/ih) >= (ow/oh): 95 | # input is wider 96 | rh = oh 97 | rw = math.ceil(rh / ih * iw) 98 | if oh > ih: 99 | interp_method = cv2.INTER_LINEAR 100 | else: 101 | rw = ow 102 | rh = math.ceil(rw / iw * ih) 103 | if ow > iw: 104 | interp_method = cv2.INTER_LINEAR 105 | 106 | w_slice_start = (rw - ow) // 2 107 | w_slice = slice(w_slice_start, w_slice_start + ow) 108 | h_slice_start = (rh - oh) // 2 109 | h_slice = slice(h_slice_start, h_slice_start + oh) 110 | c_slice = slice(None) 111 | if bgr_to_rgb: 112 | c_slice = slice(None, None, -1) 113 | 114 | def transform(img: np.ndarray): 115 | assert img.shape == ((ih,iw,3)) 116 | # resize 117 | img = cv2.resize(img, (rw, rh), interpolation=interp_method) 118 | # crop 119 | img = img[h_slice, w_slice, c_slice] 120 | return img 121 | return transform 122 | 123 | def optimal_row_cols( 124 | n_cameras, 125 | in_wh_ratio, 126 | max_resolution=(1920, 1080) 127 | ): 128 | out_w, out_h = max_resolution 129 | out_wh_ratio = out_w / out_h 130 | 131 | n_rows = np.arange(n_cameras,dtype=np.int64) + 1 132 | n_cols = np.ceil(n_cameras / n_rows).astype(np.int64) 133 | cat_wh_ratio = in_wh_ratio * (n_cols / n_rows) 134 | ratio_diff = np.abs(out_wh_ratio - cat_wh_ratio) 135 | best_idx = np.argmin(ratio_diff) 136 | best_n_row = n_rows[best_idx] 137 | best_n_col = n_cols[best_idx] 138 | best_cat_wh_ratio = cat_wh_ratio[best_idx] 139 | 140 | rw, rh = None, None 141 | if best_cat_wh_ratio >= out_wh_ratio: 142 | # cat is wider 143 | rw = math.floor(out_w / best_n_col) 144 | rh = math.floor(rw / in_wh_ratio) 145 | else: 146 | rh = math.floor(out_h / best_n_row) 147 | rw = math.floor(rh * in_wh_ratio) 148 | 149 | # crop_resolution = (rw, rh) 150 | return rw, rh, best_n_col, best_n_row 151 | -------------------------------------------------------------------------------- /diffusion_policy/common/env_util.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | def render_env_video(env, states, actions=None): 6 | observations = states 7 | imgs = list() 8 | for i in range(len(observations)): 9 | state = observations[i] 10 | env.set_state(state) 11 | if i == 0: 12 | env.set_state(state) 13 | img = env.render() 14 | # draw action 15 | if actions is not None: 16 | action = actions[i] 17 | coord = (action / 512 * 96).astype(np.int32) 18 | cv2.drawMarker(img, coord, 19 | color=(255,0,0), markerType=cv2.MARKER_CROSS, 20 | markerSize=8, thickness=1) 21 | imgs.append(img) 22 | imgs = np.array(imgs) 23 | return imgs 24 | -------------------------------------------------------------------------------- /diffusion_policy/common/json_logger.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Callable, Any, Sequence 2 | import os 3 | import copy 4 | import json 5 | import numbers 6 | import pandas as pd 7 | 8 | 9 | def read_json_log(path: str, 10 | required_keys: Sequence[str]=tuple(), 11 | **kwargs) -> pd.DataFrame: 12 | """ 13 | Read json-per-line file, with potentially incomplete lines. 14 | kwargs passed to pd.read_json 15 | """ 16 | lines = list() 17 | with open(path, 'r') as f: 18 | while True: 19 | # one json per line 20 | line = f.readline() 21 | if len(line) == 0: 22 | # EOF 23 | break 24 | elif not line.endswith('\n'): 25 | # incomplete line 26 | break 27 | is_relevant = False 28 | for k in required_keys: 29 | if k in line: 30 | is_relevant = True 31 | break 32 | if is_relevant: 33 | lines.append(line) 34 | if len(lines) < 1: 35 | return pd.DataFrame() 36 | json_buf = f'[{",".join([line for line in (line.strip() for line in lines) if line])}]' 37 | df = pd.read_json(json_buf, **kwargs) 38 | return df 39 | 40 | class JsonLogger: 41 | def __init__(self, path: str, 42 | filter_fn: Optional[Callable[[str,Any],bool]]=None): 43 | if filter_fn is None: 44 | filter_fn = lambda k,v: isinstance(v, numbers.Number) 45 | 46 | # default to append mode 47 | self.path = path 48 | self.filter_fn = filter_fn 49 | self.file = None 50 | self.last_log = None 51 | 52 | def start(self): 53 | # use line buffering 54 | try: 55 | self.file = file = open(self.path, 'r+', buffering=1) 56 | except FileNotFoundError: 57 | self.file = file = open(self.path, 'w+', buffering=1) 58 | 59 | # Move the pointer (similar to a cursor in a text editor) to the end of the file 60 | pos = file.seek(0, os.SEEK_END) 61 | 62 | # Read each character in the file one at a time from the last 63 | # character going backwards, searching for a newline character 64 | # If we find a new line, exit the search 65 | while pos > 0 and file.read(1) != "\n": 66 | pos -= 1 67 | file.seek(pos, os.SEEK_SET) 68 | # now the file pointer is at one past the last '\n' 69 | # and pos is at the last '\n'. 70 | last_line_end = file.tell() 71 | 72 | # find the start of second last line 73 | pos = max(0, pos-1) 74 | file.seek(pos, os.SEEK_SET) 75 | while pos > 0 and file.read(1) != "\n": 76 | pos -= 1 77 | file.seek(pos, os.SEEK_SET) 78 | # now the file pointer is at one past the second last '\n' 79 | last_line_start = file.tell() 80 | 81 | if last_line_start < last_line_end: 82 | # has last line of json 83 | last_line = file.readline() 84 | self.last_log = json.loads(last_line) 85 | 86 | # remove the last incomplete line 87 | file.seek(last_line_end) 88 | file.truncate() 89 | 90 | def stop(self): 91 | self.file.close() 92 | self.file = None 93 | 94 | def __enter__(self): 95 | self.start() 96 | return self 97 | 98 | def __exit__(self, exc_type, exc_val, exc_tb): 99 | self.stop() 100 | 101 | def log(self, data: dict): 102 | filtered_data = dict( 103 | filter(lambda x: self.filter_fn(*x), data.items())) 104 | # save current as last log 105 | self.last_log = filtered_data 106 | for k, v in filtered_data.items(): 107 | if isinstance(v, numbers.Integral): 108 | filtered_data[k] = int(v) 109 | elif isinstance(v, numbers.Number): 110 | filtered_data[k] = float(v) 111 | buf = json.dumps(filtered_data) 112 | # ensure one line per json 113 | buf = buf.replace('\n','') + '\n' 114 | self.file.write(buf) 115 | 116 | def get_last_log(self): 117 | return copy.deepcopy(self.last_log) 118 | -------------------------------------------------------------------------------- /diffusion_policy/common/nested_dict_util.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | def nested_dict_map(f, x): 4 | """ 5 | Map f over all leaf of nested dict x 6 | """ 7 | 8 | if not isinstance(x, dict): 9 | return f(x) 10 | y = dict() 11 | for key, value in x.items(): 12 | y[key] = nested_dict_map(f, value) 13 | return y 14 | 15 | def nested_dict_reduce(f, x): 16 | """ 17 | Map f over all values of nested dict x, and reduce to a single value 18 | """ 19 | if not isinstance(x, dict): 20 | return x 21 | 22 | reduced_values = list() 23 | for value in x.values(): 24 | reduced_values.append(nested_dict_reduce(f, value)) 25 | y = functools.reduce(f, reduced_values) 26 | return y 27 | 28 | 29 | def nested_dict_check(f, x): 30 | bool_dict = nested_dict_map(f, x) 31 | result = nested_dict_reduce(lambda x, y: x and y, bool_dict) 32 | return result 33 | -------------------------------------------------------------------------------- /diffusion_policy/common/precise_sleep.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | def precise_sleep(dt: float, slack_time: float=0.001, time_func=time.monotonic): 4 | """ 5 | Use hybrid of time.sleep and spinning to minimize jitter. 6 | Sleep dt - slack_time seconds first, then spin for the rest. 7 | """ 8 | t_start = time_func() 9 | if dt > slack_time: 10 | time.sleep(dt - slack_time) 11 | t_end = t_start + dt 12 | while time_func() < t_end: 13 | pass 14 | return 15 | 16 | def precise_wait(t_end: float, slack_time: float=0.001, time_func=time.monotonic): 17 | t_start = time_func() 18 | t_wait = t_end - t_start 19 | if t_wait > 0: 20 | t_sleep = t_wait - slack_time 21 | if t_sleep > 0: 22 | time.sleep(t_sleep) 23 | while time_func() < t_end: 24 | pass 25 | return 26 | -------------------------------------------------------------------------------- /diffusion_policy/common/pymunk_util.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import pymunk 3 | import pymunk.pygame_util 4 | import numpy as np 5 | 6 | COLLTYPE_DEFAULT = 0 7 | COLLTYPE_MOUSE = 1 8 | COLLTYPE_BALL = 2 9 | 10 | def get_body_type(static=False): 11 | body_type = pymunk.Body.DYNAMIC 12 | if static: 13 | body_type = pymunk.Body.STATIC 14 | return body_type 15 | 16 | 17 | def create_rectangle(space, 18 | pos_x,pos_y,width,height, 19 | density=3,static=False): 20 | body = pymunk.Body(body_type=get_body_type(static)) 21 | body.position = (pos_x,pos_y) 22 | shape = pymunk.Poly.create_box(body,(width,height)) 23 | shape.density = density 24 | space.add(body,shape) 25 | return body, shape 26 | 27 | 28 | def create_rectangle_bb(space, 29 | left, bottom, right, top, 30 | **kwargs): 31 | pos_x = (left + right) / 2 32 | pos_y = (top + bottom) / 2 33 | height = top - bottom 34 | width = right - left 35 | return create_rectangle(space, pos_x, pos_y, width, height, **kwargs) 36 | 37 | def create_circle(space, pos_x, pos_y, radius, density=3, static=False): 38 | body = pymunk.Body(body_type=get_body_type(static)) 39 | body.position = (pos_x, pos_y) 40 | shape = pymunk.Circle(body, radius=radius) 41 | shape.density = density 42 | shape.collision_type = COLLTYPE_BALL 43 | space.add(body, shape) 44 | return body, shape 45 | 46 | def get_body_state(body): 47 | state = np.zeros(6, dtype=np.float32) 48 | state[:2] = body.position 49 | state[2] = body.angle 50 | state[3:5] = body.velocity 51 | state[5] = body.angular_velocity 52 | return state 53 | -------------------------------------------------------------------------------- /diffusion_policy/common/pytorch_util.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Callable, List 2 | import collections 3 | import torch 4 | import torch.nn as nn 5 | 6 | def dict_apply( 7 | x: Dict[str, torch.Tensor], 8 | func: Callable[[torch.Tensor], torch.Tensor] 9 | ) -> Dict[str, torch.Tensor]: 10 | result = dict() 11 | for key, value in x.items(): 12 | if isinstance(value, dict): 13 | result[key] = dict_apply(value, func) 14 | else: 15 | result[key] = func(value) 16 | return result 17 | 18 | def pad_remaining_dims(x, target): 19 | assert x.shape == target.shape[:len(x.shape)] 20 | return x.reshape(x.shape + (1,)*(len(target.shape) - len(x.shape))) 21 | 22 | def dict_apply_split( 23 | x: Dict[str, torch.Tensor], 24 | split_func: Callable[[torch.Tensor], Dict[str, torch.Tensor]] 25 | ) -> Dict[str, torch.Tensor]: 26 | results = collections.defaultdict(dict) 27 | for key, value in x.items(): 28 | result = split_func(value) 29 | for k, v in result.items(): 30 | results[k][key] = v 31 | return results 32 | 33 | def dict_apply_reduce( 34 | x: List[Dict[str, torch.Tensor]], 35 | reduce_func: Callable[[List[torch.Tensor]], torch.Tensor] 36 | ) -> Dict[str, torch.Tensor]: 37 | result = dict() 38 | for key in x[0].keys(): 39 | result[key] = reduce_func([x_[key] for x_ in x]) 40 | return result 41 | 42 | 43 | def replace_submodules( 44 | root_module: nn.Module, 45 | predicate: Callable[[nn.Module], bool], 46 | func: Callable[[nn.Module], nn.Module]) -> nn.Module: 47 | """ 48 | predicate: Return true if the module is to be replaced. 49 | func: Return new module to use. 50 | """ 51 | if predicate(root_module): 52 | return func(root_module) 53 | 54 | bn_list = [k.split('.') for k, m 55 | in root_module.named_modules(remove_duplicate=True) 56 | if predicate(m)] 57 | for *parent, k in bn_list: 58 | parent_module = root_module 59 | if len(parent) > 0: 60 | parent_module = root_module.get_submodule('.'.join(parent)) 61 | if isinstance(parent_module, nn.Sequential): 62 | src_module = parent_module[int(k)] 63 | else: 64 | src_module = getattr(parent_module, k) 65 | tgt_module = func(src_module) 66 | if isinstance(parent_module, nn.Sequential): 67 | parent_module[int(k)] = tgt_module 68 | else: 69 | setattr(parent_module, k, tgt_module) 70 | # verify that all BN are replaced 71 | bn_list = [k.split('.') for k, m 72 | in root_module.named_modules(remove_duplicate=True) 73 | if predicate(m)] 74 | assert len(bn_list) == 0 75 | return root_module 76 | 77 | def optimizer_to(optimizer, device): 78 | for state in optimizer.state.values(): 79 | for k, v in state.items(): 80 | if isinstance(v, torch.Tensor): 81 | state[k] = v.to(device=device) 82 | return optimizer 83 | -------------------------------------------------------------------------------- /diffusion_policy/common/robomimic_config_util.py: -------------------------------------------------------------------------------- 1 | from omegaconf import OmegaConf 2 | from robomimic.config import config_factory 3 | import robomimic.scripts.generate_paper_configs as gpc 4 | from robomimic.scripts.generate_paper_configs import ( 5 | modify_config_for_default_image_exp, 6 | modify_config_for_default_low_dim_exp, 7 | modify_config_for_dataset, 8 | ) 9 | 10 | def get_robomimic_config( 11 | algo_name='bc_rnn', 12 | hdf5_type='low_dim', 13 | task_name='square', 14 | dataset_type='ph' 15 | ): 16 | base_dataset_dir = '/tmp/null' 17 | filter_key = None 18 | 19 | # decide whether to use low-dim or image training defaults 20 | modifier_for_obs = modify_config_for_default_image_exp 21 | if hdf5_type in ["low_dim", "low_dim_sparse", "low_dim_dense"]: 22 | modifier_for_obs = modify_config_for_default_low_dim_exp 23 | 24 | algo_config_name = "bc" if algo_name == "bc_rnn" else algo_name 25 | config = config_factory(algo_name=algo_config_name) 26 | # turn into default config for observation modalities (e.g.: low-dim or rgb) 27 | config = modifier_for_obs(config) 28 | # add in config based on the dataset 29 | config = modify_config_for_dataset( 30 | config=config, 31 | task_name=task_name, 32 | dataset_type=dataset_type, 33 | hdf5_type=hdf5_type, 34 | base_dataset_dir=base_dataset_dir, 35 | filter_key=filter_key, 36 | ) 37 | # add in algo hypers based on dataset 38 | algo_config_modifier = getattr(gpc, f'modify_{algo_name}_config_for_dataset') 39 | config = algo_config_modifier( 40 | config=config, 41 | task_name=task_name, 42 | dataset_type=dataset_type, 43 | hdf5_type=hdf5_type, 44 | ) 45 | return config 46 | 47 | 48 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/blockpush_lowdim_seed.yaml: -------------------------------------------------------------------------------- 1 | name: blockpush_lowdim_seed 2 | 3 | obs_dim: 16 4 | action_dim: 2 5 | keypoint_dim: 2 6 | obs_eef_target: True 7 | 8 | env_runner: 9 | _target_: diffusion_policy.env_runner.blockpush_lowdim_runner.BlockPushLowdimRunner 10 | n_train: 6 11 | n_train_vis: 2 12 | train_start_seed: 0 13 | n_test: 50 14 | n_test_vis: 4 15 | test_start_seed: 100000 16 | max_steps: 350 17 | n_obs_steps: ${n_obs_steps} 18 | n_action_steps: ${n_action_steps} 19 | fps: 5 20 | past_action: ${past_action_visible} 21 | abs_action: False 22 | obs_eef_target: ${task.obs_eef_target} 23 | n_envs: null 24 | 25 | dataset: 26 | _target_: diffusion_policy.dataset.blockpush_lowdim_dataset.BlockPushLowdimDataset 27 | zarr_path: data/block_pushing/multimodal_push_seed.zarr 28 | horizon: ${horizon} 29 | pad_before: ${eval:'${n_obs_steps}-1'} 30 | pad_after: ${eval:'${n_action_steps}-1'} 31 | obs_eef_target: ${task.obs_eef_target} 32 | use_manual_normalizer: False 33 | seed: 42 34 | val_ratio: 0.02 35 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/blockpush_lowdim_seed_abs.yaml: -------------------------------------------------------------------------------- 1 | name: blockpush_lowdim_seed_abs 2 | 3 | obs_dim: 16 4 | action_dim: 2 5 | keypoint_dim: 2 6 | obs_eef_target: True 7 | 8 | env_runner: 9 | _target_: diffusion_policy.env_runner.blockpush_lowdim_runner.BlockPushLowdimRunner 10 | n_train: 6 11 | n_train_vis: 2 12 | train_start_seed: 0 13 | n_test: 50 14 | n_test_vis: 4 15 | test_start_seed: 100000 16 | max_steps: 350 17 | n_obs_steps: ${n_obs_steps} 18 | n_action_steps: ${n_action_steps} 19 | fps: 5 20 | past_action: ${past_action_visible} 21 | abs_action: True 22 | obs_eef_target: ${task.obs_eef_target} 23 | n_envs: null 24 | 25 | dataset: 26 | _target_: diffusion_policy.dataset.blockpush_lowdim_dataset.BlockPushLowdimDataset 27 | zarr_path: data/block_pushing/multimodal_push_seed_abs.zarr 28 | horizon: ${horizon} 29 | pad_before: ${eval:'${n_obs_steps}-1'} 30 | pad_after: ${eval:'${n_action_steps}-1'} 31 | obs_eef_target: ${task.obs_eef_target} 32 | use_manual_normalizer: False 33 | seed: 42 34 | val_ratio: 0.02 35 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/can_image.yaml: -------------------------------------------------------------------------------- 1 | name: can_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [7] 21 | 22 | task_name: &task_name can 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 25 | abs_action: &abs_action False 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/can_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: can_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [10] 21 | 22 | task_name: &task_name can 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 25 | abs_action: &abs_action True 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/can_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: can_lowdim 2 | 3 | obs_dim: 23 4 | action_dim: 7 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name can 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 11 | abs_action: &abs_action False 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | n_envs: 28 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | seed: 42 45 | val_ratio: 0.02 46 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/can_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: can_lowdim 2 | 3 | obs_dim: 23 4 | action_dim: 10 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name can 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 11 | abs_action: &abs_action True 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | n_envs: 28 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | rotation_rep: rotation_6d 45 | seed: 42 46 | val_ratio: 0.02 47 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/kitchen_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: kitchen_lowdim 2 | 3 | obs_dim: 60 4 | action_dim: 9 5 | keypoint_dim: 3 6 | 7 | dataset_dir: &dataset_dir data/kitchen 8 | 9 | env_runner: 10 | _target_: diffusion_policy.env_runner.kitchen_lowdim_runner.KitchenLowdimRunner 11 | dataset_dir: *dataset_dir 12 | n_train: 6 13 | n_train_vis: 2 14 | train_start_seed: 0 15 | n_test: 50 16 | n_test_vis: 4 17 | test_start_seed: 100000 18 | max_steps: 280 19 | n_obs_steps: ${n_obs_steps} 20 | n_action_steps: ${n_action_steps} 21 | render_hw: [240, 360] 22 | fps: 12.5 23 | past_action: ${past_action_visible} 24 | n_envs: null 25 | 26 | dataset: 27 | _target_: diffusion_policy.dataset.kitchen_lowdim_dataset.KitchenLowdimDataset 28 | dataset_dir: *dataset_dir 29 | horizon: ${horizon} 30 | pad_before: ${eval:'${n_obs_steps}-1'} 31 | pad_after: ${eval:'${n_action_steps}-1'} 32 | seed: 42 33 | val_ratio: 0.02 34 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/kitchen_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: kitchen_lowdim 2 | 3 | obs_dim: 60 4 | action_dim: 9 5 | keypoint_dim: 3 6 | 7 | abs_action: True 8 | robot_noise_ratio: 0.1 9 | 10 | env_runner: 11 | _target_: diffusion_policy.env_runner.kitchen_lowdim_runner.KitchenLowdimRunner 12 | dataset_dir: data/kitchen 13 | n_train: 6 14 | n_train_vis: 2 15 | train_start_seed: 0 16 | n_test: 50 17 | n_test_vis: 4 18 | test_start_seed: 100000 19 | max_steps: 280 20 | n_obs_steps: ${n_obs_steps} 21 | n_action_steps: ${n_action_steps} 22 | render_hw: [240, 360] 23 | fps: 12.5 24 | past_action: ${past_action_visible} 25 | abs_action: ${task.abs_action} 26 | robot_noise_ratio: ${task.robot_noise_ratio} 27 | n_envs: null 28 | 29 | dataset: 30 | _target_: diffusion_policy.dataset.kitchen_mjl_lowdim_dataset.KitchenMjlLowdimDataset 31 | dataset_dir: data/kitchen/kitchen_demos_multitask 32 | horizon: ${horizon} 33 | pad_before: ${eval:'${n_obs_steps}-1'} 34 | pad_after: ${eval:'${n_action_steps}-1'} 35 | abs_action: ${task.abs_action} 36 | robot_noise_ratio: ${task.robot_noise_ratio} 37 | seed: 42 38 | val_ratio: 0.02 39 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/lift_image.yaml: -------------------------------------------------------------------------------- 1 | name: lift_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [7] 21 | 22 | task_name: &task_name lift 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 25 | abs_action: &abs_action False 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 1 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 3 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/lift_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: lift_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [10] 21 | 22 | task_name: &task_name lift 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 25 | abs_action: &abs_action True 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | n_train: 6 32 | n_train_vis: 2 33 | train_start_idx: 0 34 | n_test: 50 35 | n_test_vis: 4 36 | test_start_seed: 100000 37 | # use python's eval function as resolver, single-quoted string as argument 38 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 39 | n_obs_steps: ${n_obs_steps} 40 | n_action_steps: ${n_action_steps} 41 | render_obs_key: 'agentview_image' 42 | fps: 10 43 | crf: 22 44 | past_action: ${past_action_visible} 45 | abs_action: *abs_action 46 | tqdm_interval_sec: 1.0 47 | n_envs: 28 48 | # evaluation at this config requires a 16 core 64GB instance. 49 | 50 | dataset: 51 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 52 | shape_meta: *shape_meta 53 | dataset_path: *dataset_path 54 | horizon: ${horizon} 55 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 56 | pad_after: ${eval:'${n_action_steps}-1'} 57 | n_obs_steps: ${dataset_obs_steps} 58 | abs_action: *abs_action 59 | rotation_rep: 'rotation_6d' 60 | use_legacy_normalizer: False 61 | use_cache: True 62 | seed: 42 63 | val_ratio: 0.02 64 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/lift_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: lift_lowdim 2 | 3 | obs_dim: 19 4 | action_dim: 7 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name lift 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 11 | abs_action: &abs_action False 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | tqdm_interval_sec: 1.0 34 | n_envs: 28 35 | 36 | dataset: 37 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 38 | dataset_path: *dataset_path 39 | horizon: ${horizon} 40 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 41 | pad_after: ${eval:'${n_action_steps}-1'} 42 | obs_keys: *obs_keys 43 | abs_action: *abs_action 44 | use_legacy_normalizer: False 45 | seed: 42 46 | val_ratio: 0.02 47 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/lift_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: lift_lowdim 2 | 3 | obs_dim: 19 4 | action_dim: 10 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name lift 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 11 | abs_action: &abs_action True 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 3 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | tqdm_interval_sec: 1.0 34 | n_envs: 28 35 | 36 | dataset: 37 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 38 | dataset_path: *dataset_path 39 | horizon: ${horizon} 40 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 41 | pad_after: ${eval:'${n_action_steps}-1'} 42 | obs_keys: *obs_keys 43 | abs_action: *abs_action 44 | use_legacy_normalizer: False 45 | rotation_rep: rotation_6d 46 | seed: 42 47 | val_ratio: 0.02 48 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/pusht_image.yaml: -------------------------------------------------------------------------------- 1 | name: pusht_image 2 | 3 | image_shape: &image_shape [3, 96, 96] 4 | shape_meta: &shape_meta 5 | # acceptable types: rgb, low_dim 6 | obs: 7 | image: 8 | shape: *image_shape 9 | type: rgb 10 | agent_pos: 11 | shape: [2] 12 | type: low_dim 13 | action: 14 | shape: [2] 15 | 16 | env_runner: 17 | _target_: diffusion_policy.env_runner.pusht_image_runner.PushTImageRunner 18 | n_train: 6 19 | n_train_vis: 2 20 | train_start_seed: 0 21 | n_test: 50 22 | n_test_vis: 4 23 | legacy_test: True 24 | test_start_seed: 100000 25 | max_steps: 300 26 | n_obs_steps: ${n_obs_steps} 27 | n_action_steps: ${n_action_steps} 28 | fps: 10 29 | past_action: ${past_action_visible} 30 | n_envs: null 31 | 32 | dataset: 33 | _target_: diffusion_policy.dataset.pusht_image_dataset.PushTImageDataset 34 | zarr_path: data/pusht/pusht_cchi_v7_replay.zarr 35 | horizon: ${horizon} 36 | pad_before: ${eval:'${n_obs_steps}-1'} 37 | pad_after: ${eval:'${n_action_steps}-1'} 38 | seed: 42 39 | val_ratio: 0.02 40 | max_train_episodes: 90 41 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/pusht_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: pusht_lowdim 2 | 3 | obs_dim: 20 # 9*2 keypoints + 2 state 4 | action_dim: 2 5 | keypoint_dim: 2 6 | 7 | env_runner: 8 | _target_: diffusion_policy.env_runner.pusht_keypoints_runner.PushTKeypointsRunner 9 | keypoint_visible_rate: ${keypoint_visible_rate} 10 | n_train: 6 11 | n_train_vis: 2 12 | train_start_seed: 0 13 | n_test: 50 14 | n_test_vis: 4 15 | legacy_test: True 16 | test_start_seed: 100000 17 | max_steps: 300 18 | n_obs_steps: ${n_obs_steps} 19 | n_action_steps: ${n_action_steps} 20 | n_latency_steps: ${n_latency_steps} 21 | fps: 10 22 | agent_keypoints: False 23 | past_action: ${past_action_visible} 24 | n_envs: null 25 | 26 | dataset: 27 | _target_: diffusion_policy.dataset.pusht_dataset.PushTLowdimDataset 28 | zarr_path: data/pusht/pusht_cchi_v7_replay.zarr 29 | horizon: ${horizon} 30 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 31 | pad_after: ${eval:'${n_action_steps}-1'} 32 | seed: 42 33 | val_ratio: 0.02 34 | max_train_episodes: 90 35 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/real_pusht_image.yaml: -------------------------------------------------------------------------------- 1 | name: real_image 2 | 3 | image_shape: [3, 240, 320] 4 | dataset_path: data/pusht_real/real_pusht_20230105 5 | 6 | shape_meta: &shape_meta 7 | # acceptable types: rgb, low_dim 8 | obs: 9 | # camera_0: 10 | # shape: ${task.image_shape} 11 | # type: rgb 12 | camera_1: 13 | shape: ${task.image_shape} 14 | type: rgb 15 | # camera_2: 16 | # shape: ${task.image_shape} 17 | # type: rgb 18 | camera_3: 19 | shape: ${task.image_shape} 20 | type: rgb 21 | # camera_4: 22 | # shape: ${task.image_shape} 23 | # type: rgb 24 | robot_eef_pose: 25 | shape: [2] 26 | type: low_dim 27 | action: 28 | shape: [2] 29 | 30 | env_runner: 31 | _target_: diffusion_policy.env_runner.real_pusht_image_runner.RealPushTImageRunner 32 | 33 | dataset: 34 | _target_: diffusion_policy.dataset.real_pusht_image_dataset.RealPushTImageDataset 35 | shape_meta: *shape_meta 36 | dataset_path: ${task.dataset_path} 37 | horizon: ${horizon} 38 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 39 | pad_after: ${eval:'${n_action_steps}-1'} 40 | n_obs_steps: ${dataset_obs_steps} 41 | n_latency_steps: ${n_latency_steps} 42 | use_cache: True 43 | seed: 42 44 | val_ratio: 0.00 45 | max_train_episodes: null 46 | delta_action: False 47 | 48 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/square_image.yaml: -------------------------------------------------------------------------------- 1 | name: square_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [7] 21 | 22 | task_name: &task_name square 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 25 | abs_action: &abs_action False 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/square_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: square_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | agentview_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [10] 21 | 22 | task_name: &task_name square 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 25 | abs_action: &abs_action True 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | # use python's eval function as resolver, single-quoted string as argument 39 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 40 | n_obs_steps: ${n_obs_steps} 41 | n_action_steps: ${n_action_steps} 42 | render_obs_key: 'agentview_image' 43 | fps: 10 44 | crf: 22 45 | past_action: ${past_action_visible} 46 | abs_action: *abs_action 47 | tqdm_interval_sec: 1.0 48 | n_envs: 28 49 | # evaluation at this config requires a 16 core 64GB instance. 50 | 51 | dataset: 52 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 53 | shape_meta: *shape_meta 54 | dataset_path: *dataset_path 55 | horizon: ${horizon} 56 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 57 | pad_after: ${eval:'${n_action_steps}-1'} 58 | n_obs_steps: ${dataset_obs_steps} 59 | abs_action: *abs_action 60 | rotation_rep: 'rotation_6d' 61 | use_legacy_normalizer: False 62 | use_cache: True 63 | seed: 42 64 | val_ratio: 0.02 65 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/square_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: square_lowdim 2 | 3 | obs_dim: 23 4 | action_dim: 7 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name square 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 11 | abs_action: &abs_action False 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | # use python's eval function as resolver, single-quoted string as argument 24 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 25 | n_obs_steps: ${n_obs_steps} 26 | n_action_steps: ${n_action_steps} 27 | n_latency_steps: ${n_latency_steps} 28 | render_hw: [128,128] 29 | fps: 10 30 | crf: 22 31 | past_action: ${past_action_visible} 32 | abs_action: *abs_action 33 | n_envs: 28 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | seed: 42 45 | val_ratio: 0.02 46 | max_train_episodes: null 47 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/square_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: square_lowdim 2 | 3 | obs_dim: 23 4 | action_dim: 10 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name square 9 | dataset_type: &dataset_type ph 10 | abs_action: &abs_action True 11 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 12 | 13 | 14 | env_runner: 15 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 16 | dataset_path: *dataset_path 17 | obs_keys: *obs_keys 18 | n_train: 6 19 | n_train_vis: 2 20 | train_start_idx: 0 21 | n_test: 50 22 | n_test_vis: 4 23 | test_start_seed: 100000 24 | # use python's eval function as resolver, single-quoted string as argument 25 | max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'} 26 | n_obs_steps: ${n_obs_steps} 27 | n_action_steps: ${n_action_steps} 28 | n_latency_steps: ${n_latency_steps} 29 | render_hw: [128,128] 30 | fps: 10 31 | crf: 22 32 | past_action: ${past_action_visible} 33 | abs_action: *abs_action 34 | n_envs: 28 35 | 36 | dataset: 37 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 38 | dataset_path: *dataset_path 39 | horizon: ${horizon} 40 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 41 | pad_after: ${eval:'${n_action_steps}-1'} 42 | obs_keys: *obs_keys 43 | abs_action: *abs_action 44 | use_legacy_normalizer: False 45 | seed: 42 46 | val_ratio: 0.02 47 | max_train_episodes: null 48 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/tool_hang_image.yaml: -------------------------------------------------------------------------------- 1 | name: tool_hang_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | sideview_image: 7 | shape: [3, 240, 240] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 240, 240] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [7] 21 | 22 | task_name: &task_name tool_hang 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 25 | abs_action: &abs_action False 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | max_steps: 700 39 | n_obs_steps: ${n_obs_steps} 40 | n_action_steps: ${n_action_steps} 41 | render_obs_key: 'sideview_image' 42 | fps: 10 43 | crf: 22 44 | past_action: ${past_action_visible} 45 | abs_action: *abs_action 46 | tqdm_interval_sec: 1.0 47 | n_envs: 28 48 | # evaluation at this config requires a 16 core 64GB instance. 49 | 50 | dataset: 51 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 52 | shape_meta: *shape_meta 53 | dataset_path: *dataset_path 54 | horizon: ${horizon} 55 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 56 | pad_after: ${eval:'${n_action_steps}-1'} 57 | n_obs_steps: ${dataset_obs_steps} 58 | abs_action: *abs_action 59 | rotation_rep: 'rotation_6d' 60 | use_legacy_normalizer: False 61 | use_cache: True 62 | seed: 42 63 | val_ratio: 0.02 64 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/tool_hang_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: tool_hang_image_abs 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | sideview_image: 7 | shape: [3, 240, 240] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 240, 240] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | action: 20 | shape: [10] 21 | 22 | task_name: &task_name tool_hang 23 | dataset_type: &dataset_type ph 24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 25 | abs_action: &abs_action True 26 | 27 | env_runner: 28 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 29 | dataset_path: *dataset_path 30 | shape_meta: *shape_meta 31 | # costs 1GB per env 32 | n_train: 6 33 | n_train_vis: 2 34 | train_start_idx: 0 35 | n_test: 50 36 | n_test_vis: 4 37 | test_start_seed: 100000 38 | max_steps: 700 39 | n_obs_steps: ${n_obs_steps} 40 | n_action_steps: ${n_action_steps} 41 | render_obs_key: 'sideview_image' 42 | fps: 10 43 | crf: 22 44 | past_action: ${past_action_visible} 45 | abs_action: *abs_action 46 | tqdm_interval_sec: 1.0 47 | n_envs: 28 48 | # evaluation at this config requires a 16 core 64GB instance. 49 | 50 | dataset: 51 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 52 | shape_meta: *shape_meta 53 | dataset_path: *dataset_path 54 | horizon: ${horizon} 55 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 56 | pad_after: ${eval:'${n_action_steps}-1'} 57 | n_obs_steps: ${dataset_obs_steps} 58 | abs_action: *abs_action 59 | rotation_rep: 'rotation_6d' 60 | use_legacy_normalizer: False 61 | use_cache: True 62 | seed: 42 63 | val_ratio: 0.02 64 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/tool_hang_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: tool_hang_lowdim 2 | 3 | obs_dim: 53 4 | action_dim: 7 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name tool_hang 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 11 | abs_action: &abs_action False 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | max_steps: 700 24 | n_obs_steps: ${n_obs_steps} 25 | n_action_steps: ${n_action_steps} 26 | n_latency_steps: ${n_latency_steps} 27 | render_hw: [128,128] 28 | fps: 10 29 | crf: 22 30 | past_action: ${past_action_visible} 31 | abs_action: *abs_action 32 | n_envs: 28 33 | # seed 42 will crash MuJoCo for some reason. 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | seed: 42 45 | val_ratio: 0.02 46 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/tool_hang_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: tool_hang_lowdim 2 | 3 | obs_dim: 53 4 | action_dim: 10 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos'] 8 | task_name: &task_name tool_hang 9 | dataset_type: &dataset_type ph 10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 11 | abs_action: &abs_action True 12 | 13 | env_runner: 14 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 15 | dataset_path: *dataset_path 16 | obs_keys: *obs_keys 17 | n_train: 6 18 | n_train_vis: 2 19 | train_start_idx: 0 20 | n_test: 50 21 | n_test_vis: 4 22 | test_start_seed: 100000 23 | max_steps: 700 24 | n_obs_steps: ${n_obs_steps} 25 | n_action_steps: ${n_action_steps} 26 | n_latency_steps: ${n_latency_steps} 27 | render_hw: [128,128] 28 | fps: 10 29 | crf: 22 30 | past_action: ${past_action_visible} 31 | abs_action: *abs_action 32 | n_envs: 28 33 | # seed 42 will crash MuJoCo for some reason. 34 | 35 | dataset: 36 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 37 | dataset_path: *dataset_path 38 | horizon: ${horizon} 39 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 40 | pad_after: ${eval:'${n_action_steps}-1'} 41 | obs_keys: *obs_keys 42 | abs_action: *abs_action 43 | use_legacy_normalizer: False 44 | rotation_rep: rotation_6d 45 | seed: 42 46 | val_ratio: 0.02 47 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/transport_image.yaml: -------------------------------------------------------------------------------- 1 | name: transport_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | shouldercamera0_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | shouldercamera1_image: 20 | shape: [3, 84, 84] 21 | type: rgb 22 | robot1_eye_in_hand_image: 23 | shape: [3, 84, 84] 24 | type: rgb 25 | robot1_eef_pos: 26 | shape: [3] 27 | # type default: low_dim 28 | robot1_eef_quat: 29 | shape: [4] 30 | robot1_gripper_qpos: 31 | shape: [2] 32 | action: 33 | shape: [14] 34 | 35 | task_name: &task_name transport 36 | dataset_type: &dataset_type ph 37 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5 38 | abs_action: &abs_action False 39 | 40 | env_runner: 41 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 42 | dataset_path: *dataset_path 43 | shape_meta: *shape_meta 44 | n_train: 6 45 | n_train_vis: 2 46 | train_start_idx: 0 47 | n_test: 50 48 | n_test_vis: 4 49 | test_start_seed: 100000 50 | max_steps: 700 51 | n_obs_steps: ${n_obs_steps} 52 | n_action_steps: ${n_action_steps} 53 | render_obs_key: 'shouldercamera0_image' 54 | fps: 10 55 | crf: 22 56 | past_action: ${past_action_visible} 57 | abs_action: *abs_action 58 | tqdm_interval_sec: 1.0 59 | n_envs: 28 60 | # evaluation at this config requires a 16 core 64GB instance. 61 | 62 | dataset: 63 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 64 | shape_meta: *shape_meta 65 | dataset_path: *dataset_path 66 | horizon: ${horizon} 67 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 68 | pad_after: ${eval:'${n_action_steps}-1'} 69 | n_obs_steps: ${dataset_obs_steps} 70 | abs_action: *abs_action 71 | rotation_rep: 'rotation_6d' 72 | use_legacy_normalizer: False 73 | use_cache: True 74 | seed: 42 75 | val_ratio: 0.02 76 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/transport_image_abs.yaml: -------------------------------------------------------------------------------- 1 | name: transport_image 2 | 3 | shape_meta: &shape_meta 4 | # acceptable types: rgb, low_dim 5 | obs: 6 | shouldercamera0_image: 7 | shape: [3, 84, 84] 8 | type: rgb 9 | robot0_eye_in_hand_image: 10 | shape: [3, 84, 84] 11 | type: rgb 12 | robot0_eef_pos: 13 | shape: [3] 14 | # type default: low_dim 15 | robot0_eef_quat: 16 | shape: [4] 17 | robot0_gripper_qpos: 18 | shape: [2] 19 | shouldercamera1_image: 20 | shape: [3, 84, 84] 21 | type: rgb 22 | robot1_eye_in_hand_image: 23 | shape: [3, 84, 84] 24 | type: rgb 25 | robot1_eef_pos: 26 | shape: [3] 27 | # type default: low_dim 28 | robot1_eef_quat: 29 | shape: [4] 30 | robot1_gripper_qpos: 31 | shape: [2] 32 | action: 33 | shape: [20] 34 | 35 | task_name: &task_name transport 36 | dataset_type: &dataset_type ph 37 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5 38 | abs_action: &abs_action True 39 | 40 | env_runner: 41 | _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner 42 | dataset_path: *dataset_path 43 | shape_meta: *shape_meta 44 | n_train: 6 45 | n_train_vis: 2 46 | train_start_idx: 0 47 | n_test: 50 48 | n_test_vis: 4 49 | test_start_seed: 100000 50 | max_steps: 700 51 | n_obs_steps: ${n_obs_steps} 52 | n_action_steps: ${n_action_steps} 53 | render_obs_key: 'shouldercamera0_image' 54 | fps: 10 55 | crf: 22 56 | past_action: ${past_action_visible} 57 | abs_action: *abs_action 58 | tqdm_interval_sec: 1.0 59 | n_envs: 28 60 | # evaluation at this config requires a 16 core 64GB instance. 61 | 62 | dataset: 63 | _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset 64 | shape_meta: *shape_meta 65 | dataset_path: *dataset_path 66 | horizon: ${horizon} 67 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 68 | pad_after: ${eval:'${n_action_steps}-1'} 69 | n_obs_steps: ${dataset_obs_steps} 70 | abs_action: *abs_action 71 | rotation_rep: 'rotation_6d' 72 | use_legacy_normalizer: False 73 | use_cache: True 74 | seed: 42 75 | val_ratio: 0.02 76 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/transport_lowdim.yaml: -------------------------------------------------------------------------------- 1 | name: transport_lowdim 2 | 3 | obs_dim: 59 # 41+(3+4+2)*2 4 | action_dim: 14 # 7*2 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys [ 8 | 'object', 9 | 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', 10 | 'robot1_eef_pos', 'robot1_eef_quat', 'robot1_gripper_qpos' 11 | ] 12 | task_name: &task_name transport 13 | dataset_type: &dataset_type ph 14 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5 15 | abs_action: &abs_action False 16 | 17 | env_runner: 18 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 19 | dataset_path: *dataset_path 20 | obs_keys: *obs_keys 21 | n_train: 6 22 | n_train_vis: 2 23 | train_start_idx: 0 24 | n_test: 50 25 | n_test_vis: 5 26 | test_start_seed: 100000 27 | max_steps: 700 28 | n_obs_steps: ${n_obs_steps} 29 | n_action_steps: ${n_action_steps} 30 | n_latency_steps: ${n_latency_steps} 31 | render_hw: [128,128] 32 | fps: 10 33 | crf: 22 34 | past_action: ${past_action_visible} 35 | abs_action: *abs_action 36 | n_envs: 28 37 | # evaluation at this config requires a 16 core 64GB instance. 38 | 39 | dataset: 40 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 41 | dataset_path: *dataset_path 42 | horizon: ${horizon} 43 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 44 | pad_after: ${eval:'${n_action_steps}-1'} 45 | obs_keys: *obs_keys 46 | abs_action: *abs_action 47 | use_legacy_normalizer: False 48 | seed: 42 49 | val_ratio: 0.02 50 | -------------------------------------------------------------------------------- /diffusion_policy/config/task/transport_lowdim_abs.yaml: -------------------------------------------------------------------------------- 1 | name: transport_lowdim 2 | 3 | obs_dim: 59 # 41+(3+4+2)*2 4 | action_dim: 20 # 10*2 5 | keypoint_dim: 3 6 | 7 | obs_keys: &obs_keys [ 8 | 'object', 9 | 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', 10 | 'robot1_eef_pos', 'robot1_eef_quat', 'robot1_gripper_qpos' 11 | ] 12 | task_name: &task_name transport 13 | dataset_type: &dataset_type ph 14 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5 15 | abs_action: &abs_action True 16 | 17 | env_runner: 18 | _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner 19 | dataset_path: *dataset_path 20 | obs_keys: *obs_keys 21 | n_train: 6 22 | n_train_vis: 2 23 | train_start_idx: 0 24 | n_test: 50 25 | n_test_vis: 4 26 | test_start_seed: 100000 27 | max_steps: 700 28 | n_obs_steps: ${n_obs_steps} 29 | n_action_steps: ${n_action_steps} 30 | n_latency_steps: ${n_latency_steps} 31 | render_hw: [128,128] 32 | fps: 10 33 | crf: 22 34 | past_action: ${past_action_visible} 35 | abs_action: *abs_action 36 | n_envs: 28 37 | # evaluation at this config requires a 16 core 64GB instance. 38 | 39 | dataset: 40 | _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset 41 | dataset_path: *dataset_path 42 | horizon: ${horizon} 43 | pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'} 44 | pad_after: ${eval:'${n_action_steps}-1'} 45 | obs_keys: *obs_keys 46 | abs_action: *abs_action 47 | use_legacy_normalizer: False 48 | seed: 42 49 | val_ratio: 0.02 50 | -------------------------------------------------------------------------------- /diffusion_policy/config/train_bet_lowdim_workspace.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - task: blockpush_lowdim_seed 4 | 5 | name: train_bet_lowdim 6 | _target_: diffusion_policy.workspace.train_bet_lowdim_workspace.TrainBETLowdimWorkspace 7 | 8 | obs_dim: ${task.obs_dim} 9 | action_dim: ${task.action_dim} 10 | keypoint_dim: ${task.keypoint_dim} 11 | task_name: ${task.name} 12 | exp_name: "default" 13 | 14 | horizon: 3 15 | n_obs_steps: 3 16 | n_action_steps: 1 17 | n_latency_steps: 0 18 | past_action_visible: False 19 | keypoint_visible_rate: 1.0 20 | obs_as_local_cond: False 21 | obs_as_global_cond: False 22 | pred_action_steps_only: False 23 | 24 | policy: 25 | _target_: diffusion_policy.policy.bet_lowdim_policy.BETLowdimPolicy 26 | 27 | action_ae: 28 | _target_: diffusion_policy.model.bet.action_ae.discretizers.k_means.KMeansDiscretizer 29 | num_bins: 24 30 | action_dim: ${action_dim} 31 | predict_offsets: True 32 | 33 | obs_encoding_net: 34 | _target_: torch.nn.Identity 35 | output_dim: ${obs_dim} 36 | 37 | state_prior: 38 | _target_: diffusion_policy.model.bet.latent_generators.mingpt.MinGPT 39 | 40 | discrete_input: false 41 | input_dim: ${obs_dim} 42 | 43 | vocab_size: ${policy.action_ae.num_bins} 44 | 45 | # Architecture details 46 | n_layer: 4 47 | n_head: 4 48 | n_embd: 72 49 | 50 | block_size: ${horizon} # Length of history/context 51 | predict_offsets: True 52 | offset_loss_scale: 1000.0 # actions are very small 53 | focal_loss_gamma: 2.0 54 | action_dim: ${action_dim} 55 | 56 | horizon: ${horizon} 57 | n_obs_steps: ${n_obs_steps} 58 | n_action_steps: ${n_action_steps} 59 | 60 | dataloader: 61 | batch_size: 256 62 | num_workers: 1 63 | shuffle: True 64 | pin_memory: True 65 | persistent_workers: False 66 | 67 | val_dataloader: 68 | batch_size: 256 69 | num_workers: 1 70 | shuffle: False 71 | pin_memory: True 72 | persistent_workers: False 73 | 74 | optimizer: 75 | learning_rate: 0.0001 # 1e-4 76 | weight_decay: 0.1 77 | betas: [0.9, 0.95] 78 | 79 | training: 80 | device: "cuda:0" 81 | seed: 42 82 | debug: False 83 | resume: True 84 | # optimization 85 | lr_scheduler: cosine 86 | lr_warmup_steps: 500 87 | num_epochs: 5000 88 | gradient_accumulate_every: 1 89 | grad_norm_clip: 1.0 90 | enable_normalizer: True 91 | # training loop control 92 | # in epochs 93 | rollout_every: 50 94 | checkpoint_every: 50 95 | val_every: 1 96 | sample_every: 5 97 | # steps per epoch 98 | max_train_steps: null 99 | max_val_steps: null 100 | # misc 101 | tqdm_interval_sec: 1.0 102 | 103 | logging: 104 | project: diffusion_policy_debug 105 | resume: True 106 | mode: online 107 | name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 108 | tags: ["${name}", "${task_name}", "${exp_name}"] 109 | id: null 110 | group: null 111 | 112 | checkpoint: 113 | topk: 114 | monitor_key: test_mean_score 115 | mode: max 116 | k: 5 117 | format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt' 118 | save_last_ckpt: True 119 | save_last_snapshot: False 120 | 121 | multi_run: 122 | run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 123 | wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 124 | 125 | hydra: 126 | job: 127 | override_dirname: ${name} 128 | run: 129 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 130 | sweep: 131 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 132 | subdir: ${hydra.job.num} 133 | -------------------------------------------------------------------------------- /diffusion_policy/config/train_diffusion_transformer_hybrid_workspace.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - task: lift_image_abs 4 | 5 | name: train_diffusion_transformer_hybrid 6 | _target_: diffusion_policy.workspace.train_diffusion_transformer_hybrid_workspace.TrainDiffusionTransformerHybridWorkspace 7 | 8 | task_name: ${task.name} 9 | shape_meta: ${task.shape_meta} 10 | exp_name: "default" 11 | 12 | horizon: 10 13 | n_obs_steps: 2 14 | n_action_steps: 8 15 | n_latency_steps: 0 16 | dataset_obs_steps: ${n_obs_steps} 17 | past_action_visible: False 18 | keypoint_visible_rate: 1.0 19 | obs_as_cond: True 20 | 21 | policy: 22 | _target_: diffusion_policy.policy.diffusion_transformer_hybrid_image_policy.DiffusionTransformerHybridImagePolicy 23 | 24 | shape_meta: ${shape_meta} 25 | 26 | noise_scheduler: 27 | _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler 28 | num_train_timesteps: 100 29 | beta_start: 0.0001 30 | beta_end: 0.02 31 | beta_schedule: squaredcos_cap_v2 32 | variance_type: fixed_small # Yilun's paper uses fixed_small_log instead, but easy to cause Nan 33 | clip_sample: True # required when predict_epsilon=False 34 | prediction_type: epsilon # or sample 35 | 36 | horizon: ${horizon} 37 | n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'} 38 | n_obs_steps: ${n_obs_steps} 39 | num_inference_steps: 100 40 | 41 | crop_shape: [76, 76] 42 | obs_encoder_group_norm: True 43 | eval_fixed_crop: True 44 | 45 | n_layer: 8 46 | n_cond_layers: 0 # >0: use transformer encoder for cond, otherwise use MLP 47 | n_head: 4 48 | n_emb: 256 49 | p_drop_emb: 0.0 50 | p_drop_attn: 0.3 51 | causal_attn: True 52 | time_as_cond: True # if false, use BERT like encoder only arch, time as input 53 | obs_as_cond: ${obs_as_cond} 54 | 55 | # scheduler.step params 56 | # predict_epsilon: True 57 | 58 | ema: 59 | _target_: diffusion_policy.model.diffusion.ema_model.EMAModel 60 | update_after_step: 0 61 | inv_gamma: 1.0 62 | power: 0.75 63 | min_value: 0.0 64 | max_value: 0.9999 65 | 66 | dataloader: 67 | batch_size: 64 68 | num_workers: 8 69 | shuffle: True 70 | pin_memory: True 71 | persistent_workers: False 72 | 73 | val_dataloader: 74 | batch_size: 64 75 | num_workers: 8 76 | shuffle: False 77 | pin_memory: True 78 | persistent_workers: False 79 | 80 | optimizer: 81 | transformer_weight_decay: 1.0e-3 82 | obs_encoder_weight_decay: 1.0e-6 83 | learning_rate: 1.0e-4 84 | betas: [0.9, 0.95] 85 | 86 | training: 87 | device: "cuda:0" 88 | seed: 42 89 | debug: False 90 | resume: True 91 | # optimization 92 | lr_scheduler: cosine 93 | # Transformer needs LR warmup 94 | lr_warmup_steps: 1000 95 | num_epochs: 3050 96 | gradient_accumulate_every: 1 97 | # EMA destroys performance when used with BatchNorm 98 | # replace BatchNorm with GroupNorm. 99 | use_ema: True 100 | # training loop control 101 | # in epochs 102 | rollout_every: 50 103 | checkpoint_every: 50 104 | val_every: 1 105 | sample_every: 5 106 | # steps per epoch 107 | max_train_steps: null 108 | max_val_steps: null 109 | # misc 110 | tqdm_interval_sec: 1.0 111 | 112 | logging: 113 | project: diffusion_policy_debug 114 | resume: True 115 | mode: online 116 | name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 117 | tags: ["${name}", "${task_name}", "${exp_name}"] 118 | id: null 119 | group: null 120 | 121 | checkpoint: 122 | topk: 123 | monitor_key: test_mean_score 124 | mode: max 125 | k: 5 126 | format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt' 127 | save_last_ckpt: True 128 | save_last_snapshot: False 129 | 130 | multi_run: 131 | run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 132 | wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 133 | 134 | hydra: 135 | job: 136 | override_dirname: ${name} 137 | run: 138 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 139 | sweep: 140 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 141 | subdir: ${hydra.job.num} 142 | -------------------------------------------------------------------------------- /diffusion_policy/config/train_robomimic_image_workspace.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - task: lift_image 4 | 5 | name: train_robomimic_image 6 | _target_: diffusion_policy.workspace.train_robomimic_image_workspace.TrainRobomimicImageWorkspace 7 | 8 | task_name: ${task.name} 9 | shape_meta: ${task.shape_meta} 10 | exp_name: "default" 11 | 12 | horizon: &horizon 10 13 | n_obs_steps: 1 14 | n_action_steps: 1 15 | n_latency_steps: 0 16 | dataset_obs_steps: *horizon 17 | past_action_visible: False 18 | keypoint_visible_rate: 1.0 19 | 20 | policy: 21 | _target_: diffusion_policy.policy.robomimic_image_policy.RobomimicImagePolicy 22 | shape_meta: ${shape_meta} 23 | algo_name: bc_rnn 24 | obs_type: image 25 | # oc.select resolver: key, default 26 | task_name: ${oc.select:task.task_name,lift} 27 | dataset_type: ${oc.select:task.dataset_type,ph} 28 | crop_shape: [76,76] 29 | 30 | dataloader: 31 | batch_size: 64 32 | num_workers: 16 33 | shuffle: True 34 | pin_memory: True 35 | persistent_workers: False 36 | 37 | val_dataloader: 38 | batch_size: 64 39 | num_workers: 16 40 | shuffle: False 41 | pin_memory: True 42 | persistent_workers: False 43 | 44 | training: 45 | device: "cuda:0" 46 | seed: 42 47 | debug: False 48 | resume: True 49 | # optimization 50 | num_epochs: 3050 51 | # training loop control 52 | # in epochs 53 | rollout_every: 50 54 | checkpoint_every: 50 55 | val_every: 1 56 | sample_every: 5 57 | # steps per epoch 58 | max_train_steps: null 59 | max_val_steps: null 60 | # misc 61 | tqdm_interval_sec: 1.0 62 | 63 | logging: 64 | project: diffusion_policy_debug 65 | resume: True 66 | mode: online 67 | name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 68 | tags: ["${name}", "${task_name}", "${exp_name}"] 69 | id: null 70 | group: null 71 | 72 | checkpoint: 73 | topk: 74 | monitor_key: test_mean_score 75 | mode: max 76 | k: 5 77 | format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt' 78 | save_last_ckpt: True 79 | save_last_snapshot: False 80 | 81 | multi_run: 82 | run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 83 | wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name} 84 | 85 | hydra: 86 | job: 87 | override_dirname: ${name} 88 | run: 89 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 90 | sweep: 91 | dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name} 92 | subdir: ${hydra.job.num} 93 | -------------------------------------------------------------------------------- /diffusion_policy/dataset/base_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import torch 4 | import torch.nn 5 | from diffusion_policy.model.common.normalizer import LinearNormalizer 6 | 7 | class BaseLowdimDataset(torch.utils.data.Dataset): 8 | def get_validation_dataset(self) -> 'BaseLowdimDataset': 9 | # return an empty dataset by default 10 | return BaseLowdimDataset() 11 | 12 | def get_normalizer(self, **kwargs) -> LinearNormalizer: 13 | raise NotImplementedError() 14 | 15 | def get_all_actions(self) -> torch.Tensor: 16 | raise NotImplementedError() 17 | 18 | def __len__(self) -> int: 19 | return 0 20 | 21 | def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]: 22 | """ 23 | output: 24 | obs: T, Do 25 | action: T, Da 26 | """ 27 | raise NotImplementedError() 28 | 29 | 30 | class BaseImageDataset(torch.utils.data.Dataset): 31 | def get_validation_dataset(self) -> 'BaseLowdimDataset': 32 | # return an empty dataset by default 33 | return BaseImageDataset() 34 | 35 | def get_normalizer(self, **kwargs) -> LinearNormalizer: 36 | raise NotImplementedError() 37 | 38 | def get_all_actions(self) -> torch.Tensor: 39 | raise NotImplementedError() 40 | 41 | def __len__(self) -> int: 42 | return 0 43 | 44 | def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]: 45 | """ 46 | output: 47 | obs: 48 | key: T, * 49 | action: T, Da 50 | """ 51 | raise NotImplementedError() 52 | -------------------------------------------------------------------------------- /diffusion_policy/dataset/multitask_dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset, DataLoader 3 | import pynvml 4 | import psutil 5 | 6 | class MultiDataLoader: 7 | def __init__(self, data_loaders): 8 | self.dataloaders=data_loaders 9 | self.data_loaders = [ 10 | iter(data_loader) for data_loader in data_loaders 11 | ] 12 | self.num_loaders = len(data_loaders) 13 | self.max_loader_length = max(len(loader) for loader in data_loaders) 14 | self.current_batch_idx = 0 15 | 16 | 17 | def __iter__(self): 18 | return self 19 | 20 | def __len__(self): 21 | return self.max_loader_length 22 | 23 | def get_memory_usage(self): 24 | mem=psutil.virtual_memory() 25 | print('current available memory is' +' : '+ str(round(mem.used/1024**2)) +' MIB') 26 | return round(mem.used/1024**2) 27 | 28 | def reset(self): 29 | # delete the current data loaders and reinitialize them 30 | del self.data_loaders 31 | self.data_loaders = [ 32 | iter(data_loader) for data_loader in self.dataloaders 33 | ] 34 | self.current_batch_idx = 0 35 | self.get_memory_usage() 36 | 37 | def __next__(self): 38 | if self.current_batch_idx >= self.max_loader_length: 39 | raise StopIteration 40 | self.loader_idx = self.current_batch_idx % self.num_loaders 41 | data_loader = self.data_loaders[self.loader_idx] 42 | try: 43 | batch = next(data_loader) 44 | self.current_batch_idx = self.current_batch_idx + 1 45 | return batch 46 | except StopIteration: 47 | self.current_batch_idx = self.current_batch_idx + 1 48 | return None 49 | 50 | if __name__ == "__main__": 51 | 52 | class SubDataset(Dataset): 53 | def __init__(self, data): 54 | self.data = data 55 | 56 | def __len__(self): 57 | return len(self.data) 58 | 59 | def __getitem__(self, idx): 60 | return self.data[idx] 61 | 62 | # Create some example datasets 63 | data1 = [torch.tensor([1]),] 64 | data2 = [torch.tensor([4]), torch.tensor([5]), torch.tensor([6])] 65 | 66 | # Create sub datasets and corresponding data loaders 67 | sub_dataset1 = SubDataset(data1) 68 | sub_dataset2 = SubDataset(data2) 69 | 70 | sub_data_loader1 = DataLoader(sub_dataset1, batch_size=1, shuffle=True) 71 | sub_data_loader2 = DataLoader(sub_dataset2, batch_size=1, shuffle=True) 72 | 73 | # Create the MultiDataLoader 74 | multi_data_loader = MultiDataLoader([sub_data_loader1, sub_data_loader2]) 75 | 76 | # Iterate through batches 77 | print(len(multi_data_loader)) 78 | for epoch in range(2): 79 | for batch_idx, batch in enumerate(multi_data_loader): 80 | print(f"Batch {batch_idx}: {batch}") 81 | multi_data_loader.reset() 82 | -------------------------------------------------------------------------------- /diffusion_policy/env/robomimic/robomimic_lowdim_wrapper.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Optional 2 | import numpy as np 3 | import gym 4 | from gym.spaces import Box 5 | from robomimic.envs.env_robosuite import EnvRobosuite 6 | 7 | class RobomimicLowdimWrapper(gym.Env): 8 | def __init__(self, 9 | env: EnvRobosuite, 10 | obs_keys: List[str]=[ 11 | 'object', 12 | 'robot0_eef_pos', 13 | 'robot0_eef_quat', 14 | 'robot0_gripper_qpos'], 15 | init_state: Optional[np.ndarray]=None, 16 | render_hw=(256,256), 17 | render_camera_name='agentview' 18 | ): 19 | 20 | self.env = env 21 | self.obs_keys = obs_keys 22 | self.init_state = init_state 23 | self.render_hw = render_hw 24 | self.render_camera_name = render_camera_name 25 | self.seed_state_map = dict() 26 | self._seed = None 27 | 28 | # setup spaces 29 | low = np.full(env.action_dimension, fill_value=-1) 30 | high = np.full(env.action_dimension, fill_value=1) 31 | self.action_space = Box( 32 | low=low, 33 | high=high, 34 | shape=low.shape, 35 | dtype=low.dtype 36 | ) 37 | obs_example = self.get_observation() 38 | low = np.full_like(obs_example, fill_value=-1) 39 | high = np.full_like(obs_example, fill_value=1) 40 | self.observation_space = Box( 41 | low=low, 42 | high=high, 43 | shape=low.shape, 44 | dtype=low.dtype 45 | ) 46 | 47 | def get_observation(self): 48 | raw_obs = self.env.get_observation() 49 | obs = np.concatenate([ 50 | raw_obs[key] for key in self.obs_keys 51 | ], axis=0) 52 | return obs 53 | 54 | def seed(self, seed=None): 55 | np.random.seed(seed=seed) 56 | self._seed = seed 57 | 58 | def reset(self): 59 | if self.init_state is not None: 60 | # always reset to the same state 61 | # to be compatible with gym 62 | self.env.reset_to({'states': self.init_state}) 63 | elif self._seed is not None: 64 | # reset to a specific seed 65 | seed = self._seed 66 | if seed in self.seed_state_map: 67 | # env.reset is expensive, use cache 68 | self.env.reset_to({'states': self.seed_state_map[seed]}) 69 | else: 70 | # robosuite's initializes all use numpy global random state 71 | np.random.seed(seed=seed) 72 | self.env.reset() 73 | state = self.env.get_state()['states'] 74 | self.seed_state_map[seed] = state 75 | self._seed = None 76 | else: 77 | # random reset 78 | self.env.reset() 79 | 80 | # return obs 81 | obs = self.get_observation() 82 | return obs 83 | 84 | def step(self, action): 85 | raw_obs, reward, done, info = self.env.step(action) 86 | obs = np.concatenate([ 87 | raw_obs[key] for key in self.obs_keys 88 | ], axis=0) 89 | return obs, reward, done, info 90 | 91 | def render(self, mode='rgb_array'): 92 | h, w = self.render_hw 93 | return self.env.render(mode=mode, 94 | height=h, width=w, 95 | camera_name=self.render_camera_name) 96 | 97 | 98 | def test(): 99 | import robomimic.utils.file_utils as FileUtils 100 | import robomimic.utils.env_utils as EnvUtils 101 | from matplotlib import pyplot as plt 102 | 103 | dataset_path = '/home/cchi/dev/diffusion_policy/data/robomimic/datasets/square/ph/low_dim.hdf5' 104 | env_meta = FileUtils.get_env_metadata_from_dataset( 105 | dataset_path) 106 | 107 | env = EnvUtils.create_env_from_metadata( 108 | env_meta=env_meta, 109 | render=False, 110 | render_offscreen=False, 111 | use_image_obs=False, 112 | ) 113 | wrapper = RobomimicLowdimWrapper( 114 | env=env, 115 | obs_keys=[ 116 | 'object', 117 | 'robot0_eef_pos', 118 | 'robot0_eef_quat', 119 | 'robot0_gripper_qpos' 120 | ] 121 | ) 122 | 123 | states = list() 124 | for _ in range(2): 125 | wrapper.seed(0) 126 | wrapper.reset() 127 | states.append(wrapper.env.get_state()['states']) 128 | assert np.allclose(states[0], states[1]) 129 | 130 | img = wrapper.render() 131 | plt.imshow(img) 132 | # wrapper.seed() 133 | # states.append(wrapper.env.get_state()['states']) 134 | -------------------------------------------------------------------------------- /diffusion_policy/env_runner/base_image_runner.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from diffusion_policy.policy.base_image_policy import BaseImagePolicy 3 | 4 | class BaseImageRunner: 5 | def __init__(self, output_dir): 6 | self.output_dir = output_dir 7 | 8 | def run(self, policy: BaseImagePolicy) -> Dict: 9 | raise NotImplementedError() 10 | -------------------------------------------------------------------------------- /diffusion_policy/gym_util/video_recording_wrapper.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | from diffusion_policy.real_world.video_recorder import VideoRecorder 4 | 5 | class VideoRecordingWrapper(gym.Wrapper): 6 | def __init__(self, 7 | env, 8 | video_recoder: VideoRecorder, 9 | mode='rgb_array', 10 | file_path=None, 11 | steps_per_render=1, 12 | **kwargs 13 | ): 14 | """ 15 | When file_path is None, don't record. 16 | """ 17 | super().__init__(env) 18 | 19 | self.mode = mode 20 | self.render_kwargs = kwargs 21 | self.steps_per_render = steps_per_render 22 | self.file_path = file_path 23 | self.video_recoder = video_recoder 24 | 25 | self.step_count = 0 26 | 27 | def reset(self, **kwargs): 28 | obs = super().reset(**kwargs) 29 | self.frames = list() 30 | self.step_count = 1 31 | self.video_recoder.stop() 32 | return obs 33 | 34 | def step(self, action): 35 | result = super().step(action) 36 | self.step_count += 1 37 | if self.file_path is not None \ 38 | and ((self.step_count % self.steps_per_render) == 0): 39 | if not self.video_recoder.is_ready(): 40 | self.video_recoder.start(self.file_path) 41 | 42 | frame = self.env.render( 43 | mode=self.mode, **self.render_kwargs) 44 | assert frame.dtype == np.uint8 45 | self.video_recoder.write_frame(frame) 46 | return result 47 | 48 | def render(self, mode='rgb_array', **kwargs): 49 | if self.video_recoder.is_ready(): 50 | self.video_recoder.stop() 51 | return self.file_path 52 | -------------------------------------------------------------------------------- /diffusion_policy/gym_util/video_wrapper.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | class VideoWrapper(gym.Wrapper): 5 | def __init__(self, 6 | env, 7 | mode='rgb_array', 8 | enabled=True, 9 | steps_per_render=1, 10 | **kwargs 11 | ): 12 | super().__init__(env) 13 | 14 | self.mode = mode 15 | self.enabled = enabled 16 | self.render_kwargs = kwargs 17 | self.steps_per_render = steps_per_render 18 | 19 | self.frames = list() 20 | self.step_count = 0 21 | 22 | def reset(self, **kwargs): 23 | obs = super().reset(**kwargs) 24 | self.frames = list() 25 | self.step_count = 1 26 | if self.enabled: 27 | frame = self.env.render( 28 | mode=self.mode, **self.render_kwargs) 29 | assert frame.dtype == np.uint8 30 | self.frames.append(frame) 31 | return obs 32 | 33 | def step(self, action): 34 | result = super().step(action) 35 | self.step_count += 1 36 | if self.enabled and ((self.step_count % self.steps_per_render) == 0): 37 | frame = self.env.render( 38 | mode=self.mode, **self.render_kwargs) 39 | assert frame.dtype == np.uint8 40 | self.frames.append(frame) 41 | return result 42 | 43 | def render(self, mode='rgb_array', **kwargs): 44 | return self.frames 45 | -------------------------------------------------------------------------------- /diffusion_policy/model/bet/action_ae/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.utils.data import DataLoader 4 | import abc 5 | 6 | from typing import Optional, Union 7 | 8 | import diffusion_policy.model.bet.utils as utils 9 | 10 | 11 | class AbstractActionAE(utils.SaveModule, abc.ABC): 12 | @abc.abstractmethod 13 | def fit_model( 14 | self, 15 | input_dataloader: DataLoader, 16 | eval_dataloader: DataLoader, 17 | obs_encoding_net: Optional[nn.Module] = None, 18 | ) -> None: 19 | pass 20 | 21 | @abc.abstractmethod 22 | def encode_into_latent( 23 | self, 24 | input_action: torch.Tensor, 25 | input_rep: Optional[torch.Tensor], 26 | ) -> torch.Tensor: 27 | """ 28 | Given the input action, discretize it. 29 | 30 | Inputs: 31 | input_action (shape: ... x action_dim): The input action to discretize. This can be in a batch, 32 | and is generally assumed that the last dimnesion is the action dimension. 33 | 34 | Outputs: 35 | discretized_action (shape: ... x num_tokens): The discretized action. 36 | """ 37 | raise NotImplementedError 38 | 39 | @abc.abstractmethod 40 | def decode_actions( 41 | self, 42 | latent_action_batch: Optional[torch.Tensor], 43 | input_rep_batch: Optional[torch.Tensor] = None, 44 | ) -> torch.Tensor: 45 | """ 46 | Given a discretized action, convert it to a continuous action. 47 | 48 | Inputs: 49 | latent_action_batch (shape: ... x num_tokens): The discretized action 50 | generated by the discretizer. 51 | 52 | Outputs: 53 | continuous_action (shape: ... x action_dim): The continuous action. 54 | """ 55 | raise NotImplementedError 56 | 57 | @property 58 | @abc.abstractmethod 59 | def num_latents(self) -> Union[int, float]: 60 | """ 61 | Number of possible latents for this generator, useful for state priors that use softmax. 62 | """ 63 | return float("inf") 64 | -------------------------------------------------------------------------------- /diffusion_policy/model/bet/latent_generators/latent_generator.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import torch 3 | from typing import Tuple, Optional 4 | 5 | import diffusion_policy.model.bet.utils as utils 6 | 7 | 8 | class AbstractLatentGenerator(abc.ABC, utils.SaveModule): 9 | """ 10 | Abstract class for a generative model that can generate latents given observation representations. 11 | 12 | In the probabilisitc sense, this model fits and samples from P(latent|observation) given some observation. 13 | """ 14 | 15 | @abc.abstractmethod 16 | def get_latent_and_loss( 17 | self, 18 | obs_rep: torch.Tensor, 19 | target_latents: torch.Tensor, 20 | seq_masks: Optional[torch.Tensor] = None, 21 | ) -> Tuple[torch.Tensor, torch.Tensor]: 22 | """ 23 | Given a set of observation representation and generated latents, get the encoded latent and the loss. 24 | 25 | Inputs: 26 | input_action: Batch of the actions taken in the multimodal demonstrations. 27 | target_latents: Batch of the latents that the generator should learn to generate the actions from. 28 | seq_masks: Batch of masks that indicate which timesteps are valid. 29 | 30 | Outputs: 31 | latent: The sampled latent from the observation. 32 | loss: The loss of the latent generator. 33 | """ 34 | pass 35 | 36 | @abc.abstractmethod 37 | def generate_latents( 38 | self, seq_obses: torch.Tensor, seq_masks: torch.Tensor 39 | ) -> torch.Tensor: 40 | """ 41 | Given a batch of sequences of observations, generate a batch of sequences of latents. 42 | 43 | Inputs: 44 | seq_obses: Batch of sequences of observations, of shape seq x batch x dim, following the transformer convention. 45 | seq_masks: Batch of sequences of masks, of shape seq x batch, following the transformer convention. 46 | 47 | Outputs: 48 | seq_latents: Batch of sequences of latents of shape seq x batch x latent_dim. 49 | """ 50 | pass 51 | 52 | def get_optimizer( 53 | self, weight_decay: float, learning_rate: float, betas: Tuple[float, float] 54 | ) -> torch.optim.Optimizer: 55 | """ 56 | Default optimizer class. Override this if you want to use a different optimizer. 57 | """ 58 | return torch.optim.Adam( 59 | self.parameters(), lr=learning_rate, weight_decay=weight_decay, betas=betas 60 | ) 61 | 62 | 63 | class LatentGeneratorDataParallel(torch.nn.DataParallel): 64 | def get_latent_and_loss(self, *args, **kwargs): 65 | return self.module.get_latent_and_loss(*args, **kwargs) # type: ignore 66 | 67 | def generate_latents(self, *args, **kwargs): 68 | return self.module.generate_latents(*args, **kwargs) # type: ignore 69 | 70 | def get_optimizer(self, *args, **kwargs): 71 | return self.module.get_optimizer(*args, **kwargs) # type: ignore 72 | -------------------------------------------------------------------------------- /diffusion_policy/model/bet/latent_generators/transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import einops 5 | import diffusion_policy.model.bet.latent_generators.latent_generator as latent_generator 6 | 7 | from diffusion_policy.model.diffusion.transformer_for_diffusion import TransformerForDiffusion 8 | from diffusion_policy.model.bet.libraries.loss_fn import FocalLoss, soft_cross_entropy 9 | 10 | from typing import Optional, Tuple 11 | 12 | class Transformer(latent_generator.AbstractLatentGenerator): 13 | def __init__( 14 | self, 15 | input_dim: int, 16 | num_bins: int, 17 | action_dim: int, 18 | horizon: int, 19 | focal_loss_gamma: float, 20 | offset_loss_scale: float, 21 | **kwargs 22 | ): 23 | super().__init__() 24 | self.model = TransformerForDiffusion( 25 | input_dim=input_dim, 26 | output_dim=num_bins * (1 + action_dim), 27 | horizon=horizon, 28 | **kwargs 29 | ) 30 | self.vocab_size = num_bins 31 | self.focal_loss_gamma = focal_loss_gamma 32 | self.offset_loss_scale = offset_loss_scale 33 | self.action_dim = action_dim 34 | 35 | def get_optimizer(self, **kwargs) -> torch.optim.Optimizer: 36 | return self.model.configure_optimizers(**kwargs) 37 | 38 | def get_latent_and_loss(self, 39 | obs_rep: torch.Tensor, 40 | target_latents: torch.Tensor, 41 | return_loss_components=True, 42 | ) -> Tuple[torch.Tensor, torch.Tensor]: 43 | target_latents, target_offsets = target_latents 44 | target_latents = target_latents.view(-1) 45 | criterion = FocalLoss(gamma=self.focal_loss_gamma) 46 | 47 | t = torch.tensor(0, device=self.model.device) 48 | output = self.model(obs_rep, t) 49 | logits = output[:, :, : self.vocab_size] 50 | offsets = output[:, :, self.vocab_size :] 51 | batch = logits.shape[0] 52 | seq = logits.shape[1] 53 | offsets = einops.rearrange( 54 | offsets, 55 | "N T (V A) -> (N T) V A", # N = batch, T = seq 56 | V=self.vocab_size, 57 | A=self.action_dim, 58 | ) 59 | # calculate (optionally soft) cross entropy and offset losses 60 | class_loss = criterion(logits.view(-1, logits.size(-1)), target_latents) 61 | # offset loss is only calculated on the target class 62 | # if soft targets, argmax is considered the target class 63 | selected_offsets = offsets[ 64 | torch.arange(offsets.size(0)), 65 | target_latents.view(-1), 66 | ] 67 | offset_loss = self.offset_loss_scale * F.mse_loss( 68 | selected_offsets, target_offsets.view(-1, self.action_dim) 69 | ) 70 | loss = offset_loss + class_loss 71 | logits = einops.rearrange(logits, "batch seq classes -> seq batch classes") 72 | offsets = einops.rearrange( 73 | offsets, 74 | "(N T) V A -> T N V A", # ? N, T order? Anyway does not affect loss and training (might affect visualization) 75 | N=batch, 76 | T=seq, 77 | ) 78 | return ( 79 | (logits, offsets), 80 | loss, 81 | {"offset": offset_loss, "class": class_loss, "total": loss}, 82 | ) 83 | 84 | def generate_latents( 85 | self, obs_rep: torch.Tensor 86 | ) -> torch.Tensor: 87 | t = torch.tensor(0, device=self.model.device) 88 | output = self.model(obs_rep, t) 89 | logits = output[:, :, : self.vocab_size] 90 | offsets = output[:, :, self.vocab_size :] 91 | offsets = einops.rearrange( 92 | offsets, 93 | "N T (V A) -> (N T) V A", # N = batch, T = seq 94 | V=self.vocab_size, 95 | A=self.action_dim, 96 | ) 97 | 98 | probs = F.softmax(logits, dim=-1) 99 | batch, seq, choices = probs.shape 100 | # Sample from the multinomial distribution, one per row. 101 | sampled_data = torch.multinomial(probs.view(-1, choices), num_samples=1) 102 | sampled_data = einops.rearrange( 103 | sampled_data, "(batch seq) 1 -> batch seq 1", batch=batch, seq=seq 104 | ) 105 | sampled_offsets = offsets[ 106 | torch.arange(offsets.shape[0]), sampled_data.flatten() 107 | ].view(batch, seq, self.action_dim) 108 | return (sampled_data, sampled_offsets) 109 | -------------------------------------------------------------------------------- /diffusion_policy/model/bet/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | from collections import OrderedDict 4 | from typing import List, Optional 5 | 6 | import einops 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | 11 | from torch.utils.data import random_split 12 | import wandb 13 | 14 | 15 | def mlp(input_dim, hidden_dim, output_dim, hidden_depth, output_mod=None): 16 | if hidden_depth == 0: 17 | mods = [nn.Linear(input_dim, output_dim)] 18 | else: 19 | mods = [nn.Linear(input_dim, hidden_dim), nn.ReLU(inplace=True)] 20 | for i in range(hidden_depth - 1): 21 | mods += [nn.Linear(hidden_dim, hidden_dim), nn.ReLU(inplace=True)] 22 | mods.append(nn.Linear(hidden_dim, output_dim)) 23 | if output_mod is not None: 24 | mods.append(output_mod) 25 | trunk = nn.Sequential(*mods) 26 | return trunk 27 | 28 | 29 | class eval_mode: 30 | def __init__(self, *models, no_grad=False): 31 | self.models = models 32 | self.no_grad = no_grad 33 | self.no_grad_context = torch.no_grad() 34 | 35 | def __enter__(self): 36 | self.prev_states = [] 37 | for model in self.models: 38 | self.prev_states.append(model.training) 39 | model.train(False) 40 | if self.no_grad: 41 | self.no_grad_context.__enter__() 42 | 43 | def __exit__(self, *args): 44 | if self.no_grad: 45 | self.no_grad_context.__exit__(*args) 46 | for model, state in zip(self.models, self.prev_states): 47 | model.train(state) 48 | return False 49 | 50 | 51 | def freeze_module(module: nn.Module) -> nn.Module: 52 | for param in module.parameters(): 53 | param.requires_grad = False 54 | module.eval() 55 | return module 56 | 57 | 58 | def set_seed_everywhere(seed): 59 | torch.manual_seed(seed) 60 | if torch.cuda.is_available(): 61 | torch.cuda.manual_seed_all(seed) 62 | np.random.seed(seed) 63 | random.seed(seed) 64 | 65 | 66 | def shuffle_along_axis(a, axis): 67 | idx = np.random.rand(*a.shape).argsort(axis=axis) 68 | return np.take_along_axis(a, idx, axis=axis) 69 | 70 | 71 | def transpose_batch_timestep(*args): 72 | return (einops.rearrange(arg, "b t ... -> t b ...") for arg in args) 73 | 74 | 75 | class TrainWithLogger: 76 | def reset_log(self): 77 | self.log_components = OrderedDict() 78 | 79 | def log_append(self, log_key, length, loss_components): 80 | for key, value in loss_components.items(): 81 | key_name = f"{log_key}/{key}" 82 | count, sum = self.log_components.get(key_name, (0, 0.0)) 83 | self.log_components[key_name] = ( 84 | count + length, 85 | sum + (length * value.detach().cpu().item()), 86 | ) 87 | 88 | def flush_log(self, epoch, iterator=None): 89 | log_components = OrderedDict() 90 | iterator_log_component = OrderedDict() 91 | for key, value in self.log_components.items(): 92 | count, sum = value 93 | to_log = sum / count 94 | log_components[key] = to_log 95 | # Set the iterator status 96 | log_key, name_key = key.split("/") 97 | iterator_log_name = f"{log_key[0]}{name_key[0]}".upper() 98 | iterator_log_component[iterator_log_name] = to_log 99 | postfix = ",".join( 100 | "{}:{:.2e}".format(key, iterator_log_component[key]) 101 | for key in iterator_log_component.keys() 102 | ) 103 | if iterator is not None: 104 | iterator.set_postfix_str(postfix) 105 | wandb.log(log_components, step=epoch) 106 | self.log_components = OrderedDict() 107 | 108 | 109 | class SaveModule(nn.Module): 110 | def set_snapshot_path(self, path): 111 | self.snapshot_path = path 112 | print(f"Setting snapshot path to {self.snapshot_path}") 113 | 114 | def save_snapshot(self): 115 | os.makedirs(self.snapshot_path, exist_ok=True) 116 | torch.save(self.state_dict(), self.snapshot_path / "snapshot.pth") 117 | 118 | def load_snapshot(self): 119 | self.load_state_dict(torch.load(self.snapshot_path / "snapshot.pth")) 120 | 121 | 122 | def split_datasets(dataset, train_fraction=0.95, random_seed=42): 123 | dataset_length = len(dataset) 124 | lengths = [ 125 | int(train_fraction * dataset_length), 126 | dataset_length - int(train_fraction * dataset_length), 127 | ] 128 | train_set, val_set = random_split( 129 | dataset, lengths, generator=torch.Generator().manual_seed(random_seed) 130 | ) 131 | return train_set, val_set 132 | -------------------------------------------------------------------------------- /diffusion_policy/model/common/dict_of_tensor_mixin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class DictOfTensorMixin(nn.Module): 5 | def __init__(self, params_dict=None): 6 | super().__init__() 7 | if params_dict is None: 8 | params_dict = nn.ParameterDict() 9 | self.params_dict = params_dict 10 | 11 | @property 12 | def device(self): 13 | return next(iter(self.parameters())).device 14 | 15 | def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): 16 | def dfs_add(dest, keys, value: torch.Tensor): 17 | if len(keys) == 1: 18 | dest[keys[0]] = value 19 | return 20 | 21 | if keys[0] not in dest: 22 | dest[keys[0]] = nn.ParameterDict() 23 | dfs_add(dest[keys[0]], keys[1:], value) 24 | 25 | def load_dict(state_dict, prefix): 26 | out_dict = nn.ParameterDict() 27 | for key, value in state_dict.items(): 28 | value: torch.Tensor 29 | if key.startswith(prefix): 30 | param_keys = key[len(prefix):].split('.')[1:] 31 | # if len(param_keys) == 0: 32 | # import pdb; pdb.set_trace() 33 | dfs_add(out_dict, param_keys, value.clone()) 34 | return out_dict 35 | 36 | self.params_dict = load_dict(state_dict, prefix + 'params_dict') 37 | self.params_dict.requires_grad_(False) 38 | return 39 | -------------------------------------------------------------------------------- /diffusion_policy/model/common/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | from diffusers.optimization import ( 2 | Union, SchedulerType, Optional, 3 | Optimizer, TYPE_TO_SCHEDULER_FUNCTION 4 | ) 5 | 6 | def get_scheduler( 7 | name: Union[str, SchedulerType], 8 | optimizer: Optimizer, 9 | num_warmup_steps: Optional[int] = None, 10 | num_training_steps: Optional[int] = None, 11 | **kwargs 12 | ): 13 | """ 14 | Added kwargs vs diffuser's original implementation 15 | 16 | Unified API to get any scheduler from its name. 17 | 18 | Args: 19 | name (`str` or `SchedulerType`): 20 | The name of the scheduler to use. 21 | optimizer (`torch.optim.Optimizer`): 22 | The optimizer that will be used during training. 23 | num_warmup_steps (`int`, *optional*): 24 | The number of warmup steps to do. This is not required by all schedulers (hence the argument being 25 | optional), the function will raise an error if it's unset and the scheduler type requires it. 26 | num_training_steps (`int``, *optional*): 27 | The number of training steps to do. This is not required by all schedulers (hence the argument being 28 | optional), the function will raise an error if it's unset and the scheduler type requires it. 29 | """ 30 | name = SchedulerType(name) 31 | schedule_func = TYPE_TO_SCHEDULER_FUNCTION[name] 32 | if name == SchedulerType.CONSTANT: 33 | return schedule_func(optimizer, **kwargs) 34 | 35 | # All other schedulers require `num_warmup_steps` 36 | if num_warmup_steps is None: 37 | raise ValueError(f"{name} requires `num_warmup_steps`, please provide that argument.") 38 | 39 | if name == SchedulerType.CONSTANT_WITH_WARMUP: 40 | return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, **kwargs) 41 | 42 | # All other schedulers require `num_training_steps` 43 | if num_training_steps is None: 44 | raise ValueError(f"{name} requires `num_training_steps`, please provide that argument.") 45 | 46 | return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps, **kwargs) 47 | -------------------------------------------------------------------------------- /diffusion_policy/model/common/module_attr_mixin.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class ModuleAttrMixin(nn.Module): 4 | def __init__(self): 5 | super().__init__() 6 | self._dummy_variable = nn.Parameter() 7 | 8 | @property 9 | def device(self): 10 | return next(iter(self.parameters())).device 11 | 12 | @property 13 | def dtype(self): 14 | return next(iter(self.parameters())).dtype 15 | -------------------------------------------------------------------------------- /diffusion_policy/model/common/rotation_transformer.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | import pytorch3d.transforms as pt 3 | import torch 4 | import numpy as np 5 | import functools 6 | 7 | class RotationTransformer: 8 | valid_reps = [ 9 | 'axis_angle', 10 | 'euler_angles', 11 | 'quaternion', 12 | 'rotation_6d', 13 | 'matrix' 14 | ] 15 | 16 | def __init__(self, 17 | from_rep='axis_angle', 18 | to_rep='rotation_6d', 19 | from_convention=None, 20 | to_convention=None): 21 | """ 22 | Valid representations 23 | 24 | Always use matrix as intermediate representation. 25 | """ 26 | assert from_rep != to_rep 27 | assert from_rep in self.valid_reps 28 | assert to_rep in self.valid_reps 29 | if from_rep == 'euler_angles': 30 | assert from_convention is not None 31 | if to_rep == 'euler_angles': 32 | assert to_convention is not None 33 | 34 | forward_funcs = list() 35 | inverse_funcs = list() 36 | 37 | if from_rep != 'matrix': 38 | funcs = [ 39 | getattr(pt, f'{from_rep}_to_matrix'), 40 | getattr(pt, f'matrix_to_{from_rep}') 41 | ] 42 | if from_convention is not None: 43 | funcs = [functools.partial(func, convernsion=from_convention) 44 | for func in funcs] 45 | forward_funcs.append(funcs[0]) 46 | inverse_funcs.append(funcs[1]) 47 | 48 | if to_rep != 'matrix': 49 | funcs = [ 50 | getattr(pt, f'matrix_to_{to_rep}'), 51 | getattr(pt, f'{to_rep}_to_matrix') 52 | ] 53 | if to_convention is not None: 54 | funcs = [functools.partial(func, convernsion=to_convention) 55 | for func in funcs] 56 | forward_funcs.append(funcs[0]) 57 | inverse_funcs.append(funcs[1]) 58 | 59 | inverse_funcs = inverse_funcs[::-1] 60 | 61 | self.forward_funcs = forward_funcs 62 | self.inverse_funcs = inverse_funcs 63 | 64 | @staticmethod 65 | def _apply_funcs(x: Union[np.ndarray, torch.Tensor], funcs: list) -> Union[np.ndarray, torch.Tensor]: 66 | x_ = x 67 | if isinstance(x, np.ndarray): 68 | x_ = torch.from_numpy(x) 69 | x_: torch.Tensor 70 | for func in funcs: 71 | x_ = func(x_) 72 | y = x_ 73 | if isinstance(x, np.ndarray): 74 | y = x_.numpy() 75 | return y 76 | 77 | def forward(self, x: Union[np.ndarray, torch.Tensor] 78 | ) -> Union[np.ndarray, torch.Tensor]: 79 | return self._apply_funcs(x, self.forward_funcs) 80 | 81 | def inverse(self, x: Union[np.ndarray, torch.Tensor] 82 | ) -> Union[np.ndarray, torch.Tensor]: 83 | return self._apply_funcs(x, self.inverse_funcs) 84 | 85 | 86 | def test(): 87 | tf = RotationTransformer() 88 | 89 | rotvec = np.random.uniform(-2*np.pi,2*np.pi,size=(1000,3)) 90 | rot6d = tf.forward(rotvec) 91 | new_rotvec = tf.inverse(rot6d) 92 | 93 | from scipy.spatial.transform import Rotation 94 | diff = Rotation.from_rotvec(rotvec) * Rotation.from_rotvec(new_rotvec).inv() 95 | dist = diff.magnitude() 96 | assert dist.max() < 1e-7 97 | 98 | tf = RotationTransformer('rotation_6d', 'matrix') 99 | rot6d_wrong = rot6d + np.random.normal(scale=0.1, size=rot6d.shape) 100 | mat = tf.forward(rot6d_wrong) 101 | mat_det = np.linalg.det(mat) 102 | assert np.allclose(mat_det, 1) 103 | # rotaiton_6d will be normalized to rotation matrix 104 | -------------------------------------------------------------------------------- /diffusion_policy/model/common/shape_util.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Tuple, Callable 2 | import torch 3 | import torch.nn as nn 4 | 5 | def get_module_device(m: nn.Module): 6 | device = torch.device('cpu') 7 | try: 8 | param = next(iter(m.parameters())) 9 | device = param.device 10 | except StopIteration: 11 | pass 12 | return device 13 | 14 | @torch.no_grad() 15 | def get_output_shape( 16 | input_shape: Tuple[int], 17 | net: Callable[[torch.Tensor], torch.Tensor] 18 | ): 19 | device = get_module_device(net) 20 | test_input = torch.zeros((1,)+tuple(input_shape), device=device) 21 | test_output = net(test_input) 22 | output_shape = tuple(test_output.shape[1:]) 23 | return output_shape 24 | -------------------------------------------------------------------------------- /diffusion_policy/model/diffusion/conv1d_components.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | # from einops.layers.torch import Rearrange 5 | 6 | 7 | class Downsample1d(nn.Module): 8 | def __init__(self, dim): 9 | super().__init__() 10 | self.conv = nn.Conv1d(dim, dim, 3, 2, 1) 11 | 12 | def forward(self, x): 13 | return self.conv(x) 14 | 15 | class Upsample1d(nn.Module): 16 | def __init__(self, dim): 17 | super().__init__() 18 | self.conv = nn.ConvTranspose1d(dim, dim, 4, 2, 1) 19 | 20 | def forward(self, x): 21 | return self.conv(x) 22 | 23 | class Conv1dBlock(nn.Module): 24 | ''' 25 | Conv1d --> GroupNorm --> Mish 26 | ''' 27 | 28 | def __init__(self, inp_channels, out_channels, kernel_size, n_groups=8): 29 | super().__init__() 30 | 31 | self.block = nn.Sequential( 32 | nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2), 33 | # Rearrange('batch channels horizon -> batch channels 1 horizon'), 34 | nn.GroupNorm(n_groups, out_channels), 35 | # Rearrange('batch channels 1 horizon -> batch channels horizon'), 36 | nn.Mish(), 37 | ) 38 | 39 | def forward(self, x): 40 | return self.block(x) 41 | 42 | 43 | def test(): 44 | cb = Conv1dBlock(256, 128, kernel_size=3) 45 | x = torch.zeros((1,256,16)) 46 | o = cb(x) 47 | -------------------------------------------------------------------------------- /diffusion_policy/model/diffusion/ema_model.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import torch 3 | from torch.nn.modules.batchnorm import _BatchNorm 4 | 5 | class EMAModel: 6 | """ 7 | Exponential Moving Average of models weights 8 | """ 9 | 10 | def __init__( 11 | self, 12 | model, 13 | update_after_step=0, 14 | inv_gamma=1.0, 15 | power=2 / 3, 16 | min_value=0.0, 17 | max_value=0.9999 18 | ): 19 | """ 20 | @crowsonkb's notes on EMA Warmup: 21 | If gamma=1 and power=1, implements a simple average. gamma=1, power=2/3 are good values for models you plan 22 | to train for a million or more steps (reaches decay factor 0.999 at 31.6K steps, 0.9999 at 1M steps), 23 | gamma=1, power=3/4 for models you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999 24 | at 215.4k steps). 25 | Args: 26 | inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1. 27 | power (float): Exponential factor of EMA warmup. Default: 2/3. 28 | min_value (float): The minimum EMA decay rate. Default: 0. 29 | """ 30 | 31 | self.averaged_model = model 32 | self.averaged_model.eval() 33 | self.averaged_model.requires_grad_(False) 34 | 35 | self.update_after_step = update_after_step 36 | self.inv_gamma = inv_gamma 37 | self.power = power 38 | self.min_value = min_value 39 | self.max_value = max_value 40 | 41 | self.decay = 0.0 42 | self.optimization_step = 0 43 | 44 | def get_decay(self, optimization_step): 45 | """ 46 | Compute the decay factor for the exponential moving average. 47 | """ 48 | step = max(0, optimization_step - self.update_after_step - 1) 49 | value = 1 - (1 + step / self.inv_gamma) ** -self.power 50 | 51 | if step <= 0: 52 | return 0.0 53 | 54 | return max(self.min_value, min(value, self.max_value)) 55 | 56 | @torch.no_grad() 57 | def step(self, new_model): 58 | self.decay = self.get_decay(self.optimization_step) 59 | 60 | # old_all_dataptrs = set() 61 | # for param in new_model.parameters(): 62 | # data_ptr = param.data_ptr() 63 | # if data_ptr != 0: 64 | # old_all_dataptrs.add(data_ptr) 65 | 66 | all_dataptrs = set() 67 | for module, ema_module in zip(new_model.modules(), self.averaged_model.modules()): 68 | for param, ema_param in zip(module.parameters(recurse=False), ema_module.parameters(recurse=False)): 69 | # iterative over immediate parameters only. 70 | if isinstance(param, dict): 71 | raise RuntimeError('Dict parameter not supported') 72 | 73 | # data_ptr = param.data_ptr() 74 | # if data_ptr != 0: 75 | # all_dataptrs.add(data_ptr) 76 | 77 | if isinstance(module, _BatchNorm): 78 | # skip batchnorms 79 | ema_param.copy_(param.to(dtype=ema_param.dtype).data) 80 | elif not param.requires_grad: 81 | ema_param.copy_(param.to(dtype=ema_param.dtype).data) 82 | else: 83 | ema_param.mul_(self.decay) 84 | ema_param.add_(param.data.to(dtype=ema_param.dtype), alpha=1 - self.decay) 85 | 86 | # verify that iterating over module and then parameters is identical to parameters recursively. 87 | # assert old_all_dataptrs == all_dataptrs 88 | self.optimization_step += 1 89 | -------------------------------------------------------------------------------- /diffusion_policy/model/diffusion/positional_embedding.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | 5 | class SinusoidalPosEmb(nn.Module): 6 | def __init__(self, dim): 7 | super().__init__() 8 | self.dim = dim 9 | 10 | def forward(self, x): 11 | device = x.device 12 | half_dim = self.dim // 2 13 | emb = math.log(10000) / (half_dim - 1) 14 | emb = torch.exp(torch.arange(half_dim, device=device) * -emb) 15 | emb = x[:, None] * emb[None, :] 16 | emb = torch.cat((emb.sin(), emb.cos()), dim=-1) 17 | return emb 18 | -------------------------------------------------------------------------------- /diffusion_policy/model/vision/model_getter.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | 4 | def get_resnet(name, weights=None, **kwargs): 5 | """ 6 | name: resnet18, resnet34, resnet50 7 | weights: "IMAGENET1K_V1", "r3m" 8 | """ 9 | # load r3m weights 10 | if (weights == "r3m") or (weights == "R3M"): 11 | return get_r3m(name=name, **kwargs) 12 | 13 | func = getattr(torchvision.models, name) 14 | resnet = func(weights=weights, **kwargs) 15 | resnet.fc = torch.nn.Identity() 16 | return resnet 17 | 18 | def get_r3m(name, **kwargs): 19 | """ 20 | name: resnet18, resnet34, resnet50 21 | """ 22 | import r3m 23 | r3m.device = 'cpu' 24 | model = r3m.load_r3m(name) 25 | r3m_model = model.module 26 | resnet_model = r3m_model.convnet 27 | resnet_model = resnet_model.to('cpu') 28 | return resnet_model 29 | -------------------------------------------------------------------------------- /diffusion_policy/policy/base_image_policy.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import torch 3 | import torch.nn as nn 4 | from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin 5 | from diffusion_policy.model.common.normalizer import LinearNormalizer 6 | 7 | class BaseImagePolicy(ModuleAttrMixin): 8 | # init accepts keyword argument shape_meta, see config/task/*_image.yaml 9 | 10 | def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: 11 | """ 12 | obs_dict: 13 | str: B,To,* 14 | return: B,Ta,Da 15 | """ 16 | raise NotImplementedError() 17 | 18 | # reset state for stateful policies 19 | def reset(self): 20 | pass 21 | 22 | # ========== training =========== 23 | # no standard training interface except setting normalizer 24 | def set_normalizer(self, normalizer: LinearNormalizer): 25 | raise NotImplementedError() 26 | -------------------------------------------------------------------------------- /diffusion_policy/real_world/keystroke_counter.py: -------------------------------------------------------------------------------- 1 | from pynput.keyboard import Key, KeyCode, Listener 2 | from collections import defaultdict 3 | from threading import Lock 4 | 5 | class KeystrokeCounter(Listener): 6 | def __init__(self): 7 | self.key_count_map = defaultdict(lambda:0) 8 | self.key_press_list = list() 9 | self.lock = Lock() 10 | super().__init__(on_press=self.on_press, on_release=self.on_release) 11 | 12 | def on_press(self, key): 13 | with self.lock: 14 | self.key_count_map[key] += 1 15 | self.key_press_list.append(key) 16 | 17 | def on_release(self, key): 18 | pass 19 | 20 | def clear(self): 21 | with self.lock: 22 | self.key_count_map = defaultdict(lambda:0) 23 | self.key_press_list = list() 24 | 25 | def __getitem__(self, key): 26 | with self.lock: 27 | return self.key_count_map[key] 28 | 29 | def get_press_events(self): 30 | with self.lock: 31 | events = list(self.key_press_list) 32 | self.key_press_list = list() 33 | return events 34 | 35 | if __name__ == '__main__': 36 | import time 37 | with KeystrokeCounter() as counter: 38 | try: 39 | while True: 40 | print('Space:', counter[Key.space]) 41 | print('q:', counter[KeyCode(char='q')]) 42 | time.sleep(1/60) 43 | except KeyboardInterrupt: 44 | events = counter.get_press_events() 45 | print(events) 46 | -------------------------------------------------------------------------------- /diffusion_policy/real_world/multi_camera_visualizer.py: -------------------------------------------------------------------------------- 1 | import time 2 | import multiprocessing as mp 3 | import numpy as np 4 | import cv2 5 | from threadpoolctl import threadpool_limits 6 | from diffusion_policy.real_world.multi_realsense import MultiRealsense 7 | 8 | class MultiCameraVisualizer(mp.Process): 9 | def __init__(self, 10 | realsense: MultiRealsense, 11 | row, col, 12 | window_name='Multi Cam Vis', 13 | vis_fps=60, 14 | fill_value=0, 15 | rgb_to_bgr=True 16 | ): 17 | super().__init__() 18 | self.row = row 19 | self.col = col 20 | self.window_name = window_name 21 | self.vis_fps = vis_fps 22 | self.fill_value = fill_value 23 | self.rgb_to_bgr=rgb_to_bgr 24 | self.realsense = realsense 25 | # shared variables 26 | self.stop_event = mp.Event() 27 | 28 | def start(self, wait=False): 29 | super().start() 30 | 31 | def stop(self, wait=False): 32 | self.stop_event.set() 33 | if wait: 34 | self.stop_wait() 35 | 36 | def start_wait(self): 37 | pass 38 | 39 | def stop_wait(self): 40 | self.join() 41 | 42 | def run(self): 43 | cv2.setNumThreads(1) 44 | threadpool_limits(1) 45 | channel_slice = slice(None) 46 | if self.rgb_to_bgr: 47 | channel_slice = slice(None,None,-1) 48 | 49 | vis_data = None 50 | vis_img = None 51 | while not self.stop_event.is_set(): 52 | vis_data = self.realsense.get_vis(out=vis_data) 53 | color = vis_data['color'] 54 | N, H, W, C = color.shape 55 | assert C == 3 56 | oh = H * self.row 57 | ow = W * self.col 58 | if vis_img is None: 59 | vis_img = np.full((oh, ow, 3), 60 | fill_value=self.fill_value, dtype=np.uint8) 61 | for row in range(self.row): 62 | for col in range(self.col): 63 | idx = col + row * self.col 64 | h_start = H * row 65 | h_end = h_start + H 66 | w_start = W * col 67 | w_end = w_start + W 68 | if idx < N: 69 | # opencv uses bgr 70 | vis_img[h_start:h_end,w_start:w_end 71 | ] = color[idx,:,:,channel_slice] 72 | cv2.imshow(self.window_name, vis_img) 73 | cv2.pollKey() 74 | time.sleep(1 / self.vis_fps) 75 | -------------------------------------------------------------------------------- /diffusion_policy/real_world/real_inference_util.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Callable, Tuple 2 | import numpy as np 3 | from diffusion_policy.common.cv2_util import get_image_transform 4 | 5 | def get_real_obs_dict( 6 | env_obs: Dict[str, np.ndarray], 7 | shape_meta: dict, 8 | ) -> Dict[str, np.ndarray]: 9 | obs_dict_np = dict() 10 | obs_shape_meta = shape_meta['obs'] 11 | for key, attr in obs_shape_meta.items(): 12 | type = attr.get('type', 'low_dim') 13 | shape = attr.get('shape') 14 | if type == 'rgb': 15 | this_imgs_in = env_obs[key] 16 | t,hi,wi,ci = this_imgs_in.shape 17 | co,ho,wo = shape 18 | assert ci == co 19 | out_imgs = this_imgs_in 20 | if (ho != hi) or (wo != wi) or (this_imgs_in.dtype == np.uint8): 21 | tf = get_image_transform( 22 | input_res=(wi,hi), 23 | output_res=(wo,ho), 24 | bgr_to_rgb=False) 25 | out_imgs = np.stack([tf(x) for x in this_imgs_in]) 26 | if this_imgs_in.dtype == np.uint8: 27 | out_imgs = out_imgs.astype(np.float32) / 255 28 | # THWC to TCHW 29 | obs_dict_np[key] = np.moveaxis(out_imgs,-1,1) 30 | elif type == 'low_dim': 31 | this_data_in = env_obs[key] 32 | if 'pose' in key and shape == (2,): 33 | # take X,Y coordinates 34 | this_data_in = this_data_in[...,[0,1]] 35 | obs_dict_np[key] = this_data_in 36 | return obs_dict_np 37 | 38 | 39 | def get_real_obs_resolution( 40 | shape_meta: dict 41 | ) -> Tuple[int, int]: 42 | out_res = None 43 | obs_shape_meta = shape_meta['obs'] 44 | for key, attr in obs_shape_meta.items(): 45 | type = attr.get('type', 'low_dim') 46 | shape = attr.get('shape') 47 | if type == 'rgb': 48 | co,ho,wo = shape 49 | if out_res is None: 50 | out_res = (wo, ho) 51 | assert out_res == (wo, ho) 52 | return out_res 53 | -------------------------------------------------------------------------------- /diffusion_policy/real_world/realsense_config/415_high_accuracy_mode.json: -------------------------------------------------------------------------------- 1 | { 2 | "aux-param-autoexposure-setpoint": "400", 3 | "aux-param-colorcorrection1": "0.461914", 4 | "aux-param-colorcorrection10": "-0.553711", 5 | "aux-param-colorcorrection11": "-0.553711", 6 | "aux-param-colorcorrection12": "0.0458984", 7 | "aux-param-colorcorrection2": "0.540039", 8 | "aux-param-colorcorrection3": "0.540039", 9 | "aux-param-colorcorrection4": "0.208008", 10 | "aux-param-colorcorrection5": "-0.332031", 11 | "aux-param-colorcorrection6": "-0.212891", 12 | "aux-param-colorcorrection7": "-0.212891", 13 | "aux-param-colorcorrection8": "0.68457", 14 | "aux-param-colorcorrection9": "0.930664", 15 | "aux-param-depthclampmax": "65535", 16 | "aux-param-depthclampmin": "0", 17 | "aux-param-disparityshift": "0", 18 | "controls-autoexposure-auto": "True", 19 | "controls-autoexposure-manual": "33000", 20 | "controls-color-autoexposure-auto": "True", 21 | "controls-color-autoexposure-manual": "100", 22 | "controls-color-backlight-compensation": "0", 23 | "controls-color-brightness": "0", 24 | "controls-color-contrast": "50", 25 | "controls-color-gain": "100", 26 | "controls-color-gamma": "300", 27 | "controls-color-hue": "0", 28 | "controls-color-power-line-frequency": "3", 29 | "controls-color-saturation": "64", 30 | "controls-color-sharpness": "50", 31 | "controls-color-white-balance-auto": "True", 32 | "controls-color-white-balance-manual": "4600", 33 | "controls-depth-gain": "16", 34 | "controls-depth-white-balance-auto": "False", 35 | "controls-laserpower": "150", 36 | "controls-laserstate": "on", 37 | "ignoreSAD": "0", 38 | "param-amplitude-factor": "0", 39 | "param-autoexposure-setpoint": "400", 40 | "param-censusenablereg-udiameter": "9", 41 | "param-censusenablereg-vdiameter": "3", 42 | "param-censususize": "9", 43 | "param-censusvsize": "3", 44 | "param-depthclampmax": "65535", 45 | "param-depthclampmin": "0", 46 | "param-depthunits": "1000", 47 | "param-disableraucolor": "0", 48 | "param-disablesadcolor": "0", 49 | "param-disablesadnormalize": "0", 50 | "param-disablesloleftcolor": "0", 51 | "param-disableslorightcolor": "1", 52 | "param-disparitymode": "0", 53 | "param-disparityshift": "0", 54 | "param-lambdaad": "751", 55 | "param-lambdacensus": "6", 56 | "param-leftrightthreshold": "10", 57 | "param-maxscorethreshb": "2893", 58 | "param-medianthreshold": "796", 59 | "param-minscorethresha": "4", 60 | "param-neighborthresh": "108", 61 | "param-raumine": "6", 62 | "param-rauminn": "3", 63 | "param-rauminnssum": "7", 64 | "param-raumins": "2", 65 | "param-rauminw": "2", 66 | "param-rauminwesum": "12", 67 | "param-regioncolorthresholdb": "0.785714", 68 | "param-regioncolorthresholdg": "0.565558", 69 | "param-regioncolorthresholdr": "0.985323", 70 | "param-regionshrinku": "3", 71 | "param-regionshrinkv": "0", 72 | "param-robbinsmonrodecrement": "25", 73 | "param-robbinsmonroincrement": "2", 74 | "param-rsmdiffthreshold": "1.65625", 75 | "param-rsmrauslodiffthreshold": "0.71875", 76 | "param-rsmremovethreshold": "0.809524", 77 | "param-scanlineedgetaub": "13", 78 | "param-scanlineedgetaug": "15", 79 | "param-scanlineedgetaur": "30", 80 | "param-scanlinep1": "155", 81 | "param-scanlinep1onediscon": "160", 82 | "param-scanlinep1twodiscon": "59", 83 | "param-scanlinep2": "190", 84 | "param-scanlinep2onediscon": "507", 85 | "param-scanlinep2twodiscon": "493", 86 | "param-secondpeakdelta": "647", 87 | "param-texturecountthresh": "0", 88 | "param-texturedifferencethresh": "1722", 89 | "param-usersm": "1", 90 | "param-zunits": "1000", 91 | "stream-depth-format": "Z16", 92 | "stream-fps": "30", 93 | "stream-height": "480", 94 | "stream-width": "640" 95 | } 96 | -------------------------------------------------------------------------------- /diffusion_policy/real_world/realsense_config/435_high_accuracy_mode.json: -------------------------------------------------------------------------------- 1 | { 2 | "aux-param-autoexposure-setpoint": "1536", 3 | "aux-param-colorcorrection1": "0.298828", 4 | "aux-param-colorcorrection10": "-0", 5 | "aux-param-colorcorrection11": "-0", 6 | "aux-param-colorcorrection12": "-0", 7 | "aux-param-colorcorrection2": "0.293945", 8 | "aux-param-colorcorrection3": "0.293945", 9 | "aux-param-colorcorrection4": "0.114258", 10 | "aux-param-colorcorrection5": "-0", 11 | "aux-param-colorcorrection6": "-0", 12 | "aux-param-colorcorrection7": "-0", 13 | "aux-param-colorcorrection8": "-0", 14 | "aux-param-colorcorrection9": "-0", 15 | "aux-param-depthclampmax": "65536", 16 | "aux-param-depthclampmin": "0", 17 | "aux-param-disparityshift": "0", 18 | "controls-autoexposure-auto": "True", 19 | "controls-autoexposure-manual": "8500", 20 | "controls-color-autoexposure-auto": "True", 21 | "controls-color-autoexposure-manual": "100", 22 | "controls-color-backlight-compensation": "0", 23 | "controls-color-brightness": "0", 24 | "controls-color-contrast": "50", 25 | "controls-color-gain": "100", 26 | "controls-color-gamma": "300", 27 | "controls-color-hue": "0", 28 | "controls-color-power-line-frequency": "3", 29 | "controls-color-saturation": "64", 30 | "controls-color-sharpness": "50", 31 | "controls-color-white-balance-auto": "True", 32 | "controls-color-white-balance-manual": "4600", 33 | "controls-depth-gain": "16", 34 | "controls-laserpower": "150", 35 | "controls-laserstate": "on", 36 | "ignoreSAD": "0", 37 | "param-amplitude-factor": "0", 38 | "param-autoexposure-setpoint": "1536", 39 | "param-censusenablereg-udiameter": "9", 40 | "param-censusenablereg-vdiameter": "9", 41 | "param-censususize": "9", 42 | "param-censusvsize": "9", 43 | "param-depthclampmax": "65536", 44 | "param-depthclampmin": "0", 45 | "param-depthunits": "1000", 46 | "param-disableraucolor": "0", 47 | "param-disablesadcolor": "0", 48 | "param-disablesadnormalize": "0", 49 | "param-disablesloleftcolor": "0", 50 | "param-disableslorightcolor": "1", 51 | "param-disparitymode": "0", 52 | "param-disparityshift": "0", 53 | "param-lambdaad": "751", 54 | "param-lambdacensus": "6", 55 | "param-leftrightthreshold": "10", 56 | "param-maxscorethreshb": "2893", 57 | "param-medianthreshold": "796", 58 | "param-minscorethresha": "4", 59 | "param-neighborthresh": "108", 60 | "param-raumine": "6", 61 | "param-rauminn": "3", 62 | "param-rauminnssum": "7", 63 | "param-raumins": "2", 64 | "param-rauminw": "2", 65 | "param-rauminwesum": "12", 66 | "param-regioncolorthresholdb": "0.785714", 67 | "param-regioncolorthresholdg": "0.565558", 68 | "param-regioncolorthresholdr": "0.985323", 69 | "param-regionshrinku": "3", 70 | "param-regionshrinkv": "0", 71 | "param-robbinsmonrodecrement": "25", 72 | "param-robbinsmonroincrement": "2", 73 | "param-rsmdiffthreshold": "1.65625", 74 | "param-rsmrauslodiffthreshold": "0.71875", 75 | "param-rsmremovethreshold": "0.809524", 76 | "param-scanlineedgetaub": "13", 77 | "param-scanlineedgetaug": "15", 78 | "param-scanlineedgetaur": "30", 79 | "param-scanlinep1": "155", 80 | "param-scanlinep1onediscon": "160", 81 | "param-scanlinep1twodiscon": "59", 82 | "param-scanlinep2": "190", 83 | "param-scanlinep2onediscon": "507", 84 | "param-scanlinep2twodiscon": "493", 85 | "param-secondpeakdelta": "647", 86 | "param-texturecountthresh": "0", 87 | "param-texturedifferencethresh": "1722", 88 | "param-usersm": "1", 89 | "param-zunits": "1000", 90 | "stream-depth-format": "Z16", 91 | "stream-fps": "30", 92 | "stream-height": "480", 93 | "stream-width": "848" 94 | } -------------------------------------------------------------------------------- /diffusion_policy/real_world/spacemouse.py: -------------------------------------------------------------------------------- 1 | from spnav import spnav_open, spnav_poll_event, spnav_close, SpnavMotionEvent, SpnavButtonEvent 2 | from threading import Thread, Event 3 | from collections import defaultdict 4 | import numpy as np 5 | import time 6 | 7 | 8 | class Spacemouse(Thread): 9 | def __init__(self, max_value=500, deadzone=(0,0,0,0,0,0), dtype=np.float32): 10 | """ 11 | Continuously listen to 3D connection space naviagtor events 12 | and update the latest state. 13 | 14 | max_value: {300, 500} 300 for wired version and 500 for wireless 15 | deadzone: [0,1], number or tuple, axis with value lower than this value will stay at 0 16 | 17 | front 18 | z 19 | ^ _ 20 | | (O) space mouse 21 | | 22 | *----->x right 23 | y 24 | """ 25 | if np.issubdtype(type(deadzone), np.number): 26 | deadzone = np.full(6, fill_value=deadzone, dtype=dtype) 27 | else: 28 | deadzone = np.array(deadzone, dtype=dtype) 29 | assert (deadzone >= 0).all() 30 | 31 | super().__init__() 32 | self.stop_event = Event() 33 | self.max_value = max_value 34 | self.dtype = dtype 35 | self.deadzone = deadzone 36 | self.motion_event = SpnavMotionEvent([0,0,0], [0,0,0], 0) 37 | self.button_state = defaultdict(lambda: False) 38 | self.tx_zup_spnav = np.array([ 39 | [0,0,-1], 40 | [1,0,0], 41 | [0,1,0] 42 | ], dtype=dtype) 43 | 44 | def get_motion_state(self): 45 | me = self.motion_event 46 | state = np.array(me.translation + me.rotation, 47 | dtype=self.dtype) / self.max_value 48 | is_dead = (-self.deadzone < state) & (state < self.deadzone) 49 | state[is_dead] = 0 50 | return state 51 | 52 | def get_motion_state_transformed(self): 53 | """ 54 | Return in right-handed coordinate 55 | z 56 | *------>y right 57 | | _ 58 | | (O) space mouse 59 | v 60 | x 61 | back 62 | 63 | """ 64 | state = self.get_motion_state() 65 | tf_state = np.zeros_like(state) 66 | tf_state[:3] = self.tx_zup_spnav @ state[:3] 67 | tf_state[3:] = self.tx_zup_spnav @ state[3:] 68 | return tf_state 69 | 70 | def is_button_pressed(self, button_id): 71 | return self.button_state[button_id] 72 | 73 | def stop(self): 74 | self.stop_event.set() 75 | self.join() 76 | 77 | def __enter__(self): 78 | self.start() 79 | return self 80 | 81 | def __exit__(self, exc_type, exc_val, exc_tb): 82 | self.stop() 83 | 84 | def run(self): 85 | spnav_open() 86 | try: 87 | while not self.stop_event.is_set(): 88 | event = spnav_poll_event() 89 | if isinstance(event, SpnavMotionEvent): 90 | self.motion_event = event 91 | elif isinstance(event, SpnavButtonEvent): 92 | self.button_state[event.bnum] = event.press 93 | else: 94 | time.sleep(1/200) 95 | finally: 96 | spnav_close() 97 | 98 | 99 | def test(): 100 | with Spacemouse(deadzone=0.3) as sm: 101 | for i in range(2000): 102 | # print(sm.get_motion_state()) 103 | print(sm.get_motion_state_transformed()) 104 | print(sm.is_button_pressed(0)) 105 | time.sleep(1/100) 106 | 107 | if __name__ == '__main__': 108 | test() 109 | -------------------------------------------------------------------------------- /diffusion_policy/shared_memory/shared_memory_util.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | from dataclasses import dataclass 3 | import numpy as np 4 | from multiprocessing.managers import SharedMemoryManager 5 | from atomics import atomicview, MemoryOrder, UINT 6 | 7 | @dataclass 8 | class ArraySpec: 9 | name: str 10 | shape: Tuple[int] 11 | dtype: np.dtype 12 | 13 | 14 | class SharedAtomicCounter: 15 | def __init__(self, 16 | shm_manager: SharedMemoryManager, 17 | size :int=8 # 64bit int 18 | ): 19 | shm = shm_manager.SharedMemory(size=size) 20 | self.shm = shm 21 | self.size = size 22 | self.store(0) # initialize 23 | 24 | @property 25 | def buf(self): 26 | return self.shm.buf[:self.size] 27 | 28 | def load(self) -> int: 29 | with atomicview(buffer=self.buf, atype=UINT) as a: 30 | value = a.load(order=MemoryOrder.ACQUIRE) 31 | return value 32 | 33 | def store(self, value: int): 34 | with atomicview(buffer=self.buf, atype=UINT) as a: 35 | a.store(value, order=MemoryOrder.RELEASE) 36 | 37 | def add(self, value: int): 38 | with atomicview(buffer=self.buf, atype=UINT) as a: 39 | a.add(value, order=MemoryOrder.ACQ_REL) 40 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Usage: 3 | python eval.py --checkpoint /path/to/ckpt -o /path/to/output_dir 4 | """ 5 | 6 | import sys 7 | # use line-buffering for both stdout and stderr 8 | sys.stdout = open(sys.stdout.fileno(), mode='w', buffering=1) 9 | sys.stderr = open(sys.stderr.fileno(), mode='w', buffering=1) 10 | 11 | import os 12 | import pathlib 13 | import click 14 | import hydra 15 | import torch 16 | import dill 17 | import wandb 18 | import json 19 | from diffusion_policy.workspace.base_workspace import BaseWorkspace 20 | import copy 21 | from omegaconf.omegaconf import open_dict 22 | import yaml 23 | 24 | taskid2cfg = { 25 | 0 :"config/tasks/square_d0.yaml" , 26 | 1 :"config/tasks/stack_d0.yaml" , 27 | 2 :"config/tasks/coffee_d0.yaml" , 28 | 3 :"config/tasks/hammer_cleanup_d0.yaml" , 29 | 4 :"config/tasks/mug_cleanup_d0.yaml" , 30 | 5 :"config/tasks/nut_assembly_d0.yaml" , 31 | 6 :"config/tasks/stack_three_d0.yaml" , 32 | 7: "config/tasks/threading_d0.yaml" , 33 | } 34 | 35 | 36 | 37 | @click.command() 38 | @click.option('-c', '--checkpoint', default='epoch=0299-test_mean_score=6.070.ckpt') 39 | @click.option('-o', '--output_dir', default='test_eval') 40 | @click.option('-d', '--device', default='cuda:0') 41 | def main(checkpoint, output_dir, device): 42 | if os.path.exists(output_dir): 43 | click.confirm(f"Output path {output_dir} already exists! Overwrite?", abort=True) 44 | pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) 45 | 46 | # load checkpoint 47 | payload = torch.load(open(checkpoint, 'rb'), pickle_module=dill) 48 | cfg = payload['cfg'] 49 | for i in range(cfg['task_num']): 50 | curr_cfg=taskid2cfg[i] 51 | with open(curr_cfg, "r") as f: 52 | task_cfg = yaml.safe_load(f) 53 | cfg[f"task{i}"]=task_cfg 54 | cls = hydra.utils.get_class(cfg._target_) 55 | workspace = cls(cfg, output_dir=output_dir) 56 | workspace: BaseWorkspace 57 | workspace.load_payload(payload, exclude_keys=None, include_keys=None) 58 | 59 | # run eval 60 | # configure env 61 | env_runners = [] 62 | # env_runner3: BaseImageRunner 63 | for i in range(cfg.task_num): 64 | env_runners.append(hydra.utils.instantiate(cfg[f'task{i}'].env_runner, output_dir=output_dir)) 65 | 66 | 67 | # get policy from workspace 68 | datasets= [] 69 | for i in range(cfg.task_num): 70 | datasets.append(hydra.utils.instantiate(cfg[f'task{i}'].dataset)) 71 | normalizers=[] 72 | for dataset in datasets: 73 | normalizers.append(dataset.get_normalizer()) 74 | workspace.model.set_normalizer(normalizers) 75 | 76 | policy = workspace.model 77 | if cfg.training.use_ema: 78 | workspace.ema_model.set_normalizer(normalizers) 79 | policy = workspace.ema_model 80 | device = torch.device(device) 81 | policy.to(device) 82 | for normalizer in policy.normalizers: 83 | normalizer.to(device) 84 | policy.eval() 85 | 86 | 87 | runner_logs = [] 88 | for i, env_runner in enumerate(env_runners): 89 | runner_log = env_runner.run(policy,task_id=torch.tensor([i], dtype=torch.int64).to(device)) 90 | runner_log = {key + f'_{i}': value for key, value in runner_log.items()} 91 | runner_logs.append(runner_log) 92 | 93 | # dump log to json 94 | for i,runner_log in enumerate(runner_logs): 95 | json_log = dict() 96 | for key, value in runner_log.items(): 97 | if isinstance(value, wandb.sdk.data_types.video.Video): 98 | json_log[key] = value._path 99 | else: 100 | json_log[key] = value 101 | out_path = os.path.join(output_dir, f'eval_log_{i}.json') 102 | json.dump(json_log, open(out_path, 'w'), indent=2, sort_keys=True) 103 | 104 | if __name__ == '__main__': 105 | os.environ["CUDA_VISIBLE_DEVICES"]='1,' 106 | os.environ["MUJOCO_GL"]="osmesa" 107 | main() 108 | -------------------------------------------------------------------------------- /mixture_of_experts/mixture_of_experts/__init__.py: -------------------------------------------------------------------------------- 1 | from mixture_of_experts.mixture_of_experts import MoE, HeirarchicalMoE, Experts 2 | -------------------------------------------------------------------------------- /mixture_of_experts/moe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AnthonyHuo/SDP/9d70d48549f622c29f4f4935588467989835a46e/mixture_of_experts/moe.png -------------------------------------------------------------------------------- /mixture_of_experts/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name = 'mixture-of-experts', 5 | packages = find_packages(), 6 | version = '0.2.3', 7 | license='MIT', 8 | description = 'Sparsely-Gated Mixture of Experts for Pytorch', 9 | author = 'Phil Wang', 10 | author_email = 'lucidrains@gmail.com', 11 | url = 'https://github.com/lucidrains/mixture-of-experts', 12 | keywords = ['artificial intelligence', 'deep learning', 'transformers', 'mixture of experts'], 13 | install_requires=[ 14 | 'torch' 15 | ], 16 | classifiers=[ 17 | 'Development Status :: 4 - Beta', 18 | 'Intended Audience :: Developers', 19 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 20 | 'License :: OSI Approved :: MIT License', 21 | 'Programming Language :: Python :: 3.6', 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /parallel_linear/.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | build 35 | dist 36 | *.egg-info 37 | 38 | __pycache__ 39 | *.pyc 40 | 41 | *.json -------------------------------------------------------------------------------- /parallel_linear/README.md: -------------------------------------------------------------------------------- 1 | # Parallel Linears and Mixture of Experts 2 | 3 | ## update 2022.07.19 4 | The `MoE.forward()` is now a standard Mixture of Experts (FFD). The mixture of attention code is suppose to use `MoE.map()` and `MoE.reduce()` functions. 5 | 6 | ## Mixture of Experts 7 | Mixture of Experts (MoE) is a map-reduce style function. 8 | The forward function maps different inputs to different experts. The reduce function sums these intermediate result together for each inputs. 9 | Parameter: 10 | 1. `input_size` - the size of input hidden states 11 | 2. `output_size` - the size of intermediate hidden states 12 | 3. `num_experts` - the number of total experts 13 | 4. `k` - the number of topk selected experts for each input 14 | 5. `cvloss`, `switchloss`, `zloss` - different load balancing losses. 15 | 6. `activation` - the activation function for intermediate states of MoE (FFD). 16 | 17 | To install the classs: 18 | ``` 19 | pip3 install . 20 | ``` 21 | or 22 | ``` 23 | python3 setup.py install 24 | ``` 25 | 26 | To use the class: 27 | ``` 28 | from parallel_experts import MoE 29 | 30 | moe = MoE() 31 | ``` 32 | The `MoE` is map-reduce still function. To use the function, first map the input `x` with the map function: 33 | ``` 34 | mapped = moe.map(x) 35 | ``` 36 | Then you can pass the mapped and projected output through attention (for mixture of attention) or non-linear activation (for mixture of FFD) to get the processed matrix `y` 37 | Lastly, you feed `y` to the reduce function to get the output of mixture of attention/FFD. 38 | ``` 39 | output = moe.reduce(y) 40 | ``` 41 | 42 | ## Parallel Linears 43 | Parallel linears is a part of MoE. 44 | Input to the function includes: 45 | 1. Input matrix $ X $, a $ B \times D_{in} $ matrix, where $B$ is the total number of input vectors. 46 | 2. Weight matrix $ W $, a $ N \times D_{out} \times D_{in} $ matrix, where $N$ is the number of linear kernels. 47 | 3. Routing vector $ R $, a $ B $ dimensional vector, where each elements $ R_i $ ( $ 0 \leq R_i < N $ ) is the index of weight matrix for $ i $-th input vector. The input matrix and routing vector are sorted according to the weight index. For example, a valid routing vector is $ [0\ 0\ 0\ 1\ 1\ 2\ 3\ 3\ 3\ 3] $ 48 | 4. Start indices vector $ S $, a $ B $ dimensional vector, where each elements $ S_i $ is the starting index for inputs of $ i $-th weight matrix. 49 | 5. End indices vector $ E $, a $ B $ dimensional vector, where each elements $ E_i $ is the ending index for inputs of $ i $-th weight matrix. 50 | 51 | The output of the function is the $ \left[ \begin{matrix} W_{R_1} X_1, W_{R_2} X_2, ..., W_{R_B} X_B \end{matrix} \right] $. 52 | 53 | To run test: 54 | ``` 55 | python test.py 56 | ``` -------------------------------------------------------------------------------- /parallel_linear/parallel_experts/__init__.py: -------------------------------------------------------------------------------- 1 | from .parallel_experts import ParallelExperts, ParallelLinear 2 | from parallel_experts.moe import MoE, RandomMoE, TaskMoE -------------------------------------------------------------------------------- /parallel_linear/parallel_linear.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | torch::Tensor parallel_linear_fwd_interface(torch::Tensor, torch::Tensor, torch::Tensor); 4 | std::vector parallel_linear_bwd_interface(torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor); 5 | 6 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor") 7 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") 8 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 9 | 10 | torch::Tensor parallel_linear_fwd(torch::Tensor input, torch::Tensor weight, torch::Tensor indices) { 11 | if(input.device().type() == torch::kCPU) { 12 | int bsz = input.size(0); 13 | torch::Tensor output = torch::zeros({bsz, weight.size(1)}); 14 | for (int i = 0; i < bsz; ++i) 15 | { 16 | output[i] = torch::mv(weight[indices[i]], input[i]); 17 | } 18 | return output; 19 | } 20 | else if (input.device().type() == torch::kCUDA){ 21 | CHECK_INPUT(input); 22 | CHECK_INPUT(weight); 23 | CHECK_INPUT(indices); 24 | TORCH_CHECK(indices.dtype() == torch::kInt64, 25 | "Indices Datatype not implemented"); 26 | 27 | return parallel_linear_fwd_interface(input, weight, indices); 28 | } 29 | AT_ERROR("No such device: ", input.device()); 30 | } 31 | 32 | std::vector parallel_linear_bwd(torch::Tensor grad_out, torch::Tensor input, torch::Tensor weight, 33 | torch::Tensor indices, torch::Tensor start_indices, torch::Tensor end_indices) { 34 | if(input.device().type() == torch::kCPU) { 35 | int bsz = input.size(0); 36 | torch::Tensor d_input = torch::zeros_like(input); 37 | torch::Tensor d_weight = torch::zeros_like(weight); 38 | 39 | for (int i = 0; i < bsz; ++i) 40 | { 41 | d_input[i] = torch::mv(weight[indices[i]].transpose(0, 1), grad_out[i]); 42 | d_weight[indices[i]] += torch::outer(grad_out[i], input[i]); 43 | } 44 | return {d_input, d_weight}; 45 | } 46 | else if (input.device().type() == torch::kCUDA){ 47 | CHECK_INPUT(input); 48 | CHECK_INPUT(weight); 49 | CHECK_INPUT(indices); 50 | CHECK_INPUT(grad_out); 51 | TORCH_CHECK(indices.dtype() == torch::kInt64, 52 | "Indices Datatype not implemented"); 53 | 54 | return parallel_linear_bwd_interface(grad_out, input, weight, indices, start_indices, end_indices); 55 | } 56 | AT_ERROR("No such device: ", input.device()); 57 | } 58 | 59 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 60 | m.def("forward", ¶llel_linear_fwd, "Parallel linear forward"); 61 | m.def("backward", ¶llel_linear_bwd, "Parallel linear backward"); 62 | } -------------------------------------------------------------------------------- /parallel_linear/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from torch.utils import cpp_extension 3 | 4 | setup(name='parallel_experts', 5 | packages=find_packages(), 6 | # ext_modules=[cpp_extension.CUDAExtension('parallel_linear', 7 | # ['parallel_linear.cc', 8 | # 'parallel_linear_kernel.cu' 9 | # ])], 10 | # cmdclass={'build_ext': cpp_extension.BuildExtension}, 11 | install_requires=[ 12 | 'torch' 13 | ]) -------------------------------------------------------------------------------- /parallel_linear/test.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | from parallel_experts import ParallelLinear 7 | 8 | assert torch.cuda.is_available() 9 | cuda_device = torch.device("cuda") 10 | 11 | NUM_EXPERTS=128 12 | INPUT_SIZE=512 13 | OUTPUT_SIZE=128 14 | BSZ=512 * 32 * 8 15 | 16 | 17 | def TorchParallelLinear(input, weight, bias, expert_size): 18 | output_list = [] 19 | expert_size_list = expert_size.tolist() 20 | input_list = input.split(expert_size_list, dim=0) 21 | for i in range(NUM_EXPERTS): 22 | output_list.append(torch.mm(input_list[i], weight[i]) + bias[i]) 23 | return torch.cat(output_list, dim=0) 24 | 25 | # output = torch.mm(input, weight[0]) 26 | # return output 27 | 28 | 29 | kernel_forward = 0 30 | kernel_backward = 0 31 | torch_forward = 0 32 | torch_backward = 0 33 | for t in range(200 + 1): 34 | weight = torch.rand((NUM_EXPERTS, INPUT_SIZE, OUTPUT_SIZE), requires_grad=True, device=cuda_device, dtype=torch.float16) 35 | bias = torch.rand((NUM_EXPERTS, OUTPUT_SIZE), requires_grad=True, device=cuda_device, dtype=torch.float16) 36 | input = torch.rand((BSZ, INPUT_SIZE), requires_grad=True, device=cuda_device, dtype=torch.float16) 37 | experts = torch.randint(NUM_EXPERTS, (BSZ,), device=cuda_device, dtype=torch.long) 38 | output_vector = torch.rand((BSZ, OUTPUT_SIZE), requires_grad=True, device=cuda_device, dtype=torch.float16) 39 | 40 | experts, _ = torch.sort(experts, dim=0) 41 | zeros = torch.zeros((BSZ, NUM_EXPERTS), device=cuda_device, dtype=torch.long) 42 | gates = zeros.scatter(1, experts[:, None], 1) 43 | expert_size = gates.sum(0) 44 | end_indices = expert_size.cumsum(0) 45 | start_indices = F.pad(end_indices[:-1], (1,0), value=0) 46 | 47 | torch.cuda.synchronize(cuda_device) 48 | 49 | start = time.time() 50 | function_output = ParallelLinear.apply(input, expert_size, weight, bias) 51 | function_output_sum = torch.einsum('bi,bi->b', function_output, output_vector).sum(0) 52 | torch.cuda.synchronize(cuda_device) 53 | forward_i = time.time() - start 54 | 55 | start = time.time() 56 | function_output_sum.backward() 57 | torch.cuda.synchronize(cuda_device) 58 | backward_i = time.time() - start 59 | 60 | if t > 0: 61 | kernel_forward += forward_i 62 | kernel_backward += backward_i 63 | print('Step {:2d} | K_Fwd: {:.3f} us | K_Bwd {:.3f} us'.format(t, forward_i * 1e6/1e5, backward_i * 1e6/1e5), end=' ') 64 | 65 | input_grad = input.grad 66 | weight_grad = weight.grad 67 | bias_grad = bias.grad 68 | 69 | input.grad = None 70 | weight.grad = None 71 | bias.grad = None 72 | 73 | torch.cuda.synchronize(cuda_device) 74 | 75 | start = time.time() 76 | output = TorchParallelLinear(input, weight, bias, expert_size) 77 | output_sum = torch.einsum('bi,bi->b', output, output_vector).sum(0) 78 | torch.cuda.synchronize(cuda_device) 79 | forward_i = time.time() - start 80 | 81 | start = time.time() 82 | output_sum.backward() 83 | torch.cuda.synchronize(cuda_device) 84 | backward_i = time.time() - start 85 | 86 | if t > 0: 87 | torch_forward += forward_i 88 | torch_backward += backward_i 89 | print('| T_Fwd: {:.3f} us | T_Bwd {:.3f} us'.format(forward_i * 1e6/1e5, backward_i * 1e6/1e5), end=' ') 90 | 91 | output_diff = torch.abs(output - function_output).max() 92 | input_grad_diff = torch.abs(input.grad - input_grad).max() 93 | weight_grad_diff = torch.abs(weight.grad - weight_grad).max() 94 | bias_grad_diff = torch.abs(bias.grad - bias_grad).max() 95 | 96 | if t > 0: 97 | print('| O_Diff: {:.3f} | Ig_Diff {:.3f} | Wg_Diff {:.3f} | bg_Diff {:.3f}'.format( 98 | output_diff, input_grad_diff, weight_grad_diff, bias_grad_diff)) 99 | 100 | input.grad = None 101 | weight.grad = None 102 | 103 | print('Kernel Forward: {:.3f} us | Kernel Backward {:.3f} us'.format(kernel_forward * 1e6/1e5, kernel_backward * 1e6/1e5)) 104 | print('Torch Forward: {:.3f} us | Torch Backward {:.3f} us'.format(torch_forward * 1e6/1e5, torch_backward * 1e6/1e5)) 105 | -------------------------------------------------------------------------------- /patch_moe/gate.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.nn.init as init 5 | 6 | class Gate(nn.Module): 7 | 8 | def __init__(self, k, gating_kernel_size, strides=1, padding=0, 9 | gating_activation=None, gating_kernel_initializer=None): 10 | super(Gate, self).__init__() 11 | 12 | self.k = k 13 | self.gating_kernel_size = gating_kernel_size 14 | self.strides = strides 15 | self.padding = padding 16 | self.gating_activation = gating_activation 17 | 18 | self.gating_kernel = nn.Parameter(torch.empty(1, 3, 4, 4)) 19 | 20 | # Initialize with normal distribution 21 | init.normal_(self.gating_kernel, mean=0.0, std=0.0001) 22 | 23 | 24 | 25 | def forward(self, inputs): 26 | 27 | # Convolution 28 | #(b,3,76,76) 29 | gating_outputs = F.conv2d(inputs, self.gating_kernel, stride=self.strides, padding=self.padding) 30 | #(b,1,19,19) 31 | # Apply activation function if specified 32 | if self.gating_activation is not None: 33 | gating_outputs = self.gating_activation(gating_outputs) 34 | 35 | # Flatten and apply top-k 36 | b, c, h, w = gating_outputs.shape 37 | gating_outputs = gating_outputs.view(b, c, -1) 38 | #(b,1,361) 39 | values, indices = torch.topk(gating_outputs, self.k, dim=2, sorted=False) 40 | #(b,1,2) 41 | # Scatter values to original positions 42 | out_shape = (b, c, h * w) 43 | ret_flat = torch.zeros(b * c * h * w, device=inputs.device) 44 | #[[20,40][34,56]] 45 | indices_flat = indices.view(b*c,-1) + torch.arange(b * c, device=inputs.device).unsqueeze(-1) * h * w 46 | indices_flat = indices_flat.view(-1) 47 | ret_flat.scatter_add_(0, indices_flat, values.view(-1)) 48 | #[b,1,361] 49 | # Reshape and reorder 50 | new_gating_outputs = ret_flat.view(b, c, h, w) 51 | #[b,1,19,19] 52 | # Repeat and reshape the gating outputs 53 | new_gating_outputs = new_gating_outputs.repeat_interleave(self.gating_kernel_size[0], dim=2) 54 | new_gating_outputs = new_gating_outputs.repeat_interleave(self.gating_kernel_size[1], dim=3) 55 | new_gating_outputs = new_gating_outputs.repeat_interleave(self.gating_kernel.size(1), dim=1) 56 | #[b,48,19,19] 57 | # new_gating_outputs = new_gating_outputs.view(b, h, self.gating_kernel_size[0], w, self.gating_kernel_size[1],-1) 58 | # new_gating_outputs = new_gating_outputs.view(b, h * self.gating_kernel_size[0], w * self.gating_kernel_size[1],-1) 59 | # # new_gating_outputs = new_gating_outputs.permute(0, 3, 1, 2).contiguous() 60 | # repeat_factor = self.gating_kernel[0] * self.gating_kernel[1] * 3 61 | # new_gating_outputs = new_gating_outputs.repeat(1, 1, 1, 48) 62 | 63 | # # Step 2: Reshape new_gating_outputs 64 | # new_shape = (new_gating_outputs.size(0), new_gating_outputs.size(1), new_gating_outputs.size(2), 65 | # self.gating_kernel[0], self.gating_kernel[1], 3) 66 | # new_gating_outputs = new_gating_outputs.view(new_shape) 67 | 68 | # # Step 3: Transpose new_gating_outputs 69 | # new_gating_outputs = new_gating_outputs.permute(0, 1, 3, 2, 4, 5) 70 | 71 | # # Step 4: Final reshape 72 | # final_shape = (new_gating_outputs.size(0), new_gating_outputs.size(1) * new_gating_outputs.size(2), 73 | # new_gating_outputs.size(3) * new_gating_outputs.size(4), new_gating_outputs.size(5)) 74 | # new_gating_outputs = new_gating_outputs.view(final_shape) 75 | # Element-wise multiplication 76 | outputs = inputs * new_gating_outputs 77 | 78 | return outputs 79 | def test_gate_layer(): 80 | # Parameters for the gate layer 81 | k = 2 82 | gating_kernel_size = (4, 4) # Example kernel size 83 | strides = 4 84 | padding = 0 85 | 86 | # Initialize the Gate layer 87 | gate_layer = Gate(k, gating_kernel_size, strides, padding, gating_activation=torch.relu) 88 | 89 | # Create a random input tensor 90 | batch_size = 2 91 | in_channels = 3 92 | height, width = 16, 16 # Example dimensions 93 | input_tensor = torch.randn(batch_size, in_channels, height, width) 94 | 95 | # Forward pass through the Gate layer 96 | output = gate_layer(input_tensor) 97 | 98 | print("Input shape:", input_tensor.shape) 99 | print("Output shape:", output.shape) 100 | 101 | if __name__ == "__main__": 102 | test_gate_layer() -------------------------------------------------------------------------------- /pyrightconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "exclude": [ 3 | "data/**", 4 | "data_local/**", 5 | "outputs/**" 6 | ] 7 | } -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pyyaml-include==1.4.1 -------------------------------------------------------------------------------- /resnet_moe/moe_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import autograd, nn as nn 3 | 4 | 5 | class GetMask(autograd.Function): 6 | @staticmethod 7 | def forward(ctx, scores): # binarization 8 | 9 | expert_pred = torch.argmax(scores, dim=1) # [bs] 10 | expert_pred_one_hot = torch.zeros_like(scores).scatter_(1, expert_pred.unsqueeze(-1), 1) 11 | 12 | return expert_pred, expert_pred_one_hot 13 | 14 | @staticmethod 15 | def backward(ctx, g1, g2): 16 | return g2 17 | 18 | 19 | def get_device(x): 20 | gpu_idx = x.get_device() 21 | return f"cuda:{gpu_idx}" if gpu_idx >= 0 else "cpu" 22 | 23 | 24 | class MoEBase(nn.Module): 25 | def __init__(self): 26 | super(MoEBase, self).__init__() 27 | self.scores = None 28 | self.router = None 29 | 30 | def set_score(self, scores): 31 | self.scores = scores 32 | for module in self.modules(): 33 | if hasattr(module, 'scores'): 34 | module.scores = self.scores 35 | 36 | 37 | class MoEConv(nn.Conv2d, MoEBase): 38 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, dilation=1, bias=False, 39 | n_expert=8): 40 | super(MoEConv, self).__init__(in_channels, out_channels * n_expert, kernel_size, stride, padding, dilation, 41 | groups, bias, ) 42 | self.in_channels = in_channels 43 | self.out_channels = out_channels * n_expert 44 | self.expert_width = out_channels 45 | 46 | self.n_expert = n_expert 47 | assert self.n_expert >= 1 48 | self.layer_selection = torch.zeros([n_expert, self.out_channels]) 49 | for cluster_id in range(n_expert): 50 | start = cluster_id * self.expert_width 51 | end = (cluster_id + 1) * self.expert_width 52 | idx = torch.arange(start, end) 53 | self.layer_selection[cluster_id][idx] = 1 54 | self.scores = None 55 | 56 | def forward(self, x): 57 | if self.n_expert > 1: 58 | if self.scores is None: 59 | self.scores = self.router(x) 60 | expert_selection, expert_selection_one_hot = GetMask.apply(self.scores) 61 | mask = torch.matmul(expert_selection_one_hot, self.layer_selection.to(x)) # [bs, self.out_channels] 62 | out = super(MoEConv, self).forward(x) 63 | out = out * mask.unsqueeze(-1).unsqueeze(-1) 64 | index = torch.where(mask.view(-1) > 0)[0] 65 | shape = out.shape 66 | out_selected = out.view(shape[0] * shape[1], shape[2], shape[3])[index].view(shape[0], -1, shape[2], 67 | shape[3]) 68 | else: 69 | out_selected = super(MoEConv, self).forward(x) 70 | self.scores = None 71 | return out_selected -------------------------------------------------------------------------------- /resnet_moe/resnet_moe.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from models.layers.moe_layer import MoEConv, MoEBase 6 | 7 | 8 | class BasicBlock(nn.Module): 9 | expansion = 1 10 | 11 | def __init__(self, in_planes, planes, conv_layer, stride=1, **kwargs): 12 | super(BasicBlock, self).__init__() 13 | self.conv1 = conv_layer( 14 | in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False, **kwargs 15 | ) 16 | self.bn1 = nn.BatchNorm2d(planes) 17 | self.conv2 = conv_layer( 18 | planes, planes, kernel_size=3, stride=1, padding=1, bias=False, **kwargs 19 | ) 20 | self.bn2 = nn.BatchNorm2d(planes) 21 | 22 | self.shortcut = nn.Sequential() 23 | if stride != 1 or in_planes != self.expansion * planes: 24 | self.shortcut = nn.Sequential( 25 | nn.Conv2d( 26 | in_planes, 27 | self.expansion * planes, 28 | kernel_size=1, 29 | stride=stride, 30 | bias=False, 31 | ), 32 | nn.BatchNorm2d(self.expansion * planes), 33 | ) 34 | 35 | def forward(self, x): 36 | out = F.relu(self.bn1(self.conv1(x))) 37 | out = self.bn2(self.conv2(out)) 38 | out += self.shortcut(x) 39 | out = F.relu(out) 40 | return out 41 | 42 | 43 | class Bottleneck(nn.Module): 44 | expansion = 4 45 | 46 | def __init__(self, in_planes, planes, conv_layer, stride=1, **kwargs): 47 | super(Bottleneck, self).__init__() 48 | self.conv1 = conv_layer(in_planes, planes, kernel_size=1, bias=False, **kwargs 49 | ) 50 | self.bn1 = nn.BatchNorm2d(planes) 51 | self.conv2 = conv_layer( 52 | planes, planes, kernel_size=3, stride=stride, padding=1, bias=False, **kwargs 53 | ) 54 | self.bn2 = nn.BatchNorm2d(planes) 55 | self.conv3 = conv_layer( 56 | planes, self.expansion * planes, kernel_size=1, bias=False, **kwargs 57 | ) 58 | self.bn3 = nn.BatchNorm2d(self.expansion * planes) 59 | 60 | self.shortcut = nn.Sequential() 61 | if stride != 1 or in_planes != self.expansion * planes: 62 | self.shortcut = nn.Sequential( 63 | nn.Conv2d( 64 | in_planes, 65 | self.expansion * planes, 66 | kernel_size=1, 67 | stride=stride, 68 | bias=False, 69 | ), 70 | nn.BatchNorm2d(self.expansion * planes), 71 | ) 72 | 73 | def forward(self, x): 74 | out = F.relu(self.bn1(self.conv1(x))) 75 | out = F.relu(self.bn2(self.conv2(out))) 76 | out = self.bn3(self.conv3(out)) 77 | out += self.shortcut(x) 78 | out = F.relu(out) 79 | return out 80 | 81 | 82 | def percentile(t, q): 83 | k = 1 + round(.01 * float(q) * (t.numel() - 1)) 84 | return t.view(-1).kthvalue(k).values.item() 85 | 86 | 87 | class ResNet(MoEBase): 88 | def __init__(self, block, num_blocks, n_expert=8, ratio=1.0): 89 | super(ResNet, self).__init__() 90 | self.ratio = ratio 91 | self.in_planes = 64 92 | self.conv_layer = MoEConv 93 | self.num_blocks = num_blocks 94 | self.normalize = None 95 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 96 | self.bn1 = nn.BatchNorm2d(64) 97 | 98 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1, n_expert=n_expert) 99 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2, n_expert=n_expert) 100 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2, n_expert=n_expert) 101 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, n_expert=n_expert) 102 | 103 | 104 | def _make_layer(self, block, planes, num_blocks, stride, **kwargs): 105 | planes = int(self.ratio * planes) 106 | strides = [stride] + [1] * (num_blocks - 1) 107 | layers = [] 108 | for stride in strides: 109 | layers.append(block(self.in_planes, planes, self.conv_layer, stride, **kwargs)) 110 | self.in_planes = planes * block.expansion 111 | return nn.Sequential(*layers) 112 | 113 | def forward(self, x): 114 | if self.normalize is not None: 115 | x = self.normalize(x) 116 | if self.router is not None: 117 | self.set_score(self.router(x)) 118 | out = F.relu(self.bn1(self.conv1(x))) 119 | out = self.layer1(out) 120 | out = self.layer2(out) 121 | out = self.layer3(out) 122 | out = self.layer4(out) 123 | 124 | 125 | return out 126 | 127 | 128 | def resnet18_cifar_moe(**kwargs): 129 | return ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 130 | 131 | 132 | def resnet34_cifar_moe(**kwargs): 133 | return ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 134 | 135 | 136 | def resnet50_cifar_moe(**kwargs): 137 | return ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 138 | 139 | 140 | def resnet101_cifar_moe(**kwargs): 141 | return ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 142 | 143 | 144 | def resnet152_cifar_moe(**kwargs): 145 | return ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) -------------------------------------------------------------------------------- /resnet_moe/router.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class LambdaLayer(nn.Module): 6 | def __init__(self, lambd): 7 | super(LambdaLayer, self).__init__() 8 | self.lambd = lambd 9 | 10 | def forward(self, x): 11 | return self.lambd(x) 12 | 13 | 14 | class Block(nn.Module): 15 | expansion = 1 16 | 17 | def __init__(self, in_planes, planes, conv_layer, stride=1): 18 | super(Block, self).__init__() 19 | self.conv1 = conv_layer(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 20 | self.bn1 = nn.BatchNorm2d(planes) 21 | self.conv2 = conv_layer(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 22 | self.bn2 = nn.BatchNorm2d(planes) 23 | 24 | self.shortcut = nn.Sequential() 25 | if stride != 1 or in_planes != planes: 26 | diff = planes - in_planes 27 | self.shortcut = LambdaLayer( 28 | lambda x: F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, int(diff * 0.5), int((diff + 1) * 0.5)), "constant", 0)) 29 | def forward(self, x): 30 | out = F.relu(self.bn1(self.conv1(x))) 31 | out = self.bn2(self.conv2(out)) 32 | out += self.shortcut(x) 33 | out = F.relu(out) 34 | return out 35 | 36 | 37 | class Router(nn.Module): 38 | def __init__(self, block, num_blocks, num_experts=2): 39 | super(Router, self).__init__() 40 | self.in_planes = 16 41 | self.conv_layer = nn.Conv2d 42 | 43 | self.conv1 = nn.Conv2d(3, self.in_planes, kernel_size=3, stride=1, padding=1, bias=False) 44 | self.bn1 = nn.BatchNorm2d(self.in_planes) 45 | self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1) 46 | self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2) 47 | self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2) 48 | self.fc = nn.Linear(64, num_experts) 49 | 50 | def _make_layer(self, block, planes, num_blocks, stride): 51 | planes = planes 52 | strides = [stride] + [1] * (num_blocks - 1) 53 | layers = [] 54 | for stride in strides: 55 | layers.append(block(self.in_planes, planes, self.conv_layer, stride)) 56 | self.in_planes = planes * block.expansion 57 | 58 | return nn.Sequential(*layers) 59 | 60 | def forward(self, x): 61 | out = F.relu(self.bn1(self.conv1(x))) 62 | out = self.layer1(out) 63 | out = self.layer2(out) 64 | out = self.layer3(out) 65 | out = F.avg_pool2d(out, out.size()[3]) 66 | out = out.view(out.size(0), -1) 67 | out = self.fc(out) 68 | return out 69 | 70 | 71 | def build_router(**kwargs): 72 | return Router(Block, [3, 3, 3], **kwargs) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name = 'diffusion_policy', 5 | packages = find_packages(), 6 | ) 7 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Usage: 3 | Training: 4 | python train.py --config-name=train_diffusion_lowdim_workspace 5 | """ 6 | 7 | import sys 8 | # use line-buffering for both stdout and stderr 9 | sys.stdout = open(sys.stdout.fileno(), mode='w', buffering=1) 10 | sys.stderr = open(sys.stderr.fileno(), mode='w', buffering=1) 11 | # import os 12 | import hydra 13 | from omegaconf import OmegaConf 14 | import pathlib 15 | from diffusion_policy.workspace.base_workspace import BaseWorkspace 16 | import os 17 | 18 | # allows arbitrary python code execution in configs using the ${eval:''} resolver 19 | OmegaConf.register_new_resolver("eval", eval, replace=True) 20 | 21 | @hydra.main( 22 | version_base=None, 23 | config_path=str(pathlib.Path(__file__).parent.joinpath( 24 | 'config', 'tmp')), 25 | config_name="full.yaml", 26 | ) 27 | def main(cfg: OmegaConf): 28 | # resolve immediately so all the ${now:} resolvers 29 | # will use the same time. 30 | OmegaConf.resolve(cfg) 31 | 32 | cls = hydra.utils.get_class(cfg._target_) 33 | workspace: BaseWorkspace = cls(cfg) 34 | workspace.run() 35 | 36 | if __name__ == "__main__": 37 | os.environ["CUDA_VISIBLE_DEVICES"]='0,' 38 | os.environ["MUJOCO_GL"]="osmesa" 39 | from utils.recursive_yaml import read_yaml, write_yaml 40 | data = read_yaml('config/base.yaml') 41 | write_yaml(data, 'config/tmp/full.yaml') 42 | main() 43 | -------------------------------------------------------------------------------- /utils/recursive_yaml.py: -------------------------------------------------------------------------------- 1 | import yaml, os 2 | from yamlinclude import YamlIncludeConstructor 3 | fpath = os.path.dirname(os.path.dirname(__file__)) 4 | Path = lambda p:os.path.join(fpath,p) 5 | YamlIncludeConstructor.add_to_loader_class(loader_class=yaml.FullLoader) 6 | def read_yaml(path): 7 | p = Path(path) 8 | with open(p) as f: 9 | data = yaml.load(f, Loader=yaml.FullLoader) 10 | return data 11 | def write_yaml(data, path): 12 | p = Path(path) 13 | with open(p, 'w') as f: 14 | yaml.dump(data, f) 15 | 16 | if __name__ == '__main__': 17 | path = 'config/base.yaml' 18 | t = read_yaml(path) 19 | # write to yaml 20 | with open('config/tmp/full.yaml', 'w') as f: 21 | yaml.dump(t, f) --------------------------------------------------------------------------------