├── .gitignore
├── LICENSE
├── README.md
├── conda_environment.yaml
├── config
    ├── base.yaml
    ├── tasks
    │   ├── coffee_d0.yaml
    │   ├── coffee_preparation_d0.yaml
    │   ├── hammer_cleanup_d0.yaml
    │   ├── kitchen_d0.yaml
    │   ├── mug_cleanup_d0.yaml
    │   ├── nut_assembly_d0.yaml
    │   ├── pick_place_d0.yaml
    │   ├── square_d0.yaml
    │   ├── stack_d0.yaml
    │   ├── stack_three_d0.yaml
    │   ├── threading_d0.yaml
    │   └── three_piece_assembly_d0.yaml
    └── tmp
    │   └── full.yaml
├── diffusion_policy
    ├── codecs
    │   └── imagecodecs_numcodecs.py
    ├── common
    │   ├── checkpoint_util.py
    │   ├── cv2_util.py
    │   ├── env_util.py
    │   ├── json_logger.py
    │   ├── nested_dict_util.py
    │   ├── normalize_util.py
    │   ├── pose_trajectory_interpolator.py
    │   ├── precise_sleep.py
    │   ├── pymunk_override.py
    │   ├── pymunk_util.py
    │   ├── pytorch_util.py
    │   ├── replay_buffer.py
    │   ├── robomimic_config_util.py
    │   ├── robomimic_util.py
    │   ├── sampler.py
    │   └── timestamp_accumulator.py
    ├── config
    │   ├── task
    │   │   ├── blockpush_lowdim_seed.yaml
    │   │   ├── blockpush_lowdim_seed_abs.yaml
    │   │   ├── can_image.yaml
    │   │   ├── can_image_abs.yaml
    │   │   ├── can_lowdim.yaml
    │   │   ├── can_lowdim_abs.yaml
    │   │   ├── kitchen_lowdim.yaml
    │   │   ├── kitchen_lowdim_abs.yaml
    │   │   ├── lift_image.yaml
    │   │   ├── lift_image_abs.yaml
    │   │   ├── lift_lowdim.yaml
    │   │   ├── lift_lowdim_abs.yaml
    │   │   ├── pusht_image.yaml
    │   │   ├── pusht_lowdim.yaml
    │   │   ├── real_pusht_image.yaml
    │   │   ├── square_image.yaml
    │   │   ├── square_image_abs.yaml
    │   │   ├── square_lowdim.yaml
    │   │   ├── square_lowdim_abs.yaml
    │   │   ├── tool_hang_image.yaml
    │   │   ├── tool_hang_image_abs.yaml
    │   │   ├── tool_hang_lowdim.yaml
    │   │   ├── tool_hang_lowdim_abs.yaml
    │   │   ├── transport_image.yaml
    │   │   ├── transport_image_abs.yaml
    │   │   ├── transport_lowdim.yaml
    │   │   └── transport_lowdim_abs.yaml
    │   ├── train_bet_lowdim_workspace.yaml
    │   ├── train_diffusion_transformer_hybrid_workspace.yaml
    │   └── train_robomimic_image_workspace.yaml
    ├── dataset
    │   ├── base_dataset.py
    │   ├── multitask_dataset.py
    │   ├── robomimic_replay_image_dataset.py
    │   └── robomimic_replay_lowdim_dataset.py
    ├── env
    │   └── robomimic
    │   │   ├── robomimic_image_wrapper.py
    │   │   └── robomimic_lowdim_wrapper.py
    ├── env_runner
    │   ├── base_image_runner.py
    │   └── robomimic_image_runner.py
    ├── gym_util
    │   ├── async_vector_env.py
    │   ├── multistep_wrapper.py
    │   ├── sync_vector_env.py
    │   ├── video_recording_wrapper.py
    │   └── video_wrapper.py
    ├── model
    │   ├── bet
    │   │   ├── action_ae
    │   │   │   └── __init__.py
    │   │   ├── latent_generators
    │   │   │   ├── latent_generator.py
    │   │   │   ├── mingpt.py
    │   │   │   └── transformer.py
    │   │   ├── libraries
    │   │   │   └── loss_fn.py
    │   │   └── utils.py
    │   ├── common
    │   │   ├── dict_of_tensor_mixin.py
    │   │   ├── lr_scheduler.py
    │   │   ├── module_attr_mixin.py
    │   │   ├── normalizer.py
    │   │   ├── rotation_transformer.py
    │   │   ├── shape_util.py
    │   │   └── tensor_util.py
    │   ├── diffusion
    │   │   ├── conditional_unet1d.py
    │   │   ├── conv1d_components.py
    │   │   ├── ema_model.py
    │   │   ├── mask_generator.py
    │   │   ├── positional_embedding.py
    │   │   └── transformer_for_diffusion.py
    │   └── vision
    │   │   ├── crop_randomizer.py
    │   │   ├── model_getter.py
    │   │   └── multi_image_obs_encoder.py
    ├── policy
    │   ├── base_image_policy.py
    │   └── diffusion_transformer_hybrid_image_policy.py
    ├── real_world
    │   ├── keystroke_counter.py
    │   ├── multi_camera_visualizer.py
    │   ├── multi_realsense.py
    │   ├── real_data_conversion.py
    │   ├── real_env.py
    │   ├── real_inference_util.py
    │   ├── realsense_config
    │   │   ├── 415_high_accuracy_mode.json
    │   │   └── 435_high_accuracy_mode.json
    │   ├── rtde_interpolation_controller.py
    │   ├── single_realsense.py
    │   ├── spacemouse.py
    │   ├── spacemouse_shared_memory.py
    │   └── video_recorder.py
    ├── shared_memory
    │   ├── shared_memory_queue.py
    │   ├── shared_memory_ring_buffer.py
    │   ├── shared_memory_util.py
    │   └── shared_ndarray.py
    └── workspace
    │   ├── base_workspace.py
    │   └── train_diffusion_transformer_hybrid_workspace.py
├── eval.py
├── mixture_of_experts
    ├── mixture_of_experts
    │   └── __init__.py
    ├── moe.png
    ├── moe.py
    ├── setup.py
    └── task_moe.py
├── moe
    └── code
    │   ├── moe
    │       ├── configs
    │       │   ├── davit_base_moe_lamb_16nodes.py
    │       │   ├── davit_small_moe_lamb_16nodes.py
    │       │   └── davit_tiny_moe_lamb_16nodes.py
    │       └── davit_moe.py
    │   └── mtl
    │       ├── configs
    │           ├── davit_base_lamb_16nodes.py
    │           ├── davit_small_lamb_16nodes.py
    │           └── davit_tiny_lamb_16nodes.py
    │       └── davit.py
├── parallel_linear
    ├── .gitignore
    ├── README.md
    ├── parallel_experts
    │   ├── __init__.py
    │   ├── moe.py
    │   ├── parallel_experts.py
    │   └── task_moe.py
    ├── parallel_linear.cc
    ├── parallel_linear_kernel.cu
    ├── setup.py
    └── test.py
├── patch_moe
    ├── encoder.py
    ├── gate.py
    ├── resnet.py
    └── test.py
├── pyrightconfig.json
├── requirements.txt
├── resnet_moe
    ├── moe_layer.py
    ├── resnet_moe.py
    └── router.py
├── setup.py
├── train.py
└── utils
    └── recursive_yaml.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | bin
  2 | logs
  3 | wandb
  4 | outputs
  5 | data
  6 | data_local
  7 | .vscode
  8 | _wandb
  9 | test_eval
 10 | *.ckpt
 11 | **/.DS_Store
 12 | 
 13 | fuse.cfg
 14 | 
 15 | *.ai
 16 | 
 17 | # Generation results
 18 | results/
 19 | 
 20 | ray/auth.json
 21 | 
 22 | # Byte-compiled / optimized / DLL files
 23 | __pycache__/
 24 | *.py[cod]
 25 | *$py.class
 26 | 
 27 | # C extensions
 28 | *.so
 29 | 
 30 | # Distribution / packaging
 31 | .Python
 32 | build/
 33 | develop-eggs/
 34 | dist/
 35 | downloads/
 36 | eggs/
 37 | .eggs/
 38 | lib/
 39 | lib64/
 40 | parts/
 41 | sdist/
 42 | var/
 43 | wheels/
 44 | pip-wheel-metadata/
 45 | share/python-wheels/
 46 | *.egg-info/
 47 | .installed.cfg
 48 | *.egg
 49 | MANIFEST
 50 | 
 51 | # PyInstaller
 52 | #  Usually these files are written by a python script from a template
 53 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 54 | *.manifest
 55 | *.spec
 56 | 
 57 | # Installer logs
 58 | pip-log.txt
 59 | pip-delete-this-directory.txt
 60 | 
 61 | # Unit test / coverage reports
 62 | htmlcov/
 63 | .tox/
 64 | .nox/
 65 | .coverage
 66 | .coverage.*
 67 | .cache
 68 | nosetests.xml
 69 | coverage.xml
 70 | *.cover
 71 | *.py,cover
 72 | .hypothesis/
 73 | .pytest_cache/
 74 | 
 75 | # Translations
 76 | *.mo
 77 | *.pot
 78 | 
 79 | # Django stuff:
 80 | *.log
 81 | local_settings.py
 82 | db.sqlite3
 83 | db.sqlite3-journal
 84 | 
 85 | # Flask stuff:
 86 | instance/
 87 | .webassets-cache
 88 | 
 89 | # Scrapy stuff:
 90 | .scrapy
 91 | 
 92 | # Sphinx documentation
 93 | docs/_build/
 94 | 
 95 | # PyBuilder
 96 | target/
 97 | 
 98 | # Jupyter Notebook
 99 | .ipynb_checkpoints
100 | 
101 | # IPython
102 | profile_default/
103 | ipython_config.py
104 | 
105 | # pyenv
106 | .python-version
107 | 
108 | # pipenv
109 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
110 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
111 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
112 | #   install all needed dependencies.
113 | #Pipfile.lock
114 | 
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
116 | __pypackages__/
117 | 
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Spyder project settings
126 | .spyderproject
127 | .spyproject
128 | 
129 | # Rope project settings
130 | .ropeproject
131 | 
132 | # mkdocs documentation
133 | /site
134 | 
135 | # mypy
136 | .mypy_cache/
137 | .dmypy.json
138 | dmypy.json
139 | 
140 | # Pyre type checker
141 | .pyre/
142 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Columbia Artificial Intelligence and Robotics Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # CoRL2024: Sparse Diffusion Policy
  2 | 
  3 | ### Dataset Download
  4 | 
  5 | Using Direct Download Links
  6 | 
  7 | You can download the datasets manually through Google Drive. The folders each correspond to the dataset types described in [Dataset Types](#dataset-types).
  8 | 
  9 | **Google Drive folder with all mimicgen datasets:** [link](https://drive.google.com/drive/folders/14e9kkHGfApuQ709LBEbXrXVI1Lp5Ax7p?usp=drive_link)
 10 | 
 11 | Then, you should download the dataset with core folder in the path robomimic/core.
 12 | 
 13 | ### 🛠️ Installation
 14 | #### 🖥️ Simulation
 15 | To reproduce our simulation benchmark results, install our conda environment on a Linux machine with Nvidia GPU. On Ubuntu 20.04 you need to install the following apt packages for mujoco:
 16 | ```console
 17 | $ sudo apt install -y libosmesa6-dev libgl1-mesa-glx libglfw3 patchelf
 18 | ```
 19 | 
 20 | We recommend [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge) instead of the standard anaconda distribution for faster installation: 
 21 | ```console
 22 | $ mamba env create -f conda_environment.yaml
 23 | ```
 24 | 
 25 | but you can use conda as well: 
 26 | ```console
 27 | $ conda env create -f conda_environment.yaml
 28 | ```
 29 | ### Installation
 30 | 
 31 | Then we install the packages for mimicgen:
 32 | 
 33 | ```sh
 34 | conda activate sdp
 35 | ```
 36 | 
 37 | You can install most of the dependencies by cloning the repository and then installing from source:
 38 | 
 39 | ```sh
 40 | cd <PATH_TO_YOUR_INSTALL_DIRECTORY>
 41 | git clone https://github.com/NVlabs/mimicgen_environments.git
 42 | cd mimicgen_environments
 43 | pip install -e .
 44 | ```
 45 | 
 46 | There are some additional dependencies that we list below. These are installed from source:
 47 | 
 48 | - [robosuite](https://robosuite.ai/)
 49 |     - **Installation**
 50 |       ```sh
 51 |       cd <PATH_TO_YOUR_INSTALL_DIRECTORY>
 52 |       git clone https://github.com/ARISE-Initiative/robosuite.git
 53 |       cd robosuite
 54 |       git checkout b9d8d3de5e3dfd1724f4a0e6555246c460407daa
 55 |       pip install -e .
 56 |       ```
 57 |     - **Note**: the git checkout command corresponds to the commit we used for testing our policy learning results. In general the `master` branch (`v1.4+`) should be fine.
 58 |     - For more detailed instructions, see [here](https://robosuite.ai/docs/installation.html)
 59 | - [robomimic](https://robomimic.github.io/)
 60 |     - **Installation**
 61 |       ```sh
 62 |       cd <PATH_TO_YOUR_INSTALL_DIRECTORY>
 63 |       git clone https://github.com/ARISE-Initiative/robomimic.git
 64 |       cd robomimic
 65 |       git checkout ab6c3dcb8506f7f06b43b41365e5b3288c858520
 66 |       pip install -e .
 67 |       ```
 68 |     - **Note**: the git checkout command corresponds to the commit we used for testing our policy learning results. In general the `master` branch (`v0.3+`) should be fine.
 69 |     - For more detailed instructions, see [here](https://robomimic.github.io/docs/introduction/installation.html)
 70 | - [robosuite_task_zoo](https://github.com/ARISE-Initiative/robosuite-task-zoo)
 71 |     - **Note**: This is optional and only needed for the Kitchen and Hammer Cleanup environments / datasets.
 72 |     - **Installation**
 73 |       ```sh
 74 |       cd <PATH_TO_YOUR_INSTALL_DIRECTORY>
 75 |       git clone https://github.com/ARISE-Initiative/robosuite-task-zoo
 76 |       cd robosuite-task-zoo
 77 |       git checkout 74eab7f88214c21ca1ae8617c2b2f8d19718a9ed
 78 |       pip install -e .
 79 |       ```
 80 | 
 81 | Lastly, **please downgrade MuJoCo to 2.3.2**:
 82 | ```sh
 83 | pip install mujoco==2.3.2
 84 | ```
 85 | 
 86 | **Note**: This MuJoCo version (`2.3.2`) is important -- in our testing, we found that other versions of MuJoCo could be problematic, especially for the Sawyer arm datasets (e.g. `2.3.5` causes problems with rendering and `2.3.7` changes the dynamics of the robot arm significantly from the collected datasets).
 87 | 
 88 | The `conda_environment_macos.yaml` file is only for development on MacOS and does not have full support for benchmarks.
 89 | 
 90 | ### Training
 91 | ```console
 92 | $ python train.py
 93 | ```
 94 | The results in our paper is evaluated every 50 epochs, after 100 epochs, you can get a result similar in our paper.
 95 | 
 96 | ### Training Checkpoints
 97 | 
 98 | Within each experiment directory you may find in outputs folder:
 99 | ```
100 | 
101 | ├── config.yaml
102 | ├── metrics
103 | │   └── logs.json.txt
104 | ├── train
105 | │   ├── checkpoints
106 | │   │   ├── epoch=0299-test_mean_score=6.070.ckpt
107 | │   │   └── latest.ckpt
108 | │   └── logs.json.txt
109 | 
110 | ```
111 | 
112 | ### Checkpoints
113 | 
114 | You can download ours SDP checkpoints manually through Google Drive. 
115 | 
116 | **Google Drive folder with our checkpoints:** [link](https://drive.google.com/file/d/1So-byi2hNXIrPLsMT1KLaSbJTpRM1pil/view)
117 | 
118 | You can reload the link if it does not work.
119 | 
120 | You can save the checkpoints in /path/to/ckpt.
121 | 
122 | ### Evaluation
123 | ```console
124 | $ python eval.py --checkpoint /path/to/ckpt
125 | ```
126 | 
127 | Then you can get a similar multi-task results in our paper.
128 | 


--------------------------------------------------------------------------------
/conda_environment.yaml:
--------------------------------------------------------------------------------
 1 | name: sdp
 2 | channels:
 3 |   - pytorch
 4 |   - pytorch3d
 5 |   - nvidia
 6 |   - conda-forge
 7 | dependencies:
 8 |   - python=3.9
 9 |   - pip=22.2.2
10 |   - cudatoolkit=11.6
11 |   - pytorch=1.12.1
12 |   - torchvision=0.13.1
13 |   - pytorch3d=0.7.0
14 |   - numpy=1.23.3
15 |   - numba==0.56.4
16 |   - scipy==1.9.1
17 |   - py-opencv=4.6.0
18 |   - cffi=1.15.1
19 |   - ipykernel=6.16
20 |   - matplotlib=3.6.1
21 |   - zarr=2.12.0
22 |   - numcodecs=0.10.2
23 |   - h5py=3.7.0
24 |   - hydra-core=1.2.0
25 |   - einops=0.4.1
26 |   - tqdm=4.64.1
27 |   - dill=0.3.5.1
28 |   - scikit-video=1.1.11
29 |   - scikit-image=0.19.3
30 |   - gym=0.21.0
31 |   - pymunk=6.2.1
32 |   - wandb=0.13.3
33 |   - threadpoolctl=3.1.0
34 |   - shapely=1.8.4
35 |   - cython=0.29.32
36 |   - imageio=2.22.0
37 |   - imageio-ffmpeg=0.4.7
38 |   - termcolor=2.0.1
39 |   - tensorboard=2.10.1
40 |   - tensorboardx=2.5.1
41 |   - psutil=5.9.2
42 |   - click=8.0.4
43 |   - boto3=1.24.96
44 |   - accelerate=0.13.2
45 |   - datasets=2.6.1
46 |   - diffusers=0.11.1
47 |   - av=10.0.0
48 |   - cmake=3.24.3
49 |   # trick to avoid cpu affinity issue described in https://github.com/pytorch/pytorch/issues/99625
50 |   - llvm-openmp=14
51 |   # trick to force reinstall imagecodecs via pip
52 |   - imagecodecs==2022.8.8
53 |   - pip:
54 |     - ray[default,tune]==2.2.0
55 |     # requires mujoco py dependencies libosmesa6-dev libgl1-mesa-glx libglfw3 patchelf
56 |     - free-mujoco-py==2.1.6
57 |     - pygame==2.1.2
58 |     - pybullet-svl==3.1.6.4
59 |     - robosuite @ https://github.com/cheng-chi/robosuite/archive/277ab9588ad7a4f4b55cf75508b44aa67ec171f0.tar.gz
60 |     - robomimic==0.2.0
61 |     - pytorchvideo==0.1.5
62 |     # pip package required for jpeg-xl
63 |     - imagecodecs==2022.9.26
64 |     - r3m @ https://github.com/facebookresearch/r3m/archive/b2334e726887fa0206962d7984c69c5fb09cceab.tar.gz
65 |     - dm-control==1.0.9
66 | 


--------------------------------------------------------------------------------
/config/base.yaml:
--------------------------------------------------------------------------------
  1 | _target_: diffusion_policy.workspace.train_diffusion_transformer_hybrid_workspace.TrainDiffusionTransformerHybridWorkspace
  2 | checkpoint:
  3 |   save_last_ckpt: true
  4 |   save_last_snapshot: false
  5 |   topk:
  6 |     format_str: epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt
  7 |     k: 5
  8 |     mode: max
  9 |     monitor_key: test_mean_score
 10 | dataloader:
 11 |   batch_size: 64
 12 |   num_workers: 8
 13 |   persistent_workers: false
 14 |   pin_memory: true
 15 |   shuffle: true
 16 | dataset_obs_steps: 2
 17 | ema:
 18 |   _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
 19 |   inv_gamma: 1.0
 20 |   max_value: 0.9999
 21 |   min_value: 0.0
 22 |   power: 0.75
 23 |   update_after_step: 0
 24 | exp_name: default
 25 | horizon: 10
 26 | keypoint_visible_rate: 1.0
 27 | logging:
 28 |   group: null
 29 |   id: null
 30 |   mode: online
 31 |   name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
 32 |   project: diffusion_policy_debug
 33 |   resume: true
 34 |   tags:
 35 |   - train_diffusion_transformer_hybrid
 36 |   - can_image
 37 |   - default
 38 | multi_run:
 39 |   run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
 40 |   wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
 41 | n_action_steps: 8
 42 | n_latency_steps: 0
 43 | n_obs_steps: 2
 44 | name: train_diffusion_transformer_hybrid
 45 | obs_as_cond: true
 46 | optimizer:
 47 |   betas:
 48 |   - 0.9
 49 |   - 0.95
 50 |   learning_rate: 0.0001
 51 |   obs_encoder_weight_decay: 1.0e-06
 52 |   transformer_weight_decay: 0.001
 53 | past_action_visible: false
 54 | policy:
 55 |   _target_: diffusion_policy.policy.diffusion_transformer_hybrid_image_policy.DiffusionTransformerHybridImagePolicy
 56 |   causal_attn: true
 57 |   crop_shape:
 58 |   - 80 #76
 59 |   - 80 #76
 60 |   eval_fixed_crop: true
 61 |   horizon: 10
 62 |   n_tasks: 8
 63 |   n_action_steps: 8
 64 |   n_cond_layers: 0
 65 |   n_emb: 512
 66 |   n_head: 4
 67 |   n_layer: 12
 68 |   n_obs_steps: 2
 69 |   noise_scheduler:
 70 |     _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
 71 |     beta_end: 0.02
 72 |     beta_schedule: squaredcos_cap_v2
 73 |     beta_start: 0.0001
 74 |     clip_sample: true
 75 |     num_train_timesteps: 100
 76 |     prediction_type: epsilon
 77 |     variance_type: fixed_small
 78 |   num_inference_steps: 100
 79 |   obs_as_cond: true
 80 |   obs_encoder_group_norm: true
 81 |   p_drop_attn: 0.3
 82 |   p_drop_emb: 0.0
 83 |   shape_meta:
 84 |     action:
 85 |       shape:
 86 |       - 7
 87 |     obs:
 88 |       agentview_image:
 89 |         shape:
 90 |         - 3
 91 |         - 84
 92 |         - 84
 93 |         type: rgb
 94 |       robot0_eef_pos:
 95 |         shape:
 96 |         - 3
 97 |       robot0_eef_quat:
 98 |         shape:
 99 |         - 4
100 |       robot0_eye_in_hand_image:
101 |         shape:
102 |         - 3
103 |         - 84
104 |         - 84
105 |         type: rgb
106 |       robot0_gripper_qpos:
107 |         shape:
108 |         - 2
109 |   time_as_cond: true
110 | shape_meta:
111 |   action:
112 |     shape:
113 |     - 7
114 |   obs:
115 |     agentview_image:
116 |       shape:
117 |       - 3
118 |       - 84
119 |       - 84
120 |       type: rgb
121 |     robot0_eef_pos:
122 |       shape:
123 |       - 3
124 |     robot0_eef_quat:
125 |       shape:
126 |       - 4
127 |     robot0_eye_in_hand_image:
128 |       shape:
129 |       - 3
130 |       - 84
131 |       - 84
132 |       type: rgb
133 |     robot0_gripper_qpos:
134 |       shape:
135 |       - 2
136 | task_name: multi-task
137 | training:
138 |   checkpoint_every: 1
139 |   debug: false
140 |   device: cuda:0
141 |   gradient_accumulate_every: 1
142 |   lr_scheduler: cosine
143 |   lr_warmup_steps: 100
144 |   max_train_steps: null
145 |   max_val_steps: null
146 |   num_epochs: 3500
147 |   resume: false
148 |   rollout_every: 50
149 |   sample_every: 10
150 |   seed: 42
151 |   tqdm_interval_sec: 1.0
152 |   use_ema: true
153 |   val_every: 10
154 | val_dataloader:
155 |   batch_size: 64
156 |   num_workers: 4
157 |   persistent_workers: false
158 |   pin_memory: true
159 |   shuffle: false
160 | 
161 | task_num: 8
162 | task0: !include "config/tasks/square_d0.yaml" #2334
163 | task1: !include "config/tasks/stack_d0.yaml" #1632
164 | task2: !include "config/tasks/coffee_d0.yaml" #3402
165 | task3: !include "config/tasks/hammer_cleanup_d0.yaml" #4356
166 | task4: !include "config/tasks/mug_cleanup_d0.yaml" #5162
167 | task5: !include "config/tasks/nut_assembly_d0.yaml" #5476
168 | task6: !include "config/tasks/stack_three_d0.yaml" #3888
169 | task7: !include "config/tasks/threading_d0.yaml" #3424


--------------------------------------------------------------------------------
/config/tasks/coffee_d0.yaml:
--------------------------------------------------------------------------------
  1 | abs_action: false
  2 | dataset:
  3 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
  4 |   abs_action: false
  5 |   dataset_path: robomimic/core/coffee_d0.hdf5
  6 |   horizon: 10
  7 |   n_obs_steps: 2
  8 |   pad_after: 7
  9 |   pad_before: 1
 10 |   rotation_rep: rotation_6d
 11 |   seed: 42
 12 |   shape_meta:
 13 |     action:
 14 |       shape:
 15 |       - 7
 16 |     obs:
 17 |       agentview_image:
 18 |         shape:
 19 |         - 3
 20 |         - 84
 21 |         - 84
 22 |         type: rgb
 23 |       robot0_eef_pos:
 24 |         shape:
 25 |         - 3
 26 |       robot0_eef_quat:
 27 |         shape:
 28 |         - 4
 29 |       robot0_eye_in_hand_image:
 30 |         shape:
 31 |         - 3
 32 |         - 84
 33 |         - 84
 34 |         type: rgb
 35 |       robot0_gripper_qpos:
 36 |         shape:
 37 |         - 2
 38 |   use_cache: true
 39 |   val_ratio: 0.02
 40 | dataset_path: robomimic/core/coffee_d0.hdf5
 41 | env_runner:
 42 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
 43 |   abs_action: false
 44 |   crf: 22
 45 |   dataset_path: robomimic/core/coffee_d0.hdf5
 46 |   fps: 10
 47 |   max_steps: 400
 48 |   n_action_steps: 8
 49 |   n_envs: 28
 50 |   n_obs_steps: 2
 51 |   n_test: 50
 52 |   n_test_vis: 4
 53 |   n_train: 6
 54 |   n_train_vis: 2
 55 |   past_action: false
 56 |   render_obs_key: agentview_image
 57 |   shape_meta:
 58 |     action:
 59 |       shape:
 60 |       - 7
 61 |     obs:
 62 |       agentview_image:
 63 |         shape:
 64 |         - 3
 65 |         - 84
 66 |         - 84
 67 |         type: rgb
 68 |       robot0_eef_pos:
 69 |         shape:
 70 |         - 3
 71 |       robot0_eef_quat:
 72 |         shape:
 73 |         - 4
 74 |       robot0_eye_in_hand_image:
 75 |         shape:
 76 |         - 3
 77 |         - 84
 78 |         - 84
 79 |         type: rgb
 80 |       robot0_gripper_qpos:
 81 |         shape:
 82 |         - 2
 83 |   test_start_seed: 100000
 84 |   tqdm_interval_sec: 1.0
 85 |   train_start_idx: 0
 86 | name: coffee_d0
 87 | shape_meta:
 88 |   action:
 89 |     shape:
 90 |     - 7
 91 |   obs:
 92 |     agentview_image:
 93 |       shape:
 94 |       - 3
 95 |       - 84
 96 |       - 84
 97 |       type: rgb
 98 |     robot0_eef_pos:
 99 |       shape:
100 |       - 3
101 |     robot0_eef_quat:
102 |       shape:
103 |       - 4
104 |     robot0_eye_in_hand_image:
105 |       shape:
106 |       - 3
107 |       - 84
108 |       - 84
109 |       type: rgb
110 |     robot0_gripper_qpos:
111 |       shape:
112 |       - 2
113 | task_name: coffee_d0
114 | 


--------------------------------------------------------------------------------
/config/tasks/coffee_preparation_d0.yaml:
--------------------------------------------------------------------------------
  1 | abs_action: false
  2 | dataset:
  3 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
  4 |   abs_action: false
  5 |   dataset_path: robomimic/core/coffee_preparation_d0.hdf5
  6 |   horizon: 10
  7 |   n_obs_steps: 2
  8 |   pad_after: 7
  9 |   pad_before: 1
 10 |   rotation_rep: rotation_6d
 11 |   seed: 42
 12 |   shape_meta:
 13 |     action:
 14 |       shape:
 15 |       - 7
 16 |     obs:
 17 |       agentview_image:
 18 |         shape:
 19 |         - 3
 20 |         - 84
 21 |         - 84
 22 |         type: rgb
 23 |       robot0_eef_pos:
 24 |         shape:
 25 |         - 3
 26 |       robot0_eef_quat:
 27 |         shape:
 28 |         - 4
 29 |       robot0_eye_in_hand_image:
 30 |         shape:
 31 |         - 3
 32 |         - 84
 33 |         - 84
 34 |         type: rgb
 35 |       robot0_gripper_qpos:
 36 |         shape:
 37 |         - 2
 38 |   use_cache: true
 39 |   val_ratio: 0.02
 40 | dataset_path: robomimic/core/coffee_preparation_d0.hdf5
 41 | env_runner:
 42 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
 43 |   abs_action: false
 44 |   crf: 22
 45 |   dataset_path: robomimic/core/coffee_preparation_d0.hdf5
 46 |   fps: 10
 47 |   max_steps: 400
 48 |   n_action_steps: 8
 49 |   n_envs: 28
 50 |   n_obs_steps: 2
 51 |   n_test: 50
 52 |   n_test_vis: 4
 53 |   n_train: 6
 54 |   n_train_vis: 2
 55 |   past_action: false
 56 |   render_obs_key: agentview_image
 57 |   shape_meta:
 58 |     action:
 59 |       shape:
 60 |       - 7
 61 |     obs:
 62 |       agentview_image:
 63 |         shape:
 64 |         - 3
 65 |         - 84
 66 |         - 84
 67 |         type: rgb
 68 |       robot0_eef_pos:
 69 |         shape:
 70 |         - 3
 71 |       robot0_eef_quat:
 72 |         shape:
 73 |         - 4
 74 |       robot0_eye_in_hand_image:
 75 |         shape:
 76 |         - 3
 77 |         - 84
 78 |         - 84
 79 |         type: rgb
 80 |       robot0_gripper_qpos:
 81 |         shape:
 82 |         - 2
 83 |   test_start_seed: 100000
 84 |   tqdm_interval_sec: 1.0
 85 |   train_start_idx: 0
 86 | name: coffee_preparation_d0
 87 | shape_meta:
 88 |   action:
 89 |     shape:
 90 |     - 7
 91 |   obs:
 92 |     agentview_image:
 93 |       shape:
 94 |       - 3
 95 |       - 84
 96 |       - 84
 97 |       type: rgb
 98 |     robot0_eef_pos:
 99 |       shape:
100 |       - 3
101 |     robot0_eef_quat:
102 |       shape:
103 |       - 4
104 |     robot0_eye_in_hand_image:
105 |       shape:
106 |       - 3
107 |       - 84
108 |       - 84
109 |       type: rgb
110 |     robot0_gripper_qpos:
111 |       shape:
112 |       - 2
113 | task_name: coffee_preparation_d0
114 | 


--------------------------------------------------------------------------------
/config/tasks/hammer_cleanup_d0.yaml:
--------------------------------------------------------------------------------
  1 | abs_action: false
  2 | dataset:
  3 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
  4 |   abs_action: false
  5 |   dataset_path: robomimic/core/hammer_cleanup_d0.hdf5
  6 |   horizon: 10
  7 |   n_obs_steps: 2
  8 |   pad_after: 7
  9 |   pad_before: 1
 10 |   rotation_rep: rotation_6d
 11 |   seed: 42
 12 |   shape_meta:
 13 |     action:
 14 |       shape:
 15 |       - 7
 16 |     obs:
 17 |       agentview_image:
 18 |         shape:
 19 |         - 3
 20 |         - 84
 21 |         - 84
 22 |         type: rgb
 23 |       robot0_eef_pos:
 24 |         shape:
 25 |         - 3
 26 |       robot0_eef_quat:
 27 |         shape:
 28 |         - 4
 29 |       robot0_eye_in_hand_image:
 30 |         shape:
 31 |         - 3
 32 |         - 84
 33 |         - 84
 34 |         type: rgb
 35 |       robot0_gripper_qpos:
 36 |         shape:
 37 |         - 2
 38 |   use_cache: true
 39 |   val_ratio: 0.02
 40 | dataset_path: robomimic/core/hammer_cleanup_d0.hdf5
 41 | env_runner:
 42 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
 43 |   abs_action: false
 44 |   crf: 22
 45 |   dataset_path: robomimic/core/hammer_cleanup_d0.hdf5
 46 |   fps: 10
 47 |   max_steps: 400
 48 |   n_action_steps: 8
 49 |   n_envs: 28
 50 |   n_obs_steps: 2
 51 |   n_test: 50
 52 |   n_test_vis: 4
 53 |   n_train: 6
 54 |   n_train_vis: 2
 55 |   past_action: false
 56 |   render_obs_key: agentview_image
 57 |   shape_meta:
 58 |     action:
 59 |       shape:
 60 |       - 7
 61 |     obs:
 62 |       agentview_image:
 63 |         shape:
 64 |         - 3
 65 |         - 84
 66 |         - 84
 67 |         type: rgb
 68 |       robot0_eef_pos:
 69 |         shape:
 70 |         - 3
 71 |       robot0_eef_quat:
 72 |         shape:
 73 |         - 4
 74 |       robot0_eye_in_hand_image:
 75 |         shape:
 76 |         - 3
 77 |         - 84
 78 |         - 84
 79 |         type: rgb
 80 |       robot0_gripper_qpos:
 81 |         shape:
 82 |         - 2
 83 |   test_start_seed: 100000
 84 |   tqdm_interval_sec: 1.0
 85 |   train_start_idx: 0
 86 | name: hammer_cleanup_d0
 87 | shape_meta:
 88 |   action:
 89 |     shape:
 90 |     - 7
 91 |   obs:
 92 |     agentview_image:
 93 |       shape:
 94 |       - 3
 95 |       - 84
 96 |       - 84
 97 |       type: rgb
 98 |     robot0_eef_pos:
 99 |       shape:
100 |       - 3
101 |     robot0_eef_quat:
102 |       shape:
103 |       - 4
104 |     robot0_eye_in_hand_image:
105 |       shape:
106 |       - 3
107 |       - 84
108 |       - 84
109 |       type: rgb
110 |     robot0_gripper_qpos:
111 |       shape:
112 |       - 2
113 | task_name: hammer_cleanup_d0
114 | 


--------------------------------------------------------------------------------
/config/tasks/kitchen_d0.yaml:
--------------------------------------------------------------------------------
  1 | abs_action: false
  2 | dataset:
  3 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
  4 |   abs_action: false
  5 |   dataset_path: robomimic/core/kitchen_d0.hdf5
  6 |   horizon: 10
  7 |   n_obs_steps: 2
  8 |   pad_after: 7
  9 |   pad_before: 1
 10 |   rotation_rep: rotation_6d
 11 |   seed: 42
 12 |   shape_meta:
 13 |     action:
 14 |       shape:
 15 |       - 7
 16 |     obs:
 17 |       agentview_image:
 18 |         shape:
 19 |         - 3
 20 |         - 84
 21 |         - 84
 22 |         type: rgb
 23 |       robot0_eef_pos:
 24 |         shape:
 25 |         - 3
 26 |       robot0_eef_quat:
 27 |         shape:
 28 |         - 4
 29 |       robot0_eye_in_hand_image:
 30 |         shape:
 31 |         - 3
 32 |         - 84
 33 |         - 84
 34 |         type: rgb
 35 |       robot0_gripper_qpos:
 36 |         shape:
 37 |         - 2
 38 |   use_cache: true
 39 |   val_ratio: 0.02
 40 | dataset_path: robomimic/core/core/kitchen_d0.hdf5
 41 | env_runner:
 42 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
 43 |   abs_action: false
 44 |   crf: 22
 45 |   dataset_path: robomimic/core/kitchen_d0.hdf5
 46 |   fps: 10
 47 |   max_steps: 400
 48 |   n_action_steps: 8
 49 |   n_envs: 28
 50 |   n_obs_steps: 2
 51 |   n_test: 50
 52 |   n_test_vis: 4
 53 |   n_train: 6
 54 |   n_train_vis: 2
 55 |   past_action: false
 56 |   render_obs_key: agentview_image
 57 |   shape_meta:
 58 |     action:
 59 |       shape:
 60 |       - 7
 61 |     obs:
 62 |       agentview_image:
 63 |         shape:
 64 |         - 3
 65 |         - 84
 66 |         - 84
 67 |         type: rgb
 68 |       robot0_eef_pos:
 69 |         shape:
 70 |         - 3
 71 |       robot0_eef_quat:
 72 |         shape:
 73 |         - 4
 74 |       robot0_eye_in_hand_image:
 75 |         shape:
 76 |         - 3
 77 |         - 84
 78 |         - 84
 79 |         type: rgb
 80 |       robot0_gripper_qpos:
 81 |         shape:
 82 |         - 2
 83 |   test_start_seed: 100000
 84 |   tqdm_interval_sec: 1.0
 85 |   train_start_idx: 0
 86 | name: kitchen_d0
 87 | shape_meta:
 88 |   action:
 89 |     shape:
 90 |     - 7
 91 |   obs:
 92 |     agentview_image:
 93 |       shape:
 94 |       - 3
 95 |       - 84
 96 |       - 84
 97 |       type: rgb
 98 |     robot0_eef_pos:
 99 |       shape:
100 |       - 3
101 |     robot0_eef_quat:
102 |       shape:
103 |       - 4
104 |     robot0_eye_in_hand_image:
105 |       shape:
106 |       - 3
107 |       - 84
108 |       - 84
109 |       type: rgb
110 |     robot0_gripper_qpos:
111 |       shape:
112 |       - 2
113 | task_name: kitchen_d0
114 | 


--------------------------------------------------------------------------------
/config/tasks/mug_cleanup_d0.yaml:
--------------------------------------------------------------------------------
  1 | abs_action: false
  2 | dataset:
  3 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
  4 |   abs_action: false
  5 |   dataset_path: robomimic/core/mug_cleanup_d0.hdf5
  6 |   horizon: 10
  7 |   n_obs_steps: 2
  8 |   pad_after: 7
  9 |   pad_before: 1
 10 |   rotation_rep: rotation_6d
 11 |   seed: 42
 12 |   shape_meta:
 13 |     action:
 14 |       shape:
 15 |       - 7
 16 |     obs:
 17 |       agentview_image:
 18 |         shape:
 19 |         - 3
 20 |         - 84
 21 |         - 84
 22 |         type: rgb
 23 |       robot0_eef_pos:
 24 |         shape:
 25 |         - 3
 26 |       robot0_eef_quat:
 27 |         shape:
 28 |         - 4
 29 |       robot0_eye_in_hand_image:
 30 |         shape:
 31 |         - 3
 32 |         - 84
 33 |         - 84
 34 |         type: rgb
 35 |       robot0_gripper_qpos:
 36 |         shape:
 37 |         - 2
 38 |   use_cache: true
 39 |   val_ratio: 0.02
 40 | dataset_path: robomimic/core/mug_cleanup_d0.hdf5
 41 | env_runner:
 42 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
 43 |   abs_action: false
 44 |   crf: 22
 45 |   dataset_path: robomimic/core/mug_cleanup_d0.hdf5
 46 |   fps: 10
 47 |   max_steps: 400
 48 |   n_action_steps: 8
 49 |   n_envs: 28
 50 |   n_obs_steps: 2
 51 |   n_test: 50
 52 |   n_test_vis: 4
 53 |   n_train: 6
 54 |   n_train_vis: 2
 55 |   past_action: false
 56 |   render_obs_key: agentview_image
 57 |   shape_meta:
 58 |     action:
 59 |       shape:
 60 |       - 7
 61 |     obs:
 62 |       agentview_image:
 63 |         shape:
 64 |         - 3
 65 |         - 84
 66 |         - 84
 67 |         type: rgb
 68 |       robot0_eef_pos:
 69 |         shape:
 70 |         - 3
 71 |       robot0_eef_quat:
 72 |         shape:
 73 |         - 4
 74 |       robot0_eye_in_hand_image:
 75 |         shape:
 76 |         - 3
 77 |         - 84
 78 |         - 84
 79 |         type: rgb
 80 |       robot0_gripper_qpos:
 81 |         shape:
 82 |         - 2
 83 |   test_start_seed: 100000
 84 |   tqdm_interval_sec: 1.0
 85 |   train_start_idx: 0
 86 | name: mug_cleanup_d0
 87 | shape_meta:
 88 |   action:
 89 |     shape:
 90 |     - 7
 91 |   obs:
 92 |     agentview_image:
 93 |       shape:
 94 |       - 3
 95 |       - 84
 96 |       - 84
 97 |       type: rgb
 98 |     robot0_eef_pos:
 99 |       shape:
100 |       - 3
101 |     robot0_eef_quat:
102 |       shape:
103 |       - 4
104 |     robot0_eye_in_hand_image:
105 |       shape:
106 |       - 3
107 |       - 84
108 |       - 84
109 |       type: rgb
110 |     robot0_gripper_qpos:
111 |       shape:
112 |       - 2
113 | task_name: mug_cleanup_d0
114 | 


--------------------------------------------------------------------------------
/config/tasks/nut_assembly_d0.yaml:
--------------------------------------------------------------------------------
  1 | abs_action: false
  2 | dataset:
  3 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
  4 |   abs_action: false
  5 |   dataset_path: robomimic/core/nut_assembly_d0.hdf5
  6 |   horizon: 10
  7 |   n_obs_steps: 2
  8 |   pad_after: 7
  9 |   pad_before: 1
 10 |   rotation_rep: rotation_6d
 11 |   seed: 42
 12 |   shape_meta:
 13 |     action:
 14 |       shape:
 15 |       - 7
 16 |     obs:
 17 |       agentview_image:
 18 |         shape:
 19 |         - 3
 20 |         - 84
 21 |         - 84
 22 |         type: rgb
 23 |       robot0_eef_pos:
 24 |         shape:
 25 |         - 3
 26 |       robot0_eef_quat:
 27 |         shape:
 28 |         - 4
 29 |       robot0_eye_in_hand_image:
 30 |         shape:
 31 |         - 3
 32 |         - 84
 33 |         - 84
 34 |         type: rgb
 35 |       robot0_gripper_qpos:
 36 |         shape:
 37 |         - 2
 38 |   use_cache: true
 39 |   val_ratio: 0.02
 40 | dataset_path: robomimic/core/nut_assembly_d0.hdf5
 41 | env_runner:
 42 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
 43 |   abs_action: false
 44 |   crf: 22
 45 |   dataset_path: robomimic/core/nut_assembly_d0.hdf5
 46 |   fps: 10
 47 |   max_steps: 400
 48 |   n_action_steps: 8
 49 |   n_envs: 28
 50 |   n_obs_steps: 2
 51 |   n_test: 50
 52 |   n_test_vis: 4
 53 |   n_train: 6
 54 |   n_train_vis: 2
 55 |   past_action: false
 56 |   render_obs_key: agentview_image
 57 |   shape_meta:
 58 |     action:
 59 |       shape:
 60 |       - 7
 61 |     obs:
 62 |       agentview_image:
 63 |         shape:
 64 |         - 3
 65 |         - 84
 66 |         - 84
 67 |         type: rgb
 68 |       robot0_eef_pos:
 69 |         shape:
 70 |         - 3
 71 |       robot0_eef_quat:
 72 |         shape:
 73 |         - 4
 74 |       robot0_eye_in_hand_image:
 75 |         shape:
 76 |         - 3
 77 |         - 84
 78 |         - 84
 79 |         type: rgb
 80 |       robot0_gripper_qpos:
 81 |         shape:
 82 |         - 2
 83 |   test_start_seed: 100000
 84 |   tqdm_interval_sec: 1.0
 85 |   train_start_idx: 0
 86 | name: nut_assembly_d0
 87 | shape_meta:
 88 |   action:
 89 |     shape:
 90 |     - 7
 91 |   obs:
 92 |     agentview_image:
 93 |       shape:
 94 |       - 3
 95 |       - 84
 96 |       - 84
 97 |       type: rgb
 98 |     robot0_eef_pos:
 99 |       shape:
100 |       - 3
101 |     robot0_eef_quat:
102 |       shape:
103 |       - 4
104 |     robot0_eye_in_hand_image:
105 |       shape:
106 |       - 3
107 |       - 84
108 |       - 84
109 |       type: rgb
110 |     robot0_gripper_qpos:
111 |       shape:
112 |       - 2
113 | task_name: nut_assembly_d0
114 | 


--------------------------------------------------------------------------------
/config/tasks/pick_place_d0.yaml:
--------------------------------------------------------------------------------
  1 | abs_action: false
  2 | dataset:
  3 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
  4 |   abs_action: false
  5 |   dataset_path: robomimic/core/pick_place_d0.hdf5
  6 |   horizon: 10
  7 |   n_obs_steps: 2
  8 |   pad_after: 7
  9 |   pad_before: 1
 10 |   rotation_rep: rotation_6d
 11 |   seed: 42
 12 |   shape_meta:
 13 |     action:
 14 |       shape:
 15 |       - 7
 16 |     obs:
 17 |       agentview_image:
 18 |         shape:
 19 |         - 3
 20 |         - 84
 21 |         - 84
 22 |         type: rgb
 23 |       robot0_eef_pos:
 24 |         shape:
 25 |         - 3
 26 |       robot0_eef_quat:
 27 |         shape:
 28 |         - 4
 29 |       robot0_eye_in_hand_image:
 30 |         shape:
 31 |         - 3
 32 |         - 84
 33 |         - 84
 34 |         type: rgb
 35 |       robot0_gripper_qpos:
 36 |         shape:
 37 |         - 2
 38 |   use_cache: true
 39 |   val_ratio: 0.02
 40 | dataset_path: robomimic/core/pick_place_d0.hdf5
 41 | env_runner:
 42 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
 43 |   abs_action: false
 44 |   crf: 22
 45 |   dataset_path: robomimic/core/pick_place_d0.hdf5
 46 |   fps: 10
 47 |   max_steps: 400
 48 |   n_action_steps: 8
 49 |   n_envs: 28
 50 |   n_obs_steps: 2
 51 |   n_test: 50
 52 |   n_test_vis: 4
 53 |   n_train: 6
 54 |   n_train_vis: 2
 55 |   past_action: false
 56 |   render_obs_key: agentview_image
 57 |   shape_meta:
 58 |     action:
 59 |       shape:
 60 |       - 7
 61 |     obs:
 62 |       agentview_image:
 63 |         shape:
 64 |         - 3
 65 |         - 84
 66 |         - 84
 67 |         type: rgb
 68 |       robot0_eef_pos:
 69 |         shape:
 70 |         - 3
 71 |       robot0_eef_quat:
 72 |         shape:
 73 |         - 4
 74 |       robot0_eye_in_hand_image:
 75 |         shape:
 76 |         - 3
 77 |         - 84
 78 |         - 84
 79 |         type: rgb
 80 |       robot0_gripper_qpos:
 81 |         shape:
 82 |         - 2
 83 |   test_start_seed: 100000
 84 |   tqdm_interval_sec: 1.0
 85 |   train_start_idx: 0
 86 | name: pick_place_d0
 87 | shape_meta:
 88 |   action:
 89 |     shape:
 90 |     - 7
 91 |   obs:
 92 |     agentview_image:
 93 |       shape:
 94 |       - 3
 95 |       - 84
 96 |       - 84
 97 |       type: rgb
 98 |     robot0_eef_pos:
 99 |       shape:
100 |       - 3
101 |     robot0_eef_quat:
102 |       shape:
103 |       - 4
104 |     robot0_eye_in_hand_image:
105 |       shape:
106 |       - 3
107 |       - 84
108 |       - 84
109 |       type: rgb
110 |     robot0_gripper_qpos:
111 |       shape:
112 |       - 2
113 | task_name: pick_place_d0
114 | 


--------------------------------------------------------------------------------
/config/tasks/square_d0.yaml:
--------------------------------------------------------------------------------
  1 | abs_action: false
  2 | dataset:
  3 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
  4 |   abs_action: false
  5 |   dataset_path: robomimic/core/square_d0.hdf5
  6 |   horizon: 10
  7 |   n_obs_steps: 2
  8 |   pad_after: 7
  9 |   pad_before: 1
 10 |   rotation_rep: rotation_6d
 11 |   seed: 42
 12 |   shape_meta:
 13 |     action:
 14 |       shape:
 15 |       - 7
 16 |     obs:
 17 |       agentview_image:
 18 |         shape:
 19 |         - 3
 20 |         - 84
 21 |         - 84
 22 |         type: rgb
 23 |       robot0_eef_pos:
 24 |         shape:
 25 |         - 3
 26 |       robot0_eef_quat:
 27 |         shape:
 28 |         - 4
 29 |       robot0_eye_in_hand_image:
 30 |         shape:
 31 |         - 3
 32 |         - 84
 33 |         - 84
 34 |         type: rgb
 35 |       robot0_gripper_qpos:
 36 |         shape:
 37 |         - 2
 38 |   use_cache: true
 39 |   val_ratio: 0.02
 40 | dataset_path: robomimic/core/square_d0.hdf5
 41 | env_runner:
 42 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
 43 |   abs_action: false
 44 |   crf: 22
 45 |   dataset_path: robomimic/core/square_d0.hdf5
 46 |   fps: 10
 47 |   max_steps: 400
 48 |   n_action_steps: 8
 49 |   n_envs: 28
 50 |   n_obs_steps: 2
 51 |   n_test: 50
 52 |   n_test_vis: 4
 53 |   n_train: 6
 54 |   n_train_vis: 2
 55 |   past_action: false
 56 |   render_obs_key: agentview_image
 57 |   shape_meta:
 58 |     action:
 59 |       shape:
 60 |       - 7
 61 |     obs:
 62 |       agentview_image:
 63 |         shape:
 64 |         - 3
 65 |         - 84
 66 |         - 84
 67 |         type: rgb
 68 |       robot0_eef_pos:
 69 |         shape:
 70 |         - 3
 71 |       robot0_eef_quat:
 72 |         shape:
 73 |         - 4
 74 |       robot0_eye_in_hand_image:
 75 |         shape:
 76 |         - 3
 77 |         - 84
 78 |         - 84
 79 |         type: rgb
 80 |       robot0_gripper_qpos:
 81 |         shape:
 82 |         - 2
 83 |   test_start_seed: 100000
 84 |   tqdm_interval_sec: 1.0
 85 |   train_start_idx: 0
 86 | name: square_d0
 87 | shape_meta:
 88 |   action:
 89 |     shape:
 90 |     - 7
 91 |   obs:
 92 |     agentview_image:
 93 |       shape:
 94 |       - 3
 95 |       - 84
 96 |       - 84
 97 |       type: rgb
 98 |     robot0_eef_pos:
 99 |       shape:
100 |       - 3
101 |     robot0_eef_quat:
102 |       shape:
103 |       - 4
104 |     robot0_eye_in_hand_image:
105 |       shape:
106 |       - 3
107 |       - 84
108 |       - 84
109 |       type: rgb
110 |     robot0_gripper_qpos:
111 |       shape:
112 |       - 2
113 | task_name: square_d0
114 | 


--------------------------------------------------------------------------------
/config/tasks/stack_d0.yaml:
--------------------------------------------------------------------------------
  1 | abs_action: false
  2 | dataset:
  3 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
  4 |   abs_action: false
  5 |   dataset_path: robomimic/core/stack_d0.hdf5
  6 |   horizon: 10
  7 |   n_obs_steps: 2
  8 |   pad_after: 7
  9 |   pad_before: 1
 10 |   rotation_rep: rotation_6d
 11 |   seed: 42
 12 |   shape_meta:
 13 |     action:
 14 |       shape:
 15 |       - 7
 16 |     obs:
 17 |       agentview_image:
 18 |         shape:
 19 |         - 3
 20 |         - 84
 21 |         - 84
 22 |         type: rgb
 23 |       robot0_eef_pos:
 24 |         shape:
 25 |         - 3
 26 |       robot0_eef_quat:
 27 |         shape:
 28 |         - 4
 29 |       robot0_eye_in_hand_image:
 30 |         shape:
 31 |         - 3
 32 |         - 84
 33 |         - 84
 34 |         type: rgb
 35 |       robot0_gripper_qpos:
 36 |         shape:
 37 |         - 2
 38 |   use_cache: true
 39 |   val_ratio: 0.02
 40 | dataset_path: robomimic/core/stack_d0.hdf5
 41 | env_runner:
 42 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
 43 |   abs_action: false
 44 |   crf: 22
 45 |   dataset_path: robomimic/core/stack_d0.hdf5
 46 |   fps: 10
 47 |   max_steps: 400
 48 |   n_action_steps: 8
 49 |   n_envs: 28
 50 |   n_obs_steps: 2
 51 |   n_test: 50
 52 |   n_test_vis: 4
 53 |   n_train: 6
 54 |   n_train_vis: 2
 55 |   past_action: false
 56 |   render_obs_key: agentview_image
 57 |   shape_meta:
 58 |     action:
 59 |       shape:
 60 |       - 7
 61 |     obs:
 62 |       agentview_image:
 63 |         shape:
 64 |         - 3
 65 |         - 84
 66 |         - 84
 67 |         type: rgb
 68 |       robot0_eef_pos:
 69 |         shape:
 70 |         - 3
 71 |       robot0_eef_quat:
 72 |         shape:
 73 |         - 4
 74 |       robot0_eye_in_hand_image:
 75 |         shape:
 76 |         - 3
 77 |         - 84
 78 |         - 84
 79 |         type: rgb
 80 |       robot0_gripper_qpos:
 81 |         shape:
 82 |         - 2
 83 |   test_start_seed: 100000
 84 |   tqdm_interval_sec: 1.0
 85 |   train_start_idx: 0
 86 | name: stack_d0
 87 | shape_meta:
 88 |   action:
 89 |     shape:
 90 |     - 7
 91 |   obs:
 92 |     agentview_image:
 93 |       shape:
 94 |       - 3
 95 |       - 84
 96 |       - 84
 97 |       type: rgb
 98 |     robot0_eef_pos:
 99 |       shape:
100 |       - 3
101 |     robot0_eef_quat:
102 |       shape:
103 |       - 4
104 |     robot0_eye_in_hand_image:
105 |       shape:
106 |       - 3
107 |       - 84
108 |       - 84
109 |       type: rgb
110 |     robot0_gripper_qpos:
111 |       shape:
112 |       - 2
113 | task_name: stack_d0
114 | 


--------------------------------------------------------------------------------
/config/tasks/stack_three_d0.yaml:
--------------------------------------------------------------------------------
  1 | abs_action: false
  2 | dataset:
  3 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
  4 |   abs_action: false
  5 |   dataset_path: robomimic/core/stack_three_d0.hdf5
  6 |   horizon: 10
  7 |   n_obs_steps: 2
  8 |   pad_after: 7
  9 |   pad_before: 1
 10 |   rotation_rep: rotation_6d
 11 |   seed: 42
 12 |   shape_meta:
 13 |     action:
 14 |       shape:
 15 |       - 7
 16 |     obs:
 17 |       agentview_image:
 18 |         shape:
 19 |         - 3
 20 |         - 84
 21 |         - 84
 22 |         type: rgb
 23 |       robot0_eef_pos:
 24 |         shape:
 25 |         - 3
 26 |       robot0_eef_quat:
 27 |         shape:
 28 |         - 4
 29 |       robot0_eye_in_hand_image:
 30 |         shape:
 31 |         - 3
 32 |         - 84
 33 |         - 84
 34 |         type: rgb
 35 |       robot0_gripper_qpos:
 36 |         shape:
 37 |         - 2
 38 |   use_cache: true
 39 |   val_ratio: 0.02
 40 | dataset_path: robomimic/core/stack_three_d0.hdf5
 41 | env_runner:
 42 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
 43 |   abs_action: false
 44 |   crf: 22
 45 |   dataset_path: robomimic/core/stack_three_d0.hdf5
 46 |   fps: 10
 47 |   max_steps: 400
 48 |   n_action_steps: 8
 49 |   n_envs: 28
 50 |   n_obs_steps: 2
 51 |   n_test: 50
 52 |   n_test_vis: 4
 53 |   n_train: 6
 54 |   n_train_vis: 2
 55 |   past_action: false
 56 |   render_obs_key: agentview_image
 57 |   shape_meta:
 58 |     action:
 59 |       shape:
 60 |       - 7
 61 |     obs:
 62 |       agentview_image:
 63 |         shape:
 64 |         - 3
 65 |         - 84
 66 |         - 84
 67 |         type: rgb
 68 |       robot0_eef_pos:
 69 |         shape:
 70 |         - 3
 71 |       robot0_eef_quat:
 72 |         shape:
 73 |         - 4
 74 |       robot0_eye_in_hand_image:
 75 |         shape:
 76 |         - 3
 77 |         - 84
 78 |         - 84
 79 |         type: rgb
 80 |       robot0_gripper_qpos:
 81 |         shape:
 82 |         - 2
 83 |   test_start_seed: 100000
 84 |   tqdm_interval_sec: 1.0
 85 |   train_start_idx: 0
 86 | name: stack_three_d0
 87 | shape_meta:
 88 |   action:
 89 |     shape:
 90 |     - 7
 91 |   obs:
 92 |     agentview_image:
 93 |       shape:
 94 |       - 3
 95 |       - 84
 96 |       - 84
 97 |       type: rgb
 98 |     robot0_eef_pos:
 99 |       shape:
100 |       - 3
101 |     robot0_eef_quat:
102 |       shape:
103 |       - 4
104 |     robot0_eye_in_hand_image:
105 |       shape:
106 |       - 3
107 |       - 84
108 |       - 84
109 |       type: rgb
110 |     robot0_gripper_qpos:
111 |       shape:
112 |       - 2
113 | task_name: stack_three_d0
114 | 


--------------------------------------------------------------------------------
/config/tasks/threading_d0.yaml:
--------------------------------------------------------------------------------
  1 | abs_action: false
  2 | dataset:
  3 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
  4 |   abs_action: false
  5 |   dataset_path: robomimic/core/threading_d0.hdf5
  6 |   horizon: 10
  7 |   n_obs_steps: 2
  8 |   pad_after: 7
  9 |   pad_before: 1
 10 |   rotation_rep: rotation_6d
 11 |   seed: 42
 12 |   shape_meta:
 13 |     action:
 14 |       shape:
 15 |       - 7
 16 |     obs:
 17 |       agentview_image:
 18 |         shape:
 19 |         - 3
 20 |         - 84
 21 |         - 84
 22 |         type: rgb
 23 |       robot0_eef_pos:
 24 |         shape:
 25 |         - 3
 26 |       robot0_eef_quat:
 27 |         shape:
 28 |         - 4
 29 |       robot0_eye_in_hand_image:
 30 |         shape:
 31 |         - 3
 32 |         - 84
 33 |         - 84
 34 |         type: rgb
 35 |       robot0_gripper_qpos:
 36 |         shape:
 37 |         - 2
 38 |   use_cache: true
 39 |   val_ratio: 0.02
 40 | dataset_path: robomimic/core/threading_d0.hdf5
 41 | env_runner:
 42 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
 43 |   abs_action: false
 44 |   crf: 22
 45 |   dataset_path: robomimic/core/threading_d0.hdf5
 46 |   fps: 10
 47 |   max_steps: 400
 48 |   n_action_steps: 8
 49 |   n_envs: 28
 50 |   n_obs_steps: 2
 51 |   n_test: 50
 52 |   n_test_vis: 4
 53 |   n_train: 6
 54 |   n_train_vis: 2
 55 |   past_action: false
 56 |   render_obs_key: agentview_image
 57 |   shape_meta:
 58 |     action:
 59 |       shape:
 60 |       - 7
 61 |     obs:
 62 |       agentview_image:
 63 |         shape:
 64 |         - 3
 65 |         - 84
 66 |         - 84
 67 |         type: rgb
 68 |       robot0_eef_pos:
 69 |         shape:
 70 |         - 3
 71 |       robot0_eef_quat:
 72 |         shape:
 73 |         - 4
 74 |       robot0_eye_in_hand_image:
 75 |         shape:
 76 |         - 3
 77 |         - 84
 78 |         - 84
 79 |         type: rgb
 80 |       robot0_gripper_qpos:
 81 |         shape:
 82 |         - 2
 83 |   test_start_seed: 100000
 84 |   tqdm_interval_sec: 1.0
 85 |   train_start_idx: 0
 86 | name: threading_d0
 87 | shape_meta:
 88 |   action:
 89 |     shape:
 90 |     - 7
 91 |   obs:
 92 |     agentview_image:
 93 |       shape:
 94 |       - 3
 95 |       - 84
 96 |       - 84
 97 |       type: rgb
 98 |     robot0_eef_pos:
 99 |       shape:
100 |       - 3
101 |     robot0_eef_quat:
102 |       shape:
103 |       - 4
104 |     robot0_eye_in_hand_image:
105 |       shape:
106 |       - 3
107 |       - 84
108 |       - 84
109 |       type: rgb
110 |     robot0_gripper_qpos:
111 |       shape:
112 |       - 2
113 | task_name: threading_d0
114 | 


--------------------------------------------------------------------------------
/config/tasks/three_piece_assembly_d0.yaml:
--------------------------------------------------------------------------------
  1 | abs_action: false
  2 | dataset:
  3 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
  4 |   abs_action: false
  5 |   dataset_path: robomimic/core/three_piece_assembly_d0.hdf5
  6 |   horizon: 10
  7 |   n_obs_steps: 2
  8 |   pad_after: 7
  9 |   pad_before: 1
 10 |   rotation_rep: rotation_6d
 11 |   seed: 42
 12 |   shape_meta:
 13 |     action:
 14 |       shape:
 15 |       - 7
 16 |     obs:
 17 |       agentview_image:
 18 |         shape:
 19 |         - 3
 20 |         - 84
 21 |         - 84
 22 |         type: rgb
 23 |       robot0_eef_pos:
 24 |         shape:
 25 |         - 3
 26 |       robot0_eef_quat:
 27 |         shape:
 28 |         - 4
 29 |       robot0_eye_in_hand_image:
 30 |         shape:
 31 |         - 3
 32 |         - 84
 33 |         - 84
 34 |         type: rgb
 35 |       robot0_gripper_qpos:
 36 |         shape:
 37 |         - 2
 38 |   use_cache: true
 39 |   val_ratio: 0.02
 40 | dataset_path: robomimic/core/three_piece_assembly_d0.hdf5
 41 | env_runner:
 42 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
 43 |   abs_action: false
 44 |   crf: 22
 45 |   dataset_path: robomimic/core/three_piece_assembly_d0.hdf5
 46 |   fps: 10
 47 |   max_steps: 400
 48 |   n_action_steps: 8
 49 |   n_envs: 28
 50 |   n_obs_steps: 2
 51 |   n_test: 50
 52 |   n_test_vis: 4
 53 |   n_train: 6
 54 |   n_train_vis: 2
 55 |   past_action: false
 56 |   render_obs_key: agentview_image
 57 |   shape_meta:
 58 |     action:
 59 |       shape:
 60 |       - 7
 61 |     obs:
 62 |       agentview_image:
 63 |         shape:
 64 |         - 3
 65 |         - 84
 66 |         - 84
 67 |         type: rgb
 68 |       robot0_eef_pos:
 69 |         shape:
 70 |         - 3
 71 |       robot0_eef_quat:
 72 |         shape:
 73 |         - 4
 74 |       robot0_eye_in_hand_image:
 75 |         shape:
 76 |         - 3
 77 |         - 84
 78 |         - 84
 79 |         type: rgb
 80 |       robot0_gripper_qpos:
 81 |         shape:
 82 |         - 2
 83 |   test_start_seed: 100000
 84 |   tqdm_interval_sec: 1.0
 85 |   train_start_idx: 0
 86 | name: three_piece_assembly_d0
 87 | shape_meta:
 88 |   action:
 89 |     shape:
 90 |     - 7
 91 |   obs:
 92 |     agentview_image:
 93 |       shape:
 94 |       - 3
 95 |       - 84
 96 |       - 84
 97 |       type: rgb
 98 |     robot0_eef_pos:
 99 |       shape:
100 |       - 3
101 |     robot0_eef_quat:
102 |       shape:
103 |       - 4
104 |     robot0_eye_in_hand_image:
105 |       shape:
106 |       - 3
107 |       - 84
108 |       - 84
109 |       type: rgb
110 |     robot0_gripper_qpos:
111 |       shape:
112 |       - 2
113 | task_name: three_piece_assembly_d0
114 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/checkpoint_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Dict
 2 | import os
 3 | 
 4 | class TopKCheckpointManager:
 5 |     def __init__(self,
 6 |             save_dir,
 7 |             monitor_key: str,
 8 |             mode='min',
 9 |             k=1,
10 |             format_str='epoch={epoch:03d}-train_loss={train_loss:.3f}.ckpt'
11 |         ):
12 |         assert mode in ['max', 'min']
13 |         assert k >= 0
14 | 
15 |         self.save_dir = save_dir
16 |         self.monitor_key = monitor_key
17 |         self.mode = mode
18 |         self.k = k
19 |         self.format_str = format_str
20 |         self.path_value_map = dict()
21 |     
22 |     def get_ckpt_path(self, data: Dict[str, float]) -> Optional[str]:
23 |         if self.k == 0:
24 |             return None
25 | 
26 |         value = data[self.monitor_key]
27 |         ckpt_path = os.path.join(
28 |             self.save_dir, self.format_str.format(**data))
29 |         
30 |         if len(self.path_value_map) < self.k:
31 |             # under-capacity
32 |             self.path_value_map[ckpt_path] = value
33 |             return ckpt_path
34 |         
35 |         # at capacity
36 |         sorted_map = sorted(self.path_value_map.items(), key=lambda x: x[1])
37 |         min_path, min_value = sorted_map[0]
38 |         max_path, max_value = sorted_map[-1]
39 | 
40 |         delete_path = None
41 |         if self.mode == 'max':
42 |             if value > min_value:
43 |                 delete_path = min_path
44 |         else:
45 |             if value < max_value:
46 |                 delete_path = max_path
47 | 
48 |         if delete_path is None:
49 |             return None
50 |         else:
51 |             del self.path_value_map[delete_path]
52 |             self.path_value_map[ckpt_path] = value
53 | 
54 |             if not os.path.exists(self.save_dir):
55 |                 os.mkdir(self.save_dir)
56 | 
57 |             if os.path.exists(delete_path):
58 |                 os.remove(delete_path)
59 |             return ckpt_path
60 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/cv2_util.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple
  2 | import math
  3 | import cv2
  4 | import numpy as np
  5 | 
  6 | def draw_reticle(img, u, v, label_color):
  7 |     """
  8 |     Draws a reticle (cross-hair) on the image at the given position on top of
  9 |     the original image.
 10 |     @param img (In/Out) uint8 3 channel image
 11 |     @param u X coordinate (width)
 12 |     @param v Y coordinate (height)
 13 |     @param label_color tuple of 3 ints for RGB color used for drawing.
 14 |     """
 15 |     # Cast to int.
 16 |     u = int(u)
 17 |     v = int(v)
 18 | 
 19 |     white = (255, 255, 255)
 20 |     cv2.circle(img, (u, v), 10, label_color, 1)
 21 |     cv2.circle(img, (u, v), 11, white, 1)
 22 |     cv2.circle(img, (u, v), 12, label_color, 1)
 23 |     cv2.line(img, (u, v + 1), (u, v + 3), white, 1)
 24 |     cv2.line(img, (u + 1, v), (u + 3, v), white, 1)
 25 |     cv2.line(img, (u, v - 1), (u, v - 3), white, 1)
 26 |     cv2.line(img, (u - 1, v), (u - 3, v), white, 1)
 27 | 
 28 | 
 29 | def draw_text(
 30 |     img,
 31 |     *,
 32 |     text,
 33 |     uv_top_left,
 34 |     color=(255, 255, 255),
 35 |     fontScale=0.5,
 36 |     thickness=1,
 37 |     fontFace=cv2.FONT_HERSHEY_SIMPLEX,
 38 |     outline_color=(0, 0, 0),
 39 |     line_spacing=1.5,
 40 | ):
 41 |     """
 42 |     Draws multiline with an outline.
 43 |     """
 44 |     assert isinstance(text, str)
 45 | 
 46 |     uv_top_left = np.array(uv_top_left, dtype=float)
 47 |     assert uv_top_left.shape == (2,)
 48 | 
 49 |     for line in text.splitlines():
 50 |         (w, h), _ = cv2.getTextSize(
 51 |             text=line,
 52 |             fontFace=fontFace,
 53 |             fontScale=fontScale,
 54 |             thickness=thickness,
 55 |         )
 56 |         uv_bottom_left_i = uv_top_left + [0, h]
 57 |         org = tuple(uv_bottom_left_i.astype(int))
 58 | 
 59 |         if outline_color is not None:
 60 |             cv2.putText(
 61 |                 img,
 62 |                 text=line,
 63 |                 org=org,
 64 |                 fontFace=fontFace,
 65 |                 fontScale=fontScale,
 66 |                 color=outline_color,
 67 |                 thickness=thickness * 3,
 68 |                 lineType=cv2.LINE_AA,
 69 |             )
 70 |         cv2.putText(
 71 |             img,
 72 |             text=line,
 73 |             org=org,
 74 |             fontFace=fontFace,
 75 |             fontScale=fontScale,
 76 |             color=color,
 77 |             thickness=thickness,
 78 |             lineType=cv2.LINE_AA,
 79 |         )
 80 | 
 81 |         uv_top_left += [0, h * line_spacing]
 82 | 
 83 | 
 84 | def get_image_transform(
 85 |         input_res: Tuple[int,int]=(1280,720), 
 86 |         output_res: Tuple[int,int]=(640,480), 
 87 |         bgr_to_rgb: bool=False):
 88 | 
 89 |     iw, ih = input_res
 90 |     ow, oh = output_res
 91 |     rw, rh = None, None
 92 |     interp_method = cv2.INTER_AREA
 93 | 
 94 |     if (iw/ih) >= (ow/oh):
 95 |         # input is wider
 96 |         rh = oh
 97 |         rw = math.ceil(rh / ih * iw)
 98 |         if oh > ih:
 99 |             interp_method = cv2.INTER_LINEAR
100 |     else:
101 |         rw = ow
102 |         rh = math.ceil(rw / iw * ih)
103 |         if ow > iw:
104 |             interp_method = cv2.INTER_LINEAR
105 |     
106 |     w_slice_start = (rw - ow) // 2
107 |     w_slice = slice(w_slice_start, w_slice_start + ow)
108 |     h_slice_start = (rh - oh) // 2
109 |     h_slice = slice(h_slice_start, h_slice_start + oh)
110 |     c_slice = slice(None)
111 |     if bgr_to_rgb:
112 |         c_slice = slice(None, None, -1)
113 | 
114 |     def transform(img: np.ndarray):
115 |         assert img.shape == ((ih,iw,3))
116 |         # resize
117 |         img = cv2.resize(img, (rw, rh), interpolation=interp_method)
118 |         # crop
119 |         img = img[h_slice, w_slice, c_slice]
120 |         return img
121 |     return transform
122 | 
123 | def optimal_row_cols(
124 |         n_cameras,
125 |         in_wh_ratio,
126 |         max_resolution=(1920, 1080)
127 |     ):
128 |     out_w, out_h = max_resolution
129 |     out_wh_ratio = out_w / out_h
130 |     
131 |     n_rows = np.arange(n_cameras,dtype=np.int64) + 1
132 |     n_cols = np.ceil(n_cameras / n_rows).astype(np.int64)
133 |     cat_wh_ratio = in_wh_ratio * (n_cols / n_rows)
134 |     ratio_diff = np.abs(out_wh_ratio - cat_wh_ratio)
135 |     best_idx = np.argmin(ratio_diff)
136 |     best_n_row = n_rows[best_idx]
137 |     best_n_col = n_cols[best_idx]
138 |     best_cat_wh_ratio = cat_wh_ratio[best_idx]
139 | 
140 |     rw, rh = None, None
141 |     if best_cat_wh_ratio >= out_wh_ratio:
142 |         # cat is wider
143 |         rw = math.floor(out_w / best_n_col)
144 |         rh = math.floor(rw / in_wh_ratio)
145 |     else:
146 |         rh = math.floor(out_h / best_n_row)
147 |         rw = math.floor(rh * in_wh_ratio)
148 |     
149 |     # crop_resolution = (rw, rh)
150 |     return rw, rh, best_n_col, best_n_row
151 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/env_util.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | 
 5 | def render_env_video(env, states, actions=None):
 6 |     observations = states
 7 |     imgs = list()
 8 |     for i in range(len(observations)):
 9 |         state = observations[i]
10 |         env.set_state(state)
11 |         if i == 0:
12 |             env.set_state(state)
13 |         img = env.render()
14 |         # draw action
15 |         if actions is not None:
16 |             action = actions[i]
17 |             coord = (action / 512 * 96).astype(np.int32)
18 |             cv2.drawMarker(img, coord, 
19 |                 color=(255,0,0), markerType=cv2.MARKER_CROSS,
20 |                 markerSize=8, thickness=1)
21 |         imgs.append(img)
22 |     imgs = np.array(imgs)
23 |     return imgs
24 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/json_logger.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Callable, Any, Sequence
  2 | import os
  3 | import copy
  4 | import json
  5 | import numbers
  6 | import pandas as pd
  7 | 
  8 | 
  9 | def read_json_log(path: str, 
 10 |         required_keys: Sequence[str]=tuple(), 
 11 |         **kwargs) -> pd.DataFrame:
 12 |     """
 13 |     Read json-per-line file, with potentially incomplete lines.
 14 |     kwargs passed to pd.read_json
 15 |     """
 16 |     lines = list()
 17 |     with open(path, 'r') as f:
 18 |         while True:
 19 |             # one json per line
 20 |             line = f.readline()
 21 |             if len(line) == 0:
 22 |                 # EOF
 23 |                 break
 24 |             elif not line.endswith('\n'):
 25 |                 # incomplete line
 26 |                 break
 27 |             is_relevant = False
 28 |             for k in required_keys:
 29 |                 if k in line:
 30 |                     is_relevant = True
 31 |                     break
 32 |             if is_relevant:
 33 |                 lines.append(line)
 34 |     if len(lines) < 1:
 35 |         return pd.DataFrame()  
 36 |     json_buf = f'[{",".join([line for line in (line.strip() for line in lines) if line])}]'
 37 |     df = pd.read_json(json_buf, **kwargs)
 38 |     return df
 39 | 
 40 | class JsonLogger:
 41 |     def __init__(self, path: str, 
 42 |             filter_fn: Optional[Callable[[str,Any],bool]]=None):
 43 |         if filter_fn is None:
 44 |             filter_fn = lambda k,v: isinstance(v, numbers.Number)
 45 | 
 46 |         # default to append mode
 47 |         self.path = path
 48 |         self.filter_fn = filter_fn
 49 |         self.file = None
 50 |         self.last_log = None
 51 |     
 52 |     def start(self):
 53 |         # use line buffering
 54 |         try:
 55 |             self.file = file = open(self.path, 'r+', buffering=1)
 56 |         except FileNotFoundError:
 57 |             self.file = file = open(self.path, 'w+', buffering=1)
 58 | 
 59 |         # Move the pointer (similar to a cursor in a text editor) to the end of the file
 60 |         pos = file.seek(0, os.SEEK_END)
 61 | 
 62 |         # Read each character in the file one at a time from the last
 63 |         # character going backwards, searching for a newline character
 64 |         # If we find a new line, exit the search
 65 |         while pos > 0 and file.read(1) != "\n":
 66 |             pos -= 1
 67 |             file.seek(pos, os.SEEK_SET)
 68 |         # now the file pointer is at one past the last '\n'
 69 |         # and pos is at the last '\n'.
 70 |         last_line_end = file.tell()
 71 |         
 72 |         # find the start of second last line
 73 |         pos = max(0, pos-1)
 74 |         file.seek(pos, os.SEEK_SET)
 75 |         while pos > 0 and file.read(1) != "\n":
 76 |             pos -= 1
 77 |             file.seek(pos, os.SEEK_SET)
 78 |         # now the file pointer is at one past the second last '\n'
 79 |         last_line_start = file.tell()
 80 | 
 81 |         if last_line_start < last_line_end:
 82 |             # has last line of json
 83 |             last_line = file.readline()
 84 |             self.last_log = json.loads(last_line)
 85 |         
 86 |         # remove the last incomplete line
 87 |         file.seek(last_line_end)
 88 |         file.truncate()
 89 |     
 90 |     def stop(self):
 91 |         self.file.close()
 92 |         self.file = None
 93 |     
 94 |     def __enter__(self):
 95 |         self.start()
 96 |         return self
 97 |     
 98 |     def __exit__(self, exc_type, exc_val, exc_tb):
 99 |         self.stop()
100 |     
101 |     def log(self, data: dict):
102 |         filtered_data = dict(
103 |             filter(lambda x: self.filter_fn(*x), data.items()))
104 |         # save current as last log
105 |         self.last_log = filtered_data
106 |         for k, v in filtered_data.items():
107 |             if isinstance(v, numbers.Integral):
108 |                 filtered_data[k] = int(v)
109 |             elif isinstance(v, numbers.Number):
110 |                 filtered_data[k] = float(v)
111 |         buf = json.dumps(filtered_data)
112 |         # ensure one line per json
113 |         buf = buf.replace('\n','') + '\n'
114 |         self.file.write(buf)
115 |     
116 |     def get_last_log(self):
117 |         return copy.deepcopy(self.last_log)
118 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/nested_dict_util.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | def nested_dict_map(f, x):
 4 |     """
 5 |     Map f over all leaf of nested dict x
 6 |     """
 7 | 
 8 |     if not isinstance(x, dict):
 9 |         return f(x)
10 |     y = dict()
11 |     for key, value in x.items():
12 |         y[key] = nested_dict_map(f, value)
13 |     return y
14 | 
15 | def nested_dict_reduce(f, x):
16 |     """
17 |     Map f over all values of nested dict x, and reduce to a single value
18 |     """
19 |     if not isinstance(x, dict):
20 |         return x
21 | 
22 |     reduced_values = list()
23 |     for value in x.values():
24 |         reduced_values.append(nested_dict_reduce(f, value))
25 |     y = functools.reduce(f, reduced_values)
26 |     return y
27 | 
28 | 
29 | def nested_dict_check(f, x):
30 |     bool_dict = nested_dict_map(f, x)
31 |     result = nested_dict_reduce(lambda x, y: x and y, bool_dict)
32 |     return result
33 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/precise_sleep.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | def precise_sleep(dt: float, slack_time: float=0.001, time_func=time.monotonic):
 4 |     """
 5 |     Use hybrid of time.sleep and spinning to minimize jitter.
 6 |     Sleep dt - slack_time seconds first, then spin for the rest.
 7 |     """
 8 |     t_start = time_func()
 9 |     if dt > slack_time:
10 |         time.sleep(dt - slack_time)
11 |     t_end = t_start + dt
12 |     while time_func() < t_end:
13 |         pass
14 |     return
15 | 
16 | def precise_wait(t_end: float, slack_time: float=0.001, time_func=time.monotonic):
17 |     t_start = time_func()
18 |     t_wait = t_end - t_start
19 |     if t_wait > 0:
20 |         t_sleep = t_wait - slack_time
21 |         if t_sleep > 0:
22 |             time.sleep(t_sleep)
23 |         while time_func() < t_end:
24 |             pass
25 |     return
26 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/pymunk_util.py:
--------------------------------------------------------------------------------
 1 | import pygame
 2 | import pymunk
 3 | import pymunk.pygame_util
 4 | import numpy as np
 5 | 
 6 | COLLTYPE_DEFAULT = 0
 7 | COLLTYPE_MOUSE = 1
 8 | COLLTYPE_BALL = 2
 9 | 
10 | def get_body_type(static=False):
11 |     body_type = pymunk.Body.DYNAMIC
12 |     if static:
13 |         body_type = pymunk.Body.STATIC
14 |     return body_type
15 | 
16 | 
17 | def create_rectangle(space,
18 |         pos_x,pos_y,width,height,
19 |         density=3,static=False):
20 |     body = pymunk.Body(body_type=get_body_type(static))
21 |     body.position = (pos_x,pos_y)
22 |     shape = pymunk.Poly.create_box(body,(width,height))
23 |     shape.density = density
24 |     space.add(body,shape)
25 |     return body, shape
26 | 
27 | 
28 | def create_rectangle_bb(space, 
29 |         left, bottom, right, top, 
30 |         **kwargs):
31 |     pos_x = (left + right) / 2
32 |     pos_y = (top + bottom) / 2
33 |     height = top - bottom
34 |     width = right - left
35 |     return create_rectangle(space, pos_x, pos_y, width, height, **kwargs)
36 | 
37 | def create_circle(space, pos_x, pos_y, radius, density=3, static=False):
38 |     body = pymunk.Body(body_type=get_body_type(static))
39 |     body.position = (pos_x, pos_y)
40 |     shape = pymunk.Circle(body, radius=radius)
41 |     shape.density = density
42 |     shape.collision_type = COLLTYPE_BALL
43 |     space.add(body, shape)
44 |     return body, shape
45 | 
46 | def get_body_state(body):
47 |     state = np.zeros(6, dtype=np.float32)
48 |     state[:2] = body.position
49 |     state[2] = body.angle
50 |     state[3:5] = body.velocity
51 |     state[5] = body.angular_velocity
52 |     return state
53 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/pytorch_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Callable, List
 2 | import collections
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | def dict_apply(
 7 |         x: Dict[str, torch.Tensor], 
 8 |         func: Callable[[torch.Tensor], torch.Tensor]
 9 |         ) -> Dict[str, torch.Tensor]:
10 |     result = dict()
11 |     for key, value in x.items():
12 |         if isinstance(value, dict):
13 |             result[key] = dict_apply(value, func)
14 |         else:
15 |             result[key] = func(value)
16 |     return result
17 | 
18 | def pad_remaining_dims(x, target):
19 |     assert x.shape == target.shape[:len(x.shape)]
20 |     return x.reshape(x.shape + (1,)*(len(target.shape) - len(x.shape)))
21 | 
22 | def dict_apply_split(
23 |         x: Dict[str, torch.Tensor], 
24 |         split_func: Callable[[torch.Tensor], Dict[str, torch.Tensor]]
25 |         ) -> Dict[str, torch.Tensor]:
26 |     results = collections.defaultdict(dict)
27 |     for key, value in x.items():
28 |         result = split_func(value)
29 |         for k, v in result.items():
30 |             results[k][key] = v
31 |     return results
32 | 
33 | def dict_apply_reduce(
34 |         x: List[Dict[str, torch.Tensor]],
35 |         reduce_func: Callable[[List[torch.Tensor]], torch.Tensor]
36 |         ) -> Dict[str, torch.Tensor]:
37 |     result = dict()
38 |     for key in x[0].keys():
39 |         result[key] = reduce_func([x_[key] for x_ in x])
40 |     return result
41 | 
42 | 
43 | def replace_submodules(
44 |         root_module: nn.Module, 
45 |         predicate: Callable[[nn.Module], bool], 
46 |         func: Callable[[nn.Module], nn.Module]) -> nn.Module:
47 |     """
48 |     predicate: Return true if the module is to be replaced.
49 |     func: Return new module to use.
50 |     """
51 |     if predicate(root_module):
52 |         return func(root_module)
53 | 
54 |     bn_list = [k.split('.') for k, m 
55 |         in root_module.named_modules(remove_duplicate=True) 
56 |         if predicate(m)]
57 |     for *parent, k in bn_list:
58 |         parent_module = root_module
59 |         if len(parent) > 0:
60 |             parent_module = root_module.get_submodule('.'.join(parent))
61 |         if isinstance(parent_module, nn.Sequential):
62 |             src_module = parent_module[int(k)]
63 |         else:
64 |             src_module = getattr(parent_module, k)
65 |         tgt_module = func(src_module)
66 |         if isinstance(parent_module, nn.Sequential):
67 |             parent_module[int(k)] = tgt_module
68 |         else:
69 |             setattr(parent_module, k, tgt_module)
70 |     # verify that all BN are replaced
71 |     bn_list = [k.split('.') for k, m 
72 |         in root_module.named_modules(remove_duplicate=True) 
73 |         if predicate(m)]
74 |     assert len(bn_list) == 0
75 |     return root_module
76 | 
77 | def optimizer_to(optimizer, device):
78 |     for state in optimizer.state.values():
79 |         for k, v in state.items():
80 |             if isinstance(v, torch.Tensor):
81 |                 state[k] = v.to(device=device)
82 |     return optimizer
83 | 


--------------------------------------------------------------------------------
/diffusion_policy/common/robomimic_config_util.py:
--------------------------------------------------------------------------------
 1 | from omegaconf import OmegaConf
 2 | from robomimic.config import config_factory
 3 | import robomimic.scripts.generate_paper_configs as gpc
 4 | from robomimic.scripts.generate_paper_configs import (
 5 |     modify_config_for_default_image_exp,
 6 |     modify_config_for_default_low_dim_exp,
 7 |     modify_config_for_dataset,
 8 | )
 9 | 
10 | def get_robomimic_config(
11 |         algo_name='bc_rnn', 
12 |         hdf5_type='low_dim', 
13 |         task_name='square', 
14 |         dataset_type='ph'
15 |     ):
16 |     base_dataset_dir = '/tmp/null'
17 |     filter_key = None
18 | 
19 |     # decide whether to use low-dim or image training defaults
20 |     modifier_for_obs = modify_config_for_default_image_exp
21 |     if hdf5_type in ["low_dim", "low_dim_sparse", "low_dim_dense"]:
22 |         modifier_for_obs = modify_config_for_default_low_dim_exp
23 | 
24 |     algo_config_name = "bc" if algo_name == "bc_rnn" else algo_name
25 |     config = config_factory(algo_name=algo_config_name)
26 |     # turn into default config for observation modalities (e.g.: low-dim or rgb)
27 |     config = modifier_for_obs(config)
28 |     # add in config based on the dataset
29 |     config = modify_config_for_dataset(
30 |         config=config, 
31 |         task_name=task_name, 
32 |         dataset_type=dataset_type, 
33 |         hdf5_type=hdf5_type, 
34 |         base_dataset_dir=base_dataset_dir,
35 |         filter_key=filter_key,
36 |     )
37 |     # add in algo hypers based on dataset
38 |     algo_config_modifier = getattr(gpc, f'modify_{algo_name}_config_for_dataset')
39 |     config = algo_config_modifier(
40 |         config=config, 
41 |         task_name=task_name, 
42 |         dataset_type=dataset_type, 
43 |         hdf5_type=hdf5_type,
44 |     )
45 |     return config
46 |     
47 | 
48 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/blockpush_lowdim_seed.yaml:
--------------------------------------------------------------------------------
 1 | name: blockpush_lowdim_seed
 2 | 
 3 | obs_dim: 16
 4 | action_dim: 2
 5 | keypoint_dim: 2
 6 | obs_eef_target: True
 7 | 
 8 | env_runner:
 9 |   _target_: diffusion_policy.env_runner.blockpush_lowdim_runner.BlockPushLowdimRunner
10 |   n_train: 6
11 |   n_train_vis: 2
12 |   train_start_seed: 0
13 |   n_test: 50
14 |   n_test_vis: 4
15 |   test_start_seed: 100000
16 |   max_steps: 350
17 |   n_obs_steps: ${n_obs_steps}
18 |   n_action_steps: ${n_action_steps}
19 |   fps: 5
20 |   past_action: ${past_action_visible}
21 |   abs_action: False
22 |   obs_eef_target: ${task.obs_eef_target}
23 |   n_envs: null
24 | 
25 | dataset:
26 |   _target_: diffusion_policy.dataset.blockpush_lowdim_dataset.BlockPushLowdimDataset
27 |   zarr_path: data/block_pushing/multimodal_push_seed.zarr
28 |   horizon: ${horizon}
29 |   pad_before: ${eval:'${n_obs_steps}-1'}
30 |   pad_after: ${eval:'${n_action_steps}-1'}
31 |   obs_eef_target: ${task.obs_eef_target}
32 |   use_manual_normalizer: False
33 |   seed: 42
34 |   val_ratio: 0.02
35 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/blockpush_lowdim_seed_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: blockpush_lowdim_seed_abs
 2 | 
 3 | obs_dim: 16
 4 | action_dim: 2
 5 | keypoint_dim: 2
 6 | obs_eef_target: True
 7 | 
 8 | env_runner:
 9 |   _target_: diffusion_policy.env_runner.blockpush_lowdim_runner.BlockPushLowdimRunner
10 |   n_train: 6
11 |   n_train_vis: 2
12 |   train_start_seed: 0
13 |   n_test: 50
14 |   n_test_vis: 4
15 |   test_start_seed: 100000
16 |   max_steps: 350
17 |   n_obs_steps: ${n_obs_steps}
18 |   n_action_steps: ${n_action_steps}
19 |   fps: 5
20 |   past_action: ${past_action_visible}
21 |   abs_action: True
22 |   obs_eef_target: ${task.obs_eef_target}
23 |   n_envs: null
24 | 
25 | dataset:
26 |   _target_: diffusion_policy.dataset.blockpush_lowdim_dataset.BlockPushLowdimDataset
27 |   zarr_path: data/block_pushing/multimodal_push_seed_abs.zarr
28 |   horizon: ${horizon}
29 |   pad_before: ${eval:'${n_obs_steps}-1'}
30 |   pad_after: ${eval:'${n_action_steps}-1'}
31 |   obs_eef_target: ${task.obs_eef_target}
32 |   use_manual_normalizer: False
33 |   seed: 42
34 |   val_ratio: 0.02
35 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/can_image.yaml:
--------------------------------------------------------------------------------
 1 | name: can_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     agentview_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [7]
21 | 
22 | task_name: &task_name can
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
25 | abs_action: &abs_action False
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 2
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 4
37 |   test_start_seed: 100000
38 |   # use python's eval function as resolver, single-quoted string as argument
39 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
40 |   n_obs_steps: ${n_obs_steps}
41 |   n_action_steps: ${n_action_steps}
42 |   render_obs_key: 'agentview_image'
43 |   fps: 10
44 |   crf: 22
45 |   past_action: ${past_action_visible}
46 |   abs_action: *abs_action
47 |   tqdm_interval_sec: 1.0
48 |   n_envs: 28
49 | # evaluation at this config requires a 16 core 64GB instance.
50 | 
51 | dataset:
52 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
53 |   shape_meta: *shape_meta
54 |   dataset_path: *dataset_path
55 |   horizon: ${horizon}
56 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
57 |   pad_after: ${eval:'${n_action_steps}-1'}
58 |   n_obs_steps: ${dataset_obs_steps}
59 |   abs_action: *abs_action
60 |   rotation_rep: 'rotation_6d'
61 |   use_legacy_normalizer: False
62 |   use_cache: True
63 |   seed: 42
64 |   val_ratio: 0.02
65 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/can_image_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: can_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     agentview_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [10]
21 | 
22 | task_name: &task_name can
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
25 | abs_action: &abs_action True
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 2
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 4
37 |   test_start_seed: 100000
38 |   # use python's eval function as resolver, single-quoted string as argument
39 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
40 |   n_obs_steps: ${n_obs_steps}
41 |   n_action_steps: ${n_action_steps}
42 |   render_obs_key: 'agentview_image'
43 |   fps: 10
44 |   crf: 22
45 |   past_action: ${past_action_visible}
46 |   abs_action: *abs_action
47 |   tqdm_interval_sec: 1.0
48 |   n_envs: 28
49 | # evaluation at this config requires a 16 core 64GB instance.
50 | 
51 | dataset:
52 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
53 |   shape_meta: *shape_meta
54 |   dataset_path: *dataset_path
55 |   horizon: ${horizon}
56 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
57 |   pad_after: ${eval:'${n_action_steps}-1'}
58 |   n_obs_steps: ${dataset_obs_steps}
59 |   abs_action: *abs_action
60 |   rotation_rep: 'rotation_6d'
61 |   use_legacy_normalizer: False
62 |   use_cache: True
63 |   seed: 42
64 |   val_ratio: 0.02
65 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/can_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: can_lowdim
 2 | 
 3 | obs_dim: 23
 4 | action_dim: 7
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name can
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
11 | abs_action: &abs_action False
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 4
22 |   test_start_seed: 100000
23 |   # use python's eval function as resolver, single-quoted string as argument
24 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
25 |   n_obs_steps: ${n_obs_steps}
26 |   n_action_steps: ${n_action_steps}
27 |   n_latency_steps: ${n_latency_steps}
28 |   render_hw: [128,128]
29 |   fps: 10
30 |   crf: 22
31 |   past_action: ${past_action_visible}
32 |   abs_action: *abs_action
33 |   n_envs: 28
34 | 
35 | dataset:
36 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
37 |   dataset_path: *dataset_path
38 |   horizon: ${horizon}
39 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
40 |   pad_after: ${eval:'${n_action_steps}-1'}
41 |   obs_keys: *obs_keys
42 |   abs_action: *abs_action
43 |   use_legacy_normalizer: False
44 |   seed: 42
45 |   val_ratio: 0.02
46 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/can_lowdim_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: can_lowdim
 2 | 
 3 | obs_dim: 23
 4 | action_dim: 10
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name can
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
11 | abs_action: &abs_action True
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 4
22 |   test_start_seed: 100000
23 |   # use python's eval function as resolver, single-quoted string as argument
24 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
25 |   n_obs_steps: ${n_obs_steps}
26 |   n_action_steps: ${n_action_steps}
27 |   n_latency_steps: ${n_latency_steps}
28 |   render_hw: [128,128]
29 |   fps: 10
30 |   crf: 22
31 |   past_action: ${past_action_visible}
32 |   abs_action: *abs_action
33 |   n_envs: 28
34 | 
35 | dataset:
36 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
37 |   dataset_path: *dataset_path
38 |   horizon: ${horizon}
39 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
40 |   pad_after: ${eval:'${n_action_steps}-1'}
41 |   obs_keys: *obs_keys
42 |   abs_action: *abs_action
43 |   use_legacy_normalizer: False
44 |   rotation_rep: rotation_6d
45 |   seed: 42
46 |   val_ratio: 0.02
47 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/kitchen_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: kitchen_lowdim
 2 | 
 3 | obs_dim: 60
 4 | action_dim: 9
 5 | keypoint_dim: 3
 6 | 
 7 | dataset_dir: &dataset_dir data/kitchen
 8 | 
 9 | env_runner:
10 |   _target_: diffusion_policy.env_runner.kitchen_lowdim_runner.KitchenLowdimRunner
11 |   dataset_dir: *dataset_dir
12 |   n_train: 6
13 |   n_train_vis: 2
14 |   train_start_seed: 0
15 |   n_test: 50
16 |   n_test_vis: 4
17 |   test_start_seed: 100000
18 |   max_steps: 280
19 |   n_obs_steps: ${n_obs_steps}
20 |   n_action_steps: ${n_action_steps}
21 |   render_hw: [240, 360]
22 |   fps: 12.5
23 |   past_action: ${past_action_visible}
24 |   n_envs: null
25 | 
26 | dataset:
27 |   _target_: diffusion_policy.dataset.kitchen_lowdim_dataset.KitchenLowdimDataset
28 |   dataset_dir: *dataset_dir
29 |   horizon: ${horizon}
30 |   pad_before: ${eval:'${n_obs_steps}-1'}
31 |   pad_after: ${eval:'${n_action_steps}-1'}
32 |   seed: 42
33 |   val_ratio: 0.02
34 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/kitchen_lowdim_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: kitchen_lowdim
 2 | 
 3 | obs_dim: 60
 4 | action_dim: 9
 5 | keypoint_dim: 3
 6 | 
 7 | abs_action: True
 8 | robot_noise_ratio: 0.1
 9 | 
10 | env_runner:
11 |   _target_: diffusion_policy.env_runner.kitchen_lowdim_runner.KitchenLowdimRunner
12 |   dataset_dir: data/kitchen
13 |   n_train: 6
14 |   n_train_vis: 2
15 |   train_start_seed: 0
16 |   n_test: 50
17 |   n_test_vis: 4
18 |   test_start_seed: 100000
19 |   max_steps: 280
20 |   n_obs_steps: ${n_obs_steps}
21 |   n_action_steps: ${n_action_steps}
22 |   render_hw: [240, 360]
23 |   fps: 12.5
24 |   past_action: ${past_action_visible}
25 |   abs_action: ${task.abs_action}
26 |   robot_noise_ratio: ${task.robot_noise_ratio}
27 |   n_envs: null
28 | 
29 | dataset:
30 |   _target_: diffusion_policy.dataset.kitchen_mjl_lowdim_dataset.KitchenMjlLowdimDataset
31 |   dataset_dir: data/kitchen/kitchen_demos_multitask
32 |   horizon: ${horizon}
33 |   pad_before: ${eval:'${n_obs_steps}-1'}
34 |   pad_after: ${eval:'${n_action_steps}-1'}
35 |   abs_action: ${task.abs_action}
36 |   robot_noise_ratio: ${task.robot_noise_ratio}
37 |   seed: 42
38 |   val_ratio: 0.02
39 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/lift_image.yaml:
--------------------------------------------------------------------------------
 1 | name: lift_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     agentview_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [7]
21 | 
22 | task_name: &task_name lift
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
25 | abs_action: &abs_action False
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 1
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 3
37 |   test_start_seed: 100000
38 |   # use python's eval function as resolver, single-quoted string as argument
39 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
40 |   n_obs_steps: ${n_obs_steps}
41 |   n_action_steps: ${n_action_steps}
42 |   render_obs_key: 'agentview_image'
43 |   fps: 10
44 |   crf: 22
45 |   past_action: ${past_action_visible}
46 |   abs_action: *abs_action
47 |   tqdm_interval_sec: 1.0
48 |   n_envs: 28
49 | # evaluation at this config requires a 16 core 64GB instance.
50 | 
51 | dataset:
52 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
53 |   shape_meta: *shape_meta
54 |   dataset_path: *dataset_path
55 |   horizon: ${horizon}
56 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
57 |   pad_after: ${eval:'${n_action_steps}-1'}
58 |   n_obs_steps: ${dataset_obs_steps}
59 |   abs_action: *abs_action
60 |   rotation_rep: 'rotation_6d'
61 |   use_legacy_normalizer: False
62 |   use_cache: True
63 |   seed: 42
64 |   val_ratio: 0.02
65 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/lift_image_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: lift_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     agentview_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [10]
21 | 
22 | task_name: &task_name lift
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
25 | abs_action: &abs_action True
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   n_train: 6
32 |   n_train_vis: 2
33 |   train_start_idx: 0
34 |   n_test: 50
35 |   n_test_vis: 4
36 |   test_start_seed: 100000
37 |   # use python's eval function as resolver, single-quoted string as argument
38 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
39 |   n_obs_steps: ${n_obs_steps}
40 |   n_action_steps: ${n_action_steps}
41 |   render_obs_key: 'agentview_image'
42 |   fps: 10
43 |   crf: 22
44 |   past_action: ${past_action_visible}
45 |   abs_action: *abs_action
46 |   tqdm_interval_sec: 1.0
47 |   n_envs: 28
48 | # evaluation at this config requires a 16 core 64GB instance.
49 | 
50 | dataset:
51 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
52 |   shape_meta: *shape_meta
53 |   dataset_path: *dataset_path
54 |   horizon: ${horizon}
55 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
56 |   pad_after: ${eval:'${n_action_steps}-1'}
57 |   n_obs_steps: ${dataset_obs_steps}
58 |   abs_action: *abs_action
59 |   rotation_rep: 'rotation_6d'
60 |   use_legacy_normalizer: False
61 |   use_cache: True
62 |   seed: 42
63 |   val_ratio: 0.02
64 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/lift_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: lift_lowdim
 2 | 
 3 | obs_dim: 19
 4 | action_dim: 7
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name lift
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
11 | abs_action: &abs_action False
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 4
22 |   test_start_seed: 100000
23 |   # use python's eval function as resolver, single-quoted string as argument
24 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
25 |   n_obs_steps: ${n_obs_steps}
26 |   n_action_steps: ${n_action_steps}
27 |   n_latency_steps: ${n_latency_steps}
28 |   render_hw: [128,128]
29 |   fps: 10
30 |   crf: 22
31 |   past_action: ${past_action_visible}
32 |   abs_action: *abs_action
33 |   tqdm_interval_sec: 1.0
34 |   n_envs: 28
35 | 
36 | dataset:
37 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
38 |   dataset_path: *dataset_path
39 |   horizon: ${horizon}
40 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
41 |   pad_after: ${eval:'${n_action_steps}-1'}
42 |   obs_keys: *obs_keys
43 |   abs_action: *abs_action
44 |   use_legacy_normalizer: False
45 |   seed: 42
46 |   val_ratio: 0.02
47 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/lift_lowdim_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: lift_lowdim
 2 | 
 3 | obs_dim: 19
 4 | action_dim: 10
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name lift
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
11 | abs_action: &abs_action True
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 3
22 |   test_start_seed: 100000
23 |   # use python's eval function as resolver, single-quoted string as argument
24 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
25 |   n_obs_steps: ${n_obs_steps}
26 |   n_action_steps: ${n_action_steps}
27 |   n_latency_steps: ${n_latency_steps}
28 |   render_hw: [128,128]
29 |   fps: 10
30 |   crf: 22
31 |   past_action: ${past_action_visible}
32 |   abs_action: *abs_action
33 |   tqdm_interval_sec: 1.0
34 |   n_envs: 28
35 | 
36 | dataset:
37 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
38 |   dataset_path: *dataset_path
39 |   horizon: ${horizon}
40 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
41 |   pad_after: ${eval:'${n_action_steps}-1'}
42 |   obs_keys: *obs_keys
43 |   abs_action: *abs_action
44 |   use_legacy_normalizer: False
45 |   rotation_rep: rotation_6d
46 |   seed: 42
47 |   val_ratio: 0.02
48 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/pusht_image.yaml:
--------------------------------------------------------------------------------
 1 | name: pusht_image
 2 | 
 3 | image_shape: &image_shape [3, 96, 96]
 4 | shape_meta: &shape_meta
 5 |   # acceptable types: rgb, low_dim
 6 |   obs:
 7 |     image:
 8 |       shape: *image_shape
 9 |       type: rgb
10 |     agent_pos:
11 |       shape: [2]
12 |       type: low_dim
13 |   action:
14 |     shape: [2]
15 | 
16 | env_runner:
17 |   _target_: diffusion_policy.env_runner.pusht_image_runner.PushTImageRunner
18 |   n_train: 6
19 |   n_train_vis: 2
20 |   train_start_seed: 0
21 |   n_test: 50
22 |   n_test_vis: 4
23 |   legacy_test: True
24 |   test_start_seed: 100000
25 |   max_steps: 300
26 |   n_obs_steps: ${n_obs_steps}
27 |   n_action_steps: ${n_action_steps}
28 |   fps: 10
29 |   past_action: ${past_action_visible}
30 |   n_envs: null
31 | 
32 | dataset:
33 |   _target_: diffusion_policy.dataset.pusht_image_dataset.PushTImageDataset
34 |   zarr_path: data/pusht/pusht_cchi_v7_replay.zarr
35 |   horizon: ${horizon}
36 |   pad_before: ${eval:'${n_obs_steps}-1'}
37 |   pad_after: ${eval:'${n_action_steps}-1'}
38 |   seed: 42
39 |   val_ratio: 0.02
40 |   max_train_episodes: 90
41 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/pusht_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: pusht_lowdim
 2 | 
 3 | obs_dim: 20 # 9*2 keypoints + 2 state
 4 | action_dim: 2
 5 | keypoint_dim: 2
 6 | 
 7 | env_runner:
 8 |   _target_: diffusion_policy.env_runner.pusht_keypoints_runner.PushTKeypointsRunner
 9 |   keypoint_visible_rate: ${keypoint_visible_rate}
10 |   n_train: 6
11 |   n_train_vis: 2
12 |   train_start_seed: 0
13 |   n_test: 50
14 |   n_test_vis: 4
15 |   legacy_test: True
16 |   test_start_seed: 100000
17 |   max_steps: 300
18 |   n_obs_steps: ${n_obs_steps}
19 |   n_action_steps: ${n_action_steps}
20 |   n_latency_steps: ${n_latency_steps}
21 |   fps: 10
22 |   agent_keypoints: False
23 |   past_action: ${past_action_visible}
24 |   n_envs: null
25 | 
26 | dataset:
27 |   _target_: diffusion_policy.dataset.pusht_dataset.PushTLowdimDataset
28 |   zarr_path: data/pusht/pusht_cchi_v7_replay.zarr
29 |   horizon: ${horizon}
30 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
31 |   pad_after: ${eval:'${n_action_steps}-1'}
32 |   seed: 42
33 |   val_ratio: 0.02
34 |   max_train_episodes: 90
35 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/real_pusht_image.yaml:
--------------------------------------------------------------------------------
 1 | name: real_image
 2 | 
 3 | image_shape: [3, 240, 320]
 4 | dataset_path: data/pusht_real/real_pusht_20230105
 5 | 
 6 | shape_meta: &shape_meta
 7 |   # acceptable types: rgb, low_dim
 8 |   obs:
 9 |     # camera_0:
10 |     #   shape: ${task.image_shape}
11 |     #   type: rgb
12 |     camera_1:
13 |       shape: ${task.image_shape}
14 |       type: rgb
15 |     # camera_2:
16 |     #   shape: ${task.image_shape}
17 |     #   type: rgb
18 |     camera_3:
19 |       shape: ${task.image_shape}
20 |       type: rgb
21 |     # camera_4:
22 |     #   shape: ${task.image_shape}
23 |     #   type: rgb
24 |     robot_eef_pose:
25 |       shape: [2]
26 |       type: low_dim
27 |   action: 
28 |     shape: [2]
29 | 
30 | env_runner:
31 |   _target_: diffusion_policy.env_runner.real_pusht_image_runner.RealPushTImageRunner
32 | 
33 | dataset:
34 |   _target_: diffusion_policy.dataset.real_pusht_image_dataset.RealPushTImageDataset
35 |   shape_meta: *shape_meta
36 |   dataset_path: ${task.dataset_path}
37 |   horizon: ${horizon}
38 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
39 |   pad_after: ${eval:'${n_action_steps}-1'}
40 |   n_obs_steps: ${dataset_obs_steps}
41 |   n_latency_steps: ${n_latency_steps}
42 |   use_cache: True
43 |   seed: 42
44 |   val_ratio: 0.00
45 |   max_train_episodes: null
46 |   delta_action: False
47 | 
48 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/square_image.yaml:
--------------------------------------------------------------------------------
 1 | name: square_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     agentview_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [7]
21 | 
22 | task_name: &task_name square
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
25 | abs_action: &abs_action False
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 2
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 4
37 |   test_start_seed: 100000
38 |   # use python's eval function as resolver, single-quoted string as argument
39 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
40 |   n_obs_steps: ${n_obs_steps}
41 |   n_action_steps: ${n_action_steps}
42 |   render_obs_key: 'agentview_image'
43 |   fps: 10
44 |   crf: 22
45 |   past_action: ${past_action_visible}
46 |   abs_action: *abs_action
47 |   tqdm_interval_sec: 1.0
48 |   n_envs: 28
49 | # evaluation at this config requires a 16 core 64GB instance.
50 | 
51 | dataset:
52 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
53 |   shape_meta: *shape_meta
54 |   dataset_path: *dataset_path
55 |   horizon: ${horizon}
56 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
57 |   pad_after: ${eval:'${n_action_steps}-1'}
58 |   n_obs_steps: ${dataset_obs_steps}
59 |   abs_action: *abs_action
60 |   rotation_rep: 'rotation_6d'
61 |   use_legacy_normalizer: False
62 |   use_cache: True
63 |   seed: 42
64 |   val_ratio: 0.02
65 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/square_image_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: square_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     agentview_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [10]
21 | 
22 | task_name: &task_name square
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
25 | abs_action: &abs_action True
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 2
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 4
37 |   test_start_seed: 100000
38 |   # use python's eval function as resolver, single-quoted string as argument
39 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
40 |   n_obs_steps: ${n_obs_steps}
41 |   n_action_steps: ${n_action_steps}
42 |   render_obs_key: 'agentview_image'
43 |   fps: 10
44 |   crf: 22
45 |   past_action: ${past_action_visible}
46 |   abs_action: *abs_action
47 |   tqdm_interval_sec: 1.0
48 |   n_envs: 28
49 | # evaluation at this config requires a 16 core 64GB instance.
50 | 
51 | dataset:
52 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
53 |   shape_meta: *shape_meta
54 |   dataset_path: *dataset_path
55 |   horizon: ${horizon}
56 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
57 |   pad_after: ${eval:'${n_action_steps}-1'}
58 |   n_obs_steps: ${dataset_obs_steps}
59 |   abs_action: *abs_action
60 |   rotation_rep: 'rotation_6d'
61 |   use_legacy_normalizer: False
62 |   use_cache: True
63 |   seed: 42
64 |   val_ratio: 0.02
65 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/square_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: square_lowdim
 2 | 
 3 | obs_dim: 23
 4 | action_dim: 7
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name square
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
11 | abs_action: &abs_action False
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 4
22 |   test_start_seed: 100000
23 |   # use python's eval function as resolver, single-quoted string as argument
24 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
25 |   n_obs_steps: ${n_obs_steps}
26 |   n_action_steps: ${n_action_steps}
27 |   n_latency_steps: ${n_latency_steps}
28 |   render_hw: [128,128]
29 |   fps: 10
30 |   crf: 22
31 |   past_action: ${past_action_visible}
32 |   abs_action: *abs_action
33 |   n_envs: 28
34 | 
35 | dataset:
36 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
37 |   dataset_path: *dataset_path
38 |   horizon: ${horizon}
39 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
40 |   pad_after: ${eval:'${n_action_steps}-1'}
41 |   obs_keys: *obs_keys
42 |   abs_action: *abs_action
43 |   use_legacy_normalizer: False
44 |   seed: 42
45 |   val_ratio: 0.02
46 |   max_train_episodes: null
47 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/square_lowdim_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: square_lowdim
 2 | 
 3 | obs_dim: 23
 4 | action_dim: 10
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name square
 9 | dataset_type: &dataset_type ph
10 | abs_action: &abs_action True
11 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
12 | 
13 | 
14 | env_runner:
15 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
16 |   dataset_path: *dataset_path
17 |   obs_keys: *obs_keys
18 |   n_train: 6
19 |   n_train_vis: 2
20 |   train_start_idx: 0
21 |   n_test: 50
22 |   n_test_vis: 4
23 |   test_start_seed: 100000
24 |   # use python's eval function as resolver, single-quoted string as argument
25 |   max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
26 |   n_obs_steps: ${n_obs_steps}
27 |   n_action_steps: ${n_action_steps}
28 |   n_latency_steps: ${n_latency_steps}
29 |   render_hw: [128,128]
30 |   fps: 10
31 |   crf: 22
32 |   past_action: ${past_action_visible}
33 |   abs_action: *abs_action
34 |   n_envs: 28
35 | 
36 | dataset:
37 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
38 |   dataset_path: *dataset_path
39 |   horizon: ${horizon}
40 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
41 |   pad_after: ${eval:'${n_action_steps}-1'}
42 |   obs_keys: *obs_keys
43 |   abs_action: *abs_action
44 |   use_legacy_normalizer: False
45 |   seed: 42
46 |   val_ratio: 0.02
47 |   max_train_episodes: null
48 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/tool_hang_image.yaml:
--------------------------------------------------------------------------------
 1 | name: tool_hang_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     sideview_image:
 7 |       shape: [3, 240, 240]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 240, 240]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [7]
21 | 
22 | task_name: &task_name tool_hang
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
25 | abs_action: &abs_action False
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 2
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 4
37 |   test_start_seed: 100000
38 |   max_steps: 700
39 |   n_obs_steps: ${n_obs_steps}
40 |   n_action_steps: ${n_action_steps}
41 |   render_obs_key: 'sideview_image'
42 |   fps: 10
43 |   crf: 22
44 |   past_action: ${past_action_visible}
45 |   abs_action: *abs_action
46 |   tqdm_interval_sec: 1.0
47 |   n_envs: 28
48 | # evaluation at this config requires a 16 core 64GB instance.
49 | 
50 | dataset:
51 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
52 |   shape_meta: *shape_meta
53 |   dataset_path: *dataset_path
54 |   horizon: ${horizon}
55 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
56 |   pad_after: ${eval:'${n_action_steps}-1'}
57 |   n_obs_steps: ${dataset_obs_steps}
58 |   abs_action: *abs_action
59 |   rotation_rep: 'rotation_6d'
60 |   use_legacy_normalizer: False
61 |   use_cache: True
62 |   seed: 42
63 |   val_ratio: 0.02
64 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/tool_hang_image_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: tool_hang_image_abs
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     sideview_image:
 7 |       shape: [3, 240, 240]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 240, 240]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |   action: 
20 |     shape: [10]
21 | 
22 | task_name: &task_name tool_hang
23 | dataset_type: &dataset_type ph
24 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
25 | abs_action: &abs_action True
26 | 
27 | env_runner:
28 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
29 |   dataset_path: *dataset_path
30 |   shape_meta: *shape_meta
31 |   # costs 1GB per env
32 |   n_train: 6
33 |   n_train_vis: 2
34 |   train_start_idx: 0
35 |   n_test: 50
36 |   n_test_vis: 4
37 |   test_start_seed: 100000
38 |   max_steps: 700
39 |   n_obs_steps: ${n_obs_steps}
40 |   n_action_steps: ${n_action_steps}
41 |   render_obs_key: 'sideview_image'
42 |   fps: 10
43 |   crf: 22
44 |   past_action: ${past_action_visible}
45 |   abs_action: *abs_action
46 |   tqdm_interval_sec: 1.0
47 |   n_envs: 28
48 | # evaluation at this config requires a 16 core 64GB instance.
49 | 
50 | dataset:
51 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
52 |   shape_meta: *shape_meta
53 |   dataset_path: *dataset_path
54 |   horizon: ${horizon}
55 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
56 |   pad_after: ${eval:'${n_action_steps}-1'}
57 |   n_obs_steps: ${dataset_obs_steps}
58 |   abs_action: *abs_action
59 |   rotation_rep: 'rotation_6d'
60 |   use_legacy_normalizer: False
61 |   use_cache: True
62 |   seed: 42
63 |   val_ratio: 0.02
64 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/tool_hang_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: tool_hang_lowdim
 2 | 
 3 | obs_dim: 53
 4 | action_dim: 7
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name tool_hang
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
11 | abs_action: &abs_action False
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 4
22 |   test_start_seed: 100000
23 |   max_steps: 700
24 |   n_obs_steps: ${n_obs_steps}
25 |   n_action_steps: ${n_action_steps}
26 |   n_latency_steps: ${n_latency_steps}
27 |   render_hw: [128,128]
28 |   fps: 10
29 |   crf: 22
30 |   past_action: ${past_action_visible}
31 |   abs_action: *abs_action
32 |   n_envs: 28
33 | # seed 42 will crash MuJoCo for some reason.
34 | 
35 | dataset:
36 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
37 |   dataset_path: *dataset_path
38 |   horizon: ${horizon}
39 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
40 |   pad_after: ${eval:'${n_action_steps}-1'}
41 |   obs_keys: *obs_keys
42 |   abs_action: *abs_action
43 |   use_legacy_normalizer: False
44 |   seed: 42
45 |   val_ratio: 0.02
46 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/tool_hang_lowdim_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: tool_hang_lowdim
 2 | 
 3 | obs_dim: 53
 4 | action_dim: 10
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
 8 | task_name: &task_name tool_hang
 9 | dataset_type: &dataset_type ph
10 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
11 | abs_action: &abs_action True
12 | 
13 | env_runner:
14 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
15 |   dataset_path: *dataset_path
16 |   obs_keys: *obs_keys
17 |   n_train: 6
18 |   n_train_vis: 2
19 |   train_start_idx: 0
20 |   n_test: 50
21 |   n_test_vis: 4
22 |   test_start_seed: 100000
23 |   max_steps: 700
24 |   n_obs_steps: ${n_obs_steps}
25 |   n_action_steps: ${n_action_steps}
26 |   n_latency_steps: ${n_latency_steps}
27 |   render_hw: [128,128]
28 |   fps: 10
29 |   crf: 22
30 |   past_action: ${past_action_visible}
31 |   abs_action: *abs_action
32 |   n_envs: 28
33 | # seed 42 will crash MuJoCo for some reason.
34 | 
35 | dataset:
36 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
37 |   dataset_path: *dataset_path
38 |   horizon: ${horizon}
39 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
40 |   pad_after: ${eval:'${n_action_steps}-1'}
41 |   obs_keys: *obs_keys
42 |   abs_action: *abs_action
43 |   use_legacy_normalizer: False
44 |   rotation_rep: rotation_6d
45 |   seed: 42
46 |   val_ratio: 0.02
47 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/transport_image.yaml:
--------------------------------------------------------------------------------
 1 | name: transport_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     shouldercamera0_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |     shouldercamera1_image:
20 |       shape: [3, 84, 84]
21 |       type: rgb
22 |     robot1_eye_in_hand_image:
23 |       shape: [3, 84, 84]
24 |       type: rgb
25 |     robot1_eef_pos:
26 |       shape: [3]
27 |       # type default: low_dim
28 |     robot1_eef_quat:
29 |       shape: [4]
30 |     robot1_gripper_qpos:
31 |       shape: [2]
32 |   action: 
33 |     shape: [14]
34 | 
35 | task_name: &task_name transport
36 | dataset_type: &dataset_type ph
37 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
38 | abs_action: &abs_action False
39 | 
40 | env_runner:
41 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
42 |   dataset_path: *dataset_path
43 |   shape_meta: *shape_meta
44 |   n_train: 6
45 |   n_train_vis: 2
46 |   train_start_idx: 0
47 |   n_test: 50
48 |   n_test_vis: 4
49 |   test_start_seed: 100000
50 |   max_steps: 700
51 |   n_obs_steps: ${n_obs_steps}
52 |   n_action_steps: ${n_action_steps}
53 |   render_obs_key: 'shouldercamera0_image'
54 |   fps: 10
55 |   crf: 22
56 |   past_action: ${past_action_visible}
57 |   abs_action: *abs_action
58 |   tqdm_interval_sec: 1.0
59 |   n_envs: 28
60 | # evaluation at this config requires a 16 core 64GB instance.
61 | 
62 | dataset:
63 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
64 |   shape_meta: *shape_meta
65 |   dataset_path: *dataset_path
66 |   horizon: ${horizon}
67 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
68 |   pad_after: ${eval:'${n_action_steps}-1'}
69 |   n_obs_steps: ${dataset_obs_steps}
70 |   abs_action: *abs_action
71 |   rotation_rep: 'rotation_6d'
72 |   use_legacy_normalizer: False
73 |   use_cache: True
74 |   seed: 42
75 |   val_ratio: 0.02
76 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/transport_image_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: transport_image
 2 | 
 3 | shape_meta: &shape_meta
 4 |   # acceptable types: rgb, low_dim
 5 |   obs:
 6 |     shouldercamera0_image:
 7 |       shape: [3, 84, 84]
 8 |       type: rgb
 9 |     robot0_eye_in_hand_image:
10 |       shape: [3, 84, 84]
11 |       type: rgb
12 |     robot0_eef_pos:
13 |       shape: [3]
14 |       # type default: low_dim
15 |     robot0_eef_quat:
16 |       shape: [4]
17 |     robot0_gripper_qpos:
18 |       shape: [2]
19 |     shouldercamera1_image:
20 |       shape: [3, 84, 84]
21 |       type: rgb
22 |     robot1_eye_in_hand_image:
23 |       shape: [3, 84, 84]
24 |       type: rgb
25 |     robot1_eef_pos:
26 |       shape: [3]
27 |       # type default: low_dim
28 |     robot1_eef_quat:
29 |       shape: [4]
30 |     robot1_gripper_qpos:
31 |       shape: [2]
32 |   action: 
33 |     shape: [20]
34 | 
35 | task_name: &task_name transport
36 | dataset_type: &dataset_type ph
37 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
38 | abs_action: &abs_action True
39 | 
40 | env_runner:
41 |   _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
42 |   dataset_path: *dataset_path
43 |   shape_meta: *shape_meta
44 |   n_train: 6
45 |   n_train_vis: 2
46 |   train_start_idx: 0
47 |   n_test: 50
48 |   n_test_vis: 4
49 |   test_start_seed: 100000
50 |   max_steps: 700
51 |   n_obs_steps: ${n_obs_steps}
52 |   n_action_steps: ${n_action_steps}
53 |   render_obs_key: 'shouldercamera0_image'
54 |   fps: 10
55 |   crf: 22
56 |   past_action: ${past_action_visible}
57 |   abs_action: *abs_action
58 |   tqdm_interval_sec: 1.0
59 |   n_envs: 28
60 | # evaluation at this config requires a 16 core 64GB instance.
61 | 
62 | dataset:
63 |   _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
64 |   shape_meta: *shape_meta
65 |   dataset_path: *dataset_path
66 |   horizon: ${horizon}
67 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
68 |   pad_after: ${eval:'${n_action_steps}-1'}
69 |   n_obs_steps: ${dataset_obs_steps}
70 |   abs_action: *abs_action
71 |   rotation_rep: 'rotation_6d'
72 |   use_legacy_normalizer: False
73 |   use_cache: True
74 |   seed: 42
75 |   val_ratio: 0.02
76 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/transport_lowdim.yaml:
--------------------------------------------------------------------------------
 1 | name: transport_lowdim
 2 | 
 3 | obs_dim: 59 # 41+(3+4+2)*2
 4 | action_dim: 14 # 7*2
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys [
 8 |   'object', 
 9 |   'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', 
10 |   'robot1_eef_pos', 'robot1_eef_quat', 'robot1_gripper_qpos'
11 | ]
12 | task_name: &task_name transport
13 | dataset_type: &dataset_type ph
14 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
15 | abs_action: &abs_action False
16 | 
17 | env_runner:
18 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
19 |   dataset_path: *dataset_path
20 |   obs_keys: *obs_keys
21 |   n_train: 6
22 |   n_train_vis: 2
23 |   train_start_idx: 0
24 |   n_test: 50
25 |   n_test_vis: 5
26 |   test_start_seed: 100000
27 |   max_steps: 700
28 |   n_obs_steps: ${n_obs_steps}
29 |   n_action_steps: ${n_action_steps}
30 |   n_latency_steps: ${n_latency_steps}
31 |   render_hw: [128,128]
32 |   fps: 10
33 |   crf: 22
34 |   past_action: ${past_action_visible}
35 |   abs_action: *abs_action
36 |   n_envs: 28
37 | # evaluation at this config requires a 16 core 64GB instance.
38 | 
39 | dataset:
40 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
41 |   dataset_path: *dataset_path
42 |   horizon: ${horizon}
43 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
44 |   pad_after: ${eval:'${n_action_steps}-1'}
45 |   obs_keys: *obs_keys
46 |   abs_action: *abs_action
47 |   use_legacy_normalizer: False
48 |   seed: 42
49 |   val_ratio: 0.02
50 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/task/transport_lowdim_abs.yaml:
--------------------------------------------------------------------------------
 1 | name: transport_lowdim
 2 | 
 3 | obs_dim: 59 # 41+(3+4+2)*2
 4 | action_dim: 20 # 10*2
 5 | keypoint_dim: 3
 6 | 
 7 | obs_keys: &obs_keys [
 8 |   'object', 
 9 |   'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', 
10 |   'robot1_eef_pos', 'robot1_eef_quat', 'robot1_gripper_qpos'
11 | ]
12 | task_name: &task_name transport
13 | dataset_type: &dataset_type ph
14 | dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
15 | abs_action: &abs_action True
16 | 
17 | env_runner:
18 |   _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
19 |   dataset_path: *dataset_path
20 |   obs_keys: *obs_keys
21 |   n_train: 6
22 |   n_train_vis: 2
23 |   train_start_idx: 0
24 |   n_test: 50
25 |   n_test_vis: 4
26 |   test_start_seed: 100000
27 |   max_steps: 700
28 |   n_obs_steps: ${n_obs_steps}
29 |   n_action_steps: ${n_action_steps}
30 |   n_latency_steps: ${n_latency_steps}
31 |   render_hw: [128,128]
32 |   fps: 10
33 |   crf: 22
34 |   past_action: ${past_action_visible}
35 |   abs_action: *abs_action
36 |   n_envs: 28
37 | # evaluation at this config requires a 16 core 64GB instance.
38 | 
39 | dataset:
40 |   _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
41 |   dataset_path: *dataset_path
42 |   horizon: ${horizon}
43 |   pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
44 |   pad_after: ${eval:'${n_action_steps}-1'}
45 |   obs_keys: *obs_keys
46 |   abs_action: *abs_action
47 |   use_legacy_normalizer: False
48 |   seed: 42
49 |   val_ratio: 0.02
50 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/train_bet_lowdim_workspace.yaml:
--------------------------------------------------------------------------------
  1 | defaults:
  2 |   - _self_
  3 |   - task: blockpush_lowdim_seed
  4 | 
  5 | name: train_bet_lowdim
  6 | _target_: diffusion_policy.workspace.train_bet_lowdim_workspace.TrainBETLowdimWorkspace
  7 | 
  8 | obs_dim: ${task.obs_dim}
  9 | action_dim: ${task.action_dim}
 10 | keypoint_dim: ${task.keypoint_dim}
 11 | task_name: ${task.name}
 12 | exp_name: "default"
 13 | 
 14 | horizon: 3
 15 | n_obs_steps: 3
 16 | n_action_steps: 1
 17 | n_latency_steps: 0
 18 | past_action_visible: False
 19 | keypoint_visible_rate: 1.0
 20 | obs_as_local_cond: False
 21 | obs_as_global_cond: False
 22 | pred_action_steps_only: False
 23 | 
 24 | policy:
 25 |   _target_: diffusion_policy.policy.bet_lowdim_policy.BETLowdimPolicy
 26 | 
 27 |   action_ae:
 28 |     _target_: diffusion_policy.model.bet.action_ae.discretizers.k_means.KMeansDiscretizer
 29 |     num_bins: 24
 30 |     action_dim: ${action_dim}
 31 |     predict_offsets: True
 32 |   
 33 |   obs_encoding_net:
 34 |     _target_: torch.nn.Identity
 35 |     output_dim: ${obs_dim}
 36 |   
 37 |   state_prior:
 38 |     _target_: diffusion_policy.model.bet.latent_generators.mingpt.MinGPT
 39 | 
 40 |     discrete_input: false
 41 |     input_dim: ${obs_dim}
 42 | 
 43 |     vocab_size: ${policy.action_ae.num_bins}
 44 | 
 45 |     # Architecture details
 46 |     n_layer: 4
 47 |     n_head: 4
 48 |     n_embd: 72
 49 | 
 50 |     block_size: ${horizon}  # Length of history/context
 51 |     predict_offsets: True
 52 |     offset_loss_scale: 1000.0  # actions are very small
 53 |     focal_loss_gamma: 2.0
 54 |     action_dim: ${action_dim}
 55 | 
 56 |   horizon: ${horizon}
 57 |   n_obs_steps: ${n_obs_steps}
 58 |   n_action_steps: ${n_action_steps}
 59 | 
 60 | dataloader:
 61 |   batch_size: 256
 62 |   num_workers: 1
 63 |   shuffle: True
 64 |   pin_memory: True
 65 |   persistent_workers: False
 66 | 
 67 | val_dataloader:
 68 |   batch_size: 256
 69 |   num_workers: 1
 70 |   shuffle: False
 71 |   pin_memory: True
 72 |   persistent_workers: False
 73 | 
 74 | optimizer:
 75 |   learning_rate: 0.0001 # 1e-4
 76 |   weight_decay: 0.1
 77 |   betas: [0.9, 0.95]
 78 | 
 79 | training:
 80 |   device: "cuda:0"
 81 |   seed: 42
 82 |   debug: False
 83 |   resume: True
 84 |   # optimization
 85 |   lr_scheduler: cosine
 86 |   lr_warmup_steps: 500
 87 |   num_epochs: 5000
 88 |   gradient_accumulate_every: 1
 89 |   grad_norm_clip: 1.0
 90 |   enable_normalizer: True
 91 |   # training loop control
 92 |   # in epochs
 93 |   rollout_every: 50
 94 |   checkpoint_every: 50
 95 |   val_every: 1
 96 |   sample_every: 5
 97 |   # steps per epoch
 98 |   max_train_steps: null
 99 |   max_val_steps: null
100 |   # misc
101 |   tqdm_interval_sec: 1.0
102 | 
103 | logging:
104 |   project: diffusion_policy_debug
105 |   resume: True
106 |   mode: online
107 |   name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
108 |   tags: ["${name}", "${task_name}", "${exp_name}"]
109 |   id: null
110 |   group: null
111 | 
112 | checkpoint:
113 |   topk:
114 |     monitor_key: test_mean_score
115 |     mode: max
116 |     k: 5
117 |     format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
118 |   save_last_ckpt: True
119 |   save_last_snapshot: False
120 | 
121 | multi_run:
122 |   run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
123 |   wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
124 | 
125 | hydra:
126 |   job:
127 |     override_dirname: ${name}
128 |   run:
129 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
130 |   sweep:
131 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
132 |     subdir: ${hydra.job.num}
133 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/train_diffusion_transformer_hybrid_workspace.yaml:
--------------------------------------------------------------------------------
  1 | defaults:
  2 |   - _self_
  3 |   - task: lift_image_abs
  4 | 
  5 | name: train_diffusion_transformer_hybrid
  6 | _target_: diffusion_policy.workspace.train_diffusion_transformer_hybrid_workspace.TrainDiffusionTransformerHybridWorkspace
  7 | 
  8 | task_name: ${task.name}
  9 | shape_meta: ${task.shape_meta}
 10 | exp_name: "default"
 11 | 
 12 | horizon: 10
 13 | n_obs_steps: 2
 14 | n_action_steps: 8
 15 | n_latency_steps: 0
 16 | dataset_obs_steps: ${n_obs_steps}
 17 | past_action_visible: False
 18 | keypoint_visible_rate: 1.0
 19 | obs_as_cond: True
 20 | 
 21 | policy:
 22 |   _target_: diffusion_policy.policy.diffusion_transformer_hybrid_image_policy.DiffusionTransformerHybridImagePolicy
 23 | 
 24 |   shape_meta: ${shape_meta}
 25 |   
 26 |   noise_scheduler:
 27 |     _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
 28 |     num_train_timesteps: 100
 29 |     beta_start: 0.0001
 30 |     beta_end: 0.02
 31 |     beta_schedule: squaredcos_cap_v2
 32 |     variance_type: fixed_small # Yilun's paper uses fixed_small_log instead, but easy to cause Nan
 33 |     clip_sample: True # required when predict_epsilon=False
 34 |     prediction_type: epsilon # or sample
 35 | 
 36 |   horizon: ${horizon}
 37 |   n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
 38 |   n_obs_steps: ${n_obs_steps}
 39 |   num_inference_steps: 100
 40 | 
 41 |   crop_shape: [76, 76]
 42 |   obs_encoder_group_norm: True
 43 |   eval_fixed_crop: True
 44 | 
 45 |   n_layer: 8
 46 |   n_cond_layers: 0  # >0: use transformer encoder for cond, otherwise use MLP
 47 |   n_head: 4
 48 |   n_emb: 256
 49 |   p_drop_emb: 0.0
 50 |   p_drop_attn: 0.3
 51 |   causal_attn: True
 52 |   time_as_cond: True # if false, use BERT like encoder only arch, time as input
 53 |   obs_as_cond: ${obs_as_cond}
 54 | 
 55 |   # scheduler.step params
 56 |   # predict_epsilon: True
 57 | 
 58 | ema:
 59 |   _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
 60 |   update_after_step: 0
 61 |   inv_gamma: 1.0
 62 |   power: 0.75
 63 |   min_value: 0.0
 64 |   max_value: 0.9999
 65 | 
 66 | dataloader:
 67 |   batch_size: 64
 68 |   num_workers: 8
 69 |   shuffle: True
 70 |   pin_memory: True
 71 |   persistent_workers: False
 72 | 
 73 | val_dataloader:
 74 |   batch_size: 64
 75 |   num_workers: 8
 76 |   shuffle: False
 77 |   pin_memory: True
 78 |   persistent_workers: False
 79 | 
 80 | optimizer:
 81 |   transformer_weight_decay: 1.0e-3
 82 |   obs_encoder_weight_decay: 1.0e-6
 83 |   learning_rate: 1.0e-4
 84 |   betas: [0.9, 0.95]
 85 | 
 86 | training:
 87 |   device: "cuda:0"
 88 |   seed: 42
 89 |   debug: False
 90 |   resume: True
 91 |   # optimization
 92 |   lr_scheduler: cosine
 93 |   # Transformer needs LR warmup
 94 |   lr_warmup_steps: 1000
 95 |   num_epochs: 3050
 96 |   gradient_accumulate_every: 1
 97 |   # EMA destroys performance when used with BatchNorm
 98 |   # replace BatchNorm with GroupNorm.
 99 |   use_ema: True
100 |   # training loop control
101 |   # in epochs
102 |   rollout_every: 50
103 |   checkpoint_every: 50
104 |   val_every: 1
105 |   sample_every: 5
106 |   # steps per epoch
107 |   max_train_steps: null
108 |   max_val_steps: null
109 |   # misc
110 |   tqdm_interval_sec: 1.0
111 | 
112 | logging:
113 |   project: diffusion_policy_debug
114 |   resume: True
115 |   mode: online
116 |   name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
117 |   tags: ["${name}", "${task_name}", "${exp_name}"]
118 |   id: null
119 |   group: null
120 | 
121 | checkpoint:
122 |   topk:
123 |     monitor_key: test_mean_score
124 |     mode: max
125 |     k: 5
126 |     format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
127 |   save_last_ckpt: True
128 |   save_last_snapshot: False
129 | 
130 | multi_run:
131 |   run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
132 |   wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
133 | 
134 | hydra:
135 |   job:
136 |     override_dirname: ${name}
137 |   run:
138 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
139 |   sweep:
140 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
141 |     subdir: ${hydra.job.num}
142 | 


--------------------------------------------------------------------------------
/diffusion_policy/config/train_robomimic_image_workspace.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - task: lift_image
 4 | 
 5 | name: train_robomimic_image
 6 | _target_: diffusion_policy.workspace.train_robomimic_image_workspace.TrainRobomimicImageWorkspace
 7 | 
 8 | task_name: ${task.name}
 9 | shape_meta: ${task.shape_meta}
10 | exp_name: "default"
11 | 
12 | horizon: &horizon 10
13 | n_obs_steps: 1
14 | n_action_steps: 1
15 | n_latency_steps: 0
16 | dataset_obs_steps: *horizon
17 | past_action_visible: False
18 | keypoint_visible_rate: 1.0
19 | 
20 | policy:
21 |   _target_: diffusion_policy.policy.robomimic_image_policy.RobomimicImagePolicy
22 |   shape_meta: ${shape_meta}
23 |   algo_name: bc_rnn
24 |   obs_type: image
25 |   # oc.select resolver: key, default
26 |   task_name: ${oc.select:task.task_name,lift}
27 |   dataset_type: ${oc.select:task.dataset_type,ph}
28 |   crop_shape: [76,76]
29 | 
30 | dataloader:
31 |   batch_size: 64
32 |   num_workers: 16
33 |   shuffle: True
34 |   pin_memory: True
35 |   persistent_workers: False
36 | 
37 | val_dataloader:
38 |   batch_size: 64
39 |   num_workers: 16
40 |   shuffle: False
41 |   pin_memory: True
42 |   persistent_workers: False
43 | 
44 | training:
45 |   device: "cuda:0"
46 |   seed: 42
47 |   debug: False
48 |   resume: True
49 |   # optimization
50 |   num_epochs: 3050
51 |   # training loop control
52 |   # in epochs
53 |   rollout_every: 50
54 |   checkpoint_every: 50
55 |   val_every: 1
56 |   sample_every: 5
57 |   # steps per epoch
58 |   max_train_steps: null
59 |   max_val_steps: null
60 |   # misc
61 |   tqdm_interval_sec: 1.0
62 | 
63 | logging:
64 |   project: diffusion_policy_debug
65 |   resume: True
66 |   mode: online
67 |   name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
68 |   tags: ["${name}", "${task_name}", "${exp_name}"]
69 |   id: null
70 |   group: null
71 | 
72 | checkpoint:
73 |   topk:
74 |     monitor_key: test_mean_score
75 |     mode: max
76 |     k: 5
77 |     format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
78 |   save_last_ckpt: True
79 |   save_last_snapshot: False
80 | 
81 | multi_run:
82 |   run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
83 |   wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
84 | 
85 | hydra:
86 |   job:
87 |     override_dirname: ${name}
88 |   run:
89 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
90 |   sweep:
91 |     dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
92 |     subdir: ${hydra.job.num}
93 | 


--------------------------------------------------------------------------------
/diffusion_policy/dataset/base_dataset.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | import torch
 4 | import torch.nn
 5 | from diffusion_policy.model.common.normalizer import LinearNormalizer
 6 | 
 7 | class BaseLowdimDataset(torch.utils.data.Dataset):
 8 |     def get_validation_dataset(self) -> 'BaseLowdimDataset':
 9 |         # return an empty dataset by default
10 |         return BaseLowdimDataset()
11 | 
12 |     def get_normalizer(self, **kwargs) -> LinearNormalizer:
13 |         raise NotImplementedError()
14 | 
15 |     def get_all_actions(self) -> torch.Tensor:
16 |         raise NotImplementedError()
17 |     
18 |     def __len__(self) -> int:
19 |         return 0
20 |     
21 |     def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
22 |         """
23 |         output:
24 |             obs: T, Do
25 |             action: T, Da
26 |         """
27 |         raise NotImplementedError()
28 | 
29 | 
30 | class BaseImageDataset(torch.utils.data.Dataset):
31 |     def get_validation_dataset(self) -> 'BaseLowdimDataset':
32 |         # return an empty dataset by default
33 |         return BaseImageDataset()
34 | 
35 |     def get_normalizer(self, **kwargs) -> LinearNormalizer:
36 |         raise NotImplementedError()
37 | 
38 |     def get_all_actions(self) -> torch.Tensor:
39 |         raise NotImplementedError()
40 |     
41 |     def __len__(self) -> int:
42 |         return 0
43 |     
44 |     def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
45 |         """
46 |         output:
47 |             obs: 
48 |                 key: T, *
49 |             action: T, Da
50 |         """
51 |         raise NotImplementedError()
52 | 


--------------------------------------------------------------------------------
/diffusion_policy/dataset/multitask_dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset, DataLoader
 3 | import pynvml
 4 | import psutil
 5 | 
 6 | class MultiDataLoader:
 7 |     def __init__(self, data_loaders):
 8 |         self.dataloaders=data_loaders
 9 |         self.data_loaders = [
10 |             iter(data_loader) for data_loader in data_loaders
11 |         ]
12 |         self.num_loaders = len(data_loaders)
13 |         self.max_loader_length = max(len(loader) for loader in data_loaders)
14 |         self.current_batch_idx = 0
15 | 
16 | 
17 |     def __iter__(self):
18 |         return self
19 |     
20 |     def __len__(self):
21 |         return self.max_loader_length
22 | 
23 |     def get_memory_usage(self):
24 |         mem=psutil.virtual_memory()
25 |         print('current available memory is' +' : '+ str(round(mem.used/1024**2)) +' MIB')
26 |         return round(mem.used/1024**2)
27 | 
28 |     def reset(self):
29 |         # delete the current data loaders and reinitialize them
30 |         del self.data_loaders
31 |         self.data_loaders = [
32 |             iter(data_loader) for data_loader in self.dataloaders
33 |         ]
34 |         self.current_batch_idx = 0
35 |         self.get_memory_usage()
36 |     
37 |     def __next__(self):
38 |         if self.current_batch_idx >= self.max_loader_length:
39 |             raise StopIteration
40 |         self.loader_idx = self.current_batch_idx % self.num_loaders
41 |         data_loader = self.data_loaders[self.loader_idx]
42 |         try:
43 |             batch = next(data_loader)
44 |             self.current_batch_idx = self.current_batch_idx + 1
45 |             return batch
46 |         except StopIteration:
47 |             self.current_batch_idx = self.current_batch_idx + 1
48 |             return None
49 | 
50 | if __name__ == "__main__":
51 | 
52 |     class SubDataset(Dataset):
53 |         def __init__(self, data):
54 |             self.data = data
55 | 
56 |         def __len__(self):
57 |             return len(self.data)
58 | 
59 |         def __getitem__(self, idx):
60 |             return self.data[idx]
61 | 
62 |     # Create some example datasets
63 |     data1 = [torch.tensor([1]),]
64 |     data2 = [torch.tensor([4]), torch.tensor([5]), torch.tensor([6])]
65 | 
66 |     # Create sub datasets and corresponding data loaders
67 |     sub_dataset1 = SubDataset(data1)
68 |     sub_dataset2 = SubDataset(data2)
69 | 
70 |     sub_data_loader1 = DataLoader(sub_dataset1, batch_size=1, shuffle=True)
71 |     sub_data_loader2 = DataLoader(sub_dataset2, batch_size=1, shuffle=True)
72 | 
73 |     # Create the MultiDataLoader
74 |     multi_data_loader = MultiDataLoader([sub_data_loader1, sub_data_loader2])
75 | 
76 |     # Iterate through batches
77 |     print(len(multi_data_loader))
78 |     for epoch in range(2):
79 |         for batch_idx, batch in enumerate(multi_data_loader):
80 |             print(f"Batch {batch_idx}: {batch}")
81 |         multi_data_loader.reset()
82 | 


--------------------------------------------------------------------------------
/diffusion_policy/env/robomimic/robomimic_lowdim_wrapper.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict, Optional
  2 | import numpy as np
  3 | import gym
  4 | from gym.spaces import Box
  5 | from robomimic.envs.env_robosuite import EnvRobosuite
  6 | 
  7 | class RobomimicLowdimWrapper(gym.Env):
  8 |     def __init__(self, 
  9 |         env: EnvRobosuite,
 10 |         obs_keys: List[str]=[
 11 |             'object', 
 12 |             'robot0_eef_pos', 
 13 |             'robot0_eef_quat', 
 14 |             'robot0_gripper_qpos'],
 15 |         init_state: Optional[np.ndarray]=None,
 16 |         render_hw=(256,256),
 17 |         render_camera_name='agentview'
 18 |         ):
 19 | 
 20 |         self.env = env
 21 |         self.obs_keys = obs_keys
 22 |         self.init_state = init_state
 23 |         self.render_hw = render_hw
 24 |         self.render_camera_name = render_camera_name
 25 |         self.seed_state_map = dict()
 26 |         self._seed = None
 27 |         
 28 |         # setup spaces
 29 |         low = np.full(env.action_dimension, fill_value=-1)
 30 |         high = np.full(env.action_dimension, fill_value=1)
 31 |         self.action_space = Box(
 32 |             low=low,
 33 |             high=high,
 34 |             shape=low.shape,
 35 |             dtype=low.dtype
 36 |         )
 37 |         obs_example = self.get_observation()
 38 |         low = np.full_like(obs_example, fill_value=-1)
 39 |         high = np.full_like(obs_example, fill_value=1)
 40 |         self.observation_space = Box(
 41 |             low=low,
 42 |             high=high,
 43 |             shape=low.shape,
 44 |             dtype=low.dtype
 45 |         )
 46 | 
 47 |     def get_observation(self):
 48 |         raw_obs = self.env.get_observation()
 49 |         obs = np.concatenate([
 50 |             raw_obs[key] for key in self.obs_keys
 51 |         ], axis=0)
 52 |         return obs
 53 | 
 54 |     def seed(self, seed=None):
 55 |         np.random.seed(seed=seed)
 56 |         self._seed = seed
 57 |     
 58 |     def reset(self):
 59 |         if self.init_state is not None:
 60 |             # always reset to the same state
 61 |             # to be compatible with gym
 62 |             self.env.reset_to({'states': self.init_state})
 63 |         elif self._seed is not None:
 64 |             # reset to a specific seed
 65 |             seed = self._seed
 66 |             if seed in self.seed_state_map:
 67 |                 # env.reset is expensive, use cache
 68 |                 self.env.reset_to({'states': self.seed_state_map[seed]})
 69 |             else:
 70 |                 # robosuite's initializes all use numpy global random state
 71 |                 np.random.seed(seed=seed)
 72 |                 self.env.reset()
 73 |                 state = self.env.get_state()['states']
 74 |                 self.seed_state_map[seed] = state
 75 |             self._seed = None
 76 |         else:
 77 |             # random reset
 78 |             self.env.reset()
 79 | 
 80 |         # return obs
 81 |         obs = self.get_observation()
 82 |         return obs
 83 |     
 84 |     def step(self, action):
 85 |         raw_obs, reward, done, info = self.env.step(action)
 86 |         obs = np.concatenate([
 87 |             raw_obs[key] for key in self.obs_keys
 88 |         ], axis=0)
 89 |         return obs, reward, done, info
 90 |     
 91 |     def render(self, mode='rgb_array'):
 92 |         h, w = self.render_hw
 93 |         return self.env.render(mode=mode, 
 94 |             height=h, width=w, 
 95 |             camera_name=self.render_camera_name)
 96 | 
 97 | 
 98 | def test():
 99 |     import robomimic.utils.file_utils as FileUtils
100 |     import robomimic.utils.env_utils as EnvUtils
101 |     from matplotlib import pyplot as plt
102 | 
103 |     dataset_path = '/home/cchi/dev/diffusion_policy/data/robomimic/datasets/square/ph/low_dim.hdf5'
104 |     env_meta = FileUtils.get_env_metadata_from_dataset(
105 |         dataset_path)
106 | 
107 |     env = EnvUtils.create_env_from_metadata(
108 |         env_meta=env_meta,
109 |         render=False, 
110 |         render_offscreen=False,
111 |         use_image_obs=False, 
112 |     )
113 |     wrapper = RobomimicLowdimWrapper(
114 |         env=env,
115 |         obs_keys=[
116 |             'object', 
117 |             'robot0_eef_pos', 
118 |             'robot0_eef_quat', 
119 |             'robot0_gripper_qpos'
120 |         ]
121 |     )
122 | 
123 |     states = list()
124 |     for _ in range(2):
125 |         wrapper.seed(0)
126 |         wrapper.reset()
127 |         states.append(wrapper.env.get_state()['states'])
128 |     assert np.allclose(states[0], states[1])
129 | 
130 |     img = wrapper.render()
131 |     plt.imshow(img)
132 |     # wrapper.seed()
133 |     # states.append(wrapper.env.get_state()['states'])
134 | 


--------------------------------------------------------------------------------
/diffusion_policy/env_runner/base_image_runner.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | from diffusion_policy.policy.base_image_policy import BaseImagePolicy
 3 | 
 4 | class BaseImageRunner:
 5 |     def __init__(self, output_dir):
 6 |         self.output_dir = output_dir
 7 | 
 8 |     def run(self, policy: BaseImagePolicy) -> Dict:
 9 |         raise NotImplementedError()
10 | 


--------------------------------------------------------------------------------
/diffusion_policy/gym_util/video_recording_wrapper.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | from diffusion_policy.real_world.video_recorder import VideoRecorder
 4 | 
 5 | class VideoRecordingWrapper(gym.Wrapper):
 6 |     def __init__(self, 
 7 |             env, 
 8 |             video_recoder: VideoRecorder,
 9 |             mode='rgb_array',
10 |             file_path=None,
11 |             steps_per_render=1,
12 |             **kwargs
13 |         ):
14 |         """
15 |         When file_path is None, don't record.
16 |         """
17 |         super().__init__(env)
18 |         
19 |         self.mode = mode
20 |         self.render_kwargs = kwargs
21 |         self.steps_per_render = steps_per_render
22 |         self.file_path = file_path
23 |         self.video_recoder = video_recoder
24 | 
25 |         self.step_count = 0
26 | 
27 |     def reset(self, **kwargs):
28 |         obs = super().reset(**kwargs)
29 |         self.frames = list()
30 |         self.step_count = 1
31 |         self.video_recoder.stop()
32 |         return obs
33 |     
34 |     def step(self, action):
35 |         result = super().step(action)
36 |         self.step_count += 1
37 |         if self.file_path is not None \
38 |             and ((self.step_count % self.steps_per_render) == 0):
39 |             if not self.video_recoder.is_ready():
40 |                 self.video_recoder.start(self.file_path)
41 | 
42 |             frame = self.env.render(
43 |                 mode=self.mode, **self.render_kwargs)
44 |             assert frame.dtype == np.uint8
45 |             self.video_recoder.write_frame(frame)
46 |         return result
47 |     
48 |     def render(self, mode='rgb_array', **kwargs):
49 |         if self.video_recoder.is_ready():
50 |             self.video_recoder.stop()
51 |         return self.file_path
52 | 


--------------------------------------------------------------------------------
/diffusion_policy/gym_util/video_wrapper.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | 
 4 | class VideoWrapper(gym.Wrapper):
 5 |     def __init__(self, 
 6 |             env, 
 7 |             mode='rgb_array',
 8 |             enabled=True,
 9 |             steps_per_render=1,
10 |             **kwargs
11 |         ):
12 |         super().__init__(env)
13 |         
14 |         self.mode = mode
15 |         self.enabled = enabled
16 |         self.render_kwargs = kwargs
17 |         self.steps_per_render = steps_per_render
18 | 
19 |         self.frames = list()
20 |         self.step_count = 0
21 | 
22 |     def reset(self, **kwargs):
23 |         obs = super().reset(**kwargs)
24 |         self.frames = list()
25 |         self.step_count = 1
26 |         if self.enabled:
27 |             frame = self.env.render(
28 |                 mode=self.mode, **self.render_kwargs)
29 |             assert frame.dtype == np.uint8
30 |             self.frames.append(frame)
31 |         return obs
32 |     
33 |     def step(self, action):
34 |         result = super().step(action)
35 |         self.step_count += 1
36 |         if self.enabled and ((self.step_count % self.steps_per_render) == 0):
37 |             frame = self.env.render(
38 |                 mode=self.mode, **self.render_kwargs)
39 |             assert frame.dtype == np.uint8
40 |             self.frames.append(frame)
41 |         return result
42 |     
43 |     def render(self, mode='rgb_array', **kwargs):
44 |         return self.frames
45 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/bet/action_ae/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.utils.data import DataLoader
 4 | import abc
 5 | 
 6 | from typing import Optional, Union
 7 | 
 8 | import diffusion_policy.model.bet.utils as utils
 9 | 
10 | 
11 | class AbstractActionAE(utils.SaveModule, abc.ABC):
12 |     @abc.abstractmethod
13 |     def fit_model(
14 |         self,
15 |         input_dataloader: DataLoader,
16 |         eval_dataloader: DataLoader,
17 |         obs_encoding_net: Optional[nn.Module] = None,
18 |     ) -> None:
19 |         pass
20 | 
21 |     @abc.abstractmethod
22 |     def encode_into_latent(
23 |         self,
24 |         input_action: torch.Tensor,
25 |         input_rep: Optional[torch.Tensor],
26 |     ) -> torch.Tensor:
27 |         """
28 |         Given the input action, discretize it.
29 | 
30 |         Inputs:
31 |         input_action (shape: ... x action_dim): The input action to discretize. This can be in a batch,
32 |         and is generally assumed that the last dimnesion is the action dimension.
33 | 
34 |         Outputs:
35 |         discretized_action (shape: ... x num_tokens): The discretized action.
36 |         """
37 |         raise NotImplementedError
38 | 
39 |     @abc.abstractmethod
40 |     def decode_actions(
41 |         self,
42 |         latent_action_batch: Optional[torch.Tensor],
43 |         input_rep_batch: Optional[torch.Tensor] = None,
44 |     ) -> torch.Tensor:
45 |         """
46 |         Given a discretized action, convert it to a continuous action.
47 | 
48 |         Inputs:
49 |         latent_action_batch (shape: ... x num_tokens): The discretized action
50 |         generated by the discretizer.
51 | 
52 |         Outputs:
53 |         continuous_action (shape: ... x action_dim): The continuous action.
54 |         """
55 |         raise NotImplementedError
56 | 
57 |     @property
58 |     @abc.abstractmethod
59 |     def num_latents(self) -> Union[int, float]:
60 |         """
61 |         Number of possible latents for this generator, useful for state priors that use softmax.
62 |         """
63 |         return float("inf")
64 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/bet/latent_generators/latent_generator.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import torch
 3 | from typing import Tuple, Optional
 4 | 
 5 | import diffusion_policy.model.bet.utils as utils
 6 | 
 7 | 
 8 | class AbstractLatentGenerator(abc.ABC, utils.SaveModule):
 9 |     """
10 |     Abstract class for a generative model that can generate latents given observation representations.
11 | 
12 |     In the probabilisitc sense, this model fits and samples from P(latent|observation) given some observation.
13 |     """
14 | 
15 |     @abc.abstractmethod
16 |     def get_latent_and_loss(
17 |         self,
18 |         obs_rep: torch.Tensor,
19 |         target_latents: torch.Tensor,
20 |         seq_masks: Optional[torch.Tensor] = None,
21 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
22 |         """
23 |         Given a set of observation representation and generated latents, get the encoded latent and the loss.
24 | 
25 |         Inputs:
26 |         input_action: Batch of the actions taken in the multimodal demonstrations.
27 |         target_latents: Batch of the latents that the generator should learn to generate the actions from.
28 |         seq_masks: Batch of masks that indicate which timesteps are valid.
29 | 
30 |         Outputs:
31 |         latent: The sampled latent from the observation.
32 |         loss: The loss of the latent generator.
33 |         """
34 |         pass
35 | 
36 |     @abc.abstractmethod
37 |     def generate_latents(
38 |         self, seq_obses: torch.Tensor, seq_masks: torch.Tensor
39 |     ) -> torch.Tensor:
40 |         """
41 |         Given a batch of sequences of observations, generate a batch of sequences of latents.
42 | 
43 |         Inputs:
44 |         seq_obses: Batch of sequences of observations, of shape seq x batch x dim, following the transformer convention.
45 |         seq_masks: Batch of sequences of masks, of shape seq x batch, following the transformer convention.
46 | 
47 |         Outputs:
48 |         seq_latents: Batch of sequences of latents of shape seq x batch x latent_dim.
49 |         """
50 |         pass
51 | 
52 |     def get_optimizer(
53 |         self, weight_decay: float, learning_rate: float, betas: Tuple[float, float]
54 |     ) -> torch.optim.Optimizer:
55 |         """
56 |         Default optimizer class. Override this if you want to use a different optimizer.
57 |         """
58 |         return torch.optim.Adam(
59 |             self.parameters(), lr=learning_rate, weight_decay=weight_decay, betas=betas
60 |         )
61 | 
62 | 
63 | class LatentGeneratorDataParallel(torch.nn.DataParallel):
64 |     def get_latent_and_loss(self, *args, **kwargs):
65 |         return self.module.get_latent_and_loss(*args, **kwargs)  # type: ignore
66 | 
67 |     def generate_latents(self, *args, **kwargs):
68 |         return self.module.generate_latents(*args, **kwargs)  # type: ignore
69 | 
70 |     def get_optimizer(self, *args, **kwargs):
71 |         return self.module.get_optimizer(*args, **kwargs)  # type: ignore
72 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/bet/latent_generators/transformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import einops
  5 | import diffusion_policy.model.bet.latent_generators.latent_generator as latent_generator
  6 | 
  7 | from diffusion_policy.model.diffusion.transformer_for_diffusion import TransformerForDiffusion
  8 | from diffusion_policy.model.bet.libraries.loss_fn import FocalLoss, soft_cross_entropy
  9 | 
 10 | from typing import Optional, Tuple
 11 | 
 12 | class Transformer(latent_generator.AbstractLatentGenerator):
 13 |     def __init__(
 14 |         self,
 15 |         input_dim: int,
 16 |         num_bins: int,
 17 |         action_dim: int,
 18 |         horizon: int,
 19 |         focal_loss_gamma: float,
 20 |         offset_loss_scale: float,
 21 |         **kwargs
 22 |     ):
 23 |         super().__init__()
 24 |         self.model = TransformerForDiffusion(
 25 |             input_dim=input_dim,
 26 |             output_dim=num_bins * (1 + action_dim),
 27 |             horizon=horizon,
 28 |             **kwargs
 29 |         )
 30 |         self.vocab_size = num_bins
 31 |         self.focal_loss_gamma = focal_loss_gamma
 32 |         self.offset_loss_scale = offset_loss_scale
 33 |         self.action_dim = action_dim
 34 |     
 35 |     def get_optimizer(self, **kwargs) -> torch.optim.Optimizer:
 36 |         return self.model.configure_optimizers(**kwargs)
 37 |     
 38 |     def get_latent_and_loss(self, 
 39 |             obs_rep: torch.Tensor, 
 40 |             target_latents: torch.Tensor, 
 41 |             return_loss_components=True,
 42 |             ) -> Tuple[torch.Tensor, torch.Tensor]:
 43 |         target_latents, target_offsets = target_latents
 44 |         target_latents = target_latents.view(-1)
 45 |         criterion = FocalLoss(gamma=self.focal_loss_gamma)
 46 | 
 47 |         t = torch.tensor(0, device=self.model.device)
 48 |         output = self.model(obs_rep, t)
 49 |         logits = output[:, :, : self.vocab_size]
 50 |         offsets = output[:, :, self.vocab_size :]
 51 |         batch = logits.shape[0]
 52 |         seq = logits.shape[1]
 53 |         offsets = einops.rearrange(
 54 |             offsets,
 55 |             "N T (V A) -> (N T) V A",  # N = batch, T = seq
 56 |             V=self.vocab_size,
 57 |             A=self.action_dim,
 58 |         )
 59 |         # calculate (optionally soft) cross entropy and offset losses
 60 |         class_loss = criterion(logits.view(-1, logits.size(-1)), target_latents)
 61 |         # offset loss is only calculated on the target class
 62 |         # if soft targets, argmax is considered the target class
 63 |         selected_offsets = offsets[
 64 |             torch.arange(offsets.size(0)),
 65 |             target_latents.view(-1),
 66 |         ]
 67 |         offset_loss = self.offset_loss_scale * F.mse_loss(
 68 |             selected_offsets, target_offsets.view(-1, self.action_dim)
 69 |         )
 70 |         loss = offset_loss + class_loss
 71 |         logits = einops.rearrange(logits, "batch seq classes -> seq batch classes")
 72 |         offsets = einops.rearrange(
 73 |             offsets,
 74 |             "(N T) V A -> T N V A",  # ? N, T order? Anyway does not affect loss and training (might affect visualization)
 75 |             N=batch,
 76 |             T=seq,
 77 |         )
 78 |         return (
 79 |             (logits, offsets),
 80 |             loss,
 81 |             {"offset": offset_loss, "class": class_loss, "total": loss},
 82 |         )
 83 | 
 84 |     def generate_latents(
 85 |         self, obs_rep: torch.Tensor
 86 |     ) -> torch.Tensor:
 87 |         t = torch.tensor(0, device=self.model.device)
 88 |         output = self.model(obs_rep, t)
 89 |         logits = output[:, :, : self.vocab_size]
 90 |         offsets = output[:, :, self.vocab_size :]
 91 |         offsets = einops.rearrange(
 92 |             offsets,
 93 |             "N T (V A) -> (N T) V A",  # N = batch, T = seq
 94 |             V=self.vocab_size,
 95 |             A=self.action_dim,
 96 |         )
 97 | 
 98 |         probs = F.softmax(logits, dim=-1)
 99 |         batch, seq, choices = probs.shape
100 |         # Sample from the multinomial distribution, one per row.
101 |         sampled_data = torch.multinomial(probs.view(-1, choices), num_samples=1)
102 |         sampled_data = einops.rearrange(
103 |             sampled_data, "(batch seq) 1 -> batch seq 1", batch=batch, seq=seq
104 |         )
105 |         sampled_offsets = offsets[
106 |             torch.arange(offsets.shape[0]), sampled_data.flatten()
107 |         ].view(batch, seq, self.action_dim)
108 |         return (sampled_data, sampled_offsets)
109 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/bet/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | from collections import OrderedDict
  4 | from typing import List, Optional
  5 | 
  6 | import einops
  7 | import numpy as np
  8 | import torch
  9 | import torch.nn as nn
 10 | 
 11 | from torch.utils.data import random_split
 12 | import wandb
 13 | 
 14 | 
 15 | def mlp(input_dim, hidden_dim, output_dim, hidden_depth, output_mod=None):
 16 |     if hidden_depth == 0:
 17 |         mods = [nn.Linear(input_dim, output_dim)]
 18 |     else:
 19 |         mods = [nn.Linear(input_dim, hidden_dim), nn.ReLU(inplace=True)]
 20 |         for i in range(hidden_depth - 1):
 21 |             mods += [nn.Linear(hidden_dim, hidden_dim), nn.ReLU(inplace=True)]
 22 |         mods.append(nn.Linear(hidden_dim, output_dim))
 23 |     if output_mod is not None:
 24 |         mods.append(output_mod)
 25 |     trunk = nn.Sequential(*mods)
 26 |     return trunk
 27 | 
 28 | 
 29 | class eval_mode:
 30 |     def __init__(self, *models, no_grad=False):
 31 |         self.models = models
 32 |         self.no_grad = no_grad
 33 |         self.no_grad_context = torch.no_grad()
 34 | 
 35 |     def __enter__(self):
 36 |         self.prev_states = []
 37 |         for model in self.models:
 38 |             self.prev_states.append(model.training)
 39 |             model.train(False)
 40 |         if self.no_grad:
 41 |             self.no_grad_context.__enter__()
 42 | 
 43 |     def __exit__(self, *args):
 44 |         if self.no_grad:
 45 |             self.no_grad_context.__exit__(*args)
 46 |         for model, state in zip(self.models, self.prev_states):
 47 |             model.train(state)
 48 |         return False
 49 | 
 50 | 
 51 | def freeze_module(module: nn.Module) -> nn.Module:
 52 |     for param in module.parameters():
 53 |         param.requires_grad = False
 54 |     module.eval()
 55 |     return module
 56 | 
 57 | 
 58 | def set_seed_everywhere(seed):
 59 |     torch.manual_seed(seed)
 60 |     if torch.cuda.is_available():
 61 |         torch.cuda.manual_seed_all(seed)
 62 |     np.random.seed(seed)
 63 |     random.seed(seed)
 64 | 
 65 | 
 66 | def shuffle_along_axis(a, axis):
 67 |     idx = np.random.rand(*a.shape).argsort(axis=axis)
 68 |     return np.take_along_axis(a, idx, axis=axis)
 69 | 
 70 | 
 71 | def transpose_batch_timestep(*args):
 72 |     return (einops.rearrange(arg, "b t ... -> t b ...") for arg in args)
 73 | 
 74 | 
 75 | class TrainWithLogger:
 76 |     def reset_log(self):
 77 |         self.log_components = OrderedDict()
 78 | 
 79 |     def log_append(self, log_key, length, loss_components):
 80 |         for key, value in loss_components.items():
 81 |             key_name = f"{log_key}/{key}"
 82 |             count, sum = self.log_components.get(key_name, (0, 0.0))
 83 |             self.log_components[key_name] = (
 84 |                 count + length,
 85 |                 sum + (length * value.detach().cpu().item()),
 86 |             )
 87 | 
 88 |     def flush_log(self, epoch, iterator=None):
 89 |         log_components = OrderedDict()
 90 |         iterator_log_component = OrderedDict()
 91 |         for key, value in self.log_components.items():
 92 |             count, sum = value
 93 |             to_log = sum / count
 94 |             log_components[key] = to_log
 95 |             # Set the iterator status
 96 |             log_key, name_key = key.split("/")
 97 |             iterator_log_name = f"{log_key[0]}{name_key[0]}".upper()
 98 |             iterator_log_component[iterator_log_name] = to_log
 99 |         postfix = ",".join(
100 |             "{}:{:.2e}".format(key, iterator_log_component[key])
101 |             for key in iterator_log_component.keys()
102 |         )
103 |         if iterator is not None:
104 |             iterator.set_postfix_str(postfix)
105 |         wandb.log(log_components, step=epoch)
106 |         self.log_components = OrderedDict()
107 | 
108 | 
109 | class SaveModule(nn.Module):
110 |     def set_snapshot_path(self, path):
111 |         self.snapshot_path = path
112 |         print(f"Setting snapshot path to {self.snapshot_path}")
113 | 
114 |     def save_snapshot(self):
115 |         os.makedirs(self.snapshot_path, exist_ok=True)
116 |         torch.save(self.state_dict(), self.snapshot_path / "snapshot.pth")
117 | 
118 |     def load_snapshot(self):
119 |         self.load_state_dict(torch.load(self.snapshot_path / "snapshot.pth"))
120 | 
121 | 
122 | def split_datasets(dataset, train_fraction=0.95, random_seed=42):
123 |     dataset_length = len(dataset)
124 |     lengths = [
125 |         int(train_fraction * dataset_length),
126 |         dataset_length - int(train_fraction * dataset_length),
127 |     ]
128 |     train_set, val_set = random_split(
129 |         dataset, lengths, generator=torch.Generator().manual_seed(random_seed)
130 |     )
131 |     return train_set, val_set
132 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/common/dict_of_tensor_mixin.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class DictOfTensorMixin(nn.Module):
 5 |     def __init__(self, params_dict=None):
 6 |         super().__init__()
 7 |         if params_dict is None:
 8 |             params_dict = nn.ParameterDict()
 9 |         self.params_dict = params_dict
10 | 
11 |     @property
12 |     def device(self):
13 |         return next(iter(self.parameters())).device
14 | 
15 |     def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
16 |         def dfs_add(dest, keys, value: torch.Tensor):
17 |             if len(keys) == 1:
18 |                 dest[keys[0]] = value
19 |                 return
20 | 
21 |             if keys[0] not in dest:
22 |                 dest[keys[0]] = nn.ParameterDict()
23 |             dfs_add(dest[keys[0]], keys[1:], value)
24 | 
25 |         def load_dict(state_dict, prefix):
26 |             out_dict = nn.ParameterDict()
27 |             for key, value in state_dict.items():
28 |                 value: torch.Tensor
29 |                 if key.startswith(prefix):
30 |                     param_keys = key[len(prefix):].split('.')[1:]
31 |                     # if len(param_keys) == 0:
32 |                     #     import pdb; pdb.set_trace()
33 |                     dfs_add(out_dict, param_keys, value.clone())
34 |             return out_dict
35 | 
36 |         self.params_dict = load_dict(state_dict, prefix + 'params_dict')
37 |         self.params_dict.requires_grad_(False)
38 |         return 
39 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/common/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | from diffusers.optimization import (
 2 |     Union, SchedulerType, Optional,
 3 |     Optimizer, TYPE_TO_SCHEDULER_FUNCTION
 4 | )
 5 | 
 6 | def get_scheduler(
 7 |     name: Union[str, SchedulerType],
 8 |     optimizer: Optimizer,
 9 |     num_warmup_steps: Optional[int] = None,
10 |     num_training_steps: Optional[int] = None,
11 |     **kwargs
12 | ):
13 |     """
14 |     Added kwargs vs diffuser's original implementation
15 | 
16 |     Unified API to get any scheduler from its name.
17 | 
18 |     Args:
19 |         name (`str` or `SchedulerType`):
20 |             The name of the scheduler to use.
21 |         optimizer (`torch.optim.Optimizer`):
22 |             The optimizer that will be used during training.
23 |         num_warmup_steps (`int`, *optional*):
24 |             The number of warmup steps to do. This is not required by all schedulers (hence the argument being
25 |             optional), the function will raise an error if it's unset and the scheduler type requires it.
26 |         num_training_steps (`int``, *optional*):
27 |             The number of training steps to do. This is not required by all schedulers (hence the argument being
28 |             optional), the function will raise an error if it's unset and the scheduler type requires it.
29 |     """
30 |     name = SchedulerType(name)
31 |     schedule_func = TYPE_TO_SCHEDULER_FUNCTION[name]
32 |     if name == SchedulerType.CONSTANT:
33 |         return schedule_func(optimizer, **kwargs)
34 | 
35 |     # All other schedulers require `num_warmup_steps`
36 |     if num_warmup_steps is None:
37 |         raise ValueError(f"{name} requires `num_warmup_steps`, please provide that argument.")
38 | 
39 |     if name == SchedulerType.CONSTANT_WITH_WARMUP:
40 |         return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, **kwargs)
41 | 
42 |     # All other schedulers require `num_training_steps`
43 |     if num_training_steps is None:
44 |         raise ValueError(f"{name} requires `num_training_steps`, please provide that argument.")
45 | 
46 |     return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps, **kwargs)
47 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/common/module_attr_mixin.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | class ModuleAttrMixin(nn.Module):
 4 |     def __init__(self):
 5 |         super().__init__()
 6 |         self._dummy_variable = nn.Parameter()
 7 | 
 8 |     @property
 9 |     def device(self):
10 |         return next(iter(self.parameters())).device
11 |     
12 |     @property
13 |     def dtype(self):
14 |         return next(iter(self.parameters())).dtype
15 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/common/rotation_transformer.py:
--------------------------------------------------------------------------------
  1 | from typing import Union
  2 | import pytorch3d.transforms as pt
  3 | import torch
  4 | import numpy as np
  5 | import functools
  6 | 
  7 | class RotationTransformer:
  8 |     valid_reps = [
  9 |         'axis_angle',
 10 |         'euler_angles',
 11 |         'quaternion',
 12 |         'rotation_6d',
 13 |         'matrix'
 14 |     ]
 15 | 
 16 |     def __init__(self, 
 17 |             from_rep='axis_angle', 
 18 |             to_rep='rotation_6d', 
 19 |             from_convention=None,
 20 |             to_convention=None):
 21 |         """
 22 |         Valid representations
 23 | 
 24 |         Always use matrix as intermediate representation.
 25 |         """
 26 |         assert from_rep != to_rep
 27 |         assert from_rep in self.valid_reps
 28 |         assert to_rep in self.valid_reps
 29 |         if from_rep == 'euler_angles':
 30 |             assert from_convention is not None
 31 |         if to_rep == 'euler_angles':
 32 |             assert to_convention is not None
 33 | 
 34 |         forward_funcs = list()
 35 |         inverse_funcs = list()
 36 | 
 37 |         if from_rep != 'matrix':
 38 |             funcs = [
 39 |                 getattr(pt, f'{from_rep}_to_matrix'),
 40 |                 getattr(pt, f'matrix_to_{from_rep}')
 41 |             ]
 42 |             if from_convention is not None:
 43 |                 funcs = [functools.partial(func, convernsion=from_convention) 
 44 |                     for func in funcs]
 45 |             forward_funcs.append(funcs[0])
 46 |             inverse_funcs.append(funcs[1])
 47 | 
 48 |         if to_rep != 'matrix':
 49 |             funcs = [
 50 |                 getattr(pt, f'matrix_to_{to_rep}'),
 51 |                 getattr(pt, f'{to_rep}_to_matrix')
 52 |             ]
 53 |             if to_convention is not None:
 54 |                 funcs = [functools.partial(func, convernsion=to_convention) 
 55 |                     for func in funcs]
 56 |             forward_funcs.append(funcs[0])
 57 |             inverse_funcs.append(funcs[1])
 58 |         
 59 |         inverse_funcs = inverse_funcs[::-1]
 60 |         
 61 |         self.forward_funcs = forward_funcs
 62 |         self.inverse_funcs = inverse_funcs
 63 | 
 64 |     @staticmethod
 65 |     def _apply_funcs(x: Union[np.ndarray, torch.Tensor], funcs: list) -> Union[np.ndarray, torch.Tensor]:
 66 |         x_ = x
 67 |         if isinstance(x, np.ndarray):
 68 |             x_ = torch.from_numpy(x)
 69 |         x_: torch.Tensor
 70 |         for func in funcs:
 71 |             x_ = func(x_)
 72 |         y = x_
 73 |         if isinstance(x, np.ndarray):
 74 |             y = x_.numpy()
 75 |         return y
 76 |         
 77 |     def forward(self, x: Union[np.ndarray, torch.Tensor]
 78 |         ) -> Union[np.ndarray, torch.Tensor]:
 79 |         return self._apply_funcs(x, self.forward_funcs)
 80 |     
 81 |     def inverse(self, x: Union[np.ndarray, torch.Tensor]
 82 |         ) -> Union[np.ndarray, torch.Tensor]:
 83 |         return self._apply_funcs(x, self.inverse_funcs)
 84 | 
 85 | 
 86 | def test():
 87 |     tf = RotationTransformer()
 88 | 
 89 |     rotvec = np.random.uniform(-2*np.pi,2*np.pi,size=(1000,3))
 90 |     rot6d = tf.forward(rotvec)
 91 |     new_rotvec = tf.inverse(rot6d)
 92 | 
 93 |     from scipy.spatial.transform import Rotation
 94 |     diff = Rotation.from_rotvec(rotvec) * Rotation.from_rotvec(new_rotvec).inv()
 95 |     dist = diff.magnitude()
 96 |     assert dist.max() < 1e-7
 97 | 
 98 |     tf = RotationTransformer('rotation_6d', 'matrix')
 99 |     rot6d_wrong = rot6d + np.random.normal(scale=0.1, size=rot6d.shape)
100 |     mat = tf.forward(rot6d_wrong)
101 |     mat_det = np.linalg.det(mat)
102 |     assert np.allclose(mat_det, 1)
103 |     # rotaiton_6d will be normalized to rotation matrix
104 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/common/shape_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Tuple, Callable
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | def get_module_device(m: nn.Module):
 6 |     device = torch.device('cpu')
 7 |     try:
 8 |         param = next(iter(m.parameters()))
 9 |         device = param.device
10 |     except StopIteration:
11 |         pass
12 |     return device
13 | 
14 | @torch.no_grad()
15 | def get_output_shape(
16 |         input_shape: Tuple[int],
17 |         net: Callable[[torch.Tensor], torch.Tensor]
18 |     ):  
19 |         device = get_module_device(net)
20 |         test_input = torch.zeros((1,)+tuple(input_shape), device=device)
21 |         test_output = net(test_input)
22 |         output_shape = tuple(test_output.shape[1:])
23 |         return output_shape
24 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/diffusion/conv1d_components.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | # from einops.layers.torch import Rearrange
 5 | 
 6 | 
 7 | class Downsample1d(nn.Module):
 8 |     def __init__(self, dim):
 9 |         super().__init__()
10 |         self.conv = nn.Conv1d(dim, dim, 3, 2, 1)
11 | 
12 |     def forward(self, x):
13 |         return self.conv(x)
14 | 
15 | class Upsample1d(nn.Module):
16 |     def __init__(self, dim):
17 |         super().__init__()
18 |         self.conv = nn.ConvTranspose1d(dim, dim, 4, 2, 1)
19 | 
20 |     def forward(self, x):
21 |         return self.conv(x)
22 | 
23 | class Conv1dBlock(nn.Module):
24 |     '''
25 |         Conv1d --> GroupNorm --> Mish
26 |     '''
27 | 
28 |     def __init__(self, inp_channels, out_channels, kernel_size, n_groups=8):
29 |         super().__init__()
30 | 
31 |         self.block = nn.Sequential(
32 |             nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2),
33 |             # Rearrange('batch channels horizon -> batch channels 1 horizon'),
34 |             nn.GroupNorm(n_groups, out_channels),
35 |             # Rearrange('batch channels 1 horizon -> batch channels horizon'),
36 |             nn.Mish(),
37 |         )
38 | 
39 |     def forward(self, x):
40 |         return self.block(x)
41 | 
42 | 
43 | def test():
44 |     cb = Conv1dBlock(256, 128, kernel_size=3)
45 |     x = torch.zeros((1,256,16))
46 |     o = cb(x)
47 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/diffusion/ema_model.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import torch
 3 | from torch.nn.modules.batchnorm import _BatchNorm
 4 | 
 5 | class EMAModel:
 6 |     """
 7 |     Exponential Moving Average of models weights
 8 |     """
 9 | 
10 |     def __init__(
11 |         self,
12 |         model,
13 |         update_after_step=0,
14 |         inv_gamma=1.0,
15 |         power=2 / 3,
16 |         min_value=0.0,
17 |         max_value=0.9999
18 |     ):
19 |         """
20 |         @crowsonkb's notes on EMA Warmup:
21 |             If gamma=1 and power=1, implements a simple average. gamma=1, power=2/3 are good values for models you plan
22 |             to train for a million or more steps (reaches decay factor 0.999 at 31.6K steps, 0.9999 at 1M steps),
23 |             gamma=1, power=3/4 for models you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999
24 |             at 215.4k steps).
25 |         Args:
26 |             inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1.
27 |             power (float): Exponential factor of EMA warmup. Default: 2/3.
28 |             min_value (float): The minimum EMA decay rate. Default: 0.
29 |         """
30 | 
31 |         self.averaged_model = model
32 |         self.averaged_model.eval()
33 |         self.averaged_model.requires_grad_(False)
34 | 
35 |         self.update_after_step = update_after_step
36 |         self.inv_gamma = inv_gamma
37 |         self.power = power
38 |         self.min_value = min_value
39 |         self.max_value = max_value
40 | 
41 |         self.decay = 0.0
42 |         self.optimization_step = 0
43 | 
44 |     def get_decay(self, optimization_step):
45 |         """
46 |         Compute the decay factor for the exponential moving average.
47 |         """
48 |         step = max(0, optimization_step - self.update_after_step - 1)
49 |         value = 1 - (1 + step / self.inv_gamma) ** -self.power
50 | 
51 |         if step <= 0:
52 |             return 0.0
53 | 
54 |         return max(self.min_value, min(value, self.max_value))
55 | 
56 |     @torch.no_grad()
57 |     def step(self, new_model):
58 |         self.decay = self.get_decay(self.optimization_step)
59 | 
60 |         # old_all_dataptrs = set()
61 |         # for param in new_model.parameters():
62 |         #     data_ptr = param.data_ptr()
63 |         #     if data_ptr != 0:
64 |         #         old_all_dataptrs.add(data_ptr)
65 | 
66 |         all_dataptrs = set()
67 |         for module, ema_module in zip(new_model.modules(), self.averaged_model.modules()):            
68 |             for param, ema_param in zip(module.parameters(recurse=False), ema_module.parameters(recurse=False)):
69 |                 # iterative over immediate parameters only.
70 |                 if isinstance(param, dict):
71 |                     raise RuntimeError('Dict parameter not supported')
72 |                 
73 |                 # data_ptr = param.data_ptr()
74 |                 # if data_ptr != 0:
75 |                 #     all_dataptrs.add(data_ptr)
76 | 
77 |                 if isinstance(module, _BatchNorm):
78 |                     # skip batchnorms
79 |                     ema_param.copy_(param.to(dtype=ema_param.dtype).data)
80 |                 elif not param.requires_grad:
81 |                     ema_param.copy_(param.to(dtype=ema_param.dtype).data)
82 |                 else:
83 |                     ema_param.mul_(self.decay)
84 |                     ema_param.add_(param.data.to(dtype=ema_param.dtype), alpha=1 - self.decay)
85 | 
86 |         # verify that iterating over module and then parameters is identical to parameters recursively.
87 |         # assert old_all_dataptrs == all_dataptrs
88 |         self.optimization_step += 1
89 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/diffusion/positional_embedding.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | class SinusoidalPosEmb(nn.Module):
 6 |     def __init__(self, dim):
 7 |         super().__init__()
 8 |         self.dim = dim
 9 | 
10 |     def forward(self, x):
11 |         device = x.device
12 |         half_dim = self.dim // 2
13 |         emb = math.log(10000) / (half_dim - 1)
14 |         emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
15 |         emb = x[:, None] * emb[None, :]
16 |         emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
17 |         return emb
18 | 


--------------------------------------------------------------------------------
/diffusion_policy/model/vision/model_getter.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision
 3 | 
 4 | def get_resnet(name, weights=None, **kwargs):
 5 |     """
 6 |     name: resnet18, resnet34, resnet50
 7 |     weights: "IMAGENET1K_V1", "r3m"
 8 |     """
 9 |     # load r3m weights
10 |     if (weights == "r3m") or (weights == "R3M"):
11 |         return get_r3m(name=name, **kwargs)
12 | 
13 |     func = getattr(torchvision.models, name)
14 |     resnet = func(weights=weights, **kwargs)
15 |     resnet.fc = torch.nn.Identity()
16 |     return resnet
17 | 
18 | def get_r3m(name, **kwargs):
19 |     """
20 |     name: resnet18, resnet34, resnet50
21 |     """
22 |     import r3m
23 |     r3m.device = 'cpu'
24 |     model = r3m.load_r3m(name)
25 |     r3m_model = model.module
26 |     resnet_model = r3m_model.convnet
27 |     resnet_model = resnet_model.to('cpu')
28 |     return resnet_model
29 | 


--------------------------------------------------------------------------------
/diffusion_policy/policy/base_image_policy.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | import torch
 3 | import torch.nn as nn
 4 | from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin
 5 | from diffusion_policy.model.common.normalizer import LinearNormalizer
 6 | 
 7 | class BaseImagePolicy(ModuleAttrMixin):
 8 |     # init accepts keyword argument shape_meta, see config/task/*_image.yaml
 9 | 
10 |     def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
11 |         """
12 |         obs_dict:
13 |             str: B,To,*
14 |         return: B,Ta,Da
15 |         """
16 |         raise NotImplementedError()
17 | 
18 |     # reset state for stateful policies
19 |     def reset(self):
20 |         pass
21 | 
22 |     # ========== training ===========
23 |     # no standard training interface except setting normalizer
24 |     def set_normalizer(self, normalizer: LinearNormalizer):
25 |         raise NotImplementedError()
26 | 


--------------------------------------------------------------------------------
/diffusion_policy/real_world/keystroke_counter.py:
--------------------------------------------------------------------------------
 1 | from pynput.keyboard import Key, KeyCode, Listener
 2 | from collections import defaultdict
 3 | from threading import Lock
 4 | 
 5 | class KeystrokeCounter(Listener):
 6 |     def __init__(self):
 7 |         self.key_count_map = defaultdict(lambda:0)
 8 |         self.key_press_list = list()
 9 |         self.lock = Lock()
10 |         super().__init__(on_press=self.on_press, on_release=self.on_release)
11 |     
12 |     def on_press(self, key):
13 |         with self.lock:
14 |             self.key_count_map[key] += 1
15 |             self.key_press_list.append(key)
16 |     
17 |     def on_release(self, key):
18 |         pass
19 |     
20 |     def clear(self):
21 |         with self.lock:
22 |             self.key_count_map = defaultdict(lambda:0)
23 |             self.key_press_list = list()
24 |     
25 |     def __getitem__(self, key):
26 |         with self.lock:
27 |             return self.key_count_map[key]
28 |     
29 |     def get_press_events(self):
30 |         with self.lock:
31 |             events = list(self.key_press_list)
32 |             self.key_press_list = list()
33 |             return events
34 | 
35 | if __name__ == '__main__':
36 |     import time
37 |     with KeystrokeCounter() as counter:
38 |         try:
39 |             while True:
40 |                 print('Space:', counter[Key.space])
41 |                 print('q:', counter[KeyCode(char='q')])
42 |                 time.sleep(1/60)
43 |         except KeyboardInterrupt:
44 |             events = counter.get_press_events()
45 |             print(events)
46 | 


--------------------------------------------------------------------------------
/diffusion_policy/real_world/multi_camera_visualizer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import multiprocessing as mp
 3 | import numpy as np
 4 | import cv2
 5 | from threadpoolctl import threadpool_limits
 6 | from diffusion_policy.real_world.multi_realsense import MultiRealsense
 7 | 
 8 | class MultiCameraVisualizer(mp.Process):
 9 |     def __init__(self,
10 |         realsense: MultiRealsense,
11 |         row, col,
12 |         window_name='Multi Cam Vis',
13 |         vis_fps=60,
14 |         fill_value=0,
15 |         rgb_to_bgr=True
16 |         ):
17 |         super().__init__()
18 |         self.row = row
19 |         self.col = col
20 |         self.window_name = window_name
21 |         self.vis_fps = vis_fps
22 |         self.fill_value = fill_value
23 |         self.rgb_to_bgr=rgb_to_bgr
24 |         self.realsense = realsense
25 |         # shared variables
26 |         self.stop_event = mp.Event()
27 | 
28 |     def start(self, wait=False):
29 |         super().start()
30 |     
31 |     def stop(self, wait=False):
32 |         self.stop_event.set()
33 |         if wait:
34 |             self.stop_wait()
35 | 
36 |     def start_wait(self):
37 |         pass
38 | 
39 |     def stop_wait(self):
40 |         self.join()        
41 |     
42 |     def run(self):
43 |         cv2.setNumThreads(1)
44 |         threadpool_limits(1)
45 |         channel_slice = slice(None)
46 |         if self.rgb_to_bgr:
47 |             channel_slice = slice(None,None,-1)
48 | 
49 |         vis_data = None
50 |         vis_img = None
51 |         while not self.stop_event.is_set():
52 |             vis_data = self.realsense.get_vis(out=vis_data)
53 |             color = vis_data['color']
54 |             N, H, W, C = color.shape
55 |             assert C == 3
56 |             oh = H * self.row
57 |             ow = W * self.col
58 |             if vis_img is None:
59 |                 vis_img = np.full((oh, ow, 3), 
60 |                     fill_value=self.fill_value, dtype=np.uint8)
61 |             for row in range(self.row):
62 |                 for col in range(self.col):
63 |                     idx = col + row * self.col
64 |                     h_start = H * row
65 |                     h_end = h_start + H
66 |                     w_start = W * col
67 |                     w_end = w_start + W
68 |                     if idx < N:
69 |                         # opencv uses bgr
70 |                         vis_img[h_start:h_end,w_start:w_end
71 |                             ] = color[idx,:,:,channel_slice]
72 |             cv2.imshow(self.window_name, vis_img)
73 |             cv2.pollKey()
74 |             time.sleep(1 / self.vis_fps)
75 | 


--------------------------------------------------------------------------------
/diffusion_policy/real_world/real_inference_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Callable, Tuple
 2 | import numpy as np
 3 | from diffusion_policy.common.cv2_util import get_image_transform
 4 | 
 5 | def get_real_obs_dict(
 6 |         env_obs: Dict[str, np.ndarray], 
 7 |         shape_meta: dict,
 8 |         ) -> Dict[str, np.ndarray]:
 9 |     obs_dict_np = dict()
10 |     obs_shape_meta = shape_meta['obs']
11 |     for key, attr in obs_shape_meta.items():
12 |         type = attr.get('type', 'low_dim')
13 |         shape = attr.get('shape')
14 |         if type == 'rgb':
15 |             this_imgs_in = env_obs[key]
16 |             t,hi,wi,ci = this_imgs_in.shape
17 |             co,ho,wo = shape
18 |             assert ci == co
19 |             out_imgs = this_imgs_in
20 |             if (ho != hi) or (wo != wi) or (this_imgs_in.dtype == np.uint8):
21 |                 tf = get_image_transform(
22 |                     input_res=(wi,hi), 
23 |                     output_res=(wo,ho), 
24 |                     bgr_to_rgb=False)
25 |                 out_imgs = np.stack([tf(x) for x in this_imgs_in])
26 |                 if this_imgs_in.dtype == np.uint8:
27 |                     out_imgs = out_imgs.astype(np.float32) / 255
28 |             # THWC to TCHW
29 |             obs_dict_np[key] = np.moveaxis(out_imgs,-1,1)
30 |         elif type == 'low_dim':
31 |             this_data_in = env_obs[key]
32 |             if 'pose' in key and shape == (2,):
33 |                 # take X,Y coordinates
34 |                 this_data_in = this_data_in[...,[0,1]]
35 |             obs_dict_np[key] = this_data_in
36 |     return obs_dict_np
37 | 
38 | 
39 | def get_real_obs_resolution(
40 |         shape_meta: dict
41 |         ) -> Tuple[int, int]:
42 |     out_res = None
43 |     obs_shape_meta = shape_meta['obs']
44 |     for key, attr in obs_shape_meta.items():
45 |         type = attr.get('type', 'low_dim')
46 |         shape = attr.get('shape')
47 |         if type == 'rgb':
48 |             co,ho,wo = shape
49 |             if out_res is None:
50 |                 out_res = (wo, ho)
51 |             assert out_res == (wo, ho)
52 |     return out_res
53 | 


--------------------------------------------------------------------------------
/diffusion_policy/real_world/realsense_config/415_high_accuracy_mode.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "aux-param-autoexposure-setpoint": "400",
 3 |     "aux-param-colorcorrection1": "0.461914",
 4 |     "aux-param-colorcorrection10": "-0.553711",
 5 |     "aux-param-colorcorrection11": "-0.553711",
 6 |     "aux-param-colorcorrection12": "0.0458984",
 7 |     "aux-param-colorcorrection2": "0.540039",
 8 |     "aux-param-colorcorrection3": "0.540039",
 9 |     "aux-param-colorcorrection4": "0.208008",
10 |     "aux-param-colorcorrection5": "-0.332031",
11 |     "aux-param-colorcorrection6": "-0.212891",
12 |     "aux-param-colorcorrection7": "-0.212891",
13 |     "aux-param-colorcorrection8": "0.68457",
14 |     "aux-param-colorcorrection9": "0.930664",
15 |     "aux-param-depthclampmax": "65535",
16 |     "aux-param-depthclampmin": "0",
17 |     "aux-param-disparityshift": "0",
18 |     "controls-autoexposure-auto": "True",
19 |     "controls-autoexposure-manual": "33000",
20 |     "controls-color-autoexposure-auto": "True",
21 |     "controls-color-autoexposure-manual": "100",
22 |     "controls-color-backlight-compensation": "0",
23 |     "controls-color-brightness": "0",
24 |     "controls-color-contrast": "50",
25 |     "controls-color-gain": "100",
26 |     "controls-color-gamma": "300",
27 |     "controls-color-hue": "0",
28 |     "controls-color-power-line-frequency": "3",
29 |     "controls-color-saturation": "64",
30 |     "controls-color-sharpness": "50",
31 |     "controls-color-white-balance-auto": "True",
32 |     "controls-color-white-balance-manual": "4600",
33 |     "controls-depth-gain": "16",
34 |     "controls-depth-white-balance-auto": "False",
35 |     "controls-laserpower": "150",
36 |     "controls-laserstate": "on",
37 |     "ignoreSAD": "0",
38 |     "param-amplitude-factor": "0",
39 |     "param-autoexposure-setpoint": "400",
40 |     "param-censusenablereg-udiameter": "9",
41 |     "param-censusenablereg-vdiameter": "3",
42 |     "param-censususize": "9",
43 |     "param-censusvsize": "3",
44 |     "param-depthclampmax": "65535",
45 |     "param-depthclampmin": "0",
46 |     "param-depthunits": "1000",
47 |     "param-disableraucolor": "0",
48 |     "param-disablesadcolor": "0",
49 |     "param-disablesadnormalize": "0",
50 |     "param-disablesloleftcolor": "0",
51 |     "param-disableslorightcolor": "1",
52 |     "param-disparitymode": "0",
53 |     "param-disparityshift": "0",
54 |     "param-lambdaad": "751",
55 |     "param-lambdacensus": "6",
56 |     "param-leftrightthreshold": "10",
57 |     "param-maxscorethreshb": "2893",
58 |     "param-medianthreshold": "796",
59 |     "param-minscorethresha": "4",
60 |     "param-neighborthresh": "108",
61 |     "param-raumine": "6",
62 |     "param-rauminn": "3",
63 |     "param-rauminnssum": "7",
64 |     "param-raumins": "2",
65 |     "param-rauminw": "2",
66 |     "param-rauminwesum": "12",
67 |     "param-regioncolorthresholdb": "0.785714",
68 |     "param-regioncolorthresholdg": "0.565558",
69 |     "param-regioncolorthresholdr": "0.985323",
70 |     "param-regionshrinku": "3",
71 |     "param-regionshrinkv": "0",
72 |     "param-robbinsmonrodecrement": "25",
73 |     "param-robbinsmonroincrement": "2",
74 |     "param-rsmdiffthreshold": "1.65625",
75 |     "param-rsmrauslodiffthreshold": "0.71875",
76 |     "param-rsmremovethreshold": "0.809524",
77 |     "param-scanlineedgetaub": "13",
78 |     "param-scanlineedgetaug": "15",
79 |     "param-scanlineedgetaur": "30",
80 |     "param-scanlinep1": "155",
81 |     "param-scanlinep1onediscon": "160",
82 |     "param-scanlinep1twodiscon": "59",
83 |     "param-scanlinep2": "190",
84 |     "param-scanlinep2onediscon": "507",
85 |     "param-scanlinep2twodiscon": "493",
86 |     "param-secondpeakdelta": "647",
87 |     "param-texturecountthresh": "0",
88 |     "param-texturedifferencethresh": "1722",
89 |     "param-usersm": "1",
90 |     "param-zunits": "1000",
91 |     "stream-depth-format": "Z16",
92 |     "stream-fps": "30",
93 |     "stream-height": "480",
94 |     "stream-width": "640"
95 | }
96 | 


--------------------------------------------------------------------------------
/diffusion_policy/real_world/realsense_config/435_high_accuracy_mode.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "aux-param-autoexposure-setpoint": "1536",
 3 |     "aux-param-colorcorrection1": "0.298828",
 4 |     "aux-param-colorcorrection10": "-0",
 5 |     "aux-param-colorcorrection11": "-0",
 6 |     "aux-param-colorcorrection12": "-0",
 7 |     "aux-param-colorcorrection2": "0.293945",
 8 |     "aux-param-colorcorrection3": "0.293945",
 9 |     "aux-param-colorcorrection4": "0.114258",
10 |     "aux-param-colorcorrection5": "-0",
11 |     "aux-param-colorcorrection6": "-0",
12 |     "aux-param-colorcorrection7": "-0",
13 |     "aux-param-colorcorrection8": "-0",
14 |     "aux-param-colorcorrection9": "-0",
15 |     "aux-param-depthclampmax": "65536",
16 |     "aux-param-depthclampmin": "0",
17 |     "aux-param-disparityshift": "0",
18 |     "controls-autoexposure-auto": "True",
19 |     "controls-autoexposure-manual": "8500",
20 |     "controls-color-autoexposure-auto": "True",
21 |     "controls-color-autoexposure-manual": "100",
22 |     "controls-color-backlight-compensation": "0",
23 |     "controls-color-brightness": "0",
24 |     "controls-color-contrast": "50",
25 |     "controls-color-gain": "100",
26 |     "controls-color-gamma": "300",
27 |     "controls-color-hue": "0",
28 |     "controls-color-power-line-frequency": "3",
29 |     "controls-color-saturation": "64",
30 |     "controls-color-sharpness": "50",
31 |     "controls-color-white-balance-auto": "True",
32 |     "controls-color-white-balance-manual": "4600",
33 |     "controls-depth-gain": "16",
34 |     "controls-laserpower": "150",
35 |     "controls-laserstate": "on",
36 |     "ignoreSAD": "0",
37 |     "param-amplitude-factor": "0",
38 |     "param-autoexposure-setpoint": "1536",
39 |     "param-censusenablereg-udiameter": "9",
40 |     "param-censusenablereg-vdiameter": "9",
41 |     "param-censususize": "9",
42 |     "param-censusvsize": "9",
43 |     "param-depthclampmax": "65536",
44 |     "param-depthclampmin": "0",
45 |     "param-depthunits": "1000",
46 |     "param-disableraucolor": "0",
47 |     "param-disablesadcolor": "0",
48 |     "param-disablesadnormalize": "0",
49 |     "param-disablesloleftcolor": "0",
50 |     "param-disableslorightcolor": "1",
51 |     "param-disparitymode": "0",
52 |     "param-disparityshift": "0",
53 |     "param-lambdaad": "751",
54 |     "param-lambdacensus": "6",
55 |     "param-leftrightthreshold": "10",
56 |     "param-maxscorethreshb": "2893",
57 |     "param-medianthreshold": "796",
58 |     "param-minscorethresha": "4",
59 |     "param-neighborthresh": "108",
60 |     "param-raumine": "6",
61 |     "param-rauminn": "3",
62 |     "param-rauminnssum": "7",
63 |     "param-raumins": "2",
64 |     "param-rauminw": "2",
65 |     "param-rauminwesum": "12",
66 |     "param-regioncolorthresholdb": "0.785714",
67 |     "param-regioncolorthresholdg": "0.565558",
68 |     "param-regioncolorthresholdr": "0.985323",
69 |     "param-regionshrinku": "3",
70 |     "param-regionshrinkv": "0",
71 |     "param-robbinsmonrodecrement": "25",
72 |     "param-robbinsmonroincrement": "2",
73 |     "param-rsmdiffthreshold": "1.65625",
74 |     "param-rsmrauslodiffthreshold": "0.71875",
75 |     "param-rsmremovethreshold": "0.809524",
76 |     "param-scanlineedgetaub": "13",
77 |     "param-scanlineedgetaug": "15",
78 |     "param-scanlineedgetaur": "30",
79 |     "param-scanlinep1": "155",
80 |     "param-scanlinep1onediscon": "160",
81 |     "param-scanlinep1twodiscon": "59",
82 |     "param-scanlinep2": "190",
83 |     "param-scanlinep2onediscon": "507",
84 |     "param-scanlinep2twodiscon": "493",
85 |     "param-secondpeakdelta": "647",
86 |     "param-texturecountthresh": "0",
87 |     "param-texturedifferencethresh": "1722",
88 |     "param-usersm": "1",
89 |     "param-zunits": "1000",
90 |     "stream-depth-format": "Z16",
91 |     "stream-fps": "30",
92 |     "stream-height": "480",
93 |     "stream-width": "848"
94 | }


--------------------------------------------------------------------------------
/diffusion_policy/real_world/spacemouse.py:
--------------------------------------------------------------------------------
  1 | from spnav import spnav_open, spnav_poll_event, spnav_close, SpnavMotionEvent, SpnavButtonEvent
  2 | from threading import Thread, Event
  3 | from collections import defaultdict
  4 | import numpy as np
  5 | import time
  6 | 
  7 | 
  8 | class Spacemouse(Thread):
  9 |     def __init__(self, max_value=500, deadzone=(0,0,0,0,0,0), dtype=np.float32):
 10 |         """
 11 |         Continuously listen to 3D connection space naviagtor events
 12 |         and update the latest state.
 13 | 
 14 |         max_value: {300, 500} 300 for wired version and 500 for wireless
 15 |         deadzone: [0,1], number or tuple, axis with value lower than this value will stay at 0
 16 |         
 17 |         front
 18 |         z
 19 |         ^   _
 20 |         |  (O) space mouse
 21 |         |
 22 |         *----->x right
 23 |         y
 24 |         """
 25 |         if np.issubdtype(type(deadzone), np.number):
 26 |             deadzone = np.full(6, fill_value=deadzone, dtype=dtype)
 27 |         else:
 28 |             deadzone = np.array(deadzone, dtype=dtype)
 29 |         assert (deadzone >= 0).all()
 30 | 
 31 |         super().__init__()
 32 |         self.stop_event = Event()
 33 |         self.max_value = max_value
 34 |         self.dtype = dtype
 35 |         self.deadzone = deadzone
 36 |         self.motion_event = SpnavMotionEvent([0,0,0], [0,0,0], 0)
 37 |         self.button_state = defaultdict(lambda: False)
 38 |         self.tx_zup_spnav = np.array([
 39 |             [0,0,-1],
 40 |             [1,0,0],
 41 |             [0,1,0]
 42 |         ], dtype=dtype)
 43 | 
 44 |     def get_motion_state(self):
 45 |         me = self.motion_event
 46 |         state = np.array(me.translation + me.rotation, 
 47 |             dtype=self.dtype) / self.max_value
 48 |         is_dead = (-self.deadzone < state) & (state < self.deadzone)
 49 |         state[is_dead] = 0
 50 |         return state
 51 |     
 52 |     def get_motion_state_transformed(self):
 53 |         """
 54 |         Return in right-handed coordinate
 55 |         z
 56 |         *------>y right
 57 |         |   _
 58 |         |  (O) space mouse
 59 |         v
 60 |         x
 61 |         back
 62 | 
 63 |         """
 64 |         state = self.get_motion_state()
 65 |         tf_state = np.zeros_like(state)
 66 |         tf_state[:3] = self.tx_zup_spnav @ state[:3]
 67 |         tf_state[3:] = self.tx_zup_spnav @ state[3:]
 68 |         return tf_state
 69 | 
 70 |     def is_button_pressed(self, button_id):
 71 |         return self.button_state[button_id]
 72 | 
 73 |     def stop(self):
 74 |         self.stop_event.set()
 75 |         self.join()
 76 | 
 77 |     def __enter__(self):
 78 |         self.start()
 79 |         return self
 80 |     
 81 |     def __exit__(self, exc_type, exc_val, exc_tb):
 82 |         self.stop()
 83 | 
 84 |     def run(self):
 85 |         spnav_open()
 86 |         try:
 87 |             while not self.stop_event.is_set():
 88 |                 event = spnav_poll_event()
 89 |                 if isinstance(event, SpnavMotionEvent):
 90 |                     self.motion_event = event
 91 |                 elif isinstance(event, SpnavButtonEvent):
 92 |                     self.button_state[event.bnum] = event.press
 93 |                 else:
 94 |                     time.sleep(1/200)
 95 |         finally:
 96 |             spnav_close()
 97 | 
 98 | 
 99 | def test():
100 |     with Spacemouse(deadzone=0.3) as sm:
101 |         for i in range(2000):
102 |             # print(sm.get_motion_state())
103 |             print(sm.get_motion_state_transformed())
104 |             print(sm.is_button_pressed(0))
105 |             time.sleep(1/100)
106 | 
107 | if __name__ == '__main__':
108 |     test()
109 | 


--------------------------------------------------------------------------------
/diffusion_policy/shared_memory/shared_memory_util.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | from dataclasses import dataclass
 3 | import numpy as np
 4 | from multiprocessing.managers import SharedMemoryManager
 5 | from atomics import atomicview, MemoryOrder, UINT
 6 | 
 7 | @dataclass
 8 | class ArraySpec:
 9 |     name: str
10 |     shape: Tuple[int]
11 |     dtype: np.dtype
12 | 
13 | 
14 | class SharedAtomicCounter:
15 |     def __init__(self, 
16 |             shm_manager: SharedMemoryManager, 
17 |             size :int=8 # 64bit int
18 |             ):
19 |         shm = shm_manager.SharedMemory(size=size)
20 |         self.shm = shm
21 |         self.size = size
22 |         self.store(0) # initialize
23 | 
24 |     @property
25 |     def buf(self):
26 |         return self.shm.buf[:self.size]
27 | 
28 |     def load(self) -> int:
29 |         with atomicview(buffer=self.buf, atype=UINT) as a: 
30 |             value = a.load(order=MemoryOrder.ACQUIRE)
31 |         return value
32 |     
33 |     def store(self, value: int):
34 |         with atomicview(buffer=self.buf, atype=UINT) as a:
35 |             a.store(value, order=MemoryOrder.RELEASE)
36 |     
37 |     def add(self, value: int):
38 |         with atomicview(buffer=self.buf, atype=UINT) as a:
39 |             a.add(value, order=MemoryOrder.ACQ_REL)
40 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Usage:
  3 | python eval.py --checkpoint /path/to/ckpt -o /path/to/output_dir
  4 | """
  5 | 
  6 | import sys
  7 | # use line-buffering for both stdout and stderr
  8 | sys.stdout = open(sys.stdout.fileno(), mode='w', buffering=1)
  9 | sys.stderr = open(sys.stderr.fileno(), mode='w', buffering=1)
 10 | 
 11 | import os
 12 | import pathlib
 13 | import click
 14 | import hydra
 15 | import torch
 16 | import dill
 17 | import wandb
 18 | import json
 19 | from diffusion_policy.workspace.base_workspace import BaseWorkspace
 20 | import copy
 21 | from omegaconf.omegaconf import open_dict
 22 | import yaml
 23 | 
 24 | taskid2cfg = {
 25 |     0 :"config/tasks/square_d0.yaml" ,
 26 |     1 :"config/tasks/stack_d0.yaml" ,
 27 |     2 :"config/tasks/coffee_d0.yaml" ,
 28 |     3 :"config/tasks/hammer_cleanup_d0.yaml" ,
 29 |     4 :"config/tasks/mug_cleanup_d0.yaml" ,
 30 |     5 :"config/tasks/nut_assembly_d0.yaml" ,
 31 |     6 :"config/tasks/stack_three_d0.yaml" ,
 32 |     7: "config/tasks/threading_d0.yaml" ,
 33 | }
 34 | 
 35 | 
 36 | 
 37 | @click.command()
 38 | @click.option('-c', '--checkpoint', default='epoch=0299-test_mean_score=6.070.ckpt')
 39 | @click.option('-o', '--output_dir', default='test_eval')
 40 | @click.option('-d', '--device', default='cuda:0')
 41 | def main(checkpoint, output_dir, device):
 42 |     if os.path.exists(output_dir):
 43 |         click.confirm(f"Output path {output_dir} already exists! Overwrite?", abort=True)
 44 |     pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
 45 |     
 46 |     # load checkpoint
 47 |     payload = torch.load(open(checkpoint, 'rb'), pickle_module=dill)
 48 |     cfg = payload['cfg']
 49 |     for i in range(cfg['task_num']):
 50 |         curr_cfg=taskid2cfg[i]
 51 |         with open(curr_cfg, "r") as f:
 52 |             task_cfg = yaml.safe_load(f)
 53 |             cfg[f"task{i}"]=task_cfg
 54 |     cls = hydra.utils.get_class(cfg._target_)
 55 |     workspace = cls(cfg, output_dir=output_dir)
 56 |     workspace: BaseWorkspace
 57 |     workspace.load_payload(payload, exclude_keys=None, include_keys=None)
 58 |     
 59 |     # run eval
 60 |     # configure env
 61 |     env_runners = []
 62 |     # env_runner3: BaseImageRunner
 63 |     for i in range(cfg.task_num):
 64 |         env_runners.append(hydra.utils.instantiate(cfg[f'task{i}'].env_runner, output_dir=output_dir))
 65 | 
 66 | 
 67 |     # get policy from workspace
 68 |     datasets= []
 69 |     for i in range(cfg.task_num):
 70 |         datasets.append(hydra.utils.instantiate(cfg[f'task{i}'].dataset))
 71 |     normalizers=[]
 72 |     for dataset in datasets:
 73 |         normalizers.append(dataset.get_normalizer())
 74 |     workspace.model.set_normalizer(normalizers)
 75 | 
 76 |     policy = workspace.model
 77 |     if cfg.training.use_ema:
 78 |         workspace.ema_model.set_normalizer(normalizers)
 79 |         policy = workspace.ema_model
 80 |     device = torch.device(device)
 81 |     policy.to(device)
 82 |     for normalizer in policy.normalizers:
 83 |         normalizer.to(device)
 84 |     policy.eval()
 85 |     
 86 |     
 87 |     runner_logs = []
 88 |     for i, env_runner in enumerate(env_runners):
 89 |         runner_log = env_runner.run(policy,task_id=torch.tensor([i], dtype=torch.int64).to(device))
 90 |         runner_log = {key + f'_{i}': value for key, value in runner_log.items()}
 91 |         runner_logs.append(runner_log)
 92 |     
 93 |     # dump log to json
 94 |     for i,runner_log in enumerate(runner_logs):
 95 |         json_log = dict()
 96 |         for key, value in runner_log.items():
 97 |             if isinstance(value, wandb.sdk.data_types.video.Video):
 98 |                 json_log[key] = value._path
 99 |             else:
100 |                 json_log[key] = value
101 |         out_path = os.path.join(output_dir, f'eval_log_{i}.json')
102 |         json.dump(json_log, open(out_path, 'w'), indent=2, sort_keys=True)
103 | 
104 | if __name__ == '__main__':
105 |     os.environ["CUDA_VISIBLE_DEVICES"]='1,'
106 |     os.environ["MUJOCO_GL"]="osmesa"
107 |     main()
108 | 


--------------------------------------------------------------------------------
/mixture_of_experts/mixture_of_experts/__init__.py:
--------------------------------------------------------------------------------
1 | from mixture_of_experts.mixture_of_experts import MoE, HeirarchicalMoE, Experts
2 | 


--------------------------------------------------------------------------------
/mixture_of_experts/moe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AnthonyHuo/SDP/9d70d48549f622c29f4f4935588467989835a46e/mixture_of_experts/moe.png


--------------------------------------------------------------------------------
/mixture_of_experts/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |   name = 'mixture-of-experts',
 5 |   packages = find_packages(),
 6 |   version = '0.2.3',
 7 |   license='MIT',
 8 |   description = 'Sparsely-Gated Mixture of Experts for Pytorch',
 9 |   author = 'Phil Wang',
10 |   author_email = 'lucidrains@gmail.com',
11 |   url = 'https://github.com/lucidrains/mixture-of-experts',
12 |   keywords = ['artificial intelligence', 'deep learning', 'transformers', 'mixture of experts'],
13 |   install_requires=[
14 |       'torch'
15 |   ],
16 |   classifiers=[
17 |       'Development Status :: 4 - Beta',
18 |       'Intended Audience :: Developers',
19 |       'Topic :: Scientific/Engineering :: Artificial Intelligence',
20 |       'License :: OSI Approved :: MIT License',
21 |       'Programming Language :: Python :: 3.6',
22 |   ],
23 | )
24 | 


--------------------------------------------------------------------------------
/parallel_linear/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 
34 | build
35 | dist
36 | *.egg-info
37 | 
38 | __pycache__
39 | *.pyc
40 | 
41 | *.json


--------------------------------------------------------------------------------
/parallel_linear/README.md:
--------------------------------------------------------------------------------
 1 | # Parallel Linears and Mixture of Experts
 2 | 
 3 | ## update 2022.07.19
 4 | The `MoE.forward()` is now a standard Mixture of Experts (FFD). The mixture of attention code is suppose to use `MoE.map()` and `MoE.reduce()` functions.
 5 | 
 6 | ## Mixture of Experts
 7 | Mixture of Experts (MoE) is a map-reduce style function.
 8 | The forward function maps different inputs to different experts. The reduce function sums these intermediate result together for each inputs. 
 9 | Parameter:
10 | 1. `input_size` - the size of input hidden states
11 | 2. `output_size` - the size of intermediate hidden states
12 | 3. `num_experts` - the number of total experts
13 | 4. `k` - the number of topk selected experts for each input
14 | 5. `cvloss`, `switchloss`, `zloss` - different load balancing losses.
15 | 6. `activation` - the activation function for intermediate states of MoE (FFD).
16 | 
17 | To install the classs:
18 | ```
19 | pip3 install .
20 | ```
21 | or
22 | ```
23 | python3 setup.py install
24 | ```
25 | 
26 | To use the class:
27 | ```
28 | from parallel_experts import MoE
29 | 
30 | moe = MoE()
31 | ```
32 | The `MoE` is map-reduce still function. To use the function, first map the input `x` with the map function:
33 | ```
34 | mapped = moe.map(x)
35 | ```
36 | Then you can pass the mapped and projected output through attention (for mixture of attention) or non-linear activation (for mixture of FFD) to get the processed matrix `y`
37 | Lastly, you feed `y` to the reduce function to get the output of mixture of attention/FFD.
38 | ```
39 | output = moe.reduce(y)
40 | ```
41 | 
42 | ## Parallel Linears
43 | Parallel linears is a part of MoE. 
44 | Input to the function includes:
45 | 1. Input matrix $ X $, a $ B \times D_{in} $ matrix, where $B$ is the total number of input vectors.
46 | 2. Weight matrix $ W $, a $ N \times D_{out} \times D_{in} $ matrix, where $N$ is the number of linear kernels.
47 | 3. Routing vector $ R $, a $ B $ dimensional vector, where each elements $ R_i $ ( $ 0 \leq R_i < N $ ) is the index of weight matrix for $ i $-th input vector. The input matrix and routing vector are sorted according to the weight index. For example, a valid routing vector is $ [0\ 0\ 0\ 1\ 1\ 2\ 3\ 3\ 3\ 3] $
48 | 4. Start indices vector $ S $, a $ B $ dimensional vector, where each elements $ S_i $ is the starting index for inputs of $ i $-th weight matrix.
49 | 5. End indices vector $ E $, a $ B $ dimensional vector, where each elements $ E_i $ is the ending index for inputs of $ i $-th weight matrix.
50 | 
51 | The output of the function is the $ \left[ \begin{matrix} W_{R_1} X_1, W_{R_2} X_2, ..., W_{R_B} X_B \end{matrix} \right] $.
52 | 
53 | To run test:
54 | ```
55 | python test.py
56 | ```


--------------------------------------------------------------------------------
/parallel_linear/parallel_experts/__init__.py:
--------------------------------------------------------------------------------
1 | from .parallel_experts import ParallelExperts, ParallelLinear
2 | from parallel_experts.moe import MoE, RandomMoE, TaskMoE


--------------------------------------------------------------------------------
/parallel_linear/parallel_linear.cc:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | torch::Tensor  parallel_linear_fwd_interface(torch::Tensor, torch::Tensor, torch::Tensor);
 4 | std::vector<torch::Tensor> parallel_linear_bwd_interface(torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor);
 5 | 
 6 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor")
 7 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
 8 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
 9 | 
10 | torch::Tensor parallel_linear_fwd(torch::Tensor input, torch::Tensor weight, torch::Tensor indices) {
11 |     if(input.device().type() == torch::kCPU) {
12 |         int bsz = input.size(0);
13 |         torch::Tensor output = torch::zeros({bsz, weight.size(1)});
14 |         for (int i = 0; i < bsz; ++i)
15 |         {
16 |             output[i] = torch::mv(weight[indices[i]], input[i]);
17 |         }
18 |         return output;
19 |     } 
20 |     else if (input.device().type() == torch::kCUDA){
21 |         CHECK_INPUT(input);
22 |         CHECK_INPUT(weight);
23 |         CHECK_INPUT(indices);
24 |         TORCH_CHECK(indices.dtype() == torch::kInt64,
25 |             "Indices Datatype not implemented");
26 | 
27 |         return parallel_linear_fwd_interface(input, weight, indices);
28 |     }
29 |     AT_ERROR("No such device: ", input.device());
30 | }
31 | 
32 | std::vector<torch::Tensor> parallel_linear_bwd(torch::Tensor grad_out, torch::Tensor input, torch::Tensor weight, 
33 |                                             torch::Tensor indices, torch::Tensor start_indices, torch::Tensor end_indices) {
34 |     if(input.device().type() == torch::kCPU) {
35 |         int bsz = input.size(0);
36 |         torch::Tensor d_input = torch::zeros_like(input);
37 |         torch::Tensor d_weight = torch::zeros_like(weight);
38 | 
39 |         for (int i = 0; i < bsz; ++i)
40 |         {
41 |             d_input[i] = torch::mv(weight[indices[i]].transpose(0, 1), grad_out[i]);
42 |             d_weight[indices[i]] += torch::outer(grad_out[i], input[i]);
43 |         }
44 |         return {d_input, d_weight};
45 |     } 
46 |     else if (input.device().type() == torch::kCUDA){
47 |         CHECK_INPUT(input);
48 |         CHECK_INPUT(weight);
49 |         CHECK_INPUT(indices);
50 |         CHECK_INPUT(grad_out);
51 |         TORCH_CHECK(indices.dtype() == torch::kInt64,
52 |             "Indices Datatype not implemented");
53 |         
54 |         return parallel_linear_bwd_interface(grad_out, input, weight, indices, start_indices, end_indices);
55 |     }
56 |     AT_ERROR("No such device: ", input.device());
57 | }
58 | 
59 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
60 |   m.def("forward", &parallel_linear_fwd, "Parallel linear forward");
61 |   m.def("backward", &parallel_linear_bwd, "Parallel linear backward");
62 | }


--------------------------------------------------------------------------------
/parallel_linear/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from torch.utils import cpp_extension
 3 | 
 4 | setup(name='parallel_experts',
 5 |       packages=find_packages(), 
 6 |       # ext_modules=[cpp_extension.CUDAExtension('parallel_linear',
 7 |       #                 ['parallel_linear.cc', 
 8 |       #                 'parallel_linear_kernel.cu'
 9 |       #                 ])],
10 |       # cmdclass={'build_ext': cpp_extension.BuildExtension},
11 |       install_requires=[
12 |             'torch'
13 |       ])


--------------------------------------------------------------------------------
/parallel_linear/test.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import torch
  4 | import torch.nn.functional as F
  5 | 
  6 | from parallel_experts import ParallelLinear
  7 | 
  8 | assert torch.cuda.is_available()
  9 | cuda_device = torch.device("cuda")
 10 | 
 11 | NUM_EXPERTS=128
 12 | INPUT_SIZE=512
 13 | OUTPUT_SIZE=128
 14 | BSZ=512 * 32 * 8
 15 | 
 16 | 
 17 | def TorchParallelLinear(input, weight, bias, expert_size):
 18 |     output_list = []
 19 |     expert_size_list = expert_size.tolist()
 20 |     input_list = input.split(expert_size_list, dim=0)
 21 |     for i in range(NUM_EXPERTS):
 22 |         output_list.append(torch.mm(input_list[i], weight[i]) + bias[i])
 23 |     return torch.cat(output_list, dim=0)
 24 | 
 25 |     # output = torch.mm(input, weight[0])
 26 |     # return output
 27 | 
 28 | 
 29 | kernel_forward = 0
 30 | kernel_backward = 0
 31 | torch_forward = 0
 32 | torch_backward = 0
 33 | for t in range(200 + 1):
 34 |     weight = torch.rand((NUM_EXPERTS, INPUT_SIZE, OUTPUT_SIZE), requires_grad=True, device=cuda_device, dtype=torch.float16)
 35 |     bias = torch.rand((NUM_EXPERTS, OUTPUT_SIZE), requires_grad=True, device=cuda_device, dtype=torch.float16)
 36 |     input = torch.rand((BSZ, INPUT_SIZE), requires_grad=True, device=cuda_device, dtype=torch.float16)
 37 |     experts = torch.randint(NUM_EXPERTS, (BSZ,), device=cuda_device, dtype=torch.long)
 38 |     output_vector = torch.rand((BSZ, OUTPUT_SIZE), requires_grad=True, device=cuda_device, dtype=torch.float16)
 39 | 
 40 |     experts, _  = torch.sort(experts, dim=0)
 41 |     zeros = torch.zeros((BSZ, NUM_EXPERTS), device=cuda_device, dtype=torch.long)
 42 |     gates = zeros.scatter(1, experts[:, None], 1)
 43 |     expert_size = gates.sum(0)
 44 |     end_indices = expert_size.cumsum(0)
 45 |     start_indices = F.pad(end_indices[:-1], (1,0), value=0)
 46 | 
 47 |     torch.cuda.synchronize(cuda_device)
 48 | 
 49 |     start = time.time()
 50 |     function_output = ParallelLinear.apply(input, expert_size, weight, bias)
 51 |     function_output_sum = torch.einsum('bi,bi->b', function_output, output_vector).sum(0)
 52 |     torch.cuda.synchronize(cuda_device)
 53 |     forward_i = time.time() - start
 54 | 
 55 |     start = time.time()
 56 |     function_output_sum.backward()
 57 |     torch.cuda.synchronize(cuda_device)
 58 |     backward_i = time.time() - start
 59 | 
 60 |     if t > 0:
 61 |         kernel_forward += forward_i
 62 |         kernel_backward += backward_i
 63 |         print('Step {:2d} | K_Fwd: {:.3f} us | K_Bwd {:.3f} us'.format(t, forward_i * 1e6/1e5, backward_i * 1e6/1e5), end=' ')
 64 | 
 65 |     input_grad = input.grad
 66 |     weight_grad = weight.grad
 67 |     bias_grad = bias.grad
 68 | 
 69 |     input.grad = None
 70 |     weight.grad = None
 71 |     bias.grad = None
 72 | 
 73 |     torch.cuda.synchronize(cuda_device)
 74 | 
 75 |     start = time.time()
 76 |     output = TorchParallelLinear(input, weight, bias, expert_size)
 77 |     output_sum = torch.einsum('bi,bi->b', output, output_vector).sum(0)
 78 |     torch.cuda.synchronize(cuda_device)
 79 |     forward_i = time.time() - start
 80 | 
 81 |     start = time.time()
 82 |     output_sum.backward()
 83 |     torch.cuda.synchronize(cuda_device)
 84 |     backward_i = time.time() - start
 85 | 
 86 |     if t > 0:
 87 |         torch_forward += forward_i
 88 |         torch_backward += backward_i
 89 |         print('| T_Fwd: {:.3f} us | T_Bwd {:.3f} us'.format(forward_i * 1e6/1e5, backward_i * 1e6/1e5), end=' ')
 90 | 
 91 |     output_diff = torch.abs(output - function_output).max()
 92 |     input_grad_diff = torch.abs(input.grad - input_grad).max()
 93 |     weight_grad_diff = torch.abs(weight.grad - weight_grad).max()
 94 |     bias_grad_diff = torch.abs(bias.grad - bias_grad).max()
 95 | 
 96 |     if t > 0:
 97 |         print('| O_Diff: {:.3f} | Ig_Diff {:.3f} | Wg_Diff {:.3f} | bg_Diff {:.3f}'.format(
 98 |             output_diff, input_grad_diff, weight_grad_diff, bias_grad_diff))
 99 | 
100 |     input.grad = None
101 |     weight.grad = None
102 | 
103 | print('Kernel Forward: {:.3f} us | Kernel Backward {:.3f} us'.format(kernel_forward * 1e6/1e5, kernel_backward * 1e6/1e5))
104 | print('Torch Forward: {:.3f} us | Torch Backward {:.3f} us'.format(torch_forward * 1e6/1e5, torch_backward * 1e6/1e5))
105 | 


--------------------------------------------------------------------------------
/patch_moe/gate.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.nn.init as init
  5 | 
  6 | class Gate(nn.Module):
  7 |     
  8 |     def __init__(self, k, gating_kernel_size, strides=1, padding=0, 
  9 |                  gating_activation=None, gating_kernel_initializer=None):
 10 |         super(Gate, self).__init__()
 11 |         
 12 |         self.k = k
 13 |         self.gating_kernel_size = gating_kernel_size
 14 |         self.strides = strides
 15 |         self.padding = padding
 16 |         self.gating_activation = gating_activation
 17 | 
 18 |         self.gating_kernel = nn.Parameter(torch.empty(1, 3, 4, 4))
 19 | 
 20 |         # Initialize with normal distribution
 21 |         init.normal_(self.gating_kernel, mean=0.0, std=0.0001)
 22 |       
 23 | 
 24 | 
 25 |     def forward(self, inputs):
 26 |         
 27 |         # Convolution
 28 |         #(b,3,76,76)
 29 |         gating_outputs = F.conv2d(inputs, self.gating_kernel, stride=self.strides, padding=self.padding)
 30 |         #(b,1,19,19)
 31 |         # Apply activation function if specified
 32 |         if self.gating_activation is not None:
 33 |             gating_outputs = self.gating_activation(gating_outputs)
 34 | 
 35 |         # Flatten and apply top-k
 36 |         b, c, h, w = gating_outputs.shape
 37 |         gating_outputs = gating_outputs.view(b, c, -1)
 38 |         #(b,1,361)
 39 |         values, indices = torch.topk(gating_outputs, self.k, dim=2, sorted=False)
 40 |         #(b,1,2)
 41 |         # Scatter values to original positions
 42 |         out_shape = (b, c, h * w)
 43 |         ret_flat = torch.zeros(b * c * h * w, device=inputs.device)
 44 |         #[[20,40][34,56]]
 45 |         indices_flat = indices.view(b*c,-1) + torch.arange(b * c, device=inputs.device).unsqueeze(-1) * h * w
 46 |         indices_flat = indices_flat.view(-1)
 47 |         ret_flat.scatter_add_(0, indices_flat, values.view(-1))
 48 |         #[b,1,361]
 49 |         # Reshape and reorder
 50 |         new_gating_outputs = ret_flat.view(b, c, h, w)
 51 |         #[b,1,19,19]
 52 |         # Repeat and reshape the gating outputs
 53 |         new_gating_outputs = new_gating_outputs.repeat_interleave(self.gating_kernel_size[0], dim=2)
 54 |         new_gating_outputs = new_gating_outputs.repeat_interleave(self.gating_kernel_size[1], dim=3)
 55 |         new_gating_outputs = new_gating_outputs.repeat_interleave(self.gating_kernel.size(1), dim=1)
 56 |         #[b,48,19,19]
 57 |         # new_gating_outputs = new_gating_outputs.view(b, h, self.gating_kernel_size[0], w, self.gating_kernel_size[1],-1)
 58 |         # new_gating_outputs = new_gating_outputs.view(b, h * self.gating_kernel_size[0], w * self.gating_kernel_size[1],-1)
 59 |         # # new_gating_outputs = new_gating_outputs.permute(0, 3, 1, 2).contiguous()
 60 |         # repeat_factor = self.gating_kernel[0] * self.gating_kernel[1] * 3
 61 |         # new_gating_outputs = new_gating_outputs.repeat(1, 1, 1, 48)
 62 | 
 63 |         # # Step 2: Reshape new_gating_outputs
 64 |         # new_shape = (new_gating_outputs.size(0), new_gating_outputs.size(1), new_gating_outputs.size(2), 
 65 |         #             self.gating_kernel[0], self.gating_kernel[1], 3)
 66 |         # new_gating_outputs = new_gating_outputs.view(new_shape)
 67 | 
 68 |         # # Step 3: Transpose new_gating_outputs
 69 |         # new_gating_outputs = new_gating_outputs.permute(0, 1, 3, 2, 4, 5)
 70 | 
 71 |         # # Step 4: Final reshape
 72 |         # final_shape = (new_gating_outputs.size(0), new_gating_outputs.size(1) * new_gating_outputs.size(2), 
 73 |         #             new_gating_outputs.size(3) * new_gating_outputs.size(4), new_gating_outputs.size(5))
 74 |         # new_gating_outputs = new_gating_outputs.view(final_shape)
 75 |         # Element-wise multiplication
 76 |         outputs = inputs * new_gating_outputs
 77 | 
 78 |         return outputs
 79 | def test_gate_layer():
 80 |     # Parameters for the gate layer
 81 |     k = 2
 82 |     gating_kernel_size = (4, 4)  # Example kernel size
 83 |     strides = 4
 84 |     padding = 0
 85 | 
 86 |     # Initialize the Gate layer
 87 |     gate_layer = Gate(k, gating_kernel_size, strides, padding, gating_activation=torch.relu)
 88 | 
 89 |     # Create a random input tensor
 90 |     batch_size = 2
 91 |     in_channels = 3
 92 |     height, width = 16, 16  # Example dimensions
 93 |     input_tensor = torch.randn(batch_size, in_channels, height, width)
 94 | 
 95 |     # Forward pass through the Gate layer
 96 |     output = gate_layer(input_tensor)
 97 | 
 98 |     print("Input shape:", input_tensor.shape)
 99 |     print("Output shape:", output.shape)
100 | 
101 | if __name__ == "__main__":
102 |     test_gate_layer()


--------------------------------------------------------------------------------
/pyrightconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |     "exclude": [
3 |         "data/**",
4 |         "data_local/**",
5 |         "outputs/**"
6 |     ]
7 | }


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pyyaml-include==1.4.1


--------------------------------------------------------------------------------
/resnet_moe/moe_layer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import autograd, nn as nn
 3 | 
 4 | 
 5 | class GetMask(autograd.Function):
 6 |     @staticmethod
 7 |     def forward(ctx, scores):  # binarization
 8 | 
 9 |         expert_pred = torch.argmax(scores, dim=1)  # [bs]
10 |         expert_pred_one_hot = torch.zeros_like(scores).scatter_(1, expert_pred.unsqueeze(-1), 1)
11 | 
12 |         return expert_pred, expert_pred_one_hot
13 | 
14 |     @staticmethod
15 |     def backward(ctx, g1, g2):
16 |         return g2
17 | 
18 | 
19 | def get_device(x):
20 |     gpu_idx = x.get_device()
21 |     return f"cuda:{gpu_idx}" if gpu_idx >= 0 else "cpu"
22 | 
23 | 
24 | class MoEBase(nn.Module):
25 |     def __init__(self):
26 |         super(MoEBase, self).__init__()
27 |         self.scores = None
28 |         self.router = None
29 | 
30 |     def set_score(self, scores):
31 |         self.scores = scores
32 |         for module in self.modules():
33 |             if hasattr(module, 'scores'):
34 |                 module.scores = self.scores
35 | 
36 | 
37 | class MoEConv(nn.Conv2d, MoEBase):
38 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, dilation=1, bias=False,
39 |                  n_expert=8):
40 |         super(MoEConv, self).__init__(in_channels, out_channels * n_expert, kernel_size, stride, padding, dilation,
41 |             groups, bias, )
42 |         self.in_channels = in_channels
43 |         self.out_channels = out_channels * n_expert
44 |         self.expert_width = out_channels
45 | 
46 |         self.n_expert = n_expert
47 |         assert self.n_expert >= 1
48 |         self.layer_selection = torch.zeros([n_expert, self.out_channels])
49 |         for cluster_id in range(n_expert):
50 |             start = cluster_id * self.expert_width
51 |             end = (cluster_id + 1) * self.expert_width
52 |             idx = torch.arange(start, end)
53 |             self.layer_selection[cluster_id][idx] = 1
54 |         self.scores = None
55 | 
56 |     def forward(self, x):
57 |         if self.n_expert > 1:
58 |             if self.scores is None:
59 |                 self.scores = self.router(x)
60 |             expert_selection, expert_selection_one_hot = GetMask.apply(self.scores)
61 |             mask = torch.matmul(expert_selection_one_hot, self.layer_selection.to(x))  # [bs, self.out_channels]
62 |             out = super(MoEConv, self).forward(x)
63 |             out = out * mask.unsqueeze(-1).unsqueeze(-1)
64 |             index = torch.where(mask.view(-1) > 0)[0]
65 |             shape = out.shape
66 |             out_selected = out.view(shape[0] * shape[1], shape[2], shape[3])[index].view(shape[0], -1, shape[2],
67 |                                                                                          shape[3])
68 |         else:
69 |             out_selected = super(MoEConv, self).forward(x)
70 |         self.scores = None
71 |         return out_selected


--------------------------------------------------------------------------------
/resnet_moe/resnet_moe.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from models.layers.moe_layer import MoEConv, MoEBase
  6 | 
  7 | 
  8 | class BasicBlock(nn.Module):
  9 |     expansion = 1
 10 | 
 11 |     def __init__(self, in_planes, planes, conv_layer, stride=1, **kwargs):
 12 |         super(BasicBlock, self).__init__()
 13 |         self.conv1 = conv_layer(
 14 |             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False, **kwargs
 15 |         )
 16 |         self.bn1 = nn.BatchNorm2d(planes)
 17 |         self.conv2 = conv_layer(
 18 |             planes, planes, kernel_size=3, stride=1, padding=1, bias=False, **kwargs
 19 |         )
 20 |         self.bn2 = nn.BatchNorm2d(planes)
 21 | 
 22 |         self.shortcut = nn.Sequential()
 23 |         if stride != 1 or in_planes != self.expansion * planes:
 24 |             self.shortcut = nn.Sequential(
 25 |                 nn.Conv2d(
 26 |                     in_planes,
 27 |                     self.expansion * planes,
 28 |                     kernel_size=1,
 29 |                     stride=stride,
 30 |                     bias=False,
 31 |                 ),
 32 |                 nn.BatchNorm2d(self.expansion * planes),
 33 |             )
 34 | 
 35 |     def forward(self, x):
 36 |         out = F.relu(self.bn1(self.conv1(x)))
 37 |         out = self.bn2(self.conv2(out))
 38 |         out += self.shortcut(x)
 39 |         out = F.relu(out)
 40 |         return out
 41 | 
 42 | 
 43 | class Bottleneck(nn.Module):
 44 |     expansion = 4
 45 | 
 46 |     def __init__(self, in_planes, planes, conv_layer, stride=1, **kwargs):
 47 |         super(Bottleneck, self).__init__()
 48 |         self.conv1 = conv_layer(in_planes, planes, kernel_size=1, bias=False, **kwargs
 49 |                                 )
 50 |         self.bn1 = nn.BatchNorm2d(planes)
 51 |         self.conv2 = conv_layer(
 52 |             planes, planes, kernel_size=3, stride=stride, padding=1, bias=False, **kwargs
 53 |         )
 54 |         self.bn2 = nn.BatchNorm2d(planes)
 55 |         self.conv3 = conv_layer(
 56 |             planes, self.expansion * planes, kernel_size=1, bias=False, **kwargs
 57 |         )
 58 |         self.bn3 = nn.BatchNorm2d(self.expansion * planes)
 59 | 
 60 |         self.shortcut = nn.Sequential()
 61 |         if stride != 1 or in_planes != self.expansion * planes:
 62 |             self.shortcut = nn.Sequential(
 63 |                 nn.Conv2d(
 64 |                     in_planes,
 65 |                     self.expansion * planes,
 66 |                     kernel_size=1,
 67 |                     stride=stride,
 68 |                     bias=False,
 69 |                 ),
 70 |                 nn.BatchNorm2d(self.expansion * planes),
 71 |             )
 72 | 
 73 |     def forward(self, x):
 74 |         out = F.relu(self.bn1(self.conv1(x)))
 75 |         out = F.relu(self.bn2(self.conv2(out)))
 76 |         out = self.bn3(self.conv3(out))
 77 |         out += self.shortcut(x)
 78 |         out = F.relu(out)
 79 |         return out
 80 | 
 81 | 
 82 | def percentile(t, q):
 83 |     k = 1 + round(.01 * float(q) * (t.numel() - 1))
 84 |     return t.view(-1).kthvalue(k).values.item()
 85 | 
 86 | 
 87 | class ResNet(MoEBase):
 88 |     def __init__(self, block, num_blocks, n_expert=8, ratio=1.0):
 89 |         super(ResNet, self).__init__()
 90 |         self.ratio = ratio
 91 |         self.in_planes = 64
 92 |         self.conv_layer = MoEConv
 93 |         self.num_blocks = num_blocks
 94 |         self.normalize = None
 95 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 96 |         self.bn1 = nn.BatchNorm2d(64)
 97 | 
 98 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1, n_expert=n_expert)
 99 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2, n_expert=n_expert)
100 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2, n_expert=n_expert)
101 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, n_expert=n_expert)
102 | 
103 | 
104 |     def _make_layer(self, block, planes, num_blocks, stride, **kwargs):
105 |         planes = int(self.ratio * planes)
106 |         strides = [stride] + [1] * (num_blocks - 1)
107 |         layers = []
108 |         for stride in strides:
109 |             layers.append(block(self.in_planes, planes, self.conv_layer, stride, **kwargs))
110 |             self.in_planes = planes * block.expansion
111 |         return nn.Sequential(*layers)
112 | 
113 |     def forward(self, x):
114 |         if self.normalize is not None:
115 |             x = self.normalize(x)
116 |         if self.router is not None:
117 |             self.set_score(self.router(x))
118 |         out = F.relu(self.bn1(self.conv1(x)))
119 |         out = self.layer1(out)
120 |         out = self.layer2(out)
121 |         out = self.layer3(out)
122 |         out = self.layer4(out)
123 | 
124 | 
125 |         return out
126 | 
127 | 
128 | def resnet18_cifar_moe(**kwargs):
129 |     return ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
130 | 
131 | 
132 | def resnet34_cifar_moe(**kwargs):
133 |     return ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
134 | 
135 | 
136 | def resnet50_cifar_moe(**kwargs):
137 |     return ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
138 | 
139 | 
140 | def resnet101_cifar_moe(**kwargs):
141 |     return ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
142 | 
143 | 
144 | def resnet152_cifar_moe(**kwargs):
145 |     return ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)


--------------------------------------------------------------------------------
/resnet_moe/router.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class LambdaLayer(nn.Module):
 6 |     def __init__(self, lambd):
 7 |         super(LambdaLayer, self).__init__()
 8 |         self.lambd = lambd
 9 | 
10 |     def forward(self, x):
11 |         return self.lambd(x)
12 | 
13 | 
14 | class Block(nn.Module):
15 |     expansion = 1
16 | 
17 |     def __init__(self, in_planes, planes, conv_layer, stride=1):
18 |         super(Block, self).__init__()
19 |         self.conv1 = conv_layer(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
20 |         self.bn1 = nn.BatchNorm2d(planes)
21 |         self.conv2 = conv_layer(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
22 |         self.bn2 = nn.BatchNorm2d(planes)
23 | 
24 |         self.shortcut = nn.Sequential()
25 |         if stride != 1 or in_planes != planes:
26 |             diff = planes - in_planes
27 |             self.shortcut = LambdaLayer(
28 |                 lambda x: F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, int(diff * 0.5), int((diff + 1) * 0.5)), "constant", 0))
29 |     def forward(self, x):
30 |         out = F.relu(self.bn1(self.conv1(x)))
31 |         out = self.bn2(self.conv2(out))
32 |         out += self.shortcut(x)
33 |         out = F.relu(out)
34 |         return out
35 | 
36 | 
37 | class Router(nn.Module):
38 |     def __init__(self, block, num_blocks, num_experts=2):
39 |         super(Router, self).__init__()
40 |         self.in_planes = 16
41 |         self.conv_layer = nn.Conv2d
42 | 
43 |         self.conv1 = nn.Conv2d(3, self.in_planes, kernel_size=3, stride=1, padding=1, bias=False)
44 |         self.bn1 = nn.BatchNorm2d(self.in_planes)
45 |         self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
46 |         self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
47 |         self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
48 |         self.fc = nn.Linear(64, num_experts)
49 | 
50 |     def _make_layer(self, block, planes, num_blocks, stride):
51 |         planes = planes
52 |         strides = [stride] + [1] * (num_blocks - 1)
53 |         layers = []
54 |         for stride in strides:
55 |             layers.append(block(self.in_planes, planes, self.conv_layer, stride))
56 |             self.in_planes = planes * block.expansion
57 | 
58 |         return nn.Sequential(*layers)
59 | 
60 |     def forward(self, x):
61 |         out = F.relu(self.bn1(self.conv1(x)))
62 |         out = self.layer1(out)
63 |         out = self.layer2(out)
64 |         out = self.layer3(out)
65 |         out = F.avg_pool2d(out, out.size()[3])
66 |         out = out.view(out.size(0), -1)
67 |         out = self.fc(out)
68 |         return out
69 | 
70 | 
71 | def build_router(**kwargs):
72 |     return Router(Block, [3, 3, 3], **kwargs)


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | 
3 | setup(
4 |   name = 'diffusion_policy',
5 |   packages = find_packages(),
6 | )
7 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Usage:
 3 | Training:
 4 | python train.py --config-name=train_diffusion_lowdim_workspace
 5 | """
 6 | 
 7 | import sys
 8 | # use line-buffering for both stdout and stderr
 9 | sys.stdout = open(sys.stdout.fileno(), mode='w', buffering=1)
10 | sys.stderr = open(sys.stderr.fileno(), mode='w', buffering=1)
11 | # import os
12 | import hydra
13 | from omegaconf import OmegaConf
14 | import pathlib
15 | from diffusion_policy.workspace.base_workspace import BaseWorkspace
16 | import os
17 | 
18 | # allows arbitrary python code execution in configs using the ${eval:''} resolver
19 | OmegaConf.register_new_resolver("eval", eval, replace=True)
20 | 
21 | @hydra.main(
22 |     version_base=None,
23 |     config_path=str(pathlib.Path(__file__).parent.joinpath(
24 |         'config', 'tmp')),
25 |     config_name="full.yaml",
26 | )
27 | def main(cfg: OmegaConf):
28 |     # resolve immediately so all the ${now:} resolvers
29 |     # will use the same time.
30 |     OmegaConf.resolve(cfg)
31 | 
32 |     cls = hydra.utils.get_class(cfg._target_)
33 |     workspace: BaseWorkspace = cls(cfg)
34 |     workspace.run()
35 | 
36 | if __name__ == "__main__":
37 |     os.environ["CUDA_VISIBLE_DEVICES"]='0,'
38 |     os.environ["MUJOCO_GL"]="osmesa"
39 |     from utils.recursive_yaml import read_yaml, write_yaml
40 |     data = read_yaml('config/base.yaml')
41 |     write_yaml(data, 'config/tmp/full.yaml')
42 |     main()
43 | 


--------------------------------------------------------------------------------
/utils/recursive_yaml.py:
--------------------------------------------------------------------------------
 1 | import yaml, os
 2 | from yamlinclude import YamlIncludeConstructor
 3 | fpath = os.path.dirname(os.path.dirname(__file__))
 4 | Path = lambda p:os.path.join(fpath,p)
 5 | YamlIncludeConstructor.add_to_loader_class(loader_class=yaml.FullLoader)
 6 | def read_yaml(path):
 7 |     p = Path(path)
 8 |     with open(p) as f:
 9 |         data = yaml.load(f, Loader=yaml.FullLoader)
10 |     return data
11 | def write_yaml(data, path):
12 |     p = Path(path)
13 |     with open(p, 'w') as f:
14 |         yaml.dump(data, f) 
15 | 
16 | if __name__ == '__main__':
17 |     path = 'config/base.yaml'
18 |     t = read_yaml(path)
19 |     # write to yaml
20 |     with open('config/tmp/full.yaml', 'w') as f:
21 |         yaml.dump(t, f)


--------------------------------------------------------------------------------