├── LICENSE
├── MANIFEST.in
├── README.md
├── images
    └── sirius.png
├── requirements-docs.txt
├── requirements.txt
├── robomimic
    ├── __init__.py
    ├── algo
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── algo.cpython-38.pyc
    │   │   ├── awac.cpython-38.pyc
    │   │   ├── bc.cpython-38.pyc
    │   │   ├── bcq.cpython-38.pyc
    │   │   ├── cql.cpython-38.pyc
    │   │   ├── gl.cpython-38.pyc
    │   │   ├── hbc.cpython-38.pyc
    │   │   ├── iql.cpython-38.pyc
    │   │   ├── iris.cpython-38.pyc
    │   │   └── td3_bc.cpython-38.pyc
    │   ├── algo.py
    │   ├── awac.py
    │   ├── bc.py
    │   ├── bcq.py
    │   ├── cql.py
    │   ├── gl.py
    │   ├── hbc.py
    │   ├── iql.py
    │   ├── iris.py
    │   └── td3_bc.py
    ├── config
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── awac_config.cpython-38.pyc
    │   │   ├── base_config.cpython-38.pyc
    │   │   ├── bc_config.cpython-38.pyc
    │   │   ├── bcq_config.cpython-38.pyc
    │   │   ├── config.cpython-38.pyc
    │   │   ├── cql_config.cpython-38.pyc
    │   │   ├── gl_config.cpython-38.pyc
    │   │   ├── hbc_config.cpython-38.pyc
    │   │   ├── iql_config.cpython-38.pyc
    │   │   ├── iris_config.cpython-38.pyc
    │   │   ├── td3_bc_config.cpython-38.pyc
    │   │   └── vae_config.cpython-38.pyc
    │   ├── awac_config.py
    │   ├── base_config.py
    │   ├── bc_config.py
    │   ├── bcq_config.py
    │   ├── config.py
    │   ├── cql_config.py
    │   ├── gl_config.py
    │   ├── hbc_config.py
    │   ├── iql_config.py
    │   ├── iris_config.py
    │   ├── td3_bc_config.py
    │   └── vae_config.py
    ├── envs
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   └── env_base.cpython-38.pyc
    │   ├── env_base.py
    │   ├── env_gym.py
    │   ├── env_ig_momart.py
    │   └── env_robosuite.py
    ├── exps
    │   ├── sirius
    │   │   ├── bc.json
    │   │   ├── bc_iwr.json
    │   │   └── sirius.json
    │   ├── sirius_template
    │   │   ├── awac
    │   │   │   ├── awac_im.json
    │   │   │   └── awac_ld.json
    │   │   ├── bc
    │   │   │   ├── bc_im.json
    │   │   │   ├── bc_ld.json
    │   │   │   ├── bc_real.json
    │   │   │   ├── bc_sim_v0_im.json
    │   │   │   └── bc_sim_v0_ld.json
    │   │   └── iql
    │   │   │   └── iql_ld.json
    │   └── templates
    │   │   ├── bc.json
    │   │   ├── bcq.json
    │   │   ├── cql.json
    │   │   ├── gl.json
    │   │   ├── hbc.json
    │   │   ├── iris.json
    │   │   └── td3_bc.json
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── base_nets.cpython-38.pyc
    │   │   ├── distributions.cpython-38.pyc
    │   │   ├── obs_nets.cpython-38.pyc
    │   │   ├── policy_nets.cpython-38.pyc
    │   │   ├── vae_nets.cpython-38.pyc
    │   │   └── value_nets.cpython-38.pyc
    │   ├── base_nets.py
    │   ├── distributions.py
    │   ├── obs_nets.py
    │   ├── policy_nets.py
    │   ├── vae_nets.py
    │   └── value_nets.py
    ├── scripts
    │   ├── check_same_initial_configs.py
    │   ├── conversion
    │   │   ├── convert_d4rl.py
    │   │   ├── convert_robosuite.py
    │   │   └── convert_roboturk_pilot.py
    │   ├── dataset_states_to_obs.py
    │   ├── download_datasets.py
    │   ├── download_momart_datasets.py
    │   ├── extract_obs_from_raw_datasets.sh
    │   ├── generate_config_templates.py
    │   ├── generate_paper_configs.py
    │   ├── get_dataset_info.py
    │   ├── hitl
    │   │   ├── collect_hitl_demos.py
    │   │   └── collect_playback_utils.py
    │   ├── hyperparam_helper.py
    │   ├── playback_dataset.py
    │   ├── run_trained_agent.py
    │   ├── slurm
    │   │   ├── auto_append.txt
    │   │   ├── auto_overwrite.txt
    │   │   ├── base_args.py
    │   │   ├── base_template.sbatch
    │   │   ├── batchrl_args.py
    │   │   ├── run_hp_sweep.py
    │   │   ├── sbatch_args.py
    │   │   └── sbatch_utils.py
    │   ├── split_train_val.py
    │   ├── train.py
    │   └── vis
    │   │   ├── image_utils.py
    │   │   ├── vis_preintv.py
    │   │   └── vis_utils.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-38.pyc
    │       ├── dataset.cpython-38.pyc
    │       ├── env_utils.cpython-38.pyc
    │       ├── file_utils.cpython-38.pyc
    │       ├── log_utils.cpython-38.pyc
    │       ├── loss_utils.cpython-38.pyc
    │       ├── macros.cpython-38.pyc
    │       ├── obs_utils.cpython-38.pyc
    │       ├── python_utils.cpython-38.pyc
    │       ├── tensor_utils.cpython-38.pyc
    │       ├── torch_utils.cpython-38.pyc
    │       ├── train_utils.cpython-38.pyc
    │       └── vis_utils.cpython-38.pyc
    │   ├── dataset.py
    │   ├── env_utils.py
    │   ├── file_utils.py
    │   ├── hyperparam_utils.py
    │   ├── log_utils.py
    │   ├── loss_utils.py
    │   ├── macros.py
    │   ├── obs_utils.py
    │   ├── python_utils.py
    │   ├── tensor_utils.py
    │   ├── test_utils.py
    │   ├── torch_utils.py
    │   ├── train_utils.py
    │   └── vis_utils.py
├── setup.py
└── sirius.yml


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 UT Robot Perception and Learning Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include robomimic/exps/templates/*.json
2 | include robomimic/scripts/*.py
3 | include robomimic/scripts/*.sh
4 | include robomimic/scripts/conversion/*.py
5 | include robomimic/scripts/conversion/*.sh
6 | recursive-include examples/ *.py
7 | recursive-include tests/ *.py
8 | recursive-include tests/ *.sh
9 | recursive-include tests/assets/ *


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Sirius 🌟: Robot Learning on the Job
  2 | 
  3 | <br>
  4 | 
  5 | This is the official codebase for the [**Sirius**](https://ut-austin-rpl.github.io/sirius/) paper:
  6 | 
  7 | **Robot Learning on the Job: Human-in-the-Loop Autonomy and Learning During Deployment**
  8 | <br> [Huihan Liu](https://huihanl.github.io/), [Soroush Nasiriany](http://snasiriany.me/), [Lance Zhang](https://github.com/Lantian-Lance-Zhang), [Zhiyao Bao](https://www.linkedin.com/in/zhiyao-bao/), [Yuke Zhu](https://www.cs.utexas.edu/~yukez/) 
  9 | <br> [UT Austin Robot Perception and Learning Lab](https://rpl.cs.utexas.edu/)
 10 | <br> Robotics: Science and Systems (RSS), 2023
 11 | <br> **[[Paper]](https://arxiv.org/abs/2211.08416)** &nbsp;**[[Project Website]](https://ut-austin-rpl.github.io/sirius/)** &nbsp;**[[Real Robot Control]](https://ut-austin-rpl.github.io/deoxys-docs/html/getting_started/overview.html)** 
 12 | 
 13 | <a href="https://ut-austin-rpl.github.io/sirius/" target="_blank"><img src="images/sirius.png" width="90%" /></a>
 14 | 
 15 | <br>
 16 | 
 17 | ## Quickstart
 18 | 
 19 | Sirius builds upon [robomimic](https://github.com/ARISE-Initiative/robomimic), a framework for robot learning from demonstration. Sirius also uses the robotics simulator [robosuite](https://github.com/ARISE-Initiative/robosuite) powered by the MuJoCo physics engine.
 20 | 
 21 | ### Setup Sirius codebase
 22 | 
 23 | #### Installing Sirius
 24 | 
 25 | ```
 26 | git clone https://github.com/UT-Austin-RPL/sirius
 27 | cd sirius
 28 | conda env create -f sirius.yml
 29 | conda activate sirius
 30 | pip install -e .
 31 | ```
 32 | 
 33 | #### Installing ```robosuite```
 34 | 
 35 | The additional reference for installing robosuite [here](https://robomimic.github.io/docs/introduction/installation.html) and [here](https://robosuite.ai/docs/installation.html#install-from-source) could be helpful.
 36 | 
 37 | ```
 38 | $ git clone https://github.com/ARISE-Initiative/robosuite.git
 39 | $ cd robosuite
 40 | $ pip install -r requirements.txt
 41 | $ pip install -e .
 42 | ```
 43 | 
 44 | ## Usage
 45 | 
 46 | ### Running Sirius
 47 | 
 48 | 
 49 | Running Sirius intervention-guided policy learning:
 50 | 
 51 | ```
 52 | python robomimic/scripts/train.py --config robomimic/exps/sirius/sirius.json
 53 | ```
 54 | 
 55 | IWR baseline:
 56 | 
 57 | ```
 58 | python robomimic/scripts/train.py --config robomimic/exps/sirius/bc_iwr.json
 59 | ```
 60 | 
 61 | BC baseline:
 62 | 
 63 | ```
 64 | python robomimic/scripts/train.py --config robomimic/exps/sirius/bc.json
 65 | ```
 66 | 
 67 | <br>
 68 | 
 69 | ### Sirius Data Collection Pipeline 
 70 | 
 71 | We include the script for collecting demonstrations and performing human intervention during robot policy execution below. We use a spacemouse for providing both demonstration and intervention. More details for setting up Spacemouse can be found [here](https://ut-austin-rpl.github.io/deoxys-docs/html/tutorials/using_teleoperation_devices.html).
 72 | 
 73 | #### Performing Human Demonstration
 74 | 
 75 | Perform human demonstration with the flag ```--all-demos```:
 76 | 
 77 | ```
 78 | python robomimic/scripts/hitl/collect_hitl_demos.py --all-demos --num-traj 50
 79 | ```
 80 | 
 81 | #### Policy Execution with Intervention
 82 | 
 83 | Perform human intervention with the policy checkpoint ```${checkpoint}```:
 84 | 
 85 | ```
 86 | python robomimic/scripts/hitl/collect_hitl_demos.py --num-traj 50 --checkpoint ${checkpoint}
 87 | ```
 88 | 
 89 | <br>
 90 | 
 91 | ### Processing data
 92 | 
 93 | #### Adding modalities
 94 | 
 95 | By default, the datasets are generated in the minimum format with only low-level state information to save space. To add image observation and other modalities for training, run the following post-processing script. It will process the original data ```${data.hdf5}``` into ```${data_processed.hdf5}```, with image size ```${image_size}```. By default, the two camera view uses are agentview and robot0_eye_in_hand, which you can modify in the script ```template_process_sim_dataset.sh```.
 96 | 
 97 | ```
 98 | cd robomimic/scripts/hitl
 99 | 
100 | source template_process_sim_dataset.sh ${data.hdf5} ${data_processed.hdf5} ${image_size}
101 | ```
102 | 
103 | <br>
104 | 
105 | ## Acknowledgements
106 | 
107 | This codebase is largely built on [robomimic](https://github.com/ARISE-Initiative/robomimic) and [robosuite](https://github.com/ARISE-Initiative/robosuite). We also thank [Ajay Mandlekar](https://ai.stanford.edu/~amandlek/) for sharing well-designed simulation task environments beyond the robomimic codebase like ```Coffee``` and ```Threading``` tasks during project development.
108 | 
109 | For real-robot experiments, we used [Deoxys](https://ut-austin-rpl.github.io/deoxys-docs/html/getting_started/overview.html), a controller library for Franka Emika Panda developed by [Yifeng Zhu](https://zhuyifengzju.github.io/).
110 | 
111 | <br>
112 | 
113 | ## Citation
114 | ```bibtex
115 | @inproceedings{liu2022robot,
116 |     title = {Robot Learning on the Job: Human-in-the-Loop Autonomy and Learning During Deployment},
117 |     author = {Huihan Liu and Soroush Nasiriany and Lance Zhang and Zhiyao Bao and Yuke Zhu},
118 |     booktitle = {Robotics: Science and Systems (RSS)},
119 |     year = {2023}
120 | }
121 | ```
122 | 


--------------------------------------------------------------------------------
/images/sirius.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/images/sirius.png


--------------------------------------------------------------------------------
/requirements-docs.txt:
--------------------------------------------------------------------------------
1 | # requirements for building sphinx docs
2 | pygments==2.4.1
3 | sphinx
4 | sphinx_rtd_theme
5 | sphinx_markdown_tables
6 | recommonmark
7 | nbsphinx
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.13.3
 2 | h5py
 3 | psutil
 4 | tqdm
 5 | termcolor
 6 | tensorboard
 7 | tensorboardX
 8 | imageio
 9 | imageio-ffmpeg
10 | egl_probe>=1.0.1
11 | torch
12 | torchvision
13 | 


--------------------------------------------------------------------------------
/robomimic/__init__.py:
--------------------------------------------------------------------------------
  1 | __version__ = "0.2.0"
  2 | 
  3 | 
  4 | # stores released dataset links and rollout horizons in global dictionary.
  5 | # Structure is given below for each type of dataset:
  6 | 
  7 | # robosuite / real
  8 | # {
  9 | #   task:
 10 | #       dataset_type:
 11 | #           hdf5_type:
 12 | #               url: link
 13 | #               horizon: value
 14 | #           ...
 15 | #       ...
 16 | #   ...
 17 | # }
 18 | DATASET_REGISTRY = {}
 19 | 
 20 | # momart
 21 | # {
 22 | #   task:
 23 | #       dataset_type:
 24 | #           url: link
 25 | #           size: value
 26 | #       ...
 27 | #   ...
 28 | # }
 29 | MOMART_DATASET_REGISTRY = {}
 30 | 
 31 | 
 32 | def register_dataset_link(task, dataset_type, hdf5_type, link, horizon):
 33 |     """
 34 |     Helper function to register dataset link in global dictionary.
 35 |     Also takes a @horizon parameter - this corresponds to the evaluation
 36 |     rollout horizon that should be used during training.
 37 | 
 38 |     Args:
 39 |         task (str): name of task for this dataset
 40 |         dataset_type (str): type of dataset (usually identifies the dataset source)
 41 |         hdf5_type (str): type of hdf5 - usually one of "raw", "low_dim", or "image",
 42 |             to identify the kind of observations in the dataset
 43 |         link (str): download link for the dataset
 44 |         horizon (int): evaluation rollout horizon that should be used with this dataset
 45 |     """
 46 |     if task not in DATASET_REGISTRY:
 47 |         DATASET_REGISTRY[task] = {}
 48 |     if dataset_type not in DATASET_REGISTRY[task]:
 49 |         DATASET_REGISTRY[task][dataset_type] = {}
 50 |     DATASET_REGISTRY[task][dataset_type][hdf5_type] = dict(url=link, horizon=horizon)
 51 | 
 52 | 
 53 | def register_all_links():
 54 |     """
 55 |     Record all dataset links in this function.
 56 |     """
 57 | 
 58 |     # all proficient human datasets
 59 |     ph_tasks = ["lift", "can", "square", "transport", "tool_hang", "lift_real", "can_real", "tool_hang_real"]
 60 |     ph_horizons = [400, 400, 400, 700, 700, 1000, 1000, 1000]
 61 |     for task, horizon in zip(ph_tasks, ph_horizons):
 62 |         register_dataset_link(task=task, dataset_type="ph", hdf5_type="raw", horizon=horizon,
 63 |             link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/demo.hdf5".format(task))
 64 |         # real world datasets only have demo.hdf5 files which already contain all observation modalities
 65 |         # while sim datasets store raw low-dim mujoco states in the demo.hdf5
 66 |         if "real" not in task:
 67 |             register_dataset_link(task=task, dataset_type="ph", hdf5_type="low_dim", horizon=horizon,
 68 |                 link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/low_dim.hdf5".format(task))
 69 |             register_dataset_link(task=task, dataset_type="ph", hdf5_type="image", horizon=horizon,
 70 |                 link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/image.hdf5".format(task))
 71 | 
 72 |     # all multi human datasets
 73 |     mh_tasks = ["lift", "can", "square", "transport"]
 74 |     mh_horizons = [500, 500, 500, 1100]
 75 |     for task, horizon in zip(mh_tasks, mh_horizons):
 76 |         register_dataset_link(task=task, dataset_type="mh", hdf5_type="raw", horizon=horizon,
 77 |             link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/demo.hdf5".format(task))
 78 |         register_dataset_link(task=task, dataset_type="mh", hdf5_type="low_dim", horizon=horizon,
 79 |             link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/low_dim.hdf5".format(task))
 80 |         register_dataset_link(task=task, dataset_type="mh", hdf5_type="image", horizon=horizon,
 81 |             link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/image.hdf5".format(task))
 82 | 
 83 |     # all machine generated datasets
 84 |     for task, horizon in zip(["lift", "can"], [400, 400]):
 85 |         register_dataset_link(task=task, dataset_type="mg", hdf5_type="raw", horizon=horizon,
 86 |             link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/demo.hdf5".format(task))
 87 |         register_dataset_link(task=task, dataset_type="mg", hdf5_type="low_dim_sparse", horizon=horizon,
 88 |             link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/low_dim_sparse.hdf5".format(task))
 89 |         register_dataset_link(task=task, dataset_type="mg", hdf5_type="image_sparse", horizon=horizon,
 90 |             link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/image_sparse.hdf5".format(task))
 91 |         register_dataset_link(task=task, dataset_type="mg", hdf5_type="low_dim_dense", horizon=horizon,
 92 |             link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/low_dim_dense.hdf5".format(task))
 93 |         register_dataset_link(task=task, dataset_type="mg", hdf5_type="image_dense", horizon=horizon,
 94 |             link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/image_dense.hdf5".format(task))
 95 | 
 96 |     # can-paired dataset
 97 |     register_dataset_link(task="can", dataset_type="paired", hdf5_type="raw", horizon=400,
 98 |         link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/demo.hdf5")
 99 |     register_dataset_link(task="can", dataset_type="paired", hdf5_type="low_dim", horizon=400,
100 |         link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/low_dim.hdf5")
101 |     register_dataset_link(task="can", dataset_type="paired", hdf5_type="image", horizon=400,
102 |         link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/image.hdf5")
103 | 
104 | 
105 | def register_momart_dataset_link(task, dataset_type, link, dataset_size):
106 |     """
107 |     Helper function to register dataset link in global dictionary.
108 |     Also takes a @horizon parameter - this corresponds to the evaluation
109 |     rollout horizon that should be used during training.
110 | 
111 |     Args:
112 |         task (str): name of task for this dataset
113 |         dataset_type (str): type of dataset (usually identifies the dataset source)
114 |         link (str): download link for the dataset
115 |         dataset_size (float): size of the dataset, in GB
116 |     """
117 |     if task not in MOMART_DATASET_REGISTRY:
118 |         MOMART_DATASET_REGISTRY[task] = {}
119 |     if dataset_type not in MOMART_DATASET_REGISTRY[task]:
120 |         MOMART_DATASET_REGISTRY[task][dataset_type] = {}
121 |     MOMART_DATASET_REGISTRY[task][dataset_type] = dict(url=link, size=dataset_size)
122 | 
123 | 
124 | def register_all_momart_links():
125 |     """
126 |     Record all dataset links in this function.
127 |     """
128 |     # all tasks, mapped to their [exp, sub, gen, sam] sizes
129 |     momart_tasks = {
130 |         "table_setup_from_dishwasher": [14, 14, 3.3, 0.6],
131 |         "table_setup_from_dresser": [16, 17, 3.1, 0.7],
132 |         "table_cleanup_to_dishwasher": [23, 36, 5.3, 1.1],
133 |         "table_cleanup_to_sink": [17, 28, 2.9, 0.8],
134 |         "unload_dishwasher": [21, 27, 5.4, 1.0],
135 |     }
136 | 
137 |     momart_dataset_types = [
138 |         "expert",
139 |         "suboptimal",
140 |         "generalize",
141 |         "sample",
142 |     ]
143 | 
144 |     # Iterate over all combos and register the link
145 |     for task, dataset_sizes in momart_tasks.items():
146 |         for dataset_type, dataset_size in zip(momart_dataset_types, dataset_sizes):
147 |             register_momart_dataset_link(
148 |                 task=task,
149 |                 dataset_type=dataset_type,
150 |                 link=f"http://downloads.cs.stanford.edu/downloads/rt_mm/{dataset_type}/{task}_{dataset_type}.hdf5",
151 |                 dataset_size=dataset_size,
152 |             )
153 | 
154 | 
155 | register_all_links()
156 | register_all_momart_links()
157 | 


--------------------------------------------------------------------------------
/robomimic/algo/__init__.py:
--------------------------------------------------------------------------------
 1 | from robomimic.algo.algo import register_algo_factory_func, res_mlp_args_from_config, algo_name_to_factory_func, algo_factory, Algo, PolicyAlgo, ValueAlgo, PlannerAlgo, HierarchicalAlgo, RolloutPolicy
 2 | 
 3 | # note: these imports are needed to register these classes in the global algo registry
 4 | from robomimic.algo.bc import BC, BC_Gaussian, BC_GMM, BC_VAE, BC_RNN, BC_RNN_GMM
 5 | from robomimic.algo.bcq import BCQ, BCQ_GMM, BCQ_Distributional
 6 | from robomimic.algo.cql import CQL
 7 | from robomimic.algo.awac import AWAC
 8 | from robomimic.algo.iql import IQL
 9 | from robomimic.algo.gl import GL, GL_VAE, ValuePlanner
10 | from robomimic.algo.hbc import HBC
11 | from robomimic.algo.iris import IRIS
12 | from robomimic.algo.td3_bc import TD3_BC
13 | 


--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/algo.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/algo.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/awac.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/awac.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/bc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/bc.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/bcq.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/bcq.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/cql.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/cql.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/gl.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/gl.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/hbc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/hbc.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/iql.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/iql.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/iris.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/iris.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/td3_bc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/td3_bc.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/algo/iris.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Implementation of IRIS (https://arxiv.org/abs/1911.05321).
  3 | """
  4 | import numpy as np
  5 | from collections import OrderedDict
  6 | from copy import deepcopy
  7 | 
  8 | import torch
  9 | 
 10 | import robomimic.utils.tensor_utils as TensorUtils
 11 | import robomimic.utils.obs_utils as ObsUtils
 12 | from robomimic.config.config import Config
 13 | from robomimic.algo import register_algo_factory_func, algo_name_to_factory_func, HBC, ValuePlanner, ValueAlgo, GL_VAE
 14 | 
 15 | 
 16 | @register_algo_factory_func("iris")
 17 | def algo_config_to_class(algo_config):
 18 |     """
 19 |     Maps algo config to the IRIS algo class to instantiate, along with additional algo kwargs.
 20 | 
 21 |     Args:
 22 |         algo_config (Config instance): algo config
 23 | 
 24 |     Returns:
 25 |         algo_class: subclass of Algo
 26 |         algo_kwargs (dict): dictionary of additional kwargs to pass to algorithm
 27 |     """
 28 |     pol_cls, _ = algo_name_to_factory_func("bc")(algo_config.actor)
 29 |     plan_cls, _ = algo_name_to_factory_func("gl")(algo_config.value_planner.planner)
 30 |     value_cls, _ = algo_name_to_factory_func("bcq")(algo_config.value_planner.value)
 31 |     return IRIS, dict(policy_algo_class=pol_cls, planner_algo_class=plan_cls, value_algo_class=value_cls)
 32 | 
 33 | 
 34 | class IRIS(HBC, ValueAlgo):
 35 |     """
 36 |     Implementation of IRIS (https://arxiv.org/abs/1911.05321).
 37 |     """
 38 |     def __init__(
 39 |         self,
 40 |         planner_algo_class,
 41 |         value_algo_class,
 42 |         policy_algo_class,
 43 |         algo_config,
 44 |         obs_config,
 45 |         global_config,
 46 |         obs_key_shapes,
 47 |         ac_dim,
 48 |         device,
 49 |     ):
 50 |         """
 51 |         Args:
 52 |             planner_algo_class (Algo class): algo class for the planner
 53 | 
 54 |             policy_algo_class (Algo class): algo class for the policy
 55 | 
 56 |             algo_config (Config object): instance of Config corresponding to the algo section
 57 |                 of the config
 58 | 
 59 |             obs_config (Config object): instance of Config corresponding to the observation
 60 |                 section of the config
 61 | 
 62 |             global_config (Config object): global training config
 63 | 
 64 |             obs_key_shapes (OrderedDict): dictionary that maps input/output observation keys to shapes
 65 | 
 66 |             ac_dim (int): action dimension
 67 | 
 68 |             device: torch device
 69 |         """
 70 |         self.algo_config = algo_config
 71 |         self.obs_config = obs_config
 72 |         self.global_config = global_config
 73 | 
 74 |         self.ac_dim = ac_dim
 75 |         self.device = device
 76 | 
 77 |         self._subgoal_step_count = 0  # current step count for deciding when to update subgoal
 78 |         self._current_subgoal = None  # latest subgoal
 79 |         self._subgoal_update_interval = self.algo_config.subgoal_update_interval  # subgoal update frequency
 80 |         self._subgoal_horizon = self.algo_config.value_planner.planner.subgoal_horizon
 81 |         self._actor_horizon = self.algo_config.actor.rnn.horizon
 82 | 
 83 |         self._algo_mode = self.algo_config.mode
 84 |         assert self._algo_mode in ["separate", "cascade"]
 85 | 
 86 |         self.planner = ValuePlanner(
 87 |             planner_algo_class=planner_algo_class,
 88 |             value_algo_class=value_algo_class,
 89 |             algo_config=algo_config.value_planner,
 90 |             obs_config=obs_config.value_planner,
 91 |             global_config=global_config,
 92 |             obs_key_shapes=obs_key_shapes,
 93 |             ac_dim=ac_dim,
 94 |             device=device
 95 |         )
 96 | 
 97 |         self.actor_goal_shapes = self.planner.subgoal_shapes
 98 |         assert not algo_config.latent_subgoal.enabled, "IRIS does not support latent subgoals"
 99 | 
100 |         # only for the actor: override goal modalities and shapes to match the subgoal set by the planner
101 |         actor_obs_key_shapes = deepcopy(obs_key_shapes)
102 |         # make sure we are not modifying existing observation key shapes
103 |         for k in self.actor_goal_shapes:
104 |             if k in actor_obs_key_shapes:
105 |                 assert actor_obs_key_shapes[k] == self.actor_goal_shapes[k]
106 |         actor_obs_key_shapes.update(self.actor_goal_shapes)
107 | 
108 |         goal_modalities = {obs_modality: [] for obs_modality in ObsUtils.OBS_MODALITY_CLASSES.keys()}
109 |         for k in self.actor_goal_shapes.keys():
110 |             goal_modalities[ObsUtils.OBS_KEYS_TO_MODALITIES[k]].append(k)
111 | 
112 |         actor_obs_config = deepcopy(obs_config.actor)
113 |         with actor_obs_config.unlocked():
114 |             actor_obs_config["goal"] = Config(**goal_modalities)
115 | 
116 |         self.actor = policy_algo_class(
117 |             algo_config=algo_config.actor,
118 |             obs_config=actor_obs_config,
119 |             global_config=global_config,
120 |             obs_key_shapes=actor_obs_key_shapes,
121 |             ac_dim=ac_dim,
122 |             device=device
123 |         )
124 | 
125 |     def process_batch_for_training(self, batch):
126 |         """
127 |         Processes input batch from a data loader to filter out
128 |         relevant information and prepare the batch for training.
129 | 
130 |         Args:
131 |             batch (dict): dictionary with torch.Tensors sampled
132 |                 from a data loader
133 | 
134 |         Returns:
135 |             input_batch (dict): processed and filtered batch that
136 |                 will be used for training 
137 |         """
138 |         input_batch = dict()
139 | 
140 |         input_batch["planner"] = self.planner.process_batch_for_training(batch)
141 |         input_batch["actor"] = self.actor.process_batch_for_training(batch)
142 | 
143 |         if self.algo_config.actor_use_random_subgoals:
144 |             # optionally use randomly sampled step between [1, seq_length] as policy goal
145 |             policy_subgoal_indices = torch.randint(
146 |                 low=0, high=self.global_config.train.seq_length, size=(batch["actions"].shape[0],))
147 |             goal_obs = TensorUtils.gather_sequence(batch["next_obs"], policy_subgoal_indices)
148 |             goal_obs = TensorUtils.to_device(TensorUtils.to_float(goal_obs), self.device)
149 |             input_batch["actor"]["goal_obs"] = goal_obs
150 |         else:
151 |             # otherwise, use planner subgoal target as goal for the policy
152 |             input_batch["actor"]["goal_obs"] = input_batch["planner"]["planner"]["target_subgoals"]
153 | 
154 |         return TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device)
155 | 
156 |     def get_state_value(self, obs_dict, goal_dict=None):
157 |         """
158 |         Get state value outputs.
159 | 
160 |         Args:
161 |             obs_dict (dict): current observation
162 |             goal_dict (dict): (optional) goal
163 | 
164 |         Returns:
165 |             value (torch.Tensor): value tensor
166 |         """
167 |         return self.planner.get_state_value(obs_dict=obs_dict, goal_dict=goal_dict)
168 | 
169 |     def get_state_action_value(self, obs_dict, actions, goal_dict=None):
170 |         """
171 |         Get state-action value outputs.
172 | 
173 |         Args:
174 |             obs_dict (dict): current observation
175 |             actions (torch.Tensor): action
176 |             goal_dict (dict): (optional) goal
177 | 
178 |         Returns:
179 |             value (torch.Tensor): value tensor
180 |         """
181 |         return self.planner.get_state_action_value(obs_dict=obs_dict, actions=actions, goal_dict=goal_dict)
182 | 


--------------------------------------------------------------------------------
/robomimic/config/__init__.py:
--------------------------------------------------------------------------------
 1 | from robomimic.config.config import Config
 2 | from robomimic.config.base_config import config_factory, get_all_registered_configs
 3 | 
 4 | # note: these imports are needed to register these classes in the global config registry
 5 | from robomimic.config.bc_config import BCConfig
 6 | from robomimic.config.bcq_config import BCQConfig
 7 | from robomimic.config.cql_config import CQLConfig
 8 | from robomimic.config.awac_config import AWACConfig
 9 | from robomimic.config.iql_config import IQLConfig
10 | from robomimic.config.gl_config import GLConfig
11 | from robomimic.config.hbc_config import HBCConfig
12 | from robomimic.config.iris_config import IRISConfig
13 | from robomimic.config.td3_bc_config import TD3_BCConfig
14 | from robomimic.config.vae_config import VAEConfig


--------------------------------------------------------------------------------
/robomimic/config/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/config/__pycache__/awac_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/awac_config.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/config/__pycache__/base_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/base_config.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/config/__pycache__/bc_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/bc_config.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/config/__pycache__/bcq_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/bcq_config.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/config/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/config.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/config/__pycache__/cql_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/cql_config.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/config/__pycache__/gl_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/gl_config.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/config/__pycache__/hbc_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/hbc_config.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/config/__pycache__/iql_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/iql_config.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/config/__pycache__/iris_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/iris_config.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/config/__pycache__/td3_bc_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/td3_bc_config.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/config/__pycache__/vae_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/vae_config.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/config/awac_config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Config for CQL algorithm.
  3 | """
  4 | 
  5 | from robomimic.config.base_config import BaseConfig
  6 | 
  7 | 
  8 | class AWACConfig(BaseConfig):
  9 |     ALGO_NAME = "awac"
 10 | 
 11 |     def train_config(self):
 12 |         """
 13 |         Update from superclass to change default batch size.
 14 |         """
 15 |         super(AWACConfig, self).train_config()
 16 | 
 17 |         # increase batch size to 1024 (found to work better for most manipulation experiments)
 18 |         self.train.batch_size = 1024
 19 | 
 20 |     def algo_config(self):
 21 |         """
 22 |         This function populates the `config.algo` attribute of the config, and is given to the 
 23 |         `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 
 24 |         argument to the constructor. Any parameter that an algorithm needs to determine its 
 25 |         training and test-time behavior should be populated here.
 26 |         """
 27 |         super(AWACConfig, self).algo_config()
 28 | 
 29 |         # optimization parameters
 30 |         self.algo.optim_params.critic.learning_rate.initial = 1e-4          # critic learning rate
 31 |         self.algo.optim_params.critic.learning_rate.decay_factor = 0.0      # factor to decay LR by (if epoch schedule non-empty)
 32 |         self.algo.optim_params.critic.learning_rate.epoch_schedule = []     # epochs where LR decay occurs
 33 |         self.algo.optim_params.critic.regularization.L2 = 0.00              # L2 regularization strength
 34 | 
 35 |         self.algo.optim_params.actor.learning_rate.initial = 1e-4           # actor learning rate
 36 |         self.algo.optim_params.actor.learning_rate.decay_factor = 0.0       # factor to decay LR by (if epoch schedule non-empty)
 37 |         self.algo.optim_params.actor.learning_rate.epoch_schedule = []      # epochs where LR decay occurs
 38 |         self.algo.optim_params.actor.regularization.L2 = 0.00               # L2 regularization strength
 39 | 
 40 |         # target network related parameters
 41 |         self.algo.discount = 0.99                                           # discount factor to use
 42 |         self.algo.target_tau = 0.01                                        # update rate for target networks
 43 |         self.algo.ignore_dones = False
 44 |         self.algo.use_negative_rewards = False
 45 |         self.algo.use_hardcoded_weights = False
 46 |         self.algo.hc_weights_key = "final_success"
 47 |         self.algo.relabel_dones_mode = None
 48 |         self.algo.relabel_rewards_mode = None
 49 | 
 50 |         # Actor network settings
 51 |         self.algo.actor.net.type = "gaussian"                               # Options are currently only "gaussian" (no support for GMM yet)
 52 | 
 53 |         # Actor network settings - shared
 54 |         self.algo.actor.net.common.std_activation = "softplus"                   # Activation to use for std output from policy net
 55 |         self.algo.actor.net.common.low_noise_eval = True                    # Whether to use deterministic action sampling at eval stage
 56 |         self.algo.actor.net.common.use_tanh = False
 57 | 
 58 |         # Actor network settings - gaussian
 59 |         self.algo.actor.net.gaussian.init_last_fc_weight = 0.001            # If set, will override the initialization of the final fc layer to be uniformly sampled limited by this value
 60 |         self.algo.actor.net.gaussian.init_std = 0.3                         # Relative scaling factor for std from policy net
 61 |         self.algo.actor.net.gaussian.fixed_std = False                      # Whether to learn std dev or not
 62 | 
 63 |         self.algo.actor.net.gmm.num_modes = 5
 64 |         self.algo.actor.net.gmm.min_std = 0.0001
 65 | 
 66 |         self.algo.actor.layer_dims = (300, 400)                             # actor MLP layer dimensions
 67 | 
 68 |         self.algo.actor.max_gradient_norm = None
 69 | 
 70 |         # actor residual MLP settings
 71 |         self.algo.actor.res_mlp.enabled = False
 72 |         self.algo.actor.res_mlp.num_blocks = 4
 73 |         self.algo.actor.res_mlp.hidden_dim = 1024
 74 |         self.algo.actor.res_mlp.use_layer_norm = True
 75 | 
 76 |         # ================== Critic Network Config ===================
 77 |         # critic ensemble parameters (TD3 trick)
 78 |         self.algo.critic.ensemble.n = 2                                     # number of Q networks in the ensemble
 79 |         self.algo.critic.ensemble_method = "min"
 80 |         self.algo.critic.target_ensemble_method = "mean"
 81 |         self.algo.critic.layer_dims = (300, 400)                            # critic MLP layer dimensions
 82 |         self.algo.critic.use_huber = False
 83 | 
 84 |         # critic residual MLP settings
 85 |         self.algo.critic.res_mlp.enabled = False
 86 |         self.algo.critic.res_mlp.num_blocks = 4
 87 |         self.algo.critic.res_mlp.hidden_dim = 1024
 88 |         self.algo.critic.res_mlp.use_layer_norm = True
 89 | 
 90 |         # distributional critic
 91 |         self.algo.critic.distributional.enabled = False     # train distributional critic
 92 |         self.algo.critic.distributional.num_atoms = 51      # number of values in categorical distribution
 93 |         self.algo.critic.value_bounds = None
 94 | 
 95 |         self.algo.adv.use_mle_for_vf = False
 96 |         self.algo.adv.vf_K = 4
 97 |         self.algo.adv.value_method = "mean"
 98 |         self.algo.adv.filter_type = "softmax"
 99 |         self.algo.adv.use_final_clip = False
100 |         self.algo.adv.clip_adv_value = None
101 |         self.algo.adv.beta = 1.0
102 |         self.algo.adv.multi_weight = None
103 | 
104 |         self.algo.critic.max_gradient_norm = None
105 | 
106 |         self.algo.hc_weights.use_adv_score = False
107 | 
108 |         # RNN policy settings
109 |         self.algo.actor.rnn.enabled = False       # whether to train RNN policy
110 |         self.algo.actor.rnn.horizon = 10          # unroll length for RNN - should usually match train.seq_length
111 |         self.algo.actor.rnn.hidden_dim = 400      # hidden dimension size    
112 |         self.algo.actor.rnn.rnn_type = "LSTM"     # rnn type - one of "LSTM" or "GRU"
113 |         self.algo.actor.rnn.num_layers = 2        # number of RNN layers that are stacked
114 |         self.algo.actor.rnn.open_loop = False     # if True, action predictions are only based on a single observation (not sequence)
115 |         self.algo.actor.rnn.kwargs.bidirectional = False            # rnn kwargs
116 |         self.algo.actor.rnn.use_res_mlp = False
117 |         self.algo.actor.rnn.res_mlp_kwargs = None
118 |         self.algo.actor.rnn.kwargs.do_not_lock_keys()
119 | 
120 |         self.algo.hc_weights.use_hardcode_weight = False


--------------------------------------------------------------------------------
/robomimic/config/bc_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Config for BC algorithm.
 3 | """
 4 | 
 5 | from robomimic.config.base_config import BaseConfig
 6 | 
 7 | 
 8 | class BCConfig(BaseConfig):
 9 |     ALGO_NAME = "bc"
10 | 
11 |     def algo_config(self):
12 |         """
13 |         This function populates the `config.algo` attribute of the config, and is given to the 
14 |         `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 
15 |         argument to the constructor. Any parameter that an algorithm needs to determine its 
16 |         training and test-time behavior should be populated here.
17 |         """
18 |         super(BCConfig, self).algo_config()
19 | 
20 |         # optimization parameters
21 |         self.algo.optim_params.policy.learning_rate.initial = 1e-4      # policy learning rate
22 |         self.algo.optim_params.policy.learning_rate.decay_factor = 0.1  # factor to decay LR by (if epoch schedule non-empty)
23 |         self.algo.optim_params.policy.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
24 |         self.algo.optim_params.policy.regularization.L2 = 0.00          # L2 regularization strength
25 | 
26 |         # loss weights
27 |         self.algo.loss.l2_weight = 1.0      # L2 loss weight
28 |         self.algo.loss.l1_weight = 0.0      # L1 loss weight
29 |         self.algo.loss.cos_weight = 0.0     # cosine loss weight
30 | 
31 |         # MLP network architecture (layers after observation encoder and RNN, if present)
32 |         self.algo.actor_layer_dims = (1024, 1024)
33 |         self.algo.max_gradient_norm = None
34 | 
35 |         # residual MLP settings
36 |         self.algo.res_mlp.enabled = False
37 |         self.algo.res_mlp.num_blocks = 4
38 |         self.algo.res_mlp.hidden_dim = 1024
39 |         self.algo.res_mlp.use_layer_norm = True
40 | 
41 |         # stochastic Gaussian policy settings
42 |         self.algo.gaussian.enabled = False              # whether to train a Gaussian policy
43 |         self.algo.gaussian.fixed_std = False            # whether to train std output or keep it constant
44 |         self.algo.gaussian.init_std = 0.1               # initial standard deviation (or constant)
45 |         self.algo.gaussian.min_std = 0.01               # minimum std output from network
46 |         self.algo.gaussian.std_activation = "softplus"  # activation to use for std output from policy net
47 |         self.algo.gaussian.low_noise_eval = True        # low-std at test-time 
48 | 
49 |         # stochastic GMM policy settings
50 |         self.algo.gmm.enabled = False                   # whether to train a GMM policy
51 |         self.algo.gmm.num_modes = 5                     # number of GMM modes
52 |         self.algo.gmm.min_std = 0.0001                  # minimum std output from network
53 |         self.algo.gmm.std_activation = "softplus"       # activation to use for std output from policy net
54 |         self.algo.gmm.low_noise_eval = True             # low-std at test-time 
55 | 
56 |         # stochastic VAE policy settings
57 |         self.algo.vae.enabled = False                   # whether to train a VAE policy
58 |         self.algo.vae.latent_dim = 14                   # VAE latent dimnsion - set to twice the dimensionality of action space
59 |         self.algo.vae.latent_clip = None                # clip latent space when decoding (set to None to disable)
60 |         self.algo.vae.kl_weight = 1.                    # beta-VAE weight to scale KL loss relative to reconstruction loss in ELBO
61 | 
62 |         # VAE decoder settings
63 |         self.algo.vae.decoder.is_conditioned = True                         # whether decoder should condition on observation
64 |         self.algo.vae.decoder.reconstruction_sum_across_elements = False    # sum instead of mean for reconstruction loss
65 | 
66 |         # VAE prior settings
67 |         self.algo.vae.prior.learn = False                                   # learn Gaussian / GMM prior instead of N(0, 1)
68 |         self.algo.vae.prior.is_conditioned = False                          # whether to condition prior on observations
69 |         self.algo.vae.prior.use_gmm = False                                 # whether to use GMM prior
70 |         self.algo.vae.prior.gmm_num_modes = 10                              # number of GMM modes
71 |         self.algo.vae.prior.gmm_learn_weights = False                       # whether to learn GMM weights 
72 |         self.algo.vae.prior.use_categorical = False                         # whether to use categorical prior
73 |         self.algo.vae.prior.categorical_dim = 10                            # the number of categorical classes for each latent dimension
74 |         self.algo.vae.prior.categorical_gumbel_softmax_hard = False         # use hard selection in forward pass
75 |         self.algo.vae.prior.categorical_init_temp = 1.0                     # initial gumbel-softmax temp
76 |         self.algo.vae.prior.categorical_temp_anneal_step = 0.001            # linear temp annealing rate
77 |         self.algo.vae.prior.categorical_min_temp = 0.3                      # lowest gumbel-softmax temp
78 | 
79 |         self.algo.vae.encoder_layer_dims = (300, 400)                       # encoder MLP layer dimensions
80 |         self.algo.vae.decoder_layer_dims = (300, 400)                       # decoder MLP layer dimensions
81 |         self.algo.vae.prior_layer_dims = (300, 400)                         # prior MLP layer dimensions (if learning conditioned prior)
82 | 
83 |         # RNN policy settings
84 |         self.algo.rnn.enabled = False       # whether to train RNN policy
85 |         self.algo.rnn.horizon = 10          # unroll length for RNN - should usually match train.seq_length
86 |         self.algo.rnn.hidden_dim = 400      # hidden dimension size    
87 |         self.algo.rnn.rnn_type = "LSTM"     # rnn type - one of "LSTM" or "GRU"
88 |         self.algo.rnn.num_layers = 2        # number of RNN layers that are stacked
89 |         self.algo.rnn.open_loop = False     # if True, action predictions are only based on a single observation (not sequence)
90 |         self.algo.rnn.kwargs.bidirectional = False            # rnn kwargs
91 |         self.algo.rnn.kwargs.do_not_lock_keys()
92 | 
93 |         self.algo.hc_weights.traj_label_type = "last"
94 | 
95 |         self.algo.hc_weights.batch_normalize = True


--------------------------------------------------------------------------------
/robomimic/config/bcq_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Config for BCQ algorithm.
 3 | """
 4 | 
 5 | from robomimic.config.base_config import BaseConfig
 6 | from robomimic.config.bc_config import BCConfig
 7 | 
 8 | 
 9 | class BCQConfig(BaseConfig):
10 |     ALGO_NAME = "bcq"
11 | 
12 |     def algo_config(self):
13 |         """
14 |         This function populates the `config.algo` attribute of the config, and is given to the 
15 |         `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 
16 |         argument to the constructor. Any parameter that an algorithm needs to determine its 
17 |         training and test-time behavior should be populated here.
18 |         """
19 |         super(BCQConfig, self).algo_config()
20 |         
21 |         # optimization parameters
22 |         self.algo.optim_params.critic.learning_rate.initial = 1e-3              # critic learning rate
23 |         self.algo.optim_params.critic.learning_rate.decay_factor = 0.1          # factor to decay LR by (if epoch schedule non-empty)
24 |         self.algo.optim_params.critic.learning_rate.epoch_schedule = []         # epochs where LR decay occurs
25 |         self.algo.optim_params.critic.regularization.L2 = 0.00                  # L2 regularization strength
26 |         self.algo.optim_params.critic.start_epoch = -1                          # number of epochs before starting critic training (-1 means start right away)
27 |         self.algo.optim_params.critic.end_epoch = -1                            # number of epochs before ending critic training (-1 means start right away)
28 | 
29 |         self.algo.optim_params.action_sampler.learning_rate.initial = 1e-3      # action sampler learning rate
30 |         self.algo.optim_params.action_sampler.learning_rate.decay_factor = 0.1  # factor to decay LR by (if epoch schedule non-empty)
31 |         self.algo.optim_params.action_sampler.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
32 |         self.algo.optim_params.action_sampler.regularization.L2 = 0.00          # L2 regularization strength
33 |         self.algo.optim_params.action_sampler.start_epoch = -1                  # number of epochs before starting action sampler training (-1 means start right away)
34 |         self.algo.optim_params.action_sampler.end_epoch = -1                    # number of epochs before ending action sampler training (-1 means start right away)
35 | 
36 |         self.algo.optim_params.actor.learning_rate.initial = 1e-3               # actor learning rate
37 |         self.algo.optim_params.actor.learning_rate.decay_factor = 0.1           # factor to decay LR by (if epoch schedule non-empty)
38 |         self.algo.optim_params.actor.learning_rate.epoch_schedule = []          # epochs where LR decay occurs
39 |         self.algo.optim_params.actor.regularization.L2 = 0.00                   # L2 regularization strength
40 |         self.algo.optim_params.actor.start_epoch = -1                           # number of epochs before starting actor training (-1 means start right away)
41 |         self.algo.optim_params.actor.end_epoch = -1                             # number of epochs before ending actor training (-1 means start right away)
42 | 
43 |         # target network related parameters
44 |         self.algo.discount = 0.99                           # discount factor to use
45 |         self.algo.n_step = 1                                # for using n-step returns in TD-updates
46 |         self.algo.target_tau = 0.005                        # update rate for target networks
47 |         self.algo.infinite_horizon = False                  # if True, scale terminal rewards by 1 / (1 - discount) to treat as infinite horizon
48 | 
49 |         # ================== Critic Network Config ===================
50 |         self.algo.critic.use_huber = False                  # Huber Loss instead of L2 for critic
51 |         self.algo.critic.max_gradient_norm = None           # L2 gradient clipping for critic (None to use no clipping)
52 |         self.algo.critic.value_bounds = None                # optional 2-tuple to ensure lower and upper bound on value estimates 
53 |         self.algo.critic.num_action_samples = 10            # number of actions to sample per training batch to get target critic value
54 |         self.algo.critic.num_action_samples_rollout = 100   # number of actions to sample per environment step
55 | 
56 |         # critic ensemble parameters (TD3 trick)
57 |         self.algo.critic.ensemble.n = 2                     # number of Q networks in the ensemble
58 |         self.algo.critic.ensemble.weight = 0.75             # weighting for mixing min and max for target Q value
59 | 
60 |         # distributional critic
61 |         self.algo.critic.distributional.enabled = False     # train distributional critic (C51)
62 |         self.algo.critic.distributional.num_atoms = 51      # number of values in categorical distribution
63 | 
64 |         self.algo.critic.layer_dims = (300, 400)            # size of critic MLP
65 | 
66 |         # ================== Action Sampler Config ===================
67 |         self.algo.action_sampler = BCConfig().algo
68 |         # use VAE by default
69 |         self.algo.action_sampler.vae.enabled = True
70 |         # remove unused parts of BCConfig algo config
71 |         del self.algo.action_sampler.optim_params           # since action sampler optim params specified at top-level
72 |         del self.algo.action_sampler.loss
73 |         del self.algo.action_sampler.gaussian
74 |         del self.algo.action_sampler.rnn
75 | 
76 |         # Number of epochs before freezing encoder (-1 for no freezing). Only applies to cVAE-based action samplers.
77 |         with self.algo.action_sampler.unlocked():
78 |             self.algo.action_sampler.freeze_encoder_epoch = -1
79 | 
80 |         # ================== Actor Network Config ===================
81 |         self.algo.actor.enabled = False                     # whether to use the actor perturbation network
82 |         self.algo.actor.perturbation_scale = 0.05           # size of learned action perturbations
83 |         self.algo.actor.layer_dims = (300, 400)             # size of actor MLP
84 | 


--------------------------------------------------------------------------------
/robomimic/config/cql_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Config for CQL algorithm.
 3 | """
 4 | 
 5 | from robomimic.config.base_config import BaseConfig
 6 | 
 7 | 
 8 | class CQLConfig(BaseConfig):
 9 |     ALGO_NAME = "cql"
10 | 
11 |     def train_config(self):
12 |         """
13 |         Update from superclass to change default batch size.
14 |         """
15 |         super(CQLConfig, self).train_config()
16 | 
17 |         # increase batch size to 1024 (found to work better for most manipulation experiments)
18 |         self.train.batch_size = 1024
19 | 
20 |     def algo_config(self):
21 |         """
22 |         This function populates the `config.algo` attribute of the config, and is given to the 
23 |         `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 
24 |         argument to the constructor. Any parameter that an algorithm needs to determine its 
25 |         training and test-time behavior should be populated here.
26 |         """
27 |         super(CQLConfig, self).algo_config()
28 | 
29 |         # optimization parameters
30 |         self.algo.optim_params.critic.learning_rate.initial = 1e-3          # critic learning rate
31 |         self.algo.optim_params.critic.learning_rate.decay_factor = 0.0      # factor to decay LR by (if epoch schedule non-empty)
32 |         self.algo.optim_params.critic.learning_rate.epoch_schedule = []     # epochs where LR decay occurs
33 |         self.algo.optim_params.critic.regularization.L2 = 0.00              # L2 regularization strength
34 | 
35 |         self.algo.optim_params.actor.learning_rate.initial = 3e-4           # actor learning rate
36 |         self.algo.optim_params.actor.learning_rate.decay_factor = 0.0       # factor to decay LR by (if epoch schedule non-empty)
37 |         self.algo.optim_params.actor.learning_rate.epoch_schedule = []      # epochs where LR decay occurs
38 |         self.algo.optim_params.actor.regularization.L2 = 0.00               # L2 regularization strength
39 | 
40 |         # target network related parameters
41 |         self.algo.discount = 0.99                                           # discount factor to use
42 |         self.algo.n_step = 1                                                # for using n-step returns in TD-updates
43 |         self.algo.target_tau = 0.005                                        # update rate for target networks
44 | 
45 |         # ================== Actor Network Config ===================
46 |         self.algo.actor.bc_start_steps = 0                                  # uses BC policy loss for first n-training steps
47 |         self.algo.actor.target_entropy = "default"                          # None is fixed entropy, otherwise is automatically tuned to match target. Can specify "default" as well for default tuning target
48 |         self.algo.actor.max_gradient_norm = None                            # L2 gradient clipping for actor
49 | 
50 |         # Actor network settings
51 |         self.algo.actor.net.type = "gaussian"                               # Options are currently only "gaussian" (no support for GMM yet)
52 | 
53 |         # Actor network settings - shared
54 |         self.algo.actor.net.common.std_activation = "exp"                   # Activation to use for std output from policy net
55 |         self.algo.actor.net.common.use_tanh = True                          # Whether to use tanh at output of actor network
56 |         self.algo.actor.net.common.low_noise_eval = True                    # Whether to use deterministic action sampling at eval stage
57 | 
58 |         # Actor network settings - gaussian
59 |         self.algo.actor.net.gaussian.init_last_fc_weight = 0.001            # If set, will override the initialization of the final fc layer to be uniformly sampled limited by this value
60 |         self.algo.actor.net.gaussian.init_std = 0.3                         # Relative scaling factor for std from policy net
61 |         self.algo.actor.net.gaussian.fixed_std = False                      # Whether to learn std dev or not
62 | 
63 |         self.algo.actor.layer_dims = (300, 400)                             # actor MLP layer dimensions
64 | 
65 |         # ================== Critic Network Config ===================
66 |         self.algo.critic.use_huber = False                                  # Huber Loss instead of L2 for critic
67 |         self.algo.critic.max_gradient_norm = None                           # L2 gradient clipping for critic (None to use no clipping)
68 | 
69 |         self.algo.critic.value_bounds = None                                # optional 2-tuple to ensure lower and upper bound on value estimates 
70 | 
71 |         self.algo.critic.num_action_samples = 1                             # number of actions to sample per training batch to get target critic value; use maximum Q value from n random sampled actions when doing TD error backup
72 | 
73 |         # cql settings for critic
74 |         self.algo.critic.cql_weight = 1.0                                   # weighting for cql component of critic loss (only used if target_q_gap is < 0 or None)
75 |         self.algo.critic.deterministic_backup = True                        # if not set, subtract weighted logprob of action when doing backup
76 |         self.algo.critic.min_q_weight = 1.0                                 # min q weight (scaling factor) to apply
77 |         self.algo.critic.target_q_gap = 5.0                                 # if set, sets the diff threshold at which Q-values will be penalized more (note: this overrides cql weight above!) Use None or a negative value if not set
78 |         self.algo.critic.num_random_actions = 10                            # Number of random actions to sample when calculating CQL loss
79 | 
80 |         # critic ensemble parameters (TD3 trick)
81 |         self.algo.critic.ensemble.n = 2                                     # number of Q networks in the ensemble
82 | 
83 |         self.algo.critic.layer_dims = (300, 400)                            # critic MLP layer dimensions
84 | 


--------------------------------------------------------------------------------
/robomimic/config/gl_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Config for Goal Learning (sub-algorithm used by hierarchical models like HBC and IRIS).
 3 | This class of model predicts (or samples) subgoal observations given a current observation.
 4 | """
 5 | 
 6 | from robomimic.config.base_config import BaseConfig
 7 | 
 8 | 
 9 | class GLConfig(BaseConfig):
10 |     ALGO_NAME = "gl"
11 | 
12 |     def algo_config(self):
13 |         """
14 |         This function populates the `config.algo` attribute of the config, and is given to the 
15 |         `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 
16 |         argument to the constructor. Any parameter that an algorithm needs to determine its 
17 |         training and test-time behavior should be populated here.
18 |         """
19 |         super(GLConfig, self).algo_config()
20 | 
21 |         # optimization parameters
22 |         self.algo.optim_params.goal_network.learning_rate.initial = 1e-4        # goal network learning rate
23 |         self.algo.optim_params.goal_network.learning_rate.decay_factor = 0.1    # factor to decay LR by (if epoch schedule non-empty)
24 |         self.algo.optim_params.goal_network.learning_rate.epoch_schedule = []   # epochs where LR decay occurs
25 |         self.algo.optim_params.goal_network.regularization.L2 = 0.00
26 | 
27 |         # subgoal definition: observation that is @subgoal_horizon number of timesteps in future from current observation
28 |         self.algo.subgoal_horizon = 10 
29 | 
30 |         # MLP size for deterministic goal network (unused if VAE is enabled)
31 |         self.algo.ae.planner_layer_dims = (300, 400)
32 | 
33 |         # ================== VAE config ==================
34 |         self.algo.vae.enabled = True                                        # set to true to use VAE network
35 |         self.algo.vae.latent_dim = 16                                       # VAE latent dimension
36 |         self.algo.vae.latent_clip = None                                    # clip latent space when decoding (set to None to disable)
37 |         self.algo.vae.kl_weight = 1.                                        # beta-VAE weight to scale KL loss relative to reconstruction loss in ELBO
38 | 
39 |         # VAE decoder settings
40 |         self.algo.vae.decoder.is_conditioned = True                         # whether decoder should condition on observation
41 |         self.algo.vae.decoder.reconstruction_sum_across_elements = False    # sum instead of mean for reconstruction loss
42 | 
43 |         # VAE prior settings
44 |         self.algo.vae.prior.learn = False                                   # learn Gaussian / GMM prior instead of N(0, 1)
45 |         self.algo.vae.prior.is_conditioned = False                          # whether to condition prior on observations
46 |         self.algo.vae.prior.use_gmm = False                                 # whether to use GMM prior
47 |         self.algo.vae.prior.gmm_num_modes = 10                              # number of GMM modes
48 |         self.algo.vae.prior.gmm_learn_weights = False                       # whether to learn GMM weights 
49 |         self.algo.vae.prior.use_categorical = False                         # whether to use categorical prior
50 |         self.algo.vae.prior.categorical_dim = 10                            # the number of categorical classes for each latent dimension
51 |         self.algo.vae.prior.categorical_gumbel_softmax_hard = False         # use hard selection in forward pass
52 |         self.algo.vae.prior.categorical_init_temp = 1.0                     # initial gumbel-softmax temp
53 |         self.algo.vae.prior.categorical_temp_anneal_step = 0.001            # linear temp annealing rate
54 |         self.algo.vae.prior.categorical_min_temp = 0.3                      # lowest gumbel-softmax temp
55 | 
56 |         self.algo.vae.encoder_layer_dims = (300, 400)                       # encoder MLP layer dimensions
57 |         self.algo.vae.decoder_layer_dims = (300, 400)                       # decoder MLP layer dimensions
58 |         self.algo.vae.prior_layer_dims = (300, 400)                         # prior MLP layer dimensions (if learning conditioned prior)
59 | 
60 |     def observation_config(self):
61 |         """
62 |         Update from superclass to specify subgoal modalities.
63 |         """
64 |         super(GLConfig, self).observation_config()
65 |         self.observation.modalities.subgoal.low_dim = [                     # specify low-dim subgoal observations for agent to predict
66 |             "robot0_eef_pos", 
67 |             "robot0_eef_quat", 
68 |             "robot0_gripper_qpos", 
69 |             "object",
70 |         ]
71 |         self.observation.modalities.subgoal.rgb = []                      # specify rgb image subgoal observations for agent to predict
72 |         self.observation.modalities.subgoal.depth = []
73 |         self.observation.modalities.subgoal.scan = []
74 |         self.observation.modalities.subgoal.do_not_lock_keys()
75 | 
76 |     @property
77 |     def all_obs_keys(self):
78 |         """
79 |         Update from superclass to include subgoals.
80 |         """
81 |         # pool all modalities
82 |         return sorted(tuple(set([
83 |             obs_key for group in [
84 |                 self.observation.modalities.obs.values(),
85 |                 self.observation.modalities.goal.values(),
86 |                 self.observation.modalities.subgoal.values(),
87 |             ]
88 |             for modality in group
89 |             for obs_key in modality
90 |         ])))
91 | 


--------------------------------------------------------------------------------
/robomimic/config/hbc_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Config for HBC algorithm.
 3 | """
 4 | 
 5 | from robomimic.config.base_config import BaseConfig
 6 | from robomimic.config.gl_config import GLConfig
 7 | from robomimic.config.bc_config import BCConfig
 8 | 
 9 | 
10 | class HBCConfig(BaseConfig):
11 |     ALGO_NAME = "hbc"
12 | 
13 |     def train_config(self):
14 |         """
15 |         Update from superclass to change default sequence length to load from dataset.
16 |         """
17 |         super(HBCConfig, self).train_config()
18 |         self.train.seq_length = 10  # length of experience sequence to fetch from the buffer
19 | 
20 |     def algo_config(self):
21 |         """
22 |         This function populates the `config.algo` attribute of the config, and is given to the 
23 |         `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 
24 |         argument to the constructor. Any parameter that an algorithm needs to determine its 
25 |         training and test-time behavior should be populated here.
26 |         """
27 |         super(HBCConfig, self).algo_config()
28 | 
29 |         # One of ["separate", "cascade"]. In "separate" mode (default),
30 |         # the planner and actor are trained independently and then the planner subgoal predictions are
31 |         # used to condition the actor at test-time. In "cascade" mode, the actor is trained directly
32 |         # on planner subgoal predictions. In "actor_only" mode, only the actor is trained, and in
33 |         # "planner_only" mode, only the planner is trained.
34 |         self.algo.mode = "separate"
35 |         self.algo.actor_use_random_subgoals = False  # whether to sample subgoal index from [1, subgoal_horizon]
36 |         self.algo.subgoal_update_interval = 10  # how frequently the subgoal should be updated at test-time
37 | 
38 | 
39 |         # ================== Latent Subgoal Config ==================
40 |         self.algo.latent_subgoal.enabled = False    # if True, use VAE latent space as subgoals for actor, instead of reconstructions
41 | 
42 |         # prior correction trick for actor and value training: instead of using encoder for 
43 |         # transforming subgoals to latent subgoals, generate prior samples and choose
44 |         # the closest one to the encoder output
45 |         self.algo.latent_subgoal.prior_correction.enabled = False
46 |         self.algo.latent_subgoal.prior_correction.num_samples = 100
47 | 
48 |         # ================== Planner Config ==================
49 |         self.algo.planner = GLConfig().algo  # config for goal learning
50 |         # set subgoal horizon explicitly
51 |         self.algo.planner.subgoal_horizon = 10
52 |         # ensure VAE is used
53 |         self.algo.planner.vae.enabled = True
54 | 
55 |         # ================== Actor Config ===================
56 |         self.algo.actor = BCConfig().algo
57 |         # use RNN
58 |         self.algo.actor.rnn.enabled = True
59 |         self.algo.actor.rnn.horizon = 10
60 |         # remove unused parts of BCConfig algo config
61 |         del self.algo.actor.gaussian
62 |         del self.algo.actor.gmm
63 |         del self.algo.actor.vae
64 | 
65 |     def observation_config(self):
66 |         """
67 |         Update from superclass so that planner and actor each get their own observation config.
68 |         """
69 |         self.observation.planner = GLConfig().observation
70 |         self.observation.actor = BCConfig().observation
71 | 
72 |     @property
73 |     def use_goals(self):
74 |         """
75 |         Update from superclass - planner goal modalities determine goal-conditioning
76 |         """
77 |         return len(
78 |             self.observation.planner.modalities.goal.low_dim +
79 |             self.observation.planner.modalities.goal.rgb) > 0
80 | 
81 |     @property
82 |     def all_obs_keys(self):
83 |         """
84 |         Update from superclass to include modalities from planner and actor.
85 |         """
86 |         # pool all modalities
87 |         return sorted(tuple(set([
88 |             obs_key for group in [
89 |                 self.observation.planner.modalities.obs.values(),
90 |                 self.observation.planner.modalities.goal.values(),
91 |                 self.observation.planner.modalities.subgoal.values(),
92 |                 self.observation.actor.modalities.obs.values(),
93 |                 self.observation.actor.modalities.goal.values(),
94 |             ]
95 |             for modality in group
96 |             for obs_key in modality
97 |         ])))
98 | 


--------------------------------------------------------------------------------
/robomimic/config/iql_config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Config for CQL algorithm.
  3 | """
  4 | 
  5 | from robomimic.config.base_config import BaseConfig
  6 | 
  7 | 
  8 | class IQLConfig(BaseConfig):
  9 |     ALGO_NAME = "iql"
 10 | 
 11 |     def train_config(self):
 12 |         """
 13 |         Update from superclass to change default batch size.
 14 |         """
 15 |         super(IQLConfig, self).train_config()
 16 | 
 17 |         # increase batch size to 1024 (found to work better for most manipulation experiments)
 18 |         self.train.batch_size = 1024
 19 | 
 20 |     def algo_config(self):
 21 |         """
 22 |         This function populates the `config.algo` attribute of the config, and is given to the 
 23 |         `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 
 24 |         argument to the constructor. Any parameter that an algorithm needs to determine its 
 25 |         training and test-time behavior should be populated here.
 26 |         """
 27 |         super(IQLConfig, self).algo_config()
 28 | 
 29 |         # optimization parameters
 30 |         self.algo.optim_params.critic.learning_rate.initial = 1e-4          # critic learning rate
 31 |         self.algo.optim_params.critic.learning_rate.decay_factor = 0.0      # factor to decay LR by (if epoch schedule non-empty)
 32 |         self.algo.optim_params.critic.learning_rate.epoch_schedule = []     # epochs where LR decay occurs
 33 |         self.algo.optim_params.critic.regularization.L2 = 0.00              # L2 regularization strength
 34 | 
 35 |         self.algo.optim_params.vf.learning_rate.initial = 1e-4           # actor learning rate
 36 |         self.algo.optim_params.vf.learning_rate.decay_factor = 0.0       # factor to decay LR by (if epoch schedule non-empty)
 37 |         self.algo.optim_params.vf.learning_rate.epoch_schedule = []      # epochs where LR decay occurs
 38 |         self.algo.optim_params.vf.regularization.L2 = 0.00               # L2 regularization strength
 39 | 
 40 |         self.algo.optim_params.actor.learning_rate.initial = 1e-4           # actor learning rate
 41 |         self.algo.optim_params.actor.learning_rate.decay_factor = 0.0       # factor to decay LR by (if epoch schedule non-empty)
 42 |         self.algo.optim_params.actor.learning_rate.epoch_schedule = []      # epochs where LR decay occurs
 43 |         self.algo.optim_params.actor.regularization.L2 = 0.00               # L2 regularization strength
 44 | 
 45 |         # target network related parameters
 46 |         self.algo.discount = 0.99                                           # discount factor to use
 47 |         self.algo.target_tau = 0.01                                        # update rate for target networks
 48 |         self.algo.ignore_dones = False
 49 |         self.algo.use_negative_rewards = False
 50 |         self.algo.use_shaped_rewards = False
 51 |         self.algo.relabel_dones_mode = None
 52 |         self.algo.relabel_rewards_mode = None
 53 | 
 54 |         # Actor network settings
 55 |         self.algo.actor.net.type = "gaussian"                               # Options are currently only "gaussian" (no support for GMM yet)
 56 | 
 57 |         # Actor network settings - shared
 58 |         self.algo.actor.net.common.std_activation = "softplus"                   # Activation to use for std output from policy net
 59 |         self.algo.actor.net.common.low_noise_eval = True                    # Whether to use deterministic action sampling at eval stage
 60 |         self.algo.actor.net.common.use_tanh = False
 61 | 
 62 |         # Actor network settings - gaussian
 63 |         self.algo.actor.net.gaussian.init_last_fc_weight = 0.001            # If set, will override the initialization of the final fc layer to be uniformly sampled limited by this value
 64 |         self.algo.actor.net.gaussian.init_std = 0.3                         # Relative scaling factor for std from policy net
 65 |         self.algo.actor.net.gaussian.fixed_std = False                      # Whether to learn std dev or not
 66 | 
 67 |         self.algo.actor.net.gmm.num_modes = 5
 68 |         self.algo.actor.net.gmm.min_std = 0.0001
 69 | 
 70 |         self.algo.actor.layer_dims = (300, 400)                             # actor MLP layer dimensions
 71 | 
 72 |         self.algo.actor.max_gradient_norm = None
 73 | 
 74 |         # actor residual MLP settings
 75 |         self.algo.actor.res_mlp.enabled = False
 76 |         self.algo.actor.res_mlp.num_blocks = 4
 77 |         self.algo.actor.res_mlp.hidden_dim = 1024
 78 |         self.algo.actor.res_mlp.use_layer_norm = True
 79 | 
 80 |         # ================== Critic Network Config ===================
 81 |         # critic ensemble parameters (TD3 trick)
 82 |         self.algo.critic.ensemble.n = 2                                     # number of Q networks in the ensemble
 83 |         self.algo.critic.layer_dims = (300, 400)                            # critic MLP layer dimensions
 84 |         self.algo.critic.use_huber = False
 85 | 
 86 |         # critic residual MLP settings
 87 |         self.algo.critic.res_mlp.enabled = False
 88 |         self.algo.critic.res_mlp.num_blocks = 4
 89 |         self.algo.critic.res_mlp.hidden_dim = 1024
 90 |         self.algo.critic.res_mlp.use_layer_norm = True
 91 | 
 92 |         self.algo.adv.filter_type = "softmax"
 93 |         self.algo.adv.use_final_clip = True
 94 |         self.algo.adv.clip_adv_value = None
 95 |         self.algo.adv.beta = 1.0
 96 | 
 97 |         self.algo.vf_quantile = 0.9
 98 | 
 99 |         self.algo.critic.max_gradient_norm = None
100 | 


--------------------------------------------------------------------------------
/robomimic/config/iris_config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Config for IRIS algorithm.
  3 | """
  4 | 
  5 | from robomimic.config.bcq_config import BCQConfig
  6 | from robomimic.config.gl_config import GLConfig
  7 | from robomimic.config.bc_config import BCConfig
  8 | from robomimic.config.hbc_config import HBCConfig
  9 | 
 10 | 
 11 | class IRISConfig(HBCConfig):
 12 |     ALGO_NAME = "iris"
 13 | 
 14 |     def algo_config(self):
 15 |         """
 16 |         This function populates the `config.algo` attribute of the config, and is given to the 
 17 |         `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 
 18 |         argument to the constructor. Any parameter that an algorithm needs to determine its 
 19 |         training and test-time behavior should be populated here.
 20 |         """
 21 |         super(IRISConfig, self).algo_config()
 22 | 
 23 |         # One of ["separate", "cascade"]. In "separate" mode (default),
 24 |         # the planner and actor are trained independently and then the planner subgoal predictions are
 25 |         # used to condition the actor at test-time. In "cascade" mode, the actor is trained directly
 26 |         # on planner subgoal predictions. In "actor_only" mode, only the actor is trained, and in
 27 |         # "planner_only" mode, only the planner is trained.
 28 |         self.algo.mode = "separate"
 29 | 
 30 |         self.algo.actor_use_random_subgoals = False # whether to sample subgoal index from [1, subgoal_horizon]
 31 |         self.algo.subgoal_update_interval = 10      # how frequently the subgoal should be updated at test-time (usually matches train.seq_length)
 32 | 
 33 |         # ================== Latent Subgoal Config ==================
 34 | 
 35 |         # NOTE: latent subgoals are not supported by IRIS, but superclass expects this config
 36 |         self.algo.latent_subgoal.enabled = False
 37 |         self.algo.latent_subgoal.prior_correction.enabled = False
 38 |         self.algo.latent_subgoal.prior_correction.num_samples = 100
 39 | 
 40 |         # ================== Planner Config ==================
 41 | 
 42 |         # The ValuePlanner planner component is a Goal Learning VAE model
 43 |         self.algo.value_planner.planner = GLConfig().algo  # config for goal learning
 44 |         # set subgoal horizon explicitly
 45 |         self.algo.value_planner.planner.subgoal_horizon = 10
 46 |         # ensure VAE is used
 47 |         self.algo.value_planner.planner.vae.enabled = True
 48 | 
 49 |         # The ValuePlanner value component is a BCQ model
 50 |         self.algo.value_planner.value = BCQConfig().algo
 51 |         self.algo.value_planner.value.actor.enabled = False # ensure no BCQ actor
 52 |         # number of subgoal samples to use for value planner
 53 |         self.algo.value_planner.num_samples = 100
 54 | 
 55 |         # ================== Actor Config ===================
 56 |         self.algo.actor = BCConfig().algo
 57 |         # use RNN
 58 |         self.algo.actor.rnn.enabled = True
 59 |         self.algo.actor.rnn.horizon = 10
 60 |         # remove unused parts of BCConfig algo config
 61 |         del self.algo.actor.gaussian
 62 |         del self.algo.actor.gmm
 63 |         del self.algo.actor.vae
 64 | 
 65 |     def observation_config(self):
 66 |         """
 67 |         Update from superclass so that value planner and actor each get their own obs config.
 68 |         """
 69 |         self.observation.value_planner.planner = GLConfig().observation
 70 |         self.observation.value_planner.value = BCQConfig().observation
 71 |         self.observation.actor = BCConfig().observation
 72 | 
 73 |     @property
 74 |     def use_goals(self):
 75 |         """
 76 |         Update from superclass - value planner goal modalities determine goal-conditioning.
 77 |         """
 78 |         return len(
 79 |             self.observation.value_planner.planner.modalities.goal.low_dim +
 80 |             self.observation.value_planner.planner.modalities.goal.rgb) > 0
 81 | 
 82 |     @property
 83 |     def all_obs_keys(self):
 84 |         """
 85 |         Update from superclass to include modalities from value planner and actor.
 86 |         """
 87 |         # pool all modalities
 88 |         return sorted(tuple(set([
 89 |             obs_key for group in [
 90 |                 self.observation.value_planner.planner.modalities.obs.values(),
 91 |                 self.observation.value_planner.planner.modalities.goal.values(),
 92 |                 self.observation.value_planner.planner.modalities.subgoal.values(),
 93 |                 self.observation.value_planner.value.modalities.obs.values(),
 94 |                 self.observation.value_planner.value.modalities.goal.values(),
 95 |                 self.observation.actor.modalities.obs.values(),
 96 |                 self.observation.actor.modalities.goal.values(),
 97 |             ]
 98 |             for modality in group
 99 |             for obs_key in modality
100 |         ])))
101 | 


--------------------------------------------------------------------------------
/robomimic/config/td3_bc_config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Config for TD3_BC.
  3 | """
  4 | 
  5 | from robomimic.config.base_config import BaseConfig
  6 | 
  7 | 
  8 | class TD3_BCConfig(BaseConfig):
  9 |     ALGO_NAME = "td3_bc"
 10 | 
 11 |     def experiment_config(self):
 12 |         """
 13 |         Update from subclass to set paper defaults for gym envs.
 14 |         """
 15 |         super(TD3_BCConfig, self).experiment_config()
 16 | 
 17 |         # no validation and no video rendering
 18 |         self.experiment.validate = False
 19 |         self.experiment.render_video = False
 20 | 
 21 |         # save 10 checkpoints throughout training
 22 |         self.experiment.save.every_n_epochs = 20 
 23 | 
 24 |         # save models that achieve best rollout return instead of best success rate
 25 |         self.experiment.save.on_best_rollout_return = True
 26 |         self.experiment.save.on_best_rollout_success_rate = False
 27 | 
 28 |         # epoch definition - 5000 gradient steps per epoch, with 200 epochs = 1M gradient steps, and eval every 1 epochs
 29 |         self.experiment.epoch_every_n_steps = 5000
 30 | 
 31 |         # evaluate with normal environment rollouts
 32 |         self.experiment.rollout.enabled = True
 33 |         self.experiment.rollout.n = 50              # paper uses 10, but we can afford to do 50
 34 |         self.experiment.rollout.horizon = 1000
 35 |         self.experiment.rollout.rate = 1            # rollout every epoch to match paper
 36 | 
 37 |     def train_config(self):
 38 |         """
 39 |         Update from subclass to set paper defaults for gym envs.
 40 |         """
 41 |         super(TD3_BCConfig, self).train_config()
 42 | 
 43 |         # update to normalize observations
 44 |         self.train.hdf5_normalize_obs = True 
 45 | 
 46 |         # increase batch size to 256
 47 |         self.train.batch_size = 256
 48 | 
 49 |         # 200 epochs, with each epoch lasting 5000 gradient steps, for 1M total steps
 50 |         self.train.num_epochs = 200
 51 | 
 52 |     def algo_config(self):
 53 |         """
 54 |         This function populates the `config.algo` attribute of the config, and is given to the 
 55 |         `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 
 56 |         argument to the constructor. Any parameter that an algorithm needs to determine its 
 57 |         training and test-time behavior should be populated here.
 58 |         """
 59 |         super(TD3_BCConfig, self).algo_config()
 60 | 
 61 |         # optimization parameters
 62 |         self.algo.optim_params.critic.learning_rate.initial = 3e-4      # critic learning rate
 63 |         self.algo.optim_params.critic.learning_rate.decay_factor = 0.1  # factor to decay LR by (if epoch schedule non-empty)
 64 |         self.algo.optim_params.critic.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
 65 |         self.algo.optim_params.critic.regularization.L2 = 0.00          # L2 regularization strength
 66 |         self.algo.optim_params.critic.start_epoch = -1                  # number of epochs before starting critic training (-1 means start right away)
 67 |         self.algo.optim_params.critic.end_epoch = -1                    # number of epochs before ending critic training (-1 means start right away)
 68 | 
 69 |         self.algo.optim_params.actor.learning_rate.initial = 3e-4       # actor learning rate
 70 |         self.algo.optim_params.actor.learning_rate.decay_factor = 0.1   # factor to decay LR by (if epoch schedule non-empty)
 71 |         self.algo.optim_params.actor.learning_rate.epoch_schedule = []  # epochs where LR decay occurs
 72 |         self.algo.optim_params.actor.regularization.L2 = 0.00           # L2 regularization strength
 73 |         self.algo.optim_params.actor.start_epoch = -1                   # number of epochs before starting actor training (-1 means start right away)
 74 |         self.algo.optim_params.actor.end_epoch = -1                     # number of epochs before ending actor training (-1 means start right away)
 75 | 
 76 |         # alpha value - for weighting critic loss vs. BC loss
 77 |         self.algo.alpha = 2.5
 78 | 
 79 |         # target network related parameters
 80 |         self.algo.discount = 0.99                       # discount factor to use
 81 |         self.algo.n_step = 1                            # for using n-step returns in TD-updates
 82 |         self.algo.target_tau = 0.005                    # update rate for target networks
 83 |         self.algo.infinite_horizon = False              # if True, scale terminal rewards by 1 / (1 - discount) to treat as infinite horizon
 84 | 
 85 |         # ================== Critic Network Config ===================
 86 |         self.algo.critic.use_huber = False              # Huber Loss instead of L2 for critic
 87 |         self.algo.critic.max_gradient_norm = None       # L2 gradient clipping for critic (None to use no clipping)
 88 |         self.algo.critic.value_bounds = None            # optional 2-tuple to ensure lower and upper bound on value estimates 
 89 | 
 90 |         # critic ensemble parameters (TD3 trick)
 91 |         self.algo.critic.ensemble.n = 2                 # number of Q networks in the ensemble
 92 |         self.algo.critic.ensemble.weight = 1.0          # weighting for mixing min and max for target Q value
 93 | 
 94 |         self.algo.critic.layer_dims = (256, 256)        # size of critic MLP
 95 | 
 96 |         # ================== Actor Network Config ===================
 97 | 
 98 |         # update actor and target networks every n gradients steps for each critic gradient step
 99 |         self.algo.actor.update_freq = 2
100 | 
101 |         # exploration noise used to form target action for Q-update - clipped Gaussian noise
102 |         self.algo.actor.noise_std = 0.2                 # zero-mean gaussian noise with this std is applied to actions
103 |         self.algo.actor.noise_clip = 0.5                # noise is clipped in each dimension to (-noise_clip, noise_clip)
104 | 
105 |         self.algo.actor.layer_dims = (256, 256)         # size of actor MLP
106 | 
107 |     def observation_config(self):
108 |         """
109 |         Update from superclass to use flat observations from gym envs.
110 |         """
111 |         super(TD3_BCConfig, self).observation_config()
112 |         self.observation.modalities.obs.low_dim = ["flat"]
113 | 


--------------------------------------------------------------------------------
/robomimic/config/vae_config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Config for BC algorithm.
  3 | """
  4 | 
  5 | from robomimic.config.base_config import BaseConfig
  6 | 
  7 | 
  8 | class VAEConfig(BaseConfig):
  9 |     ALGO_NAME = "vae"
 10 | 
 11 |     def algo_config(self):
 12 |         """
 13 |         This function populates the `config.algo` attribute of the config, and is given to the 
 14 |         `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 
 15 |         argument to the constructor. Any parameter that an algorithm needs to determine its 
 16 |         training and test-time behavior should be populated here.
 17 |         """
 18 |         super(VAEConfig, self).algo_config()
 19 | 
 20 |         # optimization parameters
 21 |         self.algo.optim_params.policy.learning_rate.initial = 1e-4      # policy learning rate
 22 |         self.algo.optim_params.policy.learning_rate.decay_factor = 0.1  # factor to decay LR by (if epoch schedule non-empty)
 23 |         self.algo.optim_params.policy.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
 24 |         self.algo.optim_params.policy.regularization.L2 = 0.00          # L2 regularization strength
 25 | 
 26 |         # loss weights
 27 |         self.algo.loss.l2_weight = 1.0      # L2 loss weight
 28 |         self.algo.loss.l1_weight = 0.0      # L1 loss weight
 29 |         self.algo.loss.cos_weight = 0.0     # cosine loss weight
 30 | 
 31 |         # MLP network architecture (layers after observation encoder and RNN, if present)
 32 |         self.algo.actor_layer_dims = (1024, 1024)
 33 |         self.algo.max_gradient_norm = None
 34 | 
 35 |         # residual MLP settings
 36 |         self.algo.res_mlp.enabled = False
 37 |         self.algo.res_mlp.num_blocks = 4
 38 |         self.algo.res_mlp.hidden_dim = 1024
 39 |         self.algo.res_mlp.use_layer_norm = True
 40 | 
 41 |         # stochastic Gaussian policy settings
 42 |         self.algo.gaussian.enabled = False              # whether to train a Gaussian policy
 43 |         self.algo.gaussian.fixed_std = False            # whether to train std output or keep it constant
 44 |         self.algo.gaussian.init_std = 0.1               # initial standard deviation (or constant)
 45 |         self.algo.gaussian.min_std = 0.01               # minimum std output from network
 46 |         self.algo.gaussian.std_activation = "softplus"  # activation to use for std output from policy net
 47 |         self.algo.gaussian.low_noise_eval = True        # low-std at test-time
 48 | 
 49 |         # stochastic GMM policy settings
 50 |         self.algo.gmm.enabled = False                   # whether to train a GMM policy
 51 |         self.algo.gmm.num_modes = 5                     # number of GMM modes
 52 |         self.algo.gmm.min_std = 0.0001                  # minimum std output from network
 53 |         self.algo.gmm.std_activation = "softplus"       # activation to use for std output from policy net
 54 |         self.algo.gmm.low_noise_eval = True             # low-std at test-time
 55 | 
 56 |         # stochastic VAE policy settings
 57 |         self.algo.vae.enabled = False                   # whether to train a VAE policy (unused)
 58 |         self.algo.vae.method = ""                       # to be specified in json file
 59 |         self.algo.vae.latent_dim = 14                   # VAE latent dimnsion - set to twice the dimensionality of action space
 60 |         self.algo.vae.latent_clip = None                # clip latent space when decoding (set to None to disable)
 61 |         self.algo.vae.kl_weight = 1.                    # beta-VAE weight to scale KL loss relative to reconstruction loss in ELBO
 62 |         self.algo.vae.conditioned_on_obs = True
 63 | 
 64 |         # VAE decoder settings
 65 |         self.algo.vae.decoder.is_conditioned = True                         # whether decoder should condition on observation
 66 |         self.algo.vae.decoder.reconstruction_sum_across_elements = False    # sum instead of mean for reconstruction loss
 67 | 
 68 |         # VAE prior settings
 69 |         self.algo.vae.prior.learn = False                                   # learn Gaussian / GMM prior instead of N(0, 1)
 70 |         self.algo.vae.prior.is_conditioned = False                          # whether to condition prior on observations
 71 |         self.algo.vae.prior.use_gmm = False                                 # whether to use GMM prior
 72 |         self.algo.vae.prior.gmm_num_modes = 10                              # number of GMM modes
 73 |         self.algo.vae.prior.gmm_learn_weights = False                       # whether to learn GMM weights 
 74 |         self.algo.vae.prior.use_categorical = False                         # whether to use categorical prior
 75 |         self.algo.vae.prior.categorical_dim = 10                            # the number of categorical classes for each latent dimension
 76 |         self.algo.vae.prior.categorical_gumbel_softmax_hard = False         # use hard selection in forward pass
 77 |         self.algo.vae.prior.categorical_init_temp = 1.0                     # initial gumbel-softmax temp
 78 |         self.algo.vae.prior.categorical_temp_anneal_step = 0.001            # linear temp annealing rate
 79 |         self.algo.vae.prior.categorical_min_temp = 0.3                      # lowest gumbel-softmax temp
 80 | 
 81 |         self.algo.vae.encoder_layer_dims = (300, 400)                       # encoder MLP layer dimensions
 82 |         self.algo.vae.decoder_layer_dims = (300, 400)                       # decoder MLP layer dimensions
 83 |         self.algo.vae.prior_layer_dims = (300, 400)                         # prior MLP layer dimensions (if learning conditioned prior)
 84 | 
 85 |         # RNN policy settings
 86 |         self.algo.rnn.enabled = False       # whether to train RNN policy
 87 |         self.algo.rnn.horizon = 10          # unroll length for RNN - should usually match train.seq_length
 88 |         self.algo.rnn.hidden_dim = 400      # hidden dimension size
 89 |         self.algo.rnn.rnn_type = "LSTM"     # rnn type - one of "LSTM" or "GRU"
 90 |         self.algo.rnn.num_layers = 2        # number of RNN layers that are stacked
 91 |         self.algo.rnn.open_loop = False     # if True, action predictions are only based on a single observation (not sequence)
 92 |         self.algo.rnn.kwargs.bidirectional = False            # rnn kwargs
 93 |         self.algo.rnn.kwargs.do_not_lock_keys()
 94 | 
 95 |         # Hardcoded Weights
 96 |         self.algo.hc_weights.use_hardcode_weight = False
 97 |         self.algo.hc_weights.weight_key = ""
 98 |         self.algo.hc_weights.mixed_weights = False
 99 |         self.algo.hc_weights.use_adv_score = False
100 | 
101 |         self.algo.hc_weights.demos = 1
102 |         self.algo.hc_weights.rollouts = 1
103 |         self.algo.hc_weights.intvs = 1
104 |         self.algo.hc_weights.pre_intvs = 0.1
105 | 
106 |         self.algo.hc_weights.traj_label_type = "last"
107 | 
108 |         self.algo.hc_weights.batch_normalize = True
109 | 


--------------------------------------------------------------------------------
/robomimic/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/envs/__init__.py


--------------------------------------------------------------------------------
/robomimic/envs/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/envs/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/envs/__pycache__/env_base.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/envs/__pycache__/env_base.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/envs/env_base.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains the base class for environment wrappers that are used
  3 | to provide a standardized environment API for training policies and interacting
  4 | with metadata present in datasets.
  5 | """
  6 | import abc
  7 | 
  8 | 
  9 | class EnvType:
 10 |     """
 11 |     Holds environment types - one per environment class.
 12 |     These act as identifiers for different environments.
 13 |     """
 14 |     ROBOSUITE_TYPE = 1
 15 |     GYM_TYPE = 2
 16 |     IG_MOMART_TYPE = 3
 17 | 
 18 | 
 19 | class EnvBase(abc.ABC):
 20 |     """A base class method for environments used by this repo."""
 21 |     @abc.abstractmethod
 22 |     def __init__(
 23 |         self,
 24 |         env_name, 
 25 |         render=False, 
 26 |         render_offscreen=False, 
 27 |         use_image_obs=False, 
 28 |         postprocess_visual_obs=True, 
 29 |         **kwargs,
 30 |     ):
 31 |         """
 32 |         Args:
 33 |             env_name (str): name of environment. Only needs to be provided if making a different
 34 |                 environment from the one in @env_meta.
 35 | 
 36 |             render (bool): if True, environment supports on-screen rendering
 37 | 
 38 |             render_offscreen (bool): if True, environment supports off-screen rendering. This
 39 |                 is forced to be True if @env_meta["use_images"] is True.
 40 | 
 41 |             use_image_obs (bool): if True, environment is expected to render rgb image observations
 42 |                 on every env.step call. Set this to False for efficiency reasons, if image
 43 |                 observations are not required.
 44 | 
 45 |             postprocess_visual_obs (bool): if True, postprocess image observations
 46 |                 to prepare for learning. This should only be False when extracting observations
 47 |                 for saving to a dataset (to save space on RGB images for example).
 48 |         """
 49 |         return
 50 | 
 51 |     @abc.abstractmethod
 52 |     def step(self, action):
 53 |         """
 54 |         Step in the environment with an action.
 55 | 
 56 |         Args:
 57 |             action (np.array): action to take
 58 | 
 59 |         Returns:
 60 |             observation (dict): new observation dictionary
 61 |             reward (float): reward for this step
 62 |             done (bool): whether the task is done
 63 |             info (dict): extra information
 64 |         """
 65 |         return
 66 | 
 67 |     @abc.abstractmethod
 68 |     def reset(self):
 69 |         """
 70 |         Reset environment.
 71 | 
 72 |         Returns:
 73 |             observation (dict): initial observation dictionary.
 74 |         """
 75 |         return
 76 | 
 77 |     @abc.abstractmethod
 78 |     def reset_to(self, state):
 79 |         """
 80 |         Reset to a specific simulator state.
 81 | 
 82 |         Args:
 83 |             state (dict): current simulator state
 84 |         
 85 |         Returns:
 86 |             observation (dict): observation dictionary after setting the simulator state
 87 |         """
 88 |         return
 89 | 
 90 |     @abc.abstractmethod
 91 |     def render(self, mode="human", height=None, width=None, camera_name=None):
 92 |         """Render"""
 93 |         return
 94 | 
 95 |     @abc.abstractmethod
 96 |     def get_observation(self):
 97 |         """Get environment observation"""
 98 |         return
 99 | 
100 |     @abc.abstractmethod
101 |     def get_state(self):
102 |         """Get environment simulator state, compatible with @reset_to"""
103 |         return
104 | 
105 |     @abc.abstractmethod
106 |     def get_reward(self):
107 |         """
108 |         Get current reward.
109 |         """
110 |         return
111 | 
112 |     @abc.abstractmethod
113 |     def get_goal(self):
114 |         """
115 |         Get goal observation. Not all environments support this.
116 |         """
117 |         return
118 | 
119 |     @abc.abstractmethod
120 |     def set_goal(self, **kwargs):
121 |         """
122 |         Set goal observation with external specification. Not all environments support this.
123 |         """
124 |         return
125 | 
126 |     @abc.abstractmethod
127 |     def is_done(self):
128 |         """
129 |         Check if the task is done (not necessarily successful).
130 |         """
131 |         return
132 | 
133 |     @abc.abstractmethod
134 |     def is_success(self):
135 |         """
136 |         Check if the task condition(s) is reached. Should return a dictionary
137 |         { str: bool } with at least a "task" key for the overall task success,
138 |         and additional optional keys corresponding to other task criteria.
139 |         """
140 |         return
141 | 
142 |     @property
143 |     @abc.abstractmethod
144 |     def action_dimension(self):
145 |         """
146 |         Returns dimension of actions (int).
147 |         """
148 |         return
149 | 
150 |     @property
151 |     @abc.abstractmethod
152 |     def name(self):
153 |         """
154 |         Returns name of environment name (str).
155 |         """
156 |         return
157 | 
158 |     @property
159 |     @abc.abstractmethod
160 |     def type(self):
161 |         """
162 |         Returns environment type (int) for this kind of environment.
163 |         This helps identify this env class.
164 |         """
165 |         return
166 | 
167 |     @abc.abstractmethod
168 |     def serialize(self):
169 |         """
170 |         Save all information needed to re-instantiate this environment in a dictionary.
171 |         This is the same as @env_meta - environment metadata stored in hdf5 datasets,
172 |         and used in utils/env_utils.py.
173 |         """
174 |         return
175 | 
176 |     @classmethod
177 |     @abc.abstractmethod
178 |     def create_for_data_processing(cls, camera_names, camera_height, camera_width, reward_shaping, **kwargs):
179 |         """
180 |         Create environment for processing datasets, which includes extracting
181 |         observations, labeling dense / sparse rewards, and annotating dones in
182 |         transitions. 
183 | 
184 |         Args:
185 |             camera_names ([str]): list of camera names that correspond to image observations
186 |             camera_height (int): camera height for all cameras
187 |             camera_width (int): camera width for all cameras
188 |             reward_shaping (bool): if True, use shaped environment rewards, else use sparse task completion rewards
189 | 
190 |         Returns:
191 |             env (EnvBase instance)
192 |         """
193 |         return
194 | 
195 |     @property
196 |     @abc.abstractmethod
197 |     def rollout_exceptions(self):
198 |         """
199 |         Return tuple of exceptions to except when doing rollouts. This is useful to ensure
200 |         that the entire training run doesn't crash because of a bad policy that causes unstable
201 |         simulation computations.
202 |         """
203 |         return
204 |     
205 | 


--------------------------------------------------------------------------------
/robomimic/exps/templates/td3_bc.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "algo_name": "td3_bc",
  3 |     "experiment": {
  4 |         "name": "test",
  5 |         "validate": false,
  6 |         "logging": {
  7 |             "terminal_output_to_txt": true,
  8 |             "log_tb": true
  9 |         },
 10 |         "save": {
 11 |             "enabled": true,
 12 |             "every_n_seconds": null,
 13 |             "every_n_epochs": 20,
 14 |             "epochs": [],
 15 |             "on_best_validation": false,
 16 |             "on_best_rollout_return": true,
 17 |             "on_best_rollout_success_rate": false
 18 |         },
 19 |         "epoch_every_n_steps": 5000,
 20 |         "validation_epoch_every_n_steps": 10,
 21 |         "env": null,
 22 |         "additional_envs": null,
 23 |         "render": false,
 24 |         "render_video": false,
 25 |         "keep_all_videos": false,
 26 |         "video_skip": 5,
 27 |         "rollout": {
 28 |             "enabled": true,
 29 |             "n": 50,
 30 |             "horizon": 1000,
 31 |             "rate": 1,
 32 |             "warmstart": 0,
 33 |             "terminate_on_success": true
 34 |         }
 35 |     },
 36 |     "train": {
 37 |         "data": null,
 38 |         "output_dir": "../td3_bc_trained_models",
 39 |         "num_data_workers": 0,
 40 |         "hdf5_cache_mode": "all",
 41 |         "hdf5_use_swmr": true,
 42 |         "hdf5_normalize_obs": true,
 43 |         "hdf5_filter_key": null,
 44 |         "seq_length": 1,
 45 |         "dataset_keys": [
 46 |             "actions",
 47 |             "rewards",
 48 |             "dones"
 49 |         ],
 50 |         "goal_mode": null,
 51 |         "cuda": true,
 52 |         "batch_size": 256,
 53 |         "num_epochs": 200,
 54 |         "seed": 1
 55 |     },
 56 |     "algo": {
 57 |         "optim_params": {
 58 |             "critic": {
 59 |                 "learning_rate": {
 60 |                     "initial": 0.0003,
 61 |                     "decay_factor": 0.1,
 62 |                     "epoch_schedule": []
 63 |                 },
 64 |                 "regularization": {
 65 |                     "L2": 0.0
 66 |                 },
 67 |                 "start_epoch": -1,
 68 |                 "end_epoch": -1
 69 |             },
 70 |             "actor": {
 71 |                 "learning_rate": {
 72 |                     "initial": 0.0003,
 73 |                     "decay_factor": 0.1,
 74 |                     "epoch_schedule": []
 75 |                 },
 76 |                 "regularization": {
 77 |                     "L2": 0.0
 78 |                 },
 79 |                 "start_epoch": -1,
 80 |                 "end_epoch": -1
 81 |             }
 82 |         },
 83 |         "alpha": 2.5,
 84 |         "discount": 0.99,
 85 |         "n_step": 1,
 86 |         "target_tau": 0.005,
 87 |         "infinite_horizon": false,
 88 |         "critic": {
 89 |             "use_huber": false,
 90 |             "max_gradient_norm": null,
 91 |             "value_bounds": null,
 92 |             "ensemble": {
 93 |                 "n": 2,
 94 |                 "weight": 1.0
 95 |             },
 96 |             "layer_dims": [
 97 |                 256,
 98 |                 256
 99 |             ]
100 |         },
101 |         "actor": {
102 |             "update_freq": 2,
103 |             "noise_std": 0.2,
104 |             "noise_clip": 0.5,
105 |             "layer_dims": [
106 |                 256,
107 |                 256
108 |             ]
109 |         }
110 |     },
111 |     "observation": {
112 |         "modalities": {
113 |             "obs": {
114 |                 "low_dim": [
115 |                     "flat"
116 |                 ],
117 |                 "rgb": [],
118 |                 "depth": [],
119 |                 "scan": []
120 |             },
121 |             "goal": {
122 |                 "low_dim": [],
123 |                 "rgb": [],
124 |                 "depth": [],
125 |                 "scan": []
126 |             }
127 |         },
128 |         "encoder": {
129 |             "low_dim": {
130 |                 "feature_dimension": null,
131 |                 "core_class": null,
132 |                 "core_kwargs": {},
133 |                 "obs_randomizer_class": null,
134 |                 "obs_randomizer_kwargs": {}
135 |             },
136 |             "rgb": {
137 |                 "feature_dimension": 64,
138 |                 "core_class": "VisualCore",
139 |                 "core_kwargs": {
140 |                     "backbone_class": "ResNet18Conv",
141 |                     "backbone_kwargs": {
142 |                         "pretrained": false,
143 |                         "input_coord_conv": false
144 |                     }
145 |                 },
146 |                 "obs_randomizer_class": null,
147 |                 "obs_randomizer_kwargs": {
148 |                     "crop_height": 76,
149 |                     "crop_width": 76,
150 |                     "num_crops": 1,
151 |                     "pos_enc": false
152 |                 },
153 |                 "pool_class": "SpatialSoftmax",
154 |                 "pool_kwargs": {
155 |                     "num_kp": 32,
156 |                     "learnable_temperature": false,
157 |                     "temperature": 1.0,
158 |                     "noise_std": 0.0
159 |                 }
160 |             },
161 |             "depth": {
162 |                 "feature_dimension": 64,
163 |                 "core_class": "VisualCore",
164 |                 "core_kwargs": {
165 |                     "backbone_class": "ResNet18Conv",
166 |                     "backbone_kwargs": {
167 |                         "pretrained": false,
168 |                         "input_coord_conv": false
169 |                     }
170 |                 },
171 |                 "obs_randomizer_class": null,
172 |                 "obs_randomizer_kwargs": {
173 |                     "crop_height": 76,
174 |                     "crop_width": 76,
175 |                     "num_crops": 1,
176 |                     "pos_enc": false
177 |                 },
178 |                 "pool_class": "SpatialSoftmax",
179 |                 "pool_kwargs": {
180 |                     "num_kp": 32,
181 |                     "learnable_temperature": false,
182 |                     "temperature": 1.0,
183 |                     "noise_std": 0.0
184 |                 }
185 |             },
186 |             "scan": {
187 |                 "feature_dimension": 64,
188 |                 "core_class": "ScanCore",
189 |                 "core_kwargs": {
190 |                     "backbone_class": "ResNet18Conv",
191 |                     "backbone_kwargs": {
192 |                         "pretrained": false,
193 |                         "input_coord_conv": false
194 |                     },
195 |                     "conv_kwargs": {
196 |                         "out_channels": [
197 |                             32,
198 |                             64,
199 |                             64
200 |                         ],
201 |                         "kernel_size": [
202 |                             8,
203 |                             4,
204 |                             2
205 |                         ],
206 |                         "stride": [
207 |                             4,
208 |                             2,
209 |                             1
210 |                         ]
211 |                     }
212 |                 },
213 |                 "obs_randomizer_class": null,
214 |                 "obs_randomizer_kwargs": {
215 |                     "crop_height": 76,
216 |                     "crop_width": 76,
217 |                     "num_crops": 1,
218 |                     "pos_enc": false
219 |                 },
220 |                 "pool_class": "SpatialSoftmax",
221 |                 "pool_kwargs": {
222 |                     "num_kp": 32,
223 |                     "learnable_temperature": false,
224 |                     "temperature": 1.0,
225 |                     "noise_std": 0.0
226 |                 }
227 |             }
228 |         }
229 |     }
230 | }


--------------------------------------------------------------------------------
/robomimic/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_nets import EncoderCore, Randomizer
2 | 


--------------------------------------------------------------------------------
/robomimic/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/models/__pycache__/base_nets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/base_nets.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/models/__pycache__/distributions.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/distributions.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/models/__pycache__/obs_nets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/obs_nets.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/models/__pycache__/policy_nets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/policy_nets.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/models/__pycache__/vae_nets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/vae_nets.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/models/__pycache__/value_nets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/value_nets.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/scripts/check_same_initial_configs.py:
--------------------------------------------------------------------------------
 1 | import h5py
 2 | import sys
 3 | import numpy as np 
 4 | 
 5 | dataset_path = sys.argv[1]
 6 | 
 7 | f = h5py.File(dataset_path, "r")
 8 | data = f["data"]
 9 | demos = list(data.keys())
10 | 
11 | initial_state_lst = []
12 | 
13 | def same_state(s1, s2):
14 |     return np.all(np.equal(s1, s2))
15 |     
16 | for d in demos:
17 |     this_s = data[d]["states"][()][0]
18 |     for s in initial_state_lst:
19 |         if same_state(s, this_s):
20 |             print("same state")
21 |     initial_state_lst.append(this_s)
22 | 
23 | #########################################
24 | 
25 | sec_dataset_path = sys.argv[2]
26 | f = h5py.File(sec_dataset_path, "r")
27 | sec_data = f["data"]
28 | sec_demos = list(sec_data.keys())
29 | 
30 | for d in sec_demos:
31 |     this_s = sec_data[d]["states"][()][0]
32 |     for s in initial_state_lst:
33 |         if same_state(s, this_s):
34 |             print("same state")
35 |     initial_state_lst.append(this_s)
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/robomimic/scripts/conversion/convert_d4rl.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Helper script to convert D4RL data into an hdf5 compatible with this repository.
  3 | Takes a folder path and a D4RL env name. This script downloads the corresponding
  4 | raw D4RL dataset into a "d4rl" subfolder, and then makes a converted dataset 
  5 | in the "d4rl/converted" subfolder.
  6 | 
  7 | This script has been tested on the follwing commits:
  8 | 
  9 |     https://github.com/rail-berkeley/d4rl/tree/9b68f31bab6a8546edfb28ff0bd9d5916c62fd1f
 10 |     https://github.com/rail-berkeley/d4rl/tree/26adf732efafdad864b3df2287e7b778ee4f7f63
 11 | 
 12 | Args:
 13 |     env (str): d4rl env name, which specifies the dataset to download and convert
 14 |     folder (str): specify folder to download raw d4rl datasets and converted d4rl datasets to.
 15 |         A `d4rl` subfolder will be created in this folder with the raw d4rl dataset, and 
 16 |         a `d4rl/converted` subfolder will be created in this folder with the converted
 17 |         datasets (if they do not already exist). Defaults to the datasets folder at
 18 |         the top-level of the repository.
 19 | 
 20 | Example usage:
 21 | 
 22 |     # downloads to default path at robomimic/datasets/d4rl
 23 |     python convert_d4rl.py --env walker2d-medium-expert-v0
 24 | 
 25 |     # download to custom path
 26 |     python convert_d4rl.py --env walker2d-medium-expert-v0 --folder /path/to/folder
 27 | """
 28 | 
 29 | import os
 30 | import h5py
 31 | import json
 32 | import argparse
 33 | import numpy as np
 34 | 
 35 | import gym
 36 | import d4rl
 37 | import robomimic
 38 | from robomimic.envs.env_gym import EnvGym
 39 | from robomimic.utils.log_utils import custom_tqdm
 40 | 
 41 | if __name__ == "__main__":
 42 |     parser = argparse.ArgumentParser()
 43 |     parser.add_argument(
 44 |         "--env",
 45 |         type=str,
 46 |         help="d4rl env name, which specifies the dataset to download and convert",
 47 |     )
 48 |     parser.add_argument(
 49 |         "--folder",
 50 |         type=str,
 51 |         default=None,
 52 |         help="specify folder to download raw d4rl datasets and converted d4rl datasets to.\
 53 |             A `d4rl` subfolder will be created in this folder with the raw d4rl dataset, and\
 54 |             a `d4rl/converted` subfolder will be created in this folder with the converted\
 55 |             datasets (if they do not already exist). Defaults to the datasets folder at\
 56 |             the top-level of the repository.",
 57 |     )
 58 |     args = parser.parse_args()
 59 | 
 60 |     base_folder = args.folder
 61 |     if base_folder is None:
 62 |         base_folder = os.path.join(robomimic.__path__[0], "../datasets")
 63 |     base_folder = os.path.join(base_folder, "d4rl")
 64 | 
 65 |     # get dataset
 66 |     d4rl.set_dataset_path(base_folder)
 67 |     env = gym.make(args.env)
 68 |     ds = env.env.get_dataset()
 69 |     env.close()
 70 | 
 71 |     # env
 72 |     env = EnvGym(args.env)
 73 | 
 74 |     # output file
 75 |     write_folder = os.path.join(base_folder, "converted")
 76 |     if not os.path.exists(write_folder):
 77 |         os.makedirs(write_folder)
 78 |     output_path = os.path.join(base_folder, "converted", "{}.hdf5".format(args.env.replace("-", "_")))
 79 |     f_sars = h5py.File(output_path, "w")
 80 |     f_sars_grp = f_sars.create_group("data")
 81 | 
 82 |     # code to split D4RL data into trajectories
 83 |     # (modified from https://github.com/aviralkumar2907/d4rl_evaluations/blob/bear_intergrate/bear/examples/bear_hdf5_d4rl.py#L18)
 84 |     all_obs = ds['observations']
 85 |     all_act = ds['actions']
 86 |     N = all_obs.shape[0]
 87 | 
 88 |     obs = all_obs[:N-1]
 89 |     actions = all_act[:N-1]
 90 |     next_obs = all_obs[1:]
 91 |     rewards = np.squeeze(ds['rewards'][:N-1])
 92 |     dones = np.squeeze(ds['terminals'][:N-1]).astype(np.int32)
 93 | 
 94 |     assert 'timeouts' in ds
 95 |     timeouts = ds['timeouts'][:]
 96 | 
 97 |     ctr = 0
 98 |     total_samples = 0
 99 |     num_traj = 0
100 |     traj = dict(obs=[], next_obs=[], actions=[], rewards=[], dones=[])
101 | 
102 |     print("\nConverting hdf5...")
103 |     for idx in custom_tqdm(range(obs.shape[0])):
104 | 
105 |         # add transition
106 |         traj["obs"].append(obs[idx])
107 |         traj["actions"].append(actions[idx])
108 |         traj["rewards"].append(rewards[idx])
109 |         traj["next_obs"].append(next_obs[idx])
110 |         traj["dones"].append(dones[idx])
111 |         ctr += 1
112 | 
113 |         # if hit timeout or done is True, end the current trajectory and start a new trajectory
114 |         if timeouts[idx] or dones[idx]:
115 | 
116 |             # replace next obs with copy of current obs for final timestep, and make sure done is true
117 |             traj["next_obs"][-1] = np.array(obs[idx])
118 |             traj["dones"][-1] = 1
119 | 
120 |             # store trajectory
121 |             ep_data_grp = f_sars_grp.create_group("demo_{}".format(num_traj))
122 |             ep_data_grp.create_dataset("obs/flat", data=np.array(traj["obs"]))
123 |             ep_data_grp.create_dataset("next_obs/flat", data=np.array(traj["next_obs"]))
124 |             ep_data_grp.create_dataset("actions", data=np.array(traj["actions"]))
125 |             ep_data_grp.create_dataset("rewards", data=np.array(traj["rewards"]))
126 |             ep_data_grp.create_dataset("dones", data=np.array(traj["dones"]))
127 |             ep_data_grp.attrs["num_samples"] = len(traj["actions"])
128 |             total_samples += len(traj["actions"])
129 |             num_traj += 1
130 | 
131 |             # reset
132 |             ctr = 0
133 |             traj = dict(obs=[], next_obs=[], actions=[], rewards=[], dones=[])
134 | 
135 |     print("\nExcluding {} samples at end of file due to no trajectory truncation.".format(len(traj["actions"])))
136 |     print("Wrote {} trajectories to new converted hdf5 at {}\n".format(num_traj, output_path))
137 | 
138 |     # metadata
139 |     f_sars_grp.attrs["total"] = total_samples
140 |     f_sars_grp.attrs["env_args"] = json.dumps(env.serialize(), indent=4)
141 | 
142 |     f_sars.close()
143 | 
144 | 


--------------------------------------------------------------------------------
/robomimic/scripts/conversion/convert_robosuite.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helper script to convert a dataset collected using robosuite into an hdf5 compatible with
 3 | this repository. Takes a dataset path corresponding to the demo.hdf5 file containing the
 4 | demonstrations. It modifies the dataset in-place. By default, the script also creates a
 5 | 90-10 train-validation split.
 6 | 
 7 | For more information on collecting datasets with robosuite, see the code link and documentation
 8 | link below.
 9 | 
10 | Code: https://github.com/ARISE-Initiative/robosuite/blob/offline_study/robosuite/scripts/collect_human_demonstrations.py
11 | 
12 | Documentation: https://robosuite.ai/docs/algorithms/demonstrations.html
13 | 
14 | Example usage:
15 | 
16 |     python convert_robosuite.py --dataset /path/to/your/demo.hdf5
17 | """
18 | 
19 | import h5py
20 | import json
21 | import argparse
22 | import os
23 | 
24 | import robomimic.envs.env_base as EB
25 | from robomimic.scripts.split_train_val import split_train_val_from_hdf5
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     parser = argparse.ArgumentParser()
30 |     parser.add_argument(
31 |         "--dataset",
32 |         type=str,
33 |         help="path to input hdf5 dataset",
34 |     )
35 |     args = parser.parse_args()
36 | 
37 |     f = h5py.File(os.path.expanduser(args.dataset), "a") # edit mode
38 | 
39 |     # store env meta
40 |     env_name = f["data"].attrs["env"]
41 |     env_info = json.loads(f["data"].attrs["env_info"])
42 |     env_meta = dict(
43 |         type=EB.EnvType.ROBOSUITE_TYPE,
44 |         env_name=env_name,
45 |         env_kwargs=env_info,
46 |     )
47 |     if "env_args" in f["data"].attrs:
48 |         del f["data"].attrs["env_args"]
49 |     f["data"].attrs["env_args"] = json.dumps(env_meta, indent=4)
50 | 
51 |     print("====== Stored env meta ======")
52 |     print(f["data"].attrs["env_args"])
53 | 
54 |     # store metadata about number of samples
55 |     total_samples = 0
56 |     for ep in f["data"]:
57 |         # ensure model-xml is in per-episode metadata
58 |         assert "model_file" in f["data/{}".format(ep)].attrs
59 | 
60 |         # add "num_samples" into per-episode metadata
61 |         if "num_samples" in f["data/{}".format(ep)].attrs:
62 |             del f["data/{}".format(ep)].attrs["num_samples"]
63 |         n_sample = f["data/{}/actions".format(ep)].shape[0]
64 |         f["data/{}".format(ep)].attrs["num_samples"] = n_sample
65 |         total_samples += n_sample
66 | 
67 |     # add total samples to global metadata
68 |     if "total" in f["data"].attrs:
69 |         del f["data"].attrs["total"]
70 |     f["data"].attrs["total"] = total_samples
71 | 
72 |     f.close()
73 | 
74 |     # create 90-10 train-validation split in the dataset
75 |     split_train_val_from_hdf5(hdf5_path=args.dataset, val_ratio=0.1)
76 | 


--------------------------------------------------------------------------------
/robomimic/scripts/conversion/convert_roboturk_pilot.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Helper script to convert the RoboTurk Pilot datasets (https://roboturk.stanford.edu/dataset_sim.html)
  3 | into a format compatible with this repository. It will also create some useful filter keys
  4 | in the file (e.g. training, validation, and fastest n trajectories). Prior work
  5 | (https://arxiv.org/abs/1911.05321) has found this useful (for example, training on the 
  6 | fastest 225 demonstrations for bins-Can).
  7 | 
  8 | Direct download link for dataset: http://cvgl.stanford.edu/projects/roboturk/RoboTurkPilot.zip
  9 | 
 10 | Args:
 11 |     folder (str): path to a folder containing a demo.hdf5 and a models directory containing
 12 |         mujoco xml files. For example, RoboTurkPilot/bins-Can.
 13 | 
 14 |     n (int): creates a filter key corresponding to the n fastest trajectories. Defaults to 225.
 15 | 
 16 | Example usage:
 17 | 
 18 |     python convert_roboturk_pilot.py --folder /path/to/RoboTurkPilot/bins-Can --n 225
 19 | """
 20 | 
 21 | import os
 22 | import h5py
 23 | import json
 24 | import argparse
 25 | import numpy as np
 26 | from tqdm import tqdm
 27 | 
 28 | import robomimic
 29 | import robomimic.envs.env_base as EB
 30 | from robomimic.utils.file_utils import create_hdf5_filter_key
 31 | from robomimic.scripts.split_train_val import split_train_val_from_hdf5
 32 | 
 33 | 
 34 | def convert_rt_pilot_hdf5(ref_folder):
 35 |     """
 36 |     Uses the reference demo hdf5 to write a new converted hdf5 compatible with
 37 |     the repository.
 38 | 
 39 |     Args:
 40 |         ref_folder (str): path to a folder containing a demo.hdf5 and a models directory containing
 41 |             mujoco xml files.
 42 |     """
 43 |     hdf5_path = os.path.join(ref_folder, "demo.hdf5")
 44 |     new_path = os.path.join(ref_folder, "demo_new.hdf5")
 45 | 
 46 |     f = h5py.File(hdf5_path, "r")
 47 |     f_new = h5py.File(new_path, "w")
 48 |     f_new_grp = f_new.create_group("data")
 49 | 
 50 |     # sorted list of demonstrations by demo number
 51 |     demos = list(f["data"].keys())
 52 |     inds = np.argsort([int(elem[5:]) for elem in demos])
 53 |     demos = [demos[i] for i in inds]
 54 | 
 55 |     # write each demo
 56 |     num_samples_arr = []
 57 |     for demo_id in tqdm(range(len(demos))):
 58 |         ep = demos[demo_id]
 59 | 
 60 |         # create group for this demonstration
 61 |         ep_data_grp = f_new_grp.create_group(ep)
 62 | 
 63 |         # copy states over
 64 |         states = f["data/{}/states".format(ep)][()]
 65 |         ep_data_grp.create_dataset("states", data=np.array(states))
 66 | 
 67 |         # concat jvels and gripper actions to form full actions
 68 |         jvels = f["data/{}/joint_velocities".format(ep)][()]
 69 |         gripper_acts = f["data/{}/gripper_actuations".format(ep)][()]
 70 |         actions = np.concatenate([jvels, gripper_acts], axis=1)
 71 | 
 72 |         # IMPORTANT: clip actions to -1, 1, since this is expected by the codebase
 73 |         actions = np.clip(actions, -1., 1.)
 74 |         ep_data_grp.create_dataset("actions", data=actions)
 75 | 
 76 |         # store model xml directly in the new hdf5 file
 77 |         model_path = os.path.join(ref_folder, "models", f["data/{}".format(ep)].attrs["model_file"])
 78 |         f_model = open(model_path, "r")
 79 |         model_xml = f_model.read()
 80 |         f_model.close()
 81 |         ep_data_grp.attrs["model_file"] = model_xml
 82 | 
 83 |         # store num samples for this ep
 84 |         num_samples = actions.shape[0]
 85 |         ep_data_grp.attrs["num_samples"] = num_samples # number of transitions in this episode
 86 |         num_samples_arr.append(num_samples)
 87 | 
 88 |     # write dataset attributes (metadata)
 89 |     f_new_grp.attrs["total"] = np.sum(num_samples_arr)
 90 | 
 91 |     # construct and save env metadata
 92 |     env_meta = dict()
 93 |     env_meta["type"] = EB.EnvType.ROBOSUITE_TYPE
 94 |     env_meta["env_name"] = (f["data"].attrs["env"] + "Teleop")
 95 |     # hardcode robosuite v0.3 args
 96 |     robosuite_args = {
 97 |         "has_renderer": False,
 98 |         "has_offscreen_renderer": False,
 99 |         "ignore_done": True,
100 |         "use_object_obs": True,
101 |         "use_camera_obs": False,
102 |         "camera_depth": False,
103 |         "camera_height": 84,
104 |         "camera_width": 84,
105 |         "camera_name": "agentview",
106 |         "gripper_visualization": False,
107 |         "reward_shaping": False,
108 |         "control_freq": 100,
109 |     }
110 |     env_meta["env_kwargs"] = robosuite_args
111 |     f_new_grp.attrs["env_args"] = json.dumps(env_meta, indent=4) # environment info
112 | 
113 |     print("\n====== Added env meta ======")
114 |     print(f_new_grp.attrs["env_args"])
115 | 
116 |     f.close()
117 |     f_new.close()
118 | 
119 |     # back up the old dataset, and replace with new dataset
120 |     os.rename(hdf5_path, os.path.join(ref_folder, "demo_bak.hdf5"))
121 |     os.rename(new_path, hdf5_path)
122 | 
123 | 
124 | def split_fastest_from_hdf5(hdf5_path, n):
125 |     """
126 |     Creates filter key for fastest N trajectories, named
127 |     "fastest_{}".format(n).
128 | 
129 |     Args:
130 |         hdf5_path (str): path to the hdf5 file
131 | 
132 |         n (int): fastest n demos to create filter key for
133 |     """
134 | 
135 |     # retrieve fastest n demos
136 |     f = h5py.File(hdf5_path, "r")
137 |     demos = sorted(list(f["data"].keys()))
138 |     traj_lengths = []
139 |     for ep in demos:
140 |         traj_lengths.append(f["data/{}/actions".format(ep)].shape[0])
141 |     inds = np.argsort(traj_lengths)[:n]
142 |     filtered_demos = [demos[i] for i in inds]
143 |     f.close()
144 | 
145 |     # create filter key
146 |     name = "fastest_{}".format(n)
147 |     lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filtered_demos, key_name=name)
148 | 
149 |     print("Total number of samples in fastest {} demos: {}".format(n, np.sum(lengths)))
150 |     print("Average number of samples in fastest {} demos: {}".format(n, np.mean(lengths)))
151 | 
152 | 
153 | if __name__ == "__main__":
154 |     parser = argparse.ArgumentParser()
155 |     parser.add_argument(
156 |         "--folder",
157 |         type=str,
158 |         help="path to a folder containing a demo.hdf5 and a models directory containing \
159 |             mujoco xml files. For example, RoboTurkPilot/bins-Can.",
160 |     )
161 |     parser.add_argument(
162 |         "--n",
163 |         type=int,
164 |         default=225,
165 |         help="creates a filter key corresponding to the n fastest trajectories. Defaults to 225.",
166 |     )
167 |     args = parser.parse_args()
168 | 
169 |     # convert hdf5
170 |     convert_rt_pilot_hdf5(ref_folder=args.folder)
171 | 
172 |     # create 90-10 train-validation split in the dataset
173 |     print("\nCreating 90-10 train-validation split...\n")
174 |     hdf5_path = os.path.join(args.folder, "demo.hdf5")
175 |     split_train_val_from_hdf5(hdf5_path=hdf5_path, val_ratio=0.1)
176 | 
177 |     print("\nCreating filter key for fastest {} trajectories...".format(args.n))
178 |     split_fastest_from_hdf5(hdf5_path=hdf5_path, n=args.n)
179 | 
180 |     print("\nCreating 90-10 train-validation split for fastest {} trajectories...".format(args.n))
181 |     split_train_val_from_hdf5(hdf5_path=hdf5_path, val_ratio=0.1, filter_key="fastest_{}".format(args.n))
182 | 
183 |     print(
184 |         "\nWARNING: new dataset has replaced old one in demo.hdf5 file. "
185 |         "The old dataset file has been moved to demo_bak.hdf5"
186 |     )
187 | 
188 |     print(
189 |         "\nNOTE: the new dataset also contains a fastest_{} filter key, for an easy way "
190 |         "to train on the fastest trajectories. Just set config.train.hdf5_filter to train on this "
191 |         "subset. A common choice is 225 when training on the bins-Can dataset.\n".format(args.n)
192 |     )
193 | 


--------------------------------------------------------------------------------
/robomimic/scripts/download_datasets.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Script to download datasets packaged with the repository. By default, all
  3 | datasets will be stored at robomimic/datasets, unless the @download_dir
  4 | argument is supplied. We recommend using the default, as most examples that
  5 | use these datasets assume that they can be found there.
  6 | 
  7 | The @tasks, @dataset_types, and @hdf5_types arguments can all be supplied
  8 | to choose which datasets to download. 
  9 | 
 10 | Args:
 11 |     download_dir (str): Base download directory. Created if it doesn't exist. 
 12 |         Defaults to datasets folder in repository - only pass in if you would
 13 |         like to override the location.
 14 | 
 15 |     tasks (list): Tasks to download datasets for. Defaults to lift task. Pass 'all' to 
 16 |         download all tasks (sim + real) 'sim' to download all sim tasks, 'real' to 
 17 |         download all real tasks, or directly specify the list of tasks.
 18 |     
 19 |     dataset_types (list): Dataset types to download datasets for (e.g. ph, mh, mg). 
 20 |         Defaults to ph. Pass 'all' to download datasets for all available dataset 
 21 |         types per task, or directly specify the list of dataset types.
 22 | 
 23 |     hdf5_types (list): hdf5 types to download datasets for (e.g. raw, low_dim, image). 
 24 |         Defaults to low_dim. Pass 'all' to download datasets for all available hdf5 
 25 |         types per task and dataset, or directly specify the list of hdf5 types.
 26 | 
 27 | Example usage:
 28 | 
 29 |     # default behavior - just download lift proficient-human low-dim dataset
 30 |     python download_datasets.py
 31 | 
 32 |     # download low-dim proficient-human datasets for all simulation tasks
 33 |     # (do a dry run first to see which datasets would be downloaded)
 34 |     python download_datasets.py --tasks sim --dataset_types ph --hdf5_types low_dim --dry_run
 35 |     python download_datasets.py --tasks sim --dataset_types ph --hdf5_types low_dim
 36 | 
 37 |     # download all low-dim and image multi-human datasets for the can and square tasks
 38 |     python download_datasets.py --tasks can square --dataset_types mh --hdf5_types low_dim image
 39 | 
 40 |     # download the sparse reward machine-generated low-dim datasets
 41 |     python download_datasets.py --tasks all --dataset_types mg --hdf5_types low_dim_sparse
 42 | 
 43 |     # download all real robot datasets
 44 |     python download_datasets.py --tasks real
 45 | """
 46 | import os
 47 | import argparse
 48 | 
 49 | import robomimic
 50 | import robomimic.utils.file_utils as FileUtils
 51 | from robomimic import DATASET_REGISTRY
 52 | 
 53 | ALL_TASKS = ["lift", "can", "square", "transport", "tool_hang", "lift_real", "can_real", "tool_hang_real"]
 54 | ALL_DATASET_TYPES = ["ph", "mh", "mg", "paired"]
 55 | ALL_HDF5_TYPES = ["raw", "low_dim", "image", "low_dim_sparse", "low_dim_dense", "image_sparse", "image_dense"]
 56 | 
 57 | 
 58 | if __name__ == "__main__":
 59 |     parser = argparse.ArgumentParser()
 60 | 
 61 |     # directory to download datasets to
 62 |     parser.add_argument(
 63 |         "--download_dir",
 64 |         type=str,
 65 |         default=None,
 66 |         help="Base download directory. Created if it doesn't exist. Defaults to datasets folder in repository.",
 67 |     )
 68 | 
 69 |     # tasks to download datasets for
 70 |     parser.add_argument(
 71 |         "--tasks",
 72 |         type=str,
 73 |         nargs='+',
 74 |         default=["lift"],
 75 |         help="Tasks to download datasets for. Defaults to lift task. Pass 'all' to download all tasks (sim + real)\
 76 |             'sim' to download all sim tasks, 'real' to download all real tasks, or directly specify the list of\
 77 |             tasks.",
 78 |     )
 79 | 
 80 |     # dataset types to download datasets for
 81 |     parser.add_argument(
 82 |         "--dataset_types",
 83 |         type=str,
 84 |         nargs='+',
 85 |         default=["ph"],
 86 |         help="Dataset types to download datasets for (e.g. ph, mh, mg). Defaults to ph. Pass 'all' to download \
 87 |             datasets for all available dataset types per task, or directly specify the list of dataset types.",
 88 |     )
 89 | 
 90 |     # hdf5 types to download datasets for
 91 |     parser.add_argument(
 92 |         "--hdf5_types",
 93 |         type=str,
 94 |         nargs='+',
 95 |         default=["low_dim"],
 96 |         help="hdf5 types to download datasets for (e.g. raw, low_dim, image). Defaults to low_dim. Pass 'all' \
 97 |             to download datasets for all available hdf5 types per task and dataset, or directly specify the list\
 98 |             of hdf5 types.",
 99 |     )
100 | 
101 |     # dry run - don't actually download datasets, but print which datasets would be downloaded
102 |     parser.add_argument(
103 |         "--dry_run",
104 |         action='store_true',
105 |         help="set this flag to do a dry run to only print which datasets would be downloaded"
106 |     )
107 | 
108 |     args = parser.parse_args()
109 | 
110 |     # set default base directory for downloads
111 |     default_base_dir = args.download_dir
112 |     if default_base_dir is None:
113 |         default_base_dir = os.path.join(robomimic.__path__[0], "../datasets")
114 | 
115 |     # load args
116 |     download_tasks = args.tasks
117 |     if "all" in download_tasks:
118 |         assert len(download_tasks) == 1, "all should be only tasks argument but got: {}".format(args.tasks)
119 |         download_tasks = ALL_TASKS
120 |     elif "sim" in download_tasks:
121 |         assert len(download_tasks) == 1, "sim should be only tasks argument but got: {}".format(args.tasks)
122 |         download_tasks = [task for task in ALL_TASKS if "real" not in task]
123 |     elif "real" in download_tasks:
124 |         assert len(download_tasks) == 1, "real should be only tasks argument but got: {}".format(args.tasks)
125 |         download_tasks = [task for task in ALL_TASKS if "real" in task]
126 | 
127 |     download_dataset_types = args.dataset_types
128 |     if "all" in download_dataset_types:
129 |         assert len(download_dataset_types) == 1, "all should be only dataset_types argument but got: {}".format(args.dataset_types)
130 |         download_dataset_types = ALL_DATASET_TYPES
131 | 
132 |     download_hdf5_types = args.hdf5_types
133 |     if "all" in download_hdf5_types:
134 |         assert len(download_hdf5_types) == 1, "all should be only hdf5_types argument but got: {}".format(args.hdf5_types)
135 |         download_hdf5_types = ALL_HDF5_TYPES
136 | 
137 |     # download requested datasets
138 |     for task in DATASET_REGISTRY:
139 |         if task in download_tasks:
140 |             for dataset_type in DATASET_REGISTRY[task]:
141 |                 if dataset_type in download_dataset_types:
142 |                     for hdf5_type in DATASET_REGISTRY[task][dataset_type]:
143 |                         if hdf5_type in download_hdf5_types:
144 |                             download_dir = os.path.abspath(os.path.join(default_base_dir, task, dataset_type))
145 |                             print("\nDownloading dataset:\n    task: {}\n    dataset type: {}\n    hdf5 type: {}\n    download path: {}"
146 |                                 .format(task, dataset_type, hdf5_type, download_dir))
147 |                             if args.dry_run:
148 |                                 print("\ndry run: skip download")
149 |                             else:
150 |                                 # Make sure path exists and create if it doesn't
151 |                                 os.makedirs(download_dir, exist_ok=True)
152 |                                 FileUtils.download_url(
153 |                                     url=DATASET_REGISTRY[task][dataset_type][hdf5_type]["url"], 
154 |                                     download_dir=download_dir,
155 |                                 )
156 |                             print("")
157 | 


--------------------------------------------------------------------------------
/robomimic/scripts/download_momart_datasets.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Script to download datasets used in MoMaRT paper (https://arxiv.org/abs/2112.05251). By default, all
  3 | datasets will be stored at robomimic/datasets, unless the @download_dir
  4 | argument is supplied. We recommend using the default, as most examples that
  5 | use these datasets assume that they can be found there.
  6 | 
  7 | The @tasks and @dataset_types arguments can all be supplied
  8 | to choose which datasets to download. 
  9 | 
 10 | Args:
 11 |     download_dir (str): Base download directory. Created if it doesn't exist. 
 12 |         Defaults to datasets folder in repository - only pass in if you would
 13 |         like to override the location.
 14 | 
 15 |     tasks (list): Tasks to download datasets for. Defaults to table_setup_from_dishwasher task. Pass 'all' to
 16 |         download all tasks - 5 total:
 17 |             - table_setup_from_dishwasher
 18 |             - table_setup_from_dresser
 19 |             - table_cleanup_to_dishwasher
 20 |             - table_cleanup_to_sink
 21 |             - unload_dishwasher
 22 |     
 23 |     dataset_types (list): Dataset types to download datasets for (expert, suboptimal, generalize, sample).
 24 |         Defaults to expert. Pass 'all' to download datasets for all available dataset
 25 |         types per task, or directly specify the list of dataset types.
 26 |         NOTE: Because these datasets are huge, we will always print out a warning
 27 |         that a user must respond yes to to acknowledge the data size (can be up to >100G for all tasks of a single type)
 28 | 
 29 | Example usage:
 30 | 
 31 |     # default behavior - just download expert table_setup_from_dishwasher dataset
 32 |     python download_momart_datasets.py
 33 | 
 34 |     # download expert datasets for all tasks
 35 |     # (do a dry run first to see which datasets would be downloaded)
 36 |     python download_momart_datasets.py --tasks all --dataset_types expert --dry_run
 37 |     python download_momart_datasets.py --tasks all --dataset_types expert low_dim
 38 | 
 39 |     # download all expert and suboptimal datasets for the table_setup_from_dishwasher and table_cleanup_to_dishwasher tasks
 40 |     python download_datasets.py --tasks table_setup_from_dishwasher table_cleanup_to_dishwasher --dataset_types expert suboptimal
 41 | 
 42 |     # download the sample datasets
 43 |     python download_datasets.py --tasks all --dataset_types sample
 44 | 
 45 |     # download all datasets
 46 |     python download_datasets.py --tasks all --dataset_types all
 47 | """
 48 | import os
 49 | import argparse
 50 | 
 51 | import robomimic
 52 | import robomimic.utils.file_utils as FileUtils
 53 | from robomimic import MOMART_DATASET_REGISTRY
 54 | 
 55 | ALL_TASKS = [
 56 |     "table_setup_from_dishwasher",
 57 |     "table_setup_from_dresser",
 58 |     "table_cleanup_to_dishwasher",
 59 |     "table_cleanup_to_sink",
 60 |     "unload_dishwasher",
 61 | ]
 62 | ALL_DATASET_TYPES = [
 63 |     "expert",
 64 |     "suboptimal",
 65 |     "generalize",
 66 |     "sample",
 67 | ]
 68 | 
 69 | 
 70 | if __name__ == "__main__":
 71 |     parser = argparse.ArgumentParser()
 72 | 
 73 |     # directory to download datasets to
 74 |     parser.add_argument(
 75 |         "--download_dir",
 76 |         type=str,
 77 |         default=None,
 78 |         help="Base download directory. Created if it doesn't exist. Defaults to datasets folder in repository.",
 79 |     )
 80 | 
 81 |     # tasks to download datasets for
 82 |     parser.add_argument(
 83 |         "--tasks",
 84 |         type=str,
 85 |         nargs='+',
 86 |         default=["table_setup_from_dishwasher"],
 87 |         help="Tasks to download datasets for. Defaults to table_setup_from_dishwasher task. Pass 'all' to download all"
 88 |              f"5 tasks, or directly specify the list of tasks. Options are any of: {ALL_TASKS}",
 89 |     )
 90 | 
 91 |     # dataset types to download datasets for
 92 |     parser.add_argument(
 93 |         "--dataset_types",
 94 |         type=str,
 95 |         nargs='+',
 96 |         default=["expert"],
 97 |         help="Dataset types to download datasets for (e.g. expert, suboptimal). Defaults to expert. Pass 'all' to "
 98 |              "download datasets for all available dataset types per task, or directly specify the list of dataset "
 99 |              f"types. Options are any of: {ALL_DATASET_TYPES}",
100 |     )
101 | 
102 |     # dry run - don't actually download datasets, but print which datasets would be downloaded
103 |     parser.add_argument(
104 |         "--dry_run",
105 |         action='store_true',
106 |         help="set this flag to do a dry run to only print which datasets would be downloaded"
107 |     )
108 | 
109 |     args = parser.parse_args()
110 | 
111 |     # set default base directory for downloads
112 |     default_base_dir = args.download_dir
113 |     if default_base_dir is None:
114 |         default_base_dir = os.path.join(robomimic.__path__[0], "../datasets")
115 | 
116 |     # load args
117 |     download_tasks = args.tasks
118 |     if "all" in download_tasks:
119 |         assert len(download_tasks) == 1, "all should be only tasks argument but got: {}".format(args.tasks)
120 |         download_tasks = ALL_TASKS
121 | 
122 |     download_dataset_types = args.dataset_types
123 |     if "all" in download_dataset_types:
124 |         assert len(download_dataset_types) == 1, "all should be only dataset_types argument but got: {}".format(args.dataset_types)
125 |         download_dataset_types = ALL_DATASET_TYPES
126 | 
127 |     # Run sanity check first to warn user if they're about to download a huge amount of data
128 |     total_size = 0
129 |     for task in MOMART_DATASET_REGISTRY:
130 |         if task in download_tasks:
131 |             for dataset_type in MOMART_DATASET_REGISTRY[task]:
132 |                 if dataset_type in download_dataset_types:
133 |                     total_size += MOMART_DATASET_REGISTRY[task][dataset_type]["size"]
134 | 
135 |     # Verify user acknowledgement if we're not doing a dry run
136 |     if not args.dry_run:
137 |         user_response = input(f"Warning: requested datasets will take a total of {total_size}GB. Proceed? y/n\n")
138 |         assert user_response.lower() in {"yes", "y"}, f"Did not receive confirmation. Aborting download."
139 | 
140 |     # download requested datasets
141 |     for task in MOMART_DATASET_REGISTRY:
142 |         if task in download_tasks:
143 |             for dataset_type in MOMART_DATASET_REGISTRY[task]:
144 |                 if dataset_type in download_dataset_types:
145 |                     dataset_info = MOMART_DATASET_REGISTRY[task][dataset_type]
146 |                     download_dir = os.path.abspath(os.path.join(default_base_dir, task, dataset_type))
147 |                     print(f"\nDownloading dataset:\n"
148 |                           f"    task: {task}\n"
149 |                           f"    dataset type: {dataset_type}\n"
150 |                           f"    dataset size: {dataset_info['size']}GB\n"
151 |                           f"    download path: {download_dir}")
152 |                     if args.dry_run:
153 |                         print("\ndry run: skip download")
154 |                     else:
155 |                         # Make sure path exists and create if it doesn't
156 |                         os.makedirs(download_dir, exist_ok=True)
157 |                         FileUtils.download_url(
158 |                             url=dataset_info["url"],
159 |                             download_dir=download_dir,
160 |                         )
161 |                     print("")
162 | 


--------------------------------------------------------------------------------
/robomimic/scripts/extract_obs_from_raw_datasets.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # This script holds the commands that were used to go from raw robosuite demo.hdf5 files
  4 | # to our processed low-dim and image hdf5 files.
  5 | 
  6 | BASE_DATASET_DIR="../../datasets"
  7 | echo "Using base dataset directory: $BASE_DATASET_DIR"
  8 | 
  9 | 
 10 | ### NOTE: we use done-mode 0 for MG (dones on task success) ###
 11 | 
 12 | 
 13 | ### mg ###
 14 | 
 15 | 
 16 | # lift - mg, sparse
 17 | python dataset_states_to_obs.py --done_mode 0 \
 18 | --dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \
 19 | --output_name low_dim_sparse.hdf5
 20 | python dataset_states_to_obs.py --done_mode 0 \
 21 | --dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \
 22 | --output_name image_sparse.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
 23 | 
 24 | # lift - mg, dense
 25 | python dataset_states_to_obs.py --done_mode 0 --shaped \
 26 | --dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \
 27 | --output_name low_dim_dense.hdf5
 28 | python dataset_states_to_obs.py --done_mode 0 --shaped \
 29 | --dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \
 30 | --output_name image_dense.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
 31 | 
 32 | # can - mg, sparse
 33 | python dataset_states_to_obs.py --done_mode 0 \
 34 | --dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \
 35 | --output_name low_dim_sparse.hdf5
 36 | python dataset_states_to_obs.py --done_mode 0 \
 37 | --dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \
 38 | --output_name image_sparse.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
 39 | 
 40 | # can - mg, dense
 41 | python dataset_states_to_obs.py --done_mode 0 --shaped \
 42 | --dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \
 43 | --output_name low_dim_dense.hdf5
 44 | python dataset_states_to_obs.py --done_mode 0 --shaped \
 45 | --dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \
 46 | --output_name image_dense.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
 47 | 
 48 | 
 49 | ### NOTE: we use done-mode 2 for PH / MH (dones on task success and end of trajectory) ###
 50 | 
 51 | 
 52 | ### ph ###
 53 | 
 54 | 
 55 | # lift - ph
 56 | python dataset_states_to_obs.py --done_mode 2 \
 57 | --dataset $BASE_DATASET_DIR/lift/ph/demo.hdf5 \
 58 | --output_name low_dim.hdf5
 59 | python dataset_states_to_obs.py --done_mode 2 \
 60 | --dataset $BASE_DATASET_DIR/lift/ph/demo.hdf5 \
 61 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
 62 | 
 63 | # can - ph
 64 | python dataset_states_to_obs.py --done_mode 2 \
 65 | --dataset $BASE_DATASET_DIR/can/ph/demo.hdf5 \
 66 | --output_name low_dim.hdf5
 67 | python dataset_states_to_obs.py --done_mode 2 \
 68 | --dataset $BASE_DATASET_DIR/can/ph/demo.hdf5 \
 69 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
 70 | 
 71 | # square - ph
 72 | python dataset_states_to_obs.py --done_mode 2 \
 73 | --dataset $BASE_DATASET_DIR/square/ph/demo.hdf5 \
 74 | --output_name low_dim.hdf5
 75 | python dataset_states_to_obs.py --done_mode 2 \
 76 | --dataset $BASE_DATASET_DIR/square/ph/demo.hdf5 \
 77 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
 78 | 
 79 | # transport - ph
 80 | python dataset_states_to_obs.py --done_mode 2 \
 81 | --dataset $BASE_DATASET_DIR/transport/ph/demo.hdf5 \
 82 | --output_name low_dim.hdf5
 83 | python dataset_states_to_obs.py --done_mode 2 \
 84 | --dataset $BASE_DATASET_DIR/transport/ph/demo.hdf5 \
 85 | --output_name image.hdf5 --camera_names shouldercamera0 shouldercamera1 robot0_eye_in_hand robot1_eye_in_hand --camera_height 84 --camera_width 84
 86 | 
 87 | # tool hang - ph
 88 | python dataset_states_to_obs.py --done_mode 2 \
 89 | --dataset $BASE_DATASET_DIR/tool_hang/ph/demo.hdf5 \
 90 | --output_name low_dim.hdf5
 91 | python dataset_states_to_obs.py --done_mode 2 \
 92 | --dataset $BASE_DATASET_DIR/tool_hang/ph/demo.hdf5 \
 93 | --output_name image.hdf5 --camera_names sideview robot0_eye_in_hand --camera_height 240 --camera_width 240
 94 | 
 95 | 
 96 | ### mh ###
 97 | 
 98 | 
 99 | # lift - mh
100 | python dataset_states_to_obs.py --done_mode 2 \
101 | --dataset $BASE_DATASET_DIR/lift/mh/demo.hdf5 \
102 | --output_name low_dim.hdf5
103 | python dataset_states_to_obs.py --done_mode 2 \
104 | --dataset $BASE_DATASET_DIR/lift/mh/demo.hdf5 \
105 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
106 | 
107 | # can - mh
108 | python dataset_states_to_obs.py --done_mode 2 \
109 | --dataset $BASE_DATASET_DIR/can/mh/demo.hdf5 \
110 | --output_name low_dim.hdf5
111 | python dataset_states_to_obs.py --done_mode 2 \
112 | --dataset $BASE_DATASET_DIR/can/mh/demo.hdf5 \
113 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
114 | 
115 | # square - mh
116 | python dataset_states_to_obs.py --done_mode 2 \
117 | --dataset $BASE_DATASET_DIR/square/mh/demo.hdf5 \
118 | --output_name low_dim.hdf5
119 | python dataset_states_to_obs.py --done_mode 2 \
120 | --dataset $BASE_DATASET_DIR/square/mh/demo.hdf5 \
121 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
122 | 
123 | # transport - mh
124 | python dataset_states_to_obs.py --done_mode 2 \
125 | --dataset $BASE_DATASET_DIR/transport/mh/demo.hdf5 \
126 | --output_name low_dim.hdf5
127 | python dataset_states_to_obs.py --done_mode 2 \
128 | --dataset $BASE_DATASET_DIR/transport/mh/demo.hdf5 \
129 | --output_name image.hdf5 --camera_names shouldercamera0 shouldercamera1 robot0_eye_in_hand robot1_eye_in_hand --camera_height 84 --camera_width 84
130 | 
131 | 
132 | ### can-paired ###
133 | 
134 | 
135 | python dataset_states_to_obs.py --done_mode 2 \
136 | --dataset $BASE_DATASET_DIR/can/paired/demo.hdf5 \
137 | --output_name low_dim.hdf5
138 | python dataset_states_to_obs.py --done_mode 2 \
139 | --dataset $BASE_DATASET_DIR/can/paired/demo.hdf5 \
140 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
141 | 


--------------------------------------------------------------------------------
/robomimic/scripts/generate_config_templates.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helpful script to generate example config files for each algorithm. These should be re-generated
 3 | when new config options are added, or when default settings in the config classes are modified.
 4 | """
 5 | import os
 6 | import json
 7 | 
 8 | import robomimic
 9 | from robomimic.config import get_all_registered_configs
10 | 
11 | 
12 | def main():
13 |     # store template config jsons in this directory
14 |     target_dir = os.path.join(robomimic.__path__[0], "exps/templates/")
15 | 
16 |     # iterate through registered algorithm config classes
17 |     all_configs = get_all_registered_configs()
18 |     for algo_name in all_configs:
19 |         # make config class for this algorithm
20 |         c = all_configs[algo_name]()
21 |         assert algo_name == c.algo_name
22 |         # dump to json
23 |         json_path = os.path.join(target_dir, "{}.json".format(algo_name))
24 |         c.dump(filename=json_path)
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     main()
29 | 


--------------------------------------------------------------------------------
/robomimic/scripts/get_dataset_info.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Helper script to report dataset information. By default, will print trajectory length statistics,
  3 | the maximum and minimum action element in the dataset, filter keys present, environment
  4 | metadata, and the structure of the first demonstration. If --verbose is passed, it will
  5 | report the exact demo keys under each filter key, and the structure of all demonstrations
  6 | (not just the first one).
  7 | 
  8 | Args:
  9 |     dataset (str): path to hdf5 dataset
 10 | 
 11 |     filter_key (str): if provided, report statistics on the subset of trajectories
 12 |         in the file that correspond to this filter key
 13 | 
 14 |     verbose (bool): if flag is provided, print more details, like the structure of all
 15 |         demonstrations (not just the first one)
 16 | 
 17 | Example usage:
 18 | 
 19 |     # run script on example hdf5 packaged with repository
 20 |     python get_dataset_info.py --dataset ../../tests/assets/test.hdf5
 21 | 
 22 |     # run script only on validation data
 23 |     python get_dataset_info.py --dataset ../../tests/assets/test.hdf5 --filter_key valid
 24 | """
 25 | import h5py
 26 | import json
 27 | import argparse
 28 | import numpy as np
 29 | 
 30 | if __name__ == "__main__":
 31 |     parser = argparse.ArgumentParser()
 32 |     parser.add_argument(
 33 |         "--dataset",
 34 |         type=str,
 35 |         help="path to hdf5 dataset",
 36 |     )
 37 |     parser.add_argument(
 38 |         "--filter_key",
 39 |         type=str,
 40 |         default=None,
 41 |         help="(optional) if provided, report statistics on the subset of trajectories \
 42 |             in the file that correspond to this filter key",
 43 |     )
 44 |     parser.add_argument(
 45 |         "--verbose",
 46 |         action='store_true',
 47 |         help="verbose output",
 48 |     )
 49 |     args = parser.parse_args()
 50 | 
 51 |     # extract demonstration list from file
 52 |     filter_key = args.filter_key
 53 |     all_filter_keys = None
 54 |     f = h5py.File(args.dataset, "r")
 55 |     if filter_key is not None:
 56 |         # use the demonstrations from the filter key instead
 57 |         print("NOTE: using filter key {}".format(filter_key))
 58 |         demos = sorted([elem.decode("utf-8") for elem in np.array(f["mask/{}".format(filter_key)])])
 59 |     else:
 60 |         # use all demonstrations
 61 |         demos = sorted(list(f["data"].keys()))
 62 | 
 63 |         # extract filter key information
 64 |         if "mask" in f:
 65 |             all_filter_keys = {}
 66 |             for fk in f["mask"]:
 67 |                 fk_demos = sorted([elem.decode("utf-8") for elem in np.array(f["mask/{}".format(fk)])])
 68 |                 all_filter_keys[fk] = fk_demos
 69 | 
 70 |     # put demonstration list in increasing episode order
 71 |     inds = np.argsort([int(elem[5:]) for elem in demos])
 72 |     demos = [demos[i] for i in inds]
 73 | 
 74 |     # extract length of each trajectory in the file
 75 |     traj_lengths = []
 76 |     action_min = np.inf
 77 |     action_max = -np.inf
 78 |     for ep in demos:
 79 |         traj_lengths.append(f["data/{}/actions".format(ep)].shape[0])
 80 |         action_min = min(action_min, np.min(f["data/{}/actions".format(ep)][()]))
 81 |         action_max = max(action_max, np.max(f["data/{}/actions".format(ep)][()]))
 82 |     traj_lengths = np.array(traj_lengths)
 83 | 
 84 |     # report statistics on the data
 85 |     print("")
 86 |     print("total transitions: {}".format(np.sum(traj_lengths)))
 87 |     print("total trajectories: {}".format(traj_lengths.shape[0]))
 88 |     print("traj length mean: {}".format(np.mean(traj_lengths)))
 89 |     print("traj length std: {}".format(np.std(traj_lengths)))
 90 |     print("traj length min: {}".format(np.min(traj_lengths)))
 91 |     print("traj length max: {}".format(np.max(traj_lengths)))
 92 |     print("action min: {}".format(action_min))
 93 |     print("action max: {}".format(action_max))
 94 |     print("")
 95 |     print("==== Filter Keys ====")
 96 |     if all_filter_keys is not None:
 97 |         for fk in all_filter_keys:
 98 |             print("filter key {} with {} demos".format(fk, len(all_filter_keys[fk])))
 99 |     else:
100 |         print("no filter keys")
101 |     print("")
102 |     if args.verbose:
103 |         if all_filter_keys is not None:
104 |             print("==== Filter Key Contents ====")
105 |             for fk in all_filter_keys:
106 |                 print("filter_key {} with {} demos: {}".format(fk, len(all_filter_keys[fk]), all_filter_keys[fk]))
107 |         print("")
108 |     env_meta = json.loads(f["data"].attrs["env_args"])
109 |     print("==== Env Meta ====")
110 |     print(json.dumps(env_meta, indent=4))
111 |     print("")
112 | 
113 |     print("==== Dataset Structure ====")
114 |     for ep in demos:
115 |         print("episode {} with {} transitions".format(ep, f["data/{}".format(ep)].attrs["num_samples"]))
116 |         for k in f["data/{}".format(ep)]:
117 |             if k in ["obs", "next_obs"]:
118 |                 print("    key: {}".format(k))
119 |                 for obs_k in f["data/{}/{}".format(ep, k)]:
120 |                     shape = f["data/{}/{}/{}".format(ep, k, obs_k)].shape
121 |                     print("        observation key {} with shape {}".format(obs_k, shape))
122 |             elif isinstance(f["data/{}/{}".format(ep, k)], h5py.Dataset):
123 |                 key_shape = f["data/{}/{}".format(ep, k)].shape
124 |                 print("    key: {} with shape {}".format(k, key_shape))
125 | 
126 |         if not args.verbose:
127 |             break
128 | 
129 |     f.close()
130 | 
131 |     # maybe display error message
132 |     print("")
133 |     if (action_min < -1.) or (action_max > 1.):
134 |         raise Exception("Dataset should have actions in [-1., 1.] but got bounds [{}, {}]".format(action_min, action_max))
135 | 


--------------------------------------------------------------------------------
/robomimic/scripts/hitl/collect_playback_utils.py:
--------------------------------------------------------------------------------
  1 | """Teleoperate robot with keyboard or SpaceMouse. """
  2 | 
  3 | import argparse
  4 | import numpy as np
  5 | import os
  6 | import robosuite as suite
  7 | from robosuite import load_controller_config
  8 | from robosuite.utils.input_utils import input2action
  9 | from robosuite.wrappers import DataCollectionWrapper
 10 | import time
 11 | import numpy as np
 12 | import json
 13 | from robosuite.scripts.collect_human_demonstrations import gather_demonstrations_as_hdf5
 14 | import robomimic
 15 | import cv2
 16 | import robomimic.utils.obs_utils as ObsUtils
 17 | import copy
 18 | import h5py
 19 | 
 20 | import robosuite
 21 | is_v1 = (robosuite.__version__.split(".")[0] == "1")
 22 | 
 23 | # Change later
 24 | GOOD_EPISODE_LENGTH = None
 25 | MAX_EPISODE_LENGTH = None
 26 | SUCCESS_HOLD = None
 27 | 
 28 | class RandomPolicy:
 29 |     def __init__(self, env):
 30 |         self.env = env
 31 |         self.low, self.high = env.action_spec
 32 | 
 33 |     def get_action(self, obs):
 34 |         return np.random.uniform(self.low, self.high) / 2
 35 | 
 36 | class TrainedPolicy:
 37 |     def __init__(self, checkpoint):
 38 |         from robomimic.utils.file_utils import policy_from_checkpoint
 39 |         self.policy = policy_from_checkpoint(ckpt_path=checkpoint)[0]
 40 | 
 41 |     def get_action(self, obs):
 42 |         obs = copy.deepcopy(obs)
 43 |         di = obs
 44 |         postprocess_visual_obs = True
 45 | 
 46 |         ret = {}
 47 |         for k in di:
 48 |             pass
 49 |             """
 50 |             if ObsUtils.key_is_image(k):
 51 |                 ret[k] = di[k][::-1]
 52 |                 if postprocess_visual_obs:
 53 |                     ret[k] = ObsUtils.process_image(ret[k])
 54 |             """
 55 |         obs.update(ret)
 56 | 
 57 |         return self.policy(obs)
 58 | 
 59 | def is_empty_input_spacemouse(action):
 60 |     # empty_input1 = np.array([0.000, 0.000, 0.000, 0.000, 0.000, 0.000, -1.000])
 61 |     empty_input = np.array([0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 1.000])
 62 |     if np.array_equal(np.abs(action), empty_input):
 63 |         return True
 64 |     return False
 65 | 
 66 | def terminate_condition_met(time_success, timestep_count, term_cond):
 67 |     assert term_cond in ["fixed_length", "success_count", "stop"]
 68 |     if term_cond == "fixed_length":
 69 |         return timestep_count >= GOOD_EPISODE_LENGTH and time_success > 0
 70 |     elif term_cond == "success_count":
 71 |         return time_success == SUCCESS_HOLD
 72 |     elif term_cond == "stop":
 73 |         return timestep_count >= MAX_EPISODE_LENGTH
 74 | 
 75 | def post_process_spacemouse_action(action, grasp, last_grasp):
 76 |     """ Fixing Spacemouse Action """
 77 |     # If the current grasp is active (1) and last grasp is not (-1) (i.e.: grasping input just pressed),
 78 |     # toggle arm control and / or camera viewing angle if requested
 79 |     if last_grasp < 0 < grasp:
 80 |         if args.switch_on_grasp:
 81 |             args.arm = "left" if args.arm == "right" else "right"
 82 |         if args.toggle_camera_on_grasp:
 83 |             cam_id = (cam_id + 1) % num_cam
 84 |             env.viewer.set_camera(camera_id=cam_id)
 85 |     # Update last grasp
 86 |     last_grasp = grasp
 87 | 
 88 |     if is_v1:
 89 |         env_action_dim = env.action_dim
 90 |     else:
 91 |         env_action_dim = 7
 92 | 
 93 |     # Fill out the rest of the action space if necessary
 94 |     rem_action_dim = env_action_dim - action.size
 95 |     if rem_action_dim > 0:
 96 |         # Initialize remaining action space
 97 |         rem_action = np.zeros(rem_action_dim)
 98 |         # This is a multi-arm setting, choose which arm to control and fill the rest with zeros
 99 |         if args.arm == "right":
100 |             action = np.concatenate([action, rem_action])
101 |         elif args.arm == "left":
102 |             action = np.concatenate([rem_action, action])
103 |         else:
104 |             # Only right and left arms supported
105 |             print("Error: Unsupported arm specified -- "
106 |                   "must be either 'right' or 'left'! Got: {}".format(args.arm))
107 |     elif rem_action_dim < 0:
108 |         # We're in an environment with no gripper action space, so trim the action space to be the action dim
109 |         action = action[:env_action_dim]
110 | 
111 |     """ End Fixing Spacemouse Action """
112 |     return action, last_grasp
113 | 
114 | def reset_to(env, state):
115 |     """
116 |     Reset to a specific simulator state.
117 | 
118 |     Args:
119 |         state (dict): current simulator state that contains one or more of:
120 |             - states (np.ndarray): initial state of the mujoco environment
121 |             - model (str): mujoco scene xml
122 |     
123 |     Returns:
124 |         observation (dict): observation dictionary after setting the simulator state (only
125 |             if "states" is in @state)
126 |     """
127 |     should_ret = False
128 |     if "model" in state:
129 |         env.reset()
130 |         xml = env.postprocess_model_xml(state["model"])
131 |         env.reset_from_xml_string(xml)
132 |         env.sim.reset()
133 |         if not is_v1:
134 |             # hide teleop visualization after restoring from model
135 |             env.sim.model.site_rgba[self.env.eef_site_id] = np.array([0., 0., 0., 0.])
136 |             env.sim.model.site_rgba[self.env.eef_cylinder_id] = np.array([0., 0., 0., 0.])
137 |     if "states" in state:
138 |         env.sim.set_state_from_flattened(state["states"])
139 |         env.sim.forward()
140 |         should_ret = True
141 | 
142 |     if "goal" in state:
143 |         env.set_goal(**state["goal"])
144 |     
145 |     return env._get_observations(force_update=True)
146 | 


--------------------------------------------------------------------------------
/robomimic/scripts/hyperparam_helper.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A useful script for generating json files and shell scripts for conducting parameter scans.
  3 | The script takes a path to a base json file as an argument and a shell file name.
  4 | It generates a set of new json files in the same folder as the base json file, and 
  5 | a shell file script that contains commands to run for each experiment.
  6 | 
  7 | Instructions:
  8 | 
  9 | (1) Start with a base json that specifies a complete set of parameters for a single 
 10 |     run. This only needs to include parameters you want to sweep over, and parameters
 11 |     that are different from the defaults. You can set this file path by either
 12 |     passing it as an argument (e.g. --config /path/to/base.json) or by directly
 13 |     setting the config file in @make_generator. The new experiment jsons will be put
 14 |     into the same directory as the base json.
 15 | 
 16 | (2) Decide on what json parameters you would like to sweep over, and fill those in as 
 17 |     keys in @make_generator below, taking note of the hierarchical key
 18 |     formatting using "/" or ".". Fill in corresponding values for each - these will
 19 |     be used in creating the experiment names, and for determining the range
 20 |     of values to sweep. Parameters that should be sweeped together should
 21 |     be assigned the same group number.
 22 | 
 23 | (3) Set the output script name by either passing it as an argument (e.g. --script /path/to/script.sh)
 24 |     or by directly setting the script file in @make_generator. The script to run all experiments
 25 |     will be created at the specified path.
 26 | 
 27 | Args:
 28 |     config (str): path to a base config json file that will be modified to generate config jsons.
 29 |         The jsons will be generated in the same folder as this file.
 30 | 
 31 |     script (str): path to output script that contains commands to run the generated training runs
 32 | 
 33 | Example usage:
 34 | 
 35 |     # assumes that /tmp/gen_configs/base.json has already been created (see quickstart section of docs for an example)
 36 |     python hyperparam_helper.py --config /tmp/gen_configs/base.json --script /tmp/gen_configs/out.sh
 37 | """
 38 | import argparse
 39 | 
 40 | import robomimic
 41 | import robomimic.utils.hyperparam_utils as HyperparamUtils
 42 | 
 43 | 
 44 | def make_generator(config_file, script_file):
 45 |     """
 46 |     Implement this function to setup your own hyperparameter scan!
 47 |     """
 48 |     generator = HyperparamUtils.ConfigGenerator(
 49 |         base_config_file=config_file, script_file=script_file
 50 |     )
 51 | 
 52 |     # use RNN with horizon 10
 53 |     generator.add_param(
 54 |         key="algo.rnn.enabled",
 55 |         name="", 
 56 |         group=0, 
 57 |         values=[True],
 58 |     )
 59 |     generator.add_param(
 60 |         key="train.seq_length", 
 61 |         name="", 
 62 |         group=0, 
 63 |         values=[10], 
 64 |     )
 65 |     generator.add_param(
 66 |         key="algo.rnn.horizon",
 67 |         name="", 
 68 |         group=0, 
 69 |         values=[10], 
 70 |     )
 71 | 
 72 |     # LR - 1e-3, 1e-4
 73 |     generator.add_param(
 74 |         key="algo.optim_params.policy.learning_rate.initial", 
 75 |         name="plr", 
 76 |         group=1, 
 77 |         values=[1e-3, 1e-4], 
 78 |     )
 79 | 
 80 |     # GMM y / n
 81 |     generator.add_param(
 82 |         key="algo.gmm.enabled", 
 83 |         name="gmm", 
 84 |         group=2, 
 85 |         values=[True, False], 
 86 |         value_names=["t", "f"],
 87 |     )
 88 | 
 89 |     # RNN dim 400 + MLP dims (1024, 1024) vs. RNN dim 1000 + empty MLP dims ()
 90 |     generator.add_param(
 91 |         key="algo.rnn.hidden_dim", 
 92 |         name="rnnd", 
 93 |         group=3, 
 94 |         values=[
 95 |             400, 
 96 |             1000,
 97 |         ], 
 98 |     )
 99 |     generator.add_param(
100 |         key="algo.actor_layer_dims", 
101 |         name="mlp", 
102 |         group=3, 
103 |         values=[
104 |             [1024, 1024], 
105 |             [],
106 |         ], 
107 |         value_names=["1024", "0"],
108 |     )
109 | 
110 |     return generator
111 | 
112 | 
113 | def main(args):
114 | 
115 |     # make config generator
116 |     generator = make_generator(config_file=args.config, script_file=args.script)
117 | 
118 |     # generate jsons and script
119 |     generator.generate()
120 | 
121 | 
122 | if __name__ == "__main__":
123 |     parser = argparse.ArgumentParser()
124 | 
125 |     # Path to base json config - will override any defaults.
126 |     parser.add_argument(
127 |         "--config",
128 |         type=str,
129 |         help="path to base config json that will be modified to generate jsons. The jsons will\
130 |             be generated in the same folder as this file.",
131 |     )
132 | 
133 |     # Script name to generate - will override any defaults
134 |     parser.add_argument(
135 |         "--script",
136 |         type=str,
137 |         help="path to output script that contains commands to run the generated training runs",
138 |     )
139 | 
140 |     args = parser.parse_args()
141 |     main(args)
142 | 


--------------------------------------------------------------------------------
/robomimic/scripts/slurm/auto_append.txt:
--------------------------------------------------------------------------------
1 | no
2 | 


--------------------------------------------------------------------------------
/robomimic/scripts/slurm/auto_overwrite.txt:
--------------------------------------------------------------------------------
1 | yes
2 | 


--------------------------------------------------------------------------------
/robomimic/scripts/slurm/base_args.py:
--------------------------------------------------------------------------------
  1 | """
  2 | File holding all command line arguments to use
  3 | """
  4 | 
  5 | from argparse import ArgumentParser, Namespace, Action, ArgumentError, SUPPRESS, _UNRECOGNIZED_ARGS_ATTR
  6 | import sys as _sys
  7 | 
  8 | BOOL_CHOICES = ['True', 'False', 'true', 'false']
  9 | BOOL_MAPPING = {
 10 |     "false": False,
 11 |     "true": True
 12 | }
 13 | BOOL_STR = BOOL_MAPPING.keys()
 14 | 
 15 | 
 16 | def maybe_array_to_element(inp):
 17 |     """
 18 |     Maybe converts an array to a single (numerical) element. If len(inp) == 1, returns the input's first
 19 |     element. Otherwise, returns the input
 20 |     """
 21 |     return inp[0] if type(inp) is list and len(inp) == 1 else inp
 22 | 
 23 | 
 24 | # Define custom parsing class for nested default parses
 25 | class NestedParser(ArgumentParser):
 26 |     def parse_known_args(self, args=None, namespace=None):
 27 |         if args is None:
 28 |             # args default to the system args
 29 |             args = _sys.argv[1:]
 30 |         else:
 31 |             # make sure that args are mutable
 32 |             args = list(args)
 33 | 
 34 |         # default Namespace built from parser defaults
 35 |         if namespace is None:
 36 |             namespace = Namespace()
 37 | 
 38 |         # add any action defaults that aren't present
 39 |         for action in self._actions:
 40 |             if action.dest is not SUPPRESS:
 41 |                 if not hasattr(namespace, action.dest):
 42 |                     if action.default is not SUPPRESS:
 43 |                         # Send attribute to groupspace, not namespace!
 44 |                         groupspace = getattr(namespace, action.const, None) if action.const else namespace
 45 |                         if groupspace is None:
 46 |                             # Create new attribute in main namespace and reference this with groupspace
 47 |                             setattr(namespace, action.const, Namespace())
 48 |                             groupspace = getattr(namespace, action.const)
 49 |                         default = BOOL_MAPPING[action.default.lower()] \
 50 |                             if type(action.default) is str and action.default.lower() in BOOL_STR \
 51 |                             else action.default
 52 |                         setattr(groupspace, action.dest, default)
 53 | 
 54 |         # add any parser defaults that aren't present
 55 |         for dest in self._defaults:
 56 |             if not hasattr(namespace, dest):
 57 |                 #groupspace = getattr(namespace, dest.const, Namespace()) if dest.const else namespace
 58 |                 setattr(namespace, dest, self._defaults[dest])
 59 | 
 60 |         # parse the arguments and exit if there are any errors
 61 |         try:
 62 |             namespace, args = self._parse_known_args(args, namespace)
 63 |             if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR):
 64 |                 args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR))
 65 |                 delattr(namespace, _UNRECOGNIZED_ARGS_ATTR)
 66 |             return namespace, args
 67 |         except ArgumentError:
 68 |             err = _sys.exc_info()[1]
 69 |             self.error(str(err))
 70 | 
 71 | 
 72 | # Define class for creating custom nested namespaces
 73 | class GroupedAction(Action):
 74 | 
 75 |     def __init__(self,
 76 |                  option_strings,
 77 |                  dest,
 78 |                  nargs=None,
 79 |                  const=None,
 80 |                  default=None,
 81 |                  type=None,
 82 |                  choices=None,
 83 |                  required=False,
 84 |                  help=None,
 85 |                  metavar=None,
 86 |                  maybe_array=False,
 87 |                  ):
 88 |         # Add custom attributes
 89 |         self.maybe_array = maybe_array
 90 | 
 91 |         # Run super init
 92 |         super().__init__(
 93 |             option_strings=option_strings,
 94 |             dest=dest,
 95 |             nargs=nargs,
 96 |             const=const,
 97 |             default=default,
 98 |             type=type,
 99 |             choices=choices,
100 |             required=required,
101 |             help=help,
102 |             metavar=metavar,
103 |         )
104 | 
105 |     def __call__(self, parser, namespace, values, option_string=None):
106 |         groupspace = getattr(namespace, self.const, Namespace())
107 |         if type(values) is str and values.lower() in BOOL_STR:
108 |             values = BOOL_MAPPING[values.lower()]
109 |         # Possibly convert array if requested
110 |         if self.maybe_array:
111 |             values = maybe_array_to_element(values)
112 |         setattr(groupspace, self.dest, values)
113 |         setattr(namespace, self.const, groupspace)
114 | 
115 | 
116 | # Define global parser
117 | parser = NestedParser(description='Top level arguments')
118 | 
119 | # Add seed arg always
120 | parser.add_argument(
121 |     '--seed', type=int, default=1, help='random seed (default: 1)')
122 | 
123 | 
124 | # def parse_arguments():
125 | #     """
126 | #     Parses all arguments and splits them into their appropriate namespaces, returning separately the robosuite args,
127 | #     rllib args, and agent args
128 | #     """
129 | #     args = parser.parse_args()
130 | #     robosuite_args = getattr(args, "robosuite", None)
131 | #     rllib_args = getattr(args, "rllib", None)
132 | #     model_args = getattr(args, "model", None)
133 | #     agent_args = getattr(args, "agent", None)
134 | #
135 | #     # Print all args
136 | #     print()
137 | #     for t, arg in zip(("robosuite", "rllib", "model", "agent"), (robosuite_args, rllib_args, model_args, agent_args)):
138 | #         print('  {} Params: '.format(t))
139 | #         if arg is not None:
140 | #             for key, value in arg.__dict__.items():
141 | #                 if key.startswith('__') or key.startswith('_'):
142 | #                     continue
143 | #                 print('    {}: {}'.format(key, value))
144 | #         print()
145 | #
146 | #     # Return args
147 | #     return robosuite_args, rllib_args, model_args, agent_args
148 | 
149 | 
150 | if __name__ == '__main__':
151 |     # Add arguments
152 |     # add_robosuite_arguments()
153 |     # add_rllib_arguments()
154 |     # add_ppo_arguments()
155 |     #
156 |     # # Test parsing functionality
157 |     # a, b, c = parse_arguments()
158 |     # print(a)
159 |     # print(b)
160 |     # print(c)
161 |     pass
162 | 


--------------------------------------------------------------------------------
/robomimic/scripts/slurm/base_template.sbatch:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | #all commands that start with SBATCH contain commands that are just used by SLURM for scheduling
 4 | #################
 5 | #partition name
 6 | #SBATCH --partition={{PARTITION}}
 7 | #specific machines to avoid
 8 | #SBATCH --exclude={{EXCLUDE}}
 9 | #################
10 | #number of GPUs
11 | #SBATCH --gres=gpu:{{NUM_GPU}}
12 | ##SBATCH --nodes=1
13 | #SBATCH --cpus-per-task=4
14 | #SBATCH --ntasks={{NUM_CPU}}
15 | #################
16 | #set a job name
17 | #SBATCH --job-name="{{JOB_NAME}}"
18 | #################
19 | #a file for job output, you can check job progress, append the job ID with %j to make it unique
20 | #SBATCH --output={{EXECUTABLE_LOG_DIR}}/%j.out
21 | #################
22 | # a file for errors from the job
23 | #SBATCH --error={{EXECUTABLE_LOG_DIR}}/%j.err
24 | #################
25 | #time you think you need; default is 2 hours
26 | #format could be dd-hh:mm:ss, hh:mm:ss, mm:ss, or mm
27 | #SBATCH --time={{HOURS}}:00:00
28 | #################
29 | # Quality of Service (QOS); think of it as sending your job into a special queue; --qos=long for with a max job length of 7 days.
30 | # uncomment ##SBATCH --qos=long if you want your job to run longer than 48 hours, which is the default for normal partition,
31 | # NOTE- in the hns partition the default max run time is 7 days , so you wont need to include qos, also change to normal partition
32 | # since dev max run time is 2 hours.
33 | #{{QOS_LONG}}
34 | # We are submitting to the dev partition, there are several on sherlock: normal, gpu, bigmem (jobs requiring >64Gigs RAM)
35 | ##SBATCH -p dev
36 | #################
37 | # --mem is memory per node; default is 4000 MB per CPU, remember to ask for enough mem to match your CPU request, since
38 | # sherlock automatically allocates 4 Gigs of RAM/CPU, if you ask for 8 CPUs you will get 32 Gigs of RAM, so either
39 | # leave --mem commented out or request >= to the RAM needed for your CPU request.  It will also accept mem. in units, ie "--mem=4G"
40 | #SBATCH --mem={{MEM}}G
41 | # to request multiple threads/CPUs use the -c option, on Sherlock we use 1 thread/CPU, 16 CPUs on each normal compute node 4Gigs RAM per CPU.  Here we will request just 1.
42 | #SBATCH -c 1
43 | #################
44 | # Have SLURM send you an email when the job ends or fails, careful, the email could end up in your clutter folder
45 | # Also, if you submit hundreds of jobs at once you will get hundreds of emails.
46 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail
47 | # Remember to change this to your email
48 | #SBATCH --mail-user={{NOTIFICATION_EMAIL}}
49 | # list out some useful information
50 | echo "SLURM_JOBID="$SLURM_JOBID
51 | echo "SLURM_JOB_NAME="$SLURM_JOB_NAME
52 | echo "SLURM_JOB_NODELIST"=$SLURM_JOB_NODELIST
53 | echo "SLURM_NNODES"=$SLURM_NNODES
54 | echo "SLURMTMPDIR="$SLURMTMPDIR
55 | echo "working directory = "$SLURM_SUBMIT_DIR
56 | #now run normal batch commands
57 | {{SHELL_SOURCE_SCRIPT}}
58 | conda activate {{PYTHON_INTERPRETER}}
59 | export PYTHONPATH=$PYTHONPATH:{{EXTRA_PYTHONPATH}}
60 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{{MUJOCO_DIR}}
61 | 
62 | {{COPY_FILE}}
63 | {{CMD}}
64 | {{EXTRA_CMDS}}
65 | 
66 | # done
67 | echo "Done"
68 | exit 0
69 | 


--------------------------------------------------------------------------------
/robomimic/scripts/slurm/run_hp_sweep.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Script for executing all configs generated from hyperparamter_helper.py (in batchRL)
  3 | 
  4 | Note that this assumes that hyperparameter_helper.py has already been run, and that all the resulting
  5 | configurations exist in a single folder
  6 | """
  7 | 
  8 | # from slurm.util.arguments import *
  9 | from robomimic.scripts.slurm.batchrl_args import *
 10 | from robomimic.scripts.slurm.sbatch_args import *
 11 | 
 12 | # from slurm.util.sbatch_utils import create_and_execute_sbatch_script
 13 | from robomimic.scripts.slurm.sbatch_utils import create_and_execute_sbatch_script
 14 | 
 15 | import copy
 16 | 
 17 | # Add relevant input arguments
 18 | add_sbatch_args()
 19 | add_batchrl_hp_args()
 20 | 
 21 | 
 22 | def parse_configs_from_hp_script(hp_script):
 23 |     """
 24 |     Helper script to parse the executable hyperparameter script generated from hyperparameter_helper.py (in batchRL)
 25 |     to infer the filepaths to the generated configs.
 26 | 
 27 |     Args:
 28 |         hp_script (str): Absolute fpath to the generated hyperparameter script
 29 | 
 30 |     Returns:
 31 |         list: Absolute paths to the configs to be deployed in the hp sweep
 32 |     """
 33 |     # Create list to fill as we parse the script
 34 |     configs = []
 35 |     # Open and parse file line by line
 36 |     with open(hp_script) as f:
 37 |         for line in f:
 38 |             # Make sure we only parse the lines where we have a valid python command
 39 |             if line.startswith("python"):
 40 |                 # Extract only the config path
 41 |                 configs.append(line.split(" ")[-1].split("\n")[0])
 42 |     # Return configs
 43 |     return configs
 44 | 
 45 | 
 46 | def generate_debug_script(hp_script):
 47 |     """
 48 |     Helper script to generate an .sh executable debug hyperparameter script using the hp sweep script generated from
 49 |     hyperparameter_helper.py (in batchRL)
 50 | 
 51 |     Args:
 52 |         hp_script (str): Absolute fpath to the generated hyperparameter script
 53 |     """
 54 |     # Modify the path so that we add "_debug" to the end -- hacky way since we know ".sh" extension is 3 chars long
 55 |     debug_script = hp_script[:-3] + "_debug.sh"
 56 |     # Open and parse file line by line
 57 |     with open(hp_script) as f:
 58 |         # Open a new file to write the debug script to
 59 |         with open(debug_script, 'w+') as new_file:
 60 |             # Loop through hp script and write to this new file
 61 |             for line in f:
 62 |                 # Make sure we only parse the lines where we have a valid python command
 63 |                 if line.startswith("python"):
 64 |                     # We write the line plus the extra --debug flag
 65 |                     new_file.write(line.split("\n")[0] + " --debug\n")
 66 |                 else:
 67 |                     # Just write line normally
 68 |                     new_file.write(line)
 69 | 
 70 | 
 71 | if __name__ == '__main__':
 72 |     # First, parse args
 73 |     args = parser.parse_args()
 74 | 
 75 |     # Extract configs from hp sweep script
 76 |     configs = parse_configs_from_hp_script(hp_script=args.batchrl_hp.hp_sweep_script)
 77 | 
 78 |     # If user requested a debug script to be generated, do that now
 79 |     if args.batchrl_hp.generate_debug_script:
 80 |         generate_debug_script(hp_script=args.batchrl_hp.hp_sweep_script)
 81 | 
 82 |     n = args.batchrl_hp.n_exps_per_instance
 83 | 
 84 |     # Loop through each config to create an sbatch script from
 85 |     for i in range(0, len(configs), n):
 86 |         script_args = []
 87 |         configs_for_batch = configs[i:i+n]
 88 |         for config in configs_for_batch:
 89 |             # Extract name for this sbatch script
 90 |             name = config.split("/")[-1].split(".json")[0]
 91 | 
 92 |             # Compose script arguments to pass to sbatch script
 93 |             script_args.append({
 94 |                 "config": config,
 95 |             })
 96 | 
 97 |             # Generate the sbatch file
 98 |             print(f"Creating {name}...")
 99 | 
100 |         # Multiple resources by number of jobs in batch
101 |         sbatch_args = copy.deepcopy(args.sbatch)
102 |         sbatch_args.num_cpu *= len(configs_for_batch)
103 |         sbatch_args.mem_gb *= len(configs_for_batch)
104 | 
105 |         create_and_execute_sbatch_script(
106 |             filename=name,
107 |             job_name=name,
108 |             sbatch_args=sbatch_args,
109 |             script_args=script_args)
110 | 


--------------------------------------------------------------------------------
/robomimic/scripts/slurm/sbatch_args.py:
--------------------------------------------------------------------------------
  1 | # from slurm.util.arguments.base_args import *
  2 | from robomimic.scripts.slurm.base_args import *
  3 | 
  4 | PARTITIONS = (
  5 |     "napoli",
  6 |     "tibet",
  7 |     "svl",
  8 | 
  9 |     "titans",
 10 |     "dgx",
 11 | )
 12 | 
 13 | 
 14 | def add_sbatch_args():
 15 |     """
 16 |     Adds sbatch arguments needed for automatically generating and executing python files
 17 |     """
 18 |     # Define namespace for the robosuite args
 19 |     prefix = 'sbatch'
 20 |     actions = {
 21 |         "const": prefix,
 22 |         "action": GroupedAction
 23 |     }
 24 |     # Required args
 25 |     parser.add_argument(
 26 |         '--script',
 27 |         type=str,
 28 |         required=True,
 29 |         help='path to the Python script to execute',
 30 |         **actions
 31 |     )
 32 |     parser.add_argument(
 33 |         '--generated_dir',
 34 |         type=str,
 35 |         required=True,
 36 |         help='Sets the location where generated sbatch scripts will be stored',
 37 |         **actions
 38 |     )
 39 |     parser.add_argument(
 40 |         '--python_interpreter',
 41 |         type=str,
 42 |         required=True,
 43 |         help='Python interepreter to use for the executed python script',
 44 |         **actions
 45 |     )
 46 | 
 47 |     # Additional args
 48 |     parser.add_argument(
 49 |         '--partition',
 50 |         type=str,
 51 |         default='titans',
 52 |         choices=PARTITIONS,
 53 |         help='partition to run on for this process',
 54 |         **actions
 55 |     )
 56 |     parser.add_argument(
 57 |         '--exclude',
 58 |         type=str,
 59 |         default='',
 60 |         help='any specific machines to avoid, comma separated',
 61 |         **actions
 62 |     )
 63 |     parser.add_argument(
 64 |         '--gpu_type',
 65 |         type=str,
 66 |         default="any",
 67 |         help='Specific GPU to use. Any results in any GPU being used for this run',
 68 |         **actions
 69 |     )
 70 |     parser.add_argument(
 71 |         '--num_gpu',
 72 |         type=int,
 73 |         default=0,
 74 |         help='Sets the number of gpus to use for this sbatch script',
 75 |         **actions
 76 |     )
 77 |     parser.add_argument(
 78 |         '--num_cpu',
 79 |         type=int,
 80 |         default=4,
 81 |         help='Sets the number of cpus to use for this sbatch script',
 82 |         **actions
 83 |     )
 84 |     parser.add_argument(
 85 |         '--mem_gb',
 86 |         type=int,
 87 |         default=0,
 88 |         help='If nonzero, sets the amount of memory to be this many GB',
 89 |         **actions
 90 |     )
 91 |     parser.add_argument(
 92 |         '--max_hours',
 93 |         type=int,
 94 |         default=20,
 95 |         help='Sets the maximum number of hours this script will be run for',
 96 |         **actions
 97 |     )
 98 |     parser.add_argument(
 99 |         '--extra_pythonpath',
100 |         type=str,
101 |         default="",
102 |         help='Extra paths to set to the pythonpath variable',
103 |         **actions
104 |     )
105 |     parser.add_argument(
106 |         '--overwrite',
107 |         type=str,
108 |         default="False",
109 |         choices=BOOL_CHOICES,
110 |         help='Whether to overwrite or not',
111 |         **actions
112 |     )
113 |     parser.add_argument(
114 |         '--extra_commands',
115 |         nargs="+",
116 |         type=str,
117 |         default=None,
118 |         help='Extra commands to run after main python command',
119 |         **actions
120 |     )
121 |     parser.add_argument(
122 |         '--copy_file',
123 |         nargs="+",
124 |         type=str,
125 |         default=None,
126 |         help='Copies a file from source to location. Expected format is [source_file, targeT_dir]. New file will'
127 |              'share the same file name as the original source file. Useful in cases e.g.: copying datasets to local ssd',
128 |         **actions
129 |     )
130 |     parser.add_argument(
131 |         '--executable_log_dir',
132 |         type=str,
133 |         default='/cvgl2/u/jdwong/test_output',
134 |         help='Location to dump sbatch log out / err text to',
135 |         **actions
136 |     )
137 |     parser.add_argument(
138 |         '--shell_source_script',
139 |         type=str,
140 |         default=None,
141 |         help='If specified, bash script to source at beginning of sbatch execution',
142 |         **actions
143 |     )
144 |     parser.add_argument(
145 |         '--notification_email',
146 |         type=str,
147 |         default='jdwong@stanford.edu',
148 |         help='Email address to send slurm notifications to (i.e.: when the script finishes running)',
149 |         **actions
150 |     )
151 |     parser.add_argument(
152 |         '--mujoco_dir',
153 |         type=str,
154 |         default='/cvgl2/u/jdwong/.mujoco/mujoco200/bin',
155 |         help='Absolute path to mujoco 200 installation bin directory',
156 |         **actions
157 |     )
158 | 


--------------------------------------------------------------------------------
/robomimic/scripts/slurm/sbatch_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Python script for generating and executing sbatch files
  3 | """
  4 | 
  5 | import os
  6 | # import slurm
  7 | import robomimic
  8 | from pathlib import Path
  9 | 
 10 | PARTITIONS = (
 11 |     "napoli",
 12 |     "tibet",
 13 |     "svl",
 14 | 
 15 |     "titans",
 16 |     "dgx",
 17 | )
 18 | 
 19 | robomimic_base_path = os.path.abspath(os.path.join(os.path.dirname(robomimic.__file__), os.pardir))
 20 | 
 21 | AUTO_OVERWRITE_RESP = os.path.join(robomimic_base_path, "robomimic/scripts/slurm/auto_overwrite.txt")
 22 | AUTO_APPEND_RESP = os.path.join(robomimic_base_path, "robomimic/scripts/slurm/auto_append.txt")
 23 | 
 24 | import time
 25 | from datetime import datetime
 26 | 
 27 | def create_and_execute_sbatch_script(filename, job_name, sbatch_args, script_args=None):
 28 |     """
 29 |     Function that creates and executes an sbatch script based off of a template
 30 | 
 31 |     Args:
 32 |         @filename (str): Name of the sbatch file that will be generated
 33 |         @job_name (str): Name of sbatch job to execute
 34 |         @sbatch_args (Namespace): Input arguments to fill in sbatch script
 35 |         @script_args (list of dicts, dict or None): If specified, adds additional
 36 |             input arguments to script execution based on key-value mappings.
 37 |             If of type list, indicates multiple commands in one sbatch script.
 38 |     """
 39 |     # Create a new directory path if it doesn't exist and create a new filename that we will write to
 40 |     Path(sbatch_args.generated_dir).mkdir(parents=True, exist_ok=True)
 41 |     ts = time.time()
 42 |     new_sbatch_fpath = os.path.join(sbatch_args.generated_dir, "{}_{}.sbatch".format(filename, ts))
 43 | 
 44 |     # Compose extra commands
 45 |     if sbatch_args.extra_commands is not None:
 46 |         sbatch_args.extra_commands = sbatch_args.extra_commands if type(sbatch_args.extra_commands) is list else \
 47 |             [sbatch_args.extra_commands]
 48 |         sbatch_args.extra_commands = "\n".join(sbatch_args.extra_commands)
 49 |     else:
 50 |         sbatch_args.extra_commands = ""
 51 | 
 52 |     # infer number of commands from script args
 53 |     if script_args is None:
 54 |         num_commands = 1
 55 |     elif not isinstance(script_args, list):
 56 |         script_args = [script_args]
 57 |         num_commands = 1
 58 |     else:
 59 |         num_commands = len(script_args)
 60 | 
 61 |     command = ""
 62 |     for i in range(num_commands):
 63 |         # Compose main command to be executed in script
 64 |         command += "python {}".format(sbatch_args.script)
 65 | 
 66 |         # Add additional input args if necessary
 67 |         if script_args is not None:
 68 |             for k, v in script_args[i].items():
 69 |                 if v is not None:
 70 |                     if type(v) is list or type(v) is tuple:
 71 |                         v = " ".join(str(vi) for vi in v)
 72 |                     command += " --{} {}".format(k, v)
 73 | 
 74 |         # Add overwrite if requested
 75 |         if sbatch_args.overwrite:
 76 |             command += f" < {AUTO_OVERWRITE_RESP}"
 77 |         else:
 78 |             command += f" < {AUTO_APPEND_RESP}"
 79 | 
 80 |         command += " & \n"
 81 |     command += "wait"
 82 | 
 83 |     # Define partition
 84 |     if sbatch_args.partition == "napoli":
 85 |         partition = "napoli-gpu" if sbatch_args.num_gpu > 0 else "napoli-cpu\n#SBATCH --exclude=napoli[15-16]"
 86 |     else:
 87 |         partition = sbatch_args.partition
 88 | 
 89 |     # Define GPU(s) to use
 90 |     num_gpu = sbatch_args.num_gpu
 91 |     if sbatch_args.gpu_type != "any":
 92 |         num_gpu = f"{sbatch_args.gpu_type}:{num_gpu}"
 93 | 
 94 |     # Add copy file if requested
 95 |     copy_file = "" if sbatch_args.copy_file is None else create_copy_file_cmd(*sbatch_args.copy_file)
 96 | 
 97 |     # Add shell source script if requested
 98 |     shell_source_script = "" if sbatch_args.shell_source_script is None else f"source {sbatch_args.shell_source_script}"
 99 | 
100 |     # Define a dict to map expected fill-ins with replacement values
101 |     fill_ins = {
102 |         "{{PARTITION}}": partition,
103 |         "{{EXCLUDE}}": sbatch_args.exclude,
104 |         "{{NUM_GPU}}": num_gpu,
105 |         "{{NUM_CPU}}": sbatch_args.num_cpu,
106 |         "{{JOB_NAME}}": job_name,
107 |         "{{EXECUTABLE_LOG_DIR}}": sbatch_args.executable_log_dir,
108 |         "{{HOURS}}": sbatch_args.max_hours,
109 |         "{{QOS_LONG}}": "#SBATCH --qos=long" if sbatch_args.max_hours > 48 else "",
110 |         "{{MEM}}": sbatch_args.mem_gb,
111 |         "{{NOTIFICATION_EMAIL}}": sbatch_args.notification_email,
112 |         "{{SHELL_SOURCE_SCRIPT}}": shell_source_script,
113 |         "{{PYTHON_INTERPRETER}}": sbatch_args.python_interpreter,
114 |         "{{EXTRA_PYTHONPATH}}": sbatch_args.extra_pythonpath,
115 |         "{{MUJOCO_DIR}}": sbatch_args.mujoco_dir,
116 |         "{{COPY_FILE}}": copy_file,
117 |         "{{CMD}}": command,
118 |         "{{EXTRA_CMDS}}": sbatch_args.extra_commands
119 |     }
120 | 
121 |     # Open the template file
122 |     with open(os.path.join(robomimic_base_path, "robomimic/scripts/slurm/base_template.sbatch")) as template:
123 |         # Open the new sbatch file
124 |         print(new_sbatch_fpath)
125 |         with open(new_sbatch_fpath, 'w+') as new_file:
126 |             # Loop through template and write to this new file
127 |             for line in template:
128 |                 wrote = False
129 |                 # Check for various cases
130 |                 for k, v in fill_ins.items():
131 |                     # If the key is found in the line, replace it with its value and pop it from the dict
132 |                     if k in line:
133 |                         new_file.write(line.replace(k, str(v)))
134 |                         wrote = True
135 |                         break
136 |                 # Otherwise, we just write the line from the template directly
137 |                 if not wrote:
138 |                     new_file.write(line)
139 | 
140 |     # Execute this file!
141 |     # TODO: Fix! (Permission denied error)
142 |     #os.system(new_sbatch_fpath)
143 | 
144 | 
145 | def create_copy_file_cmd(source_file, target_dir):
146 |     """
147 |     Helper function to create a bash command (in string format) to copy a source file to a target location.
148 | 
149 |     Args:
150 |         source_file (str): Absolute path to the source file to copy
151 |         target_dir (str): Absolute path to the target directory to which the source file will be copied
152 | 
153 |     Returns:
154 |         str: bash command to execute in string format
155 |     """
156 |     target_filename = source_file.split("/")[-1]
157 |     target_fpath = os.path.join(target_dir, target_filename)
158 |     cmd =\
159 |         f'mkdir -p {target_dir}\n'\
160 |         f'if [[ -f "{target_fpath}" ]]; then\n'\
161 |         f'    echo "{target_fpath} exists, no copying"\n'\
162 |         f'else\n'\
163 |         f'    echo "{target_fpath} does not exist, copying dataset"\n'\
164 |         f'    cp {source_file} {target_fpath}\n'\
165 |         f'fi'
166 | 
167 |     return cmd
168 | 


--------------------------------------------------------------------------------
/robomimic/scripts/split_train_val.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Script for splitting a dataset hdf5 file into training and validation trajectories.
  3 | 
  4 | Args:
  5 |     dataset (str): path to hdf5 dataset
  6 | 
  7 |     filter_key (str): if provided, split the subset of trajectories
  8 |         in the file that correspond to this filter key into a training
  9 |         and validation set of trajectories, instead of splitting the
 10 |         full set of trajectories
 11 | 
 12 |     ratio (float): validation ratio, in (0, 1). Defaults to 0.1, which is 10%.
 13 | 
 14 | Example usage:
 15 |     python split_train_val.py --dataset /path/to/demo.hdf5 --ratio 0.1
 16 | """
 17 | 
 18 | import argparse
 19 | import h5py
 20 | import numpy as np
 21 | 
 22 | from robomimic.utils.file_utils import create_hdf5_filter_key
 23 | 
 24 | 
 25 | def split_train_val_from_hdf5(hdf5_path, val_ratio=0.1, filter_key=None):
 26 |     """
 27 |     Splits data into training set and validation set from HDF5 file.
 28 | 
 29 |     Args:
 30 |         hdf5_path (str): path to the hdf5 file
 31 |             to load the transitions from
 32 | 
 33 |         val_ratio (float): ratio of validation demonstrations to all demonstrations
 34 | 
 35 |         filter_key (str): if provided, split the subset of demonstration keys stored
 36 |             under mask/@filter_key instead of the full set of demonstrations
 37 |     """
 38 | 
 39 |     # retrieve number of demos
 40 |     f = h5py.File(hdf5_path, "r")
 41 |     if filter_key is not None:
 42 |         print("using filter key: {}".format(filter_key))
 43 |         demos = sorted([elem.decode("utf-8") for elem in np.array(f["mask/{}".format(filter_key)])])
 44 |     else:
 45 |         demos = sorted(list(f["data"].keys()))
 46 |     num_demos = len(demos)
 47 |     f.close()
 48 | 
 49 |     # get random split
 50 |     num_demos = len(demos)
 51 |     num_val = int(val_ratio * num_demos)
 52 |     mask = np.zeros(num_demos)
 53 |     mask[:num_val] = 1.
 54 |     np.random.shuffle(mask)
 55 |     mask = mask.astype(int)
 56 |     train_inds = (1 - mask).nonzero()[0]
 57 |     valid_inds = mask.nonzero()[0]
 58 |     train_keys = [demos[i] for i in train_inds]
 59 |     valid_keys = [demos[i] for i in valid_inds]
 60 |     print("{} validation demonstrations out of {} total demonstrations.".format(num_val, num_demos))
 61 | 
 62 |     # pass mask to generate split
 63 |     name_1 = "train"
 64 |     name_2 = "valid"
 65 |     if filter_key is not None:
 66 |         name_1 = "{}_{}".format(filter_key, name_1)
 67 |         name_2 = "{}_{}".format(filter_key, name_2)
 68 | 
 69 |     train_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=train_keys, key_name=name_1)
 70 |     valid_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=valid_keys, key_name=name_2)
 71 | 
 72 |     print("Total number of train samples: {}".format(np.sum(train_lengths)))
 73 |     print("Average number of train samples {}".format(np.mean(train_lengths)))
 74 | 
 75 |     print("Total number of valid samples: {}".format(np.sum(valid_lengths)))
 76 |     print("Average number of valid samples {}".format(np.mean(valid_lengths)))
 77 | 
 78 | 
 79 | if __name__ == "__main__":
 80 |     parser = argparse.ArgumentParser()
 81 |     parser.add_argument(
 82 |         "--dataset",
 83 |         type=str,
 84 |         help="path to hdf5 dataset",
 85 |     )
 86 |     parser.add_argument(
 87 |         "--filter_key",
 88 |         type=str,
 89 |         default=None,
 90 |         help="if provided, split the subset of trajectories in the file that correspond to\
 91 |             this filter key into a training and validation set of trajectories, instead of\
 92 |             splitting the full set of trajectories",
 93 |     )
 94 |     parser.add_argument(
 95 |         "--ratio",
 96 |         type=float,
 97 |         default=0.1,
 98 |         help="validation ratio, in (0, 1)"
 99 |     )
100 |     args = parser.parse_args()
101 | 
102 |     # seed to make sure results are consistent
103 |     np.random.seed(0)
104 | 
105 |     split_train_val_from_hdf5(args.dataset, val_ratio=args.ratio, filter_key=args.filter_key)


--------------------------------------------------------------------------------
/robomimic/scripts/vis/vis_preintv.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | 
  4 | from robomimic.scripts.vis.vis_utils import get_argparser, playback_dataset
  5 | from robomimic.scripts.vis.image_utils import apply_filter
  6 | 
  7 | import matplotlib
  8 | matplotlib.use('Agg')
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | def get_intv_and_preintv_inds(ep_info):
 12 |     if args.model == 'Q':
 13 |         vals = ep_info['q_vals']
 14 |     elif args.model == 'V':
 15 |         vals = ep_info['v_vals']
 16 |     else:
 17 |         raise ValueError
 18 | 
 19 |     ac_mods = ep_info["action_modes"]
 20 |     intv_inds = np.reshape(np.argwhere(ac_mods == 1), -1)
 21 | 
 22 |     preintv_inds = []
 23 |     intv_start_inds = [i for i in intv_inds if i > 0 and ac_mods[-1] != 1]
 24 |     for i_start in intv_start_inds:
 25 |         for j in range(i_start-1, 0, -1):
 26 |             if j in intv_inds or vals[j] > args.th:
 27 |                 break
 28 | 
 29 |             preintv_inds.append(j)
 30 | 
 31 |     return intv_inds, preintv_inds
 32 | 
 33 | 
 34 | def plot_helper(ep_num, ep_info):
 35 |     fig, ax1 = plt.subplots()
 36 | 
 37 |     if args.model == 'Q':
 38 |         y_vals = ep_info['q_vals']
 39 |         y_label = 'Q'
 40 |     elif args.model == 'V':
 41 |         y_vals = ep_info['v_vals']
 42 |         y_label = 'V'
 43 |     else:
 44 |         raise ValueError
 45 | 
 46 |     color = 'tab:blue'
 47 |     ax1.set_xlabel('Timestep')
 48 | 
 49 |     ax1.set_ylabel(y_label)
 50 |     ax1.plot(y_vals, color = color)
 51 |     ax1.tick_params(axis ='y')
 52 | 
 53 |     ax1.axhline(y = 0.0, color = 'black')
 54 | 
 55 |     ax1.set_ylim(-1.2, 0.2)
 56 | 
 57 |     intv_inds, preintv_inds = get_intv_and_preintv_inds(ep_info)
 58 |     for i in intv_inds:
 59 |         ax1.axvline(x=i, color='green', linewidth=5, alpha=0.10)
 60 | 
 61 |     for i in preintv_inds:
 62 |         ax1.axvline(x=i, color='red', linewidth=5, alpha=0.10)
 63 | 
 64 |     plt.savefig(os.path.join(
 65 |         args.vis_path,
 66 |         'plot_{}.png'.format(ep_num)
 67 |     ))
 68 |     plt.close()
 69 | 
 70 | 
 71 | def video_helper(ep_num, ep_info):
 72 |     intv_inds, preintv_inds = get_intv_and_preintv_inds(ep_info)
 73 | 
 74 |     if len(intv_inds) == 0:
 75 |         return []
 76 | 
 77 |     video_frames = ep_info['video_frames']
 78 |     for (i, img) in video_frames:
 79 |         if i in intv_inds:
 80 |             img[::] = apply_filter(img, color=(0, 255, 0))
 81 | 
 82 |         if i in preintv_inds:
 83 |             img[::] = apply_filter(img, color=(255, 0, 0))
 84 | 
 85 |     return video_frames
 86 | 
 87 | 
 88 | if __name__ == "__main__":
 89 |     parser = get_argparser()
 90 | 
 91 |     parser.add_argument(
 92 |         "--th",
 93 |         type=float,
 94 |         default=-0.35,
 95 |         help="threshold for pre-intervention",
 96 |     )
 97 | 
 98 |     parser.add_argument(
 99 |         "--model",
100 |         type=str,
101 |         default='Q',
102 |         choices=['Q', 'V'],
103 |         help="Model to use for determining pre-intv",
104 |     )
105 | 
106 |     args = parser.parse_args()
107 |     playback_dataset(args, plot_helper=plot_helper, video_helper=video_helper)
108 | 


--------------------------------------------------------------------------------
/robomimic/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__init__.py


--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/env_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/env_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/file_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/file_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/log_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/log_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/loss_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/loss_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/macros.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/macros.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/obs_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/obs_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/python_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/python_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/tensor_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/tensor_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/torch_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/torch_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/train_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/train_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/vis_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/vis_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/robomimic/utils/log_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains utility classes and functions for logging to stdout, stderr,
  3 | and to tensorboard.
  4 | """
  5 | import os
  6 | import sys
  7 | import numpy as np
  8 | from datetime import datetime
  9 | from contextlib import contextmanager
 10 | from tqdm import tqdm
 11 | import time
 12 | 
 13 | 
 14 | class PrintLogger(object):
 15 |     """
 16 |     This class redirects print statements to both console and a file.
 17 |     """
 18 |     def __init__(self, log_file):
 19 |         self.terminal = sys.stdout
 20 |         print('STDOUT will be forked to %s' % log_file)
 21 |         self.log_file = open(log_file, "a")
 22 | 
 23 |     def write(self, message):
 24 |         self.terminal.write(message)
 25 |         self.log_file.write(message)
 26 |         self.log_file.flush()
 27 | 
 28 |     def flush(self):
 29 |         # this flush method is needed for python 3 compatibility.
 30 |         # this handles the flush command by doing nothing.
 31 |         # you might want to specify some extra behavior here.
 32 |         pass
 33 | 
 34 | 
 35 | class DataLogger(object):
 36 |     """
 37 |     Logging class to log metrics to tensorboard and/or retrieve running statistics about logged data.
 38 |     """
 39 |     def __init__(self, log_dir, config, log_tb=True, log_wandb=False):
 40 |         """
 41 |         Args:
 42 |             log_dir (str): base path to store logs
 43 |             log_tb (bool): whether to use tensorboard logging
 44 |         """
 45 |         self._tb_logger = None
 46 |         self._wandb_logger = None
 47 |         self._data = dict() # store all the scalar data logged so far
 48 | 
 49 |         if log_tb:
 50 |             from tensorboardX import SummaryWriter
 51 |             self._tb_logger = SummaryWriter(os.path.join(log_dir, 'tb'))
 52 | 
 53 |         if log_wandb:
 54 |             import wandb
 55 | 
 56 |             num_attempts = 10
 57 |             for attempt in range(num_attempts):
 58 |                 try:
 59 |                     # set up wandb
 60 |                     self._wandb_logger = wandb
 61 |                     self._wandb_logger.init(
 62 |                         entity="sirius",
 63 |                         project=config['tags']['wandb_proj_name'],
 64 |                         name=config.experiment.name,
 65 |                         dir=log_dir,
 66 |                         mode=("offline" if attempt == num_attempts - 1 else "online"),
 67 |                     )
 68 | 
 69 |                     # set up tags for identifying experiment
 70 |                     tags = config['tags']
 71 |                     wandb_config = {k: v for (k, v) in tags.items() if k not in ['hp_keys', 'hp_values']}
 72 |                     for (k, v) in zip(tags['hp_keys'], tags['hp_values']):
 73 |                         wandb_config[k] = v
 74 |                     self._wandb_logger.config.update(wandb_config)
 75 | 
 76 |                     break
 77 |                 except:
 78 |                     print("wandb initialization, attempt #{}".format(attempt + 1))
 79 |                     self._wandb_logger = None
 80 |                     time.sleep(30)
 81 | 
 82 |     def record(self, k, v, epoch, data_type='scalar', log_stats=False):
 83 |         """
 84 |         Record data with logger.
 85 | 
 86 |         Args:
 87 |             k (str): key string
 88 |             v (float or image): value to store
 89 |             epoch: current epoch number
 90 |             data_type (str): the type of data. either 'scalar' or 'image'
 91 |             log_stats (bool): whether to store the mean/max/min/std for all data logged so far with key k
 92 |         """
 93 | 
 94 |         assert data_type in ['scalar', 'image']
 95 | 
 96 |         if data_type == 'scalar':
 97 |             # maybe update internal cache if logging stats for this key
 98 |             if log_stats or k in self._data: # any key that we're logging or previously logged
 99 |                 if k not in self._data:
100 |                     self._data[k] = []
101 |                 self._data[k].append(v)
102 | 
103 |         # maybe log to tensorboard
104 |         if self._tb_logger is not None:
105 |             if data_type == 'scalar':
106 |                 self._tb_logger.add_scalar(k, v, epoch)
107 |                 if log_stats:
108 |                     stats = self.get_stats(k)
109 |                     for (stat_k, stat_v) in stats.items():
110 |                         stat_k_name = '{}-{}'.format(k, stat_k)
111 |                         self._tb_logger.add_scalar(stat_k_name, stat_v, epoch)
112 |             elif data_type == 'image':
113 |                 self._tb_logger.add_images(k, img_tensor=v, global_step=epoch, dataformats="NHWC")
114 | 
115 |         if self._wandb_logger is not None:
116 |             if data_type == 'scalar':
117 |                 self._wandb_logger.log({k: v}, step=epoch)
118 |                 if log_stats:
119 |                     stats = self.get_stats(k)
120 |                     for (stat_k, stat_v) in stats.items():
121 |                         self._wandb_logger.log({stat_k: stat_v}, step=epoch)
122 |             elif data_type == 'image':
123 |                 pass # Not Implemented
124 | 
125 |     def get_stats(self, k):
126 |         """
127 |         Computes running statistics for a particular key.
128 | 
129 |         Args:
130 |             k (str): key string
131 |         Returns:
132 |             stats (dict): dictionary of statistics
133 |         """
134 |         stats = dict()
135 |         stats['mean'] = np.mean(self._data[k])
136 |         stats['std'] = np.std(self._data[k])
137 |         stats['min'] = np.min(self._data[k])
138 |         stats['max'] = np.max(self._data[k])
139 |         return stats
140 | 
141 |     def close(self):
142 |         """
143 |         Run before terminating to make sure all logs are flushed
144 |         """
145 |         if self._tb_logger is not None:
146 |             self._tb_logger.close()
147 | 
148 |         if self._wandb_logger is not None:
149 |             self._wandb_logger.finish()
150 | 
151 | 
152 | class custom_tqdm(tqdm):
153 |     """
154 |     Small extension to tqdm to make a few changes from default behavior.
155 |     By default tqdm writes to stderr. Instead, we change it to write
156 |     to stdout.
157 |     """
158 |     def __init__(self, *args, **kwargs):
159 |         assert "file" not in kwargs
160 |         super(custom_tqdm, self).__init__(*args, file=sys.stdout, **kwargs)
161 | 
162 | 
163 | @contextmanager
164 | def silence_stdout():
165 |     """
166 |     This contextmanager will redirect stdout so that nothing is printed
167 |     to the terminal. Taken from the link below:
168 | 
169 |     https://stackoverflow.com/questions/6735917/redirecting-stdout-to-nothing-in-python
170 |     """
171 |     old_target = sys.stdout
172 |     try:
173 |         with open(os.devnull, "w") as new_target:
174 |             sys.stdout = new_target
175 |             yield new_target
176 |     finally:
177 |         sys.stdout = old_target
178 | 


--------------------------------------------------------------------------------
/robomimic/utils/macros.py:
--------------------------------------------------------------------------------
1 | """
2 | Set of global variables shared across robomimic
3 | """
4 | # Sets debugging mode. Should be set at top-level script so that internal
5 | # debugging functionalities are made active
6 | DEBUG = False
7 | 


--------------------------------------------------------------------------------
/robomimic/utils/python_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Set of general purpose utility functions for easier interfacing with Python API
 3 | """
 4 | import inspect
 5 | from copy import deepcopy
 6 | import robomimic.utils.macros as Macros
 7 | 
 8 | 
 9 | def get_class_init_kwargs(cls):
10 |     """
11 |     Helper function to return a list of all valid keyword arguments (excluding "self") for the given @cls class.
12 | 
13 |     Args:
14 |         cls (object): Class from which to grab __init__ kwargs
15 | 
16 |     Returns:
17 |         list: All keyword arguments (excluding "self") specified by @cls __init__ constructor method
18 |     """
19 |     return list(inspect.signature(cls.__init__).parameters.keys())[1:]
20 | 
21 | 
22 | def extract_subset_dict(dic, keys, copy=False):
23 |     """
24 |     Helper function to extract a subset of dictionary key-values from a current dictionary. Optionally (deep)copies
25 |     the values extracted from the original @dic if @copy is True.
26 | 
27 |     Args:
28 |         dic (dict): Dictionary containing multiple key-values
29 |         keys (Iterable): Specific keys to extract from @dic. If the key doesn't exist in @dic, then the key is skipped
30 |         copy (bool): If True, will deepcopy all values corresponding to the specified @keys
31 | 
32 |     Returns:
33 |         dict: Extracted subset dictionary containing only the specified @keys and their corresponding values
34 |     """
35 |     subset = {k: dic[k] for k in keys if k in dic}
36 |     return deepcopy(subset) if copy else subset
37 | 
38 | 
39 | def extract_class_init_kwargs_from_dict(cls, dic, copy=False, verbose=False):
40 |     """
41 |     Helper function to return a dictionary of key-values that specifically correspond to @cls class's __init__
42 |     constructor method, from @dic which may or may not contain additional, irrelevant kwargs.
43 | 
44 |     Note that @dic may possibly be missing certain kwargs as specified by cls.__init__. No error will be raised.
45 | 
46 |     Args:
47 |         cls (object): Class from which to grab __init__ kwargs that will be be used as filtering keys for @dic
48 |         dic (dict): Dictionary containing multiple key-values
49 |         copy (bool): If True, will deepcopy all values corresponding to the specified @keys
50 |         verbose (bool): If True (or if macro DEBUG is True), then will print out mismatched keys
51 | 
52 |     Returns:
53 |         dict: Extracted subset dictionary possibly containing only the specified keys from cls.__init__ and their
54 |             corresponding values
55 |     """
56 |     # extract only relevant kwargs for this specific backbone
57 |     cls_keys = get_class_init_kwargs(cls)
58 |     subdic = extract_subset_dict(
59 |         dic=dic,
60 |         keys=cls_keys,
61 |         copy=copy,
62 |     )
63 | 
64 |     # Run sanity check if verbose or debugging
65 |     if verbose or Macros.DEBUG:
66 |         keys_not_in_cls = [k for k in dic if k not in cls_keys]
67 |         keys_not_in_dic = [k for k in cls_keys if k not in list(dic.keys())]
68 |         if len(keys_not_in_cls) > 0:
69 |             print(f"Warning: For class {cls.__name__}, got unknown keys: {keys_not_in_cls} ")
70 |         if len(keys_not_in_dic) > 0:
71 |             print(f"Warning: For class {cls.__name__}, got missing keys: {keys_not_in_dic} ")
72 | 
73 |     return subdic


--------------------------------------------------------------------------------
/robomimic/utils/torch_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains some PyTorch utilities.
  3 | """
  4 | import numpy as np
  5 | import torch
  6 | import torch.optim as optim
  7 | 
  8 | 
  9 | def soft_update(source, target, tau):
 10 |     """
 11 |     Soft update from the parameters of a @source torch module to a @target torch module
 12 |     with strength @tau. The update follows target = target * (1 - tau) + source * tau.
 13 | 
 14 |     Args:
 15 |         source (torch.nn.Module): source network to push target network parameters towards
 16 |         target (torch.nn.Module): target network to update
 17 |     """
 18 |     for target_param, param in zip(target.parameters(), source.parameters()):
 19 |         target_param.copy_(
 20 |             target_param * (1.0 - tau) + param * tau
 21 |         )
 22 | 
 23 | 
 24 | def hard_update(source, target):
 25 |     """
 26 |     Hard update @target parameters to match @source.
 27 | 
 28 |     Args:
 29 |         source (torch.nn.Module): source network to provide parameters
 30 |         target (torch.nn.Module): target network to update parameters for
 31 |     """
 32 |     for target_param, param in zip(target.parameters(), source.parameters()):
 33 |             target_param.copy_(param)
 34 | 
 35 | 
 36 | def get_torch_device(try_to_use_cuda):
 37 |     """
 38 |     Return torch device. If using cuda (GPU), will also set cudnn.benchmark to True
 39 |     to optimize CNNs.
 40 | 
 41 |     Args:
 42 |         try_to_use_cuda (bool): if True and cuda is available, will use GPU
 43 | 
 44 |     Returns:
 45 |         device (torch.Device): device to use for models
 46 |     """
 47 |     if try_to_use_cuda and torch.cuda.is_available():
 48 |         torch.backends.cudnn.benchmark = True
 49 |         device = torch.device("cuda:0")
 50 |     else:
 51 |         device = torch.device("cpu")
 52 |     return device
 53 | 
 54 | 
 55 | def reparameterize(mu, logvar):
 56 |     """
 57 |     Reparameterize for the backpropagation of z instead of q.
 58 |     This makes it so that we can backpropagate through the sampling of z from
 59 |     our encoder when feeding the sampled variable to the decoder.
 60 | 
 61 |     (See "The reparameterization trick" section of https://arxiv.org/abs/1312.6114)
 62 | 
 63 |     Args:
 64 |         mu (torch.Tensor): batch of means from the encoder distribution
 65 |         logvar (torch.Tensor): batch of log variances from the encoder distribution
 66 | 
 67 |     Returns:
 68 |         z (torch.Tensor): batch of sampled latents from the encoder distribution that
 69 |             support backpropagation
 70 |     """
 71 |     # logvar = \log(\sigma^2) = 2 * \log(\sigma)
 72 |     # \sigma = \exp(0.5 * logvar)
 73 | 
 74 |     # clamped for numerical stability
 75 |     logstd = (0.5 * logvar).clamp(-4, 15)
 76 |     std = torch.exp(logstd)
 77 | 
 78 |     # Sample \epsilon from normal distribution
 79 |     # use std to create a new tensor, so we don't have to care
 80 |     # about running on GPU or not
 81 |     eps = std.new(std.size()).normal_()
 82 | 
 83 |     # Then multiply with the standard deviation and add the mean
 84 |     z = eps.mul(std).add_(mu)
 85 | 
 86 |     return z
 87 | 
 88 | 
 89 | def optimizer_from_optim_params(net_optim_params, net):
 90 |     """
 91 |     Helper function to return a torch Optimizer from the optim_params 
 92 |     section of the config for a particular network.
 93 | 
 94 |     Args:
 95 |         optim_params (Config): optim_params part of algo_config corresponding
 96 |             to @net. This determines the optimizer that is created.
 97 | 
 98 |         net (torch.nn.Module): module whose parameters this optimizer will be
 99 |             responsible
100 | 
101 |     Returns:
102 |         optimizer (torch.optim.Optimizer): optimizer
103 |     """
104 |     return optim.Adam(
105 |         params=net.parameters(),
106 |         lr=net_optim_params["learning_rate"]["initial"],
107 |         weight_decay=net_optim_params["regularization"]["L2"],
108 |     )
109 | 
110 | 
111 | def lr_scheduler_from_optim_params(net_optim_params, net, optimizer):
112 |     """
113 |     Helper function to return a LRScheduler from the optim_params 
114 |     section of the config for a particular network. Returns None
115 |     if a scheduler is not needed.
116 | 
117 |     Args:
118 |         optim_params (Config): optim_params part of algo_config corresponding
119 |             to @net. This determines whether a learning rate scheduler is created.
120 | 
121 |         net (torch.nn.Module): module whose parameters this optimizer will be
122 |             responsible
123 | 
124 |         optimizer (torch.optim.Optimizer): optimizer for this net
125 | 
126 |     Returns:
127 |         lr_scheduler (torch.optim.lr_scheduler or None): learning rate scheduler
128 |     """
129 |     lr_scheduler = None
130 |     if len(net_optim_params["learning_rate"]["epoch_schedule"]) > 0:
131 |         # decay LR according to the epoch schedule
132 |         lr_scheduler = optim.lr_scheduler.MultiStepLR(
133 |             optimizer=optimizer,
134 |             milestones=net_optim_params["learning_rate"]["epoch_schedule"],
135 |             gamma=net_optim_params["learning_rate"]["decay_factor"],
136 |         )
137 |     return lr_scheduler
138 | 
139 | 
140 | def backprop_for_loss(net, optim, loss, max_grad_norm=None, retain_graph=False, dont_step=False):
141 |     """
142 |     Backpropagate loss and update parameters for network with
143 |     name @name.
144 | 
145 |     Args:
146 |         net (torch.nn.Module): network to update
147 | 
148 |         optim (torch.optim.Optimizer): optimizer to use
149 | 
150 |         loss (torch.Tensor): loss to use for backpropagation
151 | 
152 |         max_grad_norm (float): if provided, used to clip gradients
153 | 
154 |         retain_graph (bool): if True, graph is not freed after backward call
155 | 
156 |     Returns:
157 |         grad_norms (float): average gradient norms from backpropagation
158 |     """
159 | 
160 |     # backprop
161 |     optim.zero_grad()
162 |     loss.backward(retain_graph=retain_graph)
163 | 
164 |     # gradient clipping
165 |     if max_grad_norm is not None:
166 |         torch.nn.utils.clip_grad_norm_(net.parameters(), max_grad_norm)
167 | 
168 |     # compute grad norms
169 |     grad_norms = 0.
170 |     for p in net.parameters():
171 |         # only clip gradients for parameters for which requires_grad is True
172 |         if p.grad is not None:
173 |             grad_norms += p.grad.data.norm(2).pow(2).item()
174 | 
175 |     if not dont_step:
176 |         # step
177 |         optim.step()
178 | 
179 |     return grad_norms
180 | 
181 | 
182 | class dummy_context_mgr():
183 |     """
184 |     A dummy context manager - useful for having conditional scopes (such
185 |     as @maybe_no_grad). Nothing happens in this scope.
186 |     """
187 |     def __enter__(self):
188 |         return None
189 |     def __exit__(self, exc_type, exc_value, traceback):
190 |         return False
191 | 
192 | 
193 | def maybe_no_grad(no_grad):
194 |     """
195 |     Args:
196 |         no_grad (bool): if True, the returned context will be torch.no_grad(), otherwise
197 |             it will be a dummy context
198 |     """
199 |     return torch.no_grad() if no_grad else dummy_context_mgr()
200 | 


--------------------------------------------------------------------------------
/robomimic/utils/vis_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file contains utility functions for visualizing image observations in the training pipeline.
 3 | These functions can be a useful debugging tool.
 4 | """
 5 | import numpy as np
 6 | 
 7 | import robomimic.utils.tensor_utils as TensorUtils
 8 | import robomimic.utils.obs_utils as ObsUtils
 9 | 
10 | from PIL import Image, ImageFont, ImageDraw
11 | 
12 | 
13 | def image_tensor_to_numpy(image):
14 |     """
15 |     Converts processed image tensors to numpy so that they can be saved to disk or video.
16 |     A useful utility function for visualizing images in the middle of training.
17 | 
18 |     Args:
19 |         image (torch.Tensor): images of shape [..., C, H, W]
20 | 
21 |     Returns:
22 |         image (np.array): converted images of shape [..., H, W, C] and type uint8
23 |     """
24 |     return TensorUtils.to_numpy(
25 |             ObsUtils.unprocess_image(image)
26 |         ).astype(np.uint8)
27 | 
28 | 
29 | def image_to_disk(image, fname):
30 |     """
31 |     Writes an image to disk.
32 | 
33 |     Args:
34 |         image (np.array): image of shape [H, W, 3]
35 |         fname (str): path to save image to
36 |     """
37 |     image = Image.fromarray(image)
38 |     image.save(fname)
39 | 
40 | 
41 | def image_tensor_to_disk(image, fname):
42 |     """
43 |     Writes an image tensor to disk. Any leading batch dimensions are indexed out
44 |     with the first element.
45 | 
46 |     Args:
47 |         image (torch.Tensor): image of shape [..., C, H, W]. All leading dimensions
48 |             will be indexed out with the first element
49 |         fname (str): path to save image to
50 |     """
51 |     # index out all leading dimensions before [C, H, W]
52 |     num_leading_dims = len(image.shape[:-3])
53 |     for _ in range(num_leading_dims):
54 |         image = image[0]
55 |     image = image_tensor_to_numpy(image)
56 |     image_to_disk(image, fname)
57 |     
58 | def write_text_on_image(image_arr, text, def_color=None, font=30, pos=(15, 15)):
59 |     img = Image.fromarray(image_arr)
60 |     image_edit = ImageDraw.Draw(img)
61 |     font = ImageFont.truetype("FreeMono.ttf", font)
62 |     color = (0, 255, 0) if def_color is None else def_color
63 |     image_edit.text(pos, text, color, font=font)
64 |     return np.array(img)
65 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | # read the contents of your README file
 4 | from os import path
 5 | this_directory = path.abspath(path.dirname(__file__))
 6 | with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f:
 7 |     lines = f.readlines()
 8 | 
 9 | # remove images from README
10 | lines = [x for x in lines if (('.png' not in x) and ('.gif' not in x))]
11 | long_description = ''.join(lines)
12 | 
13 | setup(
14 |     name="robomimic",
15 |     packages=[
16 |         package for package in find_packages() if package.startswith("robomimic")
17 |     ],
18 |     install_requires=[
19 |         "numpy>=1.13.3",
20 |         "h5py",
21 |         "psutil",
22 |         "tqdm",
23 |         "termcolor",
24 |         "tensorboard",
25 |         "tensorboardX",
26 |         "imageio",
27 |         "imageio-ffmpeg",
28 |         "egl_probe>=1.0.1",
29 |         "torch",
30 |         "torchvision",
31 |     ],
32 |     eager_resources=['*'],
33 |     include_package_data=True,
34 |     python_requires='>=3',
35 |     description="robomimic: A Modular Framework for Robot Learning from Demonstration",
36 |     author="Ajay Mandlekar, Danfei Xu, Josiah Wong, Soroush Nasiriany, Chen Wang",
37 |     url="https://github.com/ARISE-Initiative/robomimic",
38 |     author_email="amandlek@cs.stanford.edu",
39 |     version="0.2.0",
40 |     long_description=long_description,
41 |     long_description_content_type='text/markdown'
42 | )
43 | 


--------------------------------------------------------------------------------