├── LICENSE ├── MANIFEST.in ├── README.md ├── images └── sirius.png ├── requirements-docs.txt ├── requirements.txt ├── robomimic ├── __init__.py ├── algo │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── algo.cpython-38.pyc │ │ ├── awac.cpython-38.pyc │ │ ├── bc.cpython-38.pyc │ │ ├── bcq.cpython-38.pyc │ │ ├── cql.cpython-38.pyc │ │ ├── gl.cpython-38.pyc │ │ ├── hbc.cpython-38.pyc │ │ ├── iql.cpython-38.pyc │ │ ├── iris.cpython-38.pyc │ │ └── td3_bc.cpython-38.pyc │ ├── algo.py │ ├── awac.py │ ├── bc.py │ ├── bcq.py │ ├── cql.py │ ├── gl.py │ ├── hbc.py │ ├── iql.py │ ├── iris.py │ └── td3_bc.py ├── config │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── awac_config.cpython-38.pyc │ │ ├── base_config.cpython-38.pyc │ │ ├── bc_config.cpython-38.pyc │ │ ├── bcq_config.cpython-38.pyc │ │ ├── config.cpython-38.pyc │ │ ├── cql_config.cpython-38.pyc │ │ ├── gl_config.cpython-38.pyc │ │ ├── hbc_config.cpython-38.pyc │ │ ├── iql_config.cpython-38.pyc │ │ ├── iris_config.cpython-38.pyc │ │ ├── td3_bc_config.cpython-38.pyc │ │ └── vae_config.cpython-38.pyc │ ├── awac_config.py │ ├── base_config.py │ ├── bc_config.py │ ├── bcq_config.py │ ├── config.py │ ├── cql_config.py │ ├── gl_config.py │ ├── hbc_config.py │ ├── iql_config.py │ ├── iris_config.py │ ├── td3_bc_config.py │ └── vae_config.py ├── envs │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ └── env_base.cpython-38.pyc │ ├── env_base.py │ ├── env_gym.py │ ├── env_ig_momart.py │ └── env_robosuite.py ├── exps │ ├── sirius │ │ ├── bc.json │ │ ├── bc_iwr.json │ │ └── sirius.json │ ├── sirius_template │ │ ├── awac │ │ │ ├── awac_im.json │ │ │ └── awac_ld.json │ │ ├── bc │ │ │ ├── bc_im.json │ │ │ ├── bc_ld.json │ │ │ ├── bc_real.json │ │ │ ├── bc_sim_v0_im.json │ │ │ └── bc_sim_v0_ld.json │ │ └── iql │ │ │ └── iql_ld.json │ └── templates │ │ ├── bc.json │ │ ├── bcq.json │ │ ├── cql.json │ │ ├── gl.json │ │ ├── hbc.json │ │ ├── iris.json │ │ └── td3_bc.json ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── base_nets.cpython-38.pyc │ │ ├── distributions.cpython-38.pyc │ │ ├── obs_nets.cpython-38.pyc │ │ ├── policy_nets.cpython-38.pyc │ │ ├── vae_nets.cpython-38.pyc │ │ └── value_nets.cpython-38.pyc │ ├── base_nets.py │ ├── distributions.py │ ├── obs_nets.py │ ├── policy_nets.py │ ├── vae_nets.py │ └── value_nets.py ├── scripts │ ├── check_same_initial_configs.py │ ├── conversion │ │ ├── convert_d4rl.py │ │ ├── convert_robosuite.py │ │ └── convert_roboturk_pilot.py │ ├── dataset_states_to_obs.py │ ├── download_datasets.py │ ├── download_momart_datasets.py │ ├── extract_obs_from_raw_datasets.sh │ ├── generate_config_templates.py │ ├── generate_paper_configs.py │ ├── get_dataset_info.py │ ├── hitl │ │ ├── collect_hitl_demos.py │ │ └── collect_playback_utils.py │ ├── hyperparam_helper.py │ ├── playback_dataset.py │ ├── run_trained_agent.py │ ├── slurm │ │ ├── auto_append.txt │ │ ├── auto_overwrite.txt │ │ ├── base_args.py │ │ ├── base_template.sbatch │ │ ├── batchrl_args.py │ │ ├── run_hp_sweep.py │ │ ├── sbatch_args.py │ │ └── sbatch_utils.py │ ├── split_train_val.py │ ├── train.py │ └── vis │ │ ├── image_utils.py │ │ ├── vis_preintv.py │ │ └── vis_utils.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-38.pyc │ ├── dataset.cpython-38.pyc │ ├── env_utils.cpython-38.pyc │ ├── file_utils.cpython-38.pyc │ ├── log_utils.cpython-38.pyc │ ├── loss_utils.cpython-38.pyc │ ├── macros.cpython-38.pyc │ ├── obs_utils.cpython-38.pyc │ ├── python_utils.cpython-38.pyc │ ├── tensor_utils.cpython-38.pyc │ ├── torch_utils.cpython-38.pyc │ ├── train_utils.cpython-38.pyc │ └── vis_utils.cpython-38.pyc │ ├── dataset.py │ ├── env_utils.py │ ├── file_utils.py │ ├── hyperparam_utils.py │ ├── log_utils.py │ ├── loss_utils.py │ ├── macros.py │ ├── obs_utils.py │ ├── python_utils.py │ ├── tensor_utils.py │ ├── test_utils.py │ ├── torch_utils.py │ ├── train_utils.py │ └── vis_utils.py ├── setup.py └── sirius.yml /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 UT Robot Perception and Learning Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include robomimic/exps/templates/*.json 2 | include robomimic/scripts/*.py 3 | include robomimic/scripts/*.sh 4 | include robomimic/scripts/conversion/*.py 5 | include robomimic/scripts/conversion/*.sh 6 | recursive-include examples/ *.py 7 | recursive-include tests/ *.py 8 | recursive-include tests/ *.sh 9 | recursive-include tests/assets/ * -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sirius 🌟: Robot Learning on the Job 2 | 3 |
4 | 5 | This is the official codebase for the [**Sirius**](https://ut-austin-rpl.github.io/sirius/) paper: 6 | 7 | **Robot Learning on the Job: Human-in-the-Loop Autonomy and Learning During Deployment** 8 |
[Huihan Liu](https://huihanl.github.io/), [Soroush Nasiriany](http://snasiriany.me/), [Lance Zhang](https://github.com/Lantian-Lance-Zhang), [Zhiyao Bao](https://www.linkedin.com/in/zhiyao-bao/), [Yuke Zhu](https://www.cs.utexas.edu/~yukez/) 9 |
[UT Austin Robot Perception and Learning Lab](https://rpl.cs.utexas.edu/) 10 |
Robotics: Science and Systems (RSS), 2023 11 |
**[[Paper]](https://arxiv.org/abs/2211.08416)**  **[[Project Website]](https://ut-austin-rpl.github.io/sirius/)**  **[[Real Robot Control]](https://ut-austin-rpl.github.io/deoxys-docs/html/getting_started/overview.html)** 12 | 13 | 14 | 15 |
16 | 17 | ## Quickstart 18 | 19 | Sirius builds upon [robomimic](https://github.com/ARISE-Initiative/robomimic), a framework for robot learning from demonstration. Sirius also uses the robotics simulator [robosuite](https://github.com/ARISE-Initiative/robosuite) powered by the MuJoCo physics engine. 20 | 21 | ### Setup Sirius codebase 22 | 23 | #### Installing Sirius 24 | 25 | ``` 26 | git clone https://github.com/UT-Austin-RPL/sirius 27 | cd sirius 28 | conda env create -f sirius.yml 29 | conda activate sirius 30 | pip install -e . 31 | ``` 32 | 33 | #### Installing ```robosuite``` 34 | 35 | The additional reference for installing robosuite [here](https://robomimic.github.io/docs/introduction/installation.html) and [here](https://robosuite.ai/docs/installation.html#install-from-source) could be helpful. 36 | 37 | ``` 38 | $ git clone https://github.com/ARISE-Initiative/robosuite.git 39 | $ cd robosuite 40 | $ pip install -r requirements.txt 41 | $ pip install -e . 42 | ``` 43 | 44 | ## Usage 45 | 46 | ### Running Sirius 47 | 48 | 49 | Running Sirius intervention-guided policy learning: 50 | 51 | ``` 52 | python robomimic/scripts/train.py --config robomimic/exps/sirius/sirius.json 53 | ``` 54 | 55 | IWR baseline: 56 | 57 | ``` 58 | python robomimic/scripts/train.py --config robomimic/exps/sirius/bc_iwr.json 59 | ``` 60 | 61 | BC baseline: 62 | 63 | ``` 64 | python robomimic/scripts/train.py --config robomimic/exps/sirius/bc.json 65 | ``` 66 | 67 |
68 | 69 | ### Sirius Data Collection Pipeline 70 | 71 | We include the script for collecting demonstrations and performing human intervention during robot policy execution below. We use a spacemouse for providing both demonstration and intervention. More details for setting up Spacemouse can be found [here](https://ut-austin-rpl.github.io/deoxys-docs/html/tutorials/using_teleoperation_devices.html). 72 | 73 | #### Performing Human Demonstration 74 | 75 | Perform human demonstration with the flag ```--all-demos```: 76 | 77 | ``` 78 | python robomimic/scripts/hitl/collect_hitl_demos.py --all-demos --num-traj 50 79 | ``` 80 | 81 | #### Policy Execution with Intervention 82 | 83 | Perform human intervention with the policy checkpoint ```${checkpoint}```: 84 | 85 | ``` 86 | python robomimic/scripts/hitl/collect_hitl_demos.py --num-traj 50 --checkpoint ${checkpoint} 87 | ``` 88 | 89 |
90 | 91 | ### Processing data 92 | 93 | #### Adding modalities 94 | 95 | By default, the datasets are generated in the minimum format with only low-level state information to save space. To add image observation and other modalities for training, run the following post-processing script. It will process the original data ```${data.hdf5}``` into ```${data_processed.hdf5}```, with image size ```${image_size}```. By default, the two camera view uses are agentview and robot0_eye_in_hand, which you can modify in the script ```template_process_sim_dataset.sh```. 96 | 97 | ``` 98 | cd robomimic/scripts/hitl 99 | 100 | source template_process_sim_dataset.sh ${data.hdf5} ${data_processed.hdf5} ${image_size} 101 | ``` 102 | 103 |
104 | 105 | ## Acknowledgements 106 | 107 | This codebase is largely built on [robomimic](https://github.com/ARISE-Initiative/robomimic) and [robosuite](https://github.com/ARISE-Initiative/robosuite). We also thank [Ajay Mandlekar](https://ai.stanford.edu/~amandlek/) for sharing well-designed simulation task environments beyond the robomimic codebase like ```Coffee``` and ```Threading``` tasks during project development. 108 | 109 | For real-robot experiments, we used [Deoxys](https://ut-austin-rpl.github.io/deoxys-docs/html/getting_started/overview.html), a controller library for Franka Emika Panda developed by [Yifeng Zhu](https://zhuyifengzju.github.io/). 110 | 111 |
112 | 113 | ## Citation 114 | ```bibtex 115 | @inproceedings{liu2022robot, 116 | title = {Robot Learning on the Job: Human-in-the-Loop Autonomy and Learning During Deployment}, 117 | author = {Huihan Liu and Soroush Nasiriany and Lance Zhang and Zhiyao Bao and Yuke Zhu}, 118 | booktitle = {Robotics: Science and Systems (RSS)}, 119 | year = {2023} 120 | } 121 | ``` 122 | -------------------------------------------------------------------------------- /images/sirius.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/images/sirius.png -------------------------------------------------------------------------------- /requirements-docs.txt: -------------------------------------------------------------------------------- 1 | # requirements for building sphinx docs 2 | pygments==2.4.1 3 | sphinx 4 | sphinx_rtd_theme 5 | sphinx_markdown_tables 6 | recommonmark 7 | nbsphinx 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.13.3 2 | h5py 3 | psutil 4 | tqdm 5 | termcolor 6 | tensorboard 7 | tensorboardX 8 | imageio 9 | imageio-ffmpeg 10 | egl_probe>=1.0.1 11 | torch 12 | torchvision 13 | -------------------------------------------------------------------------------- /robomimic/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.0" 2 | 3 | 4 | # stores released dataset links and rollout horizons in global dictionary. 5 | # Structure is given below for each type of dataset: 6 | 7 | # robosuite / real 8 | # { 9 | # task: 10 | # dataset_type: 11 | # hdf5_type: 12 | # url: link 13 | # horizon: value 14 | # ... 15 | # ... 16 | # ... 17 | # } 18 | DATASET_REGISTRY = {} 19 | 20 | # momart 21 | # { 22 | # task: 23 | # dataset_type: 24 | # url: link 25 | # size: value 26 | # ... 27 | # ... 28 | # } 29 | MOMART_DATASET_REGISTRY = {} 30 | 31 | 32 | def register_dataset_link(task, dataset_type, hdf5_type, link, horizon): 33 | """ 34 | Helper function to register dataset link in global dictionary. 35 | Also takes a @horizon parameter - this corresponds to the evaluation 36 | rollout horizon that should be used during training. 37 | 38 | Args: 39 | task (str): name of task for this dataset 40 | dataset_type (str): type of dataset (usually identifies the dataset source) 41 | hdf5_type (str): type of hdf5 - usually one of "raw", "low_dim", or "image", 42 | to identify the kind of observations in the dataset 43 | link (str): download link for the dataset 44 | horizon (int): evaluation rollout horizon that should be used with this dataset 45 | """ 46 | if task not in DATASET_REGISTRY: 47 | DATASET_REGISTRY[task] = {} 48 | if dataset_type not in DATASET_REGISTRY[task]: 49 | DATASET_REGISTRY[task][dataset_type] = {} 50 | DATASET_REGISTRY[task][dataset_type][hdf5_type] = dict(url=link, horizon=horizon) 51 | 52 | 53 | def register_all_links(): 54 | """ 55 | Record all dataset links in this function. 56 | """ 57 | 58 | # all proficient human datasets 59 | ph_tasks = ["lift", "can", "square", "transport", "tool_hang", "lift_real", "can_real", "tool_hang_real"] 60 | ph_horizons = [400, 400, 400, 700, 700, 1000, 1000, 1000] 61 | for task, horizon in zip(ph_tasks, ph_horizons): 62 | register_dataset_link(task=task, dataset_type="ph", hdf5_type="raw", horizon=horizon, 63 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/demo.hdf5".format(task)) 64 | # real world datasets only have demo.hdf5 files which already contain all observation modalities 65 | # while sim datasets store raw low-dim mujoco states in the demo.hdf5 66 | if "real" not in task: 67 | register_dataset_link(task=task, dataset_type="ph", hdf5_type="low_dim", horizon=horizon, 68 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/low_dim.hdf5".format(task)) 69 | register_dataset_link(task=task, dataset_type="ph", hdf5_type="image", horizon=horizon, 70 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/image.hdf5".format(task)) 71 | 72 | # all multi human datasets 73 | mh_tasks = ["lift", "can", "square", "transport"] 74 | mh_horizons = [500, 500, 500, 1100] 75 | for task, horizon in zip(mh_tasks, mh_horizons): 76 | register_dataset_link(task=task, dataset_type="mh", hdf5_type="raw", horizon=horizon, 77 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/demo.hdf5".format(task)) 78 | register_dataset_link(task=task, dataset_type="mh", hdf5_type="low_dim", horizon=horizon, 79 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/low_dim.hdf5".format(task)) 80 | register_dataset_link(task=task, dataset_type="mh", hdf5_type="image", horizon=horizon, 81 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/image.hdf5".format(task)) 82 | 83 | # all machine generated datasets 84 | for task, horizon in zip(["lift", "can"], [400, 400]): 85 | register_dataset_link(task=task, dataset_type="mg", hdf5_type="raw", horizon=horizon, 86 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/demo.hdf5".format(task)) 87 | register_dataset_link(task=task, dataset_type="mg", hdf5_type="low_dim_sparse", horizon=horizon, 88 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/low_dim_sparse.hdf5".format(task)) 89 | register_dataset_link(task=task, dataset_type="mg", hdf5_type="image_sparse", horizon=horizon, 90 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/image_sparse.hdf5".format(task)) 91 | register_dataset_link(task=task, dataset_type="mg", hdf5_type="low_dim_dense", horizon=horizon, 92 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/low_dim_dense.hdf5".format(task)) 93 | register_dataset_link(task=task, dataset_type="mg", hdf5_type="image_dense", horizon=horizon, 94 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/image_dense.hdf5".format(task)) 95 | 96 | # can-paired dataset 97 | register_dataset_link(task="can", dataset_type="paired", hdf5_type="raw", horizon=400, 98 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/demo.hdf5") 99 | register_dataset_link(task="can", dataset_type="paired", hdf5_type="low_dim", horizon=400, 100 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/low_dim.hdf5") 101 | register_dataset_link(task="can", dataset_type="paired", hdf5_type="image", horizon=400, 102 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/image.hdf5") 103 | 104 | 105 | def register_momart_dataset_link(task, dataset_type, link, dataset_size): 106 | """ 107 | Helper function to register dataset link in global dictionary. 108 | Also takes a @horizon parameter - this corresponds to the evaluation 109 | rollout horizon that should be used during training. 110 | 111 | Args: 112 | task (str): name of task for this dataset 113 | dataset_type (str): type of dataset (usually identifies the dataset source) 114 | link (str): download link for the dataset 115 | dataset_size (float): size of the dataset, in GB 116 | """ 117 | if task not in MOMART_DATASET_REGISTRY: 118 | MOMART_DATASET_REGISTRY[task] = {} 119 | if dataset_type not in MOMART_DATASET_REGISTRY[task]: 120 | MOMART_DATASET_REGISTRY[task][dataset_type] = {} 121 | MOMART_DATASET_REGISTRY[task][dataset_type] = dict(url=link, size=dataset_size) 122 | 123 | 124 | def register_all_momart_links(): 125 | """ 126 | Record all dataset links in this function. 127 | """ 128 | # all tasks, mapped to their [exp, sub, gen, sam] sizes 129 | momart_tasks = { 130 | "table_setup_from_dishwasher": [14, 14, 3.3, 0.6], 131 | "table_setup_from_dresser": [16, 17, 3.1, 0.7], 132 | "table_cleanup_to_dishwasher": [23, 36, 5.3, 1.1], 133 | "table_cleanup_to_sink": [17, 28, 2.9, 0.8], 134 | "unload_dishwasher": [21, 27, 5.4, 1.0], 135 | } 136 | 137 | momart_dataset_types = [ 138 | "expert", 139 | "suboptimal", 140 | "generalize", 141 | "sample", 142 | ] 143 | 144 | # Iterate over all combos and register the link 145 | for task, dataset_sizes in momart_tasks.items(): 146 | for dataset_type, dataset_size in zip(momart_dataset_types, dataset_sizes): 147 | register_momart_dataset_link( 148 | task=task, 149 | dataset_type=dataset_type, 150 | link=f"http://downloads.cs.stanford.edu/downloads/rt_mm/{dataset_type}/{task}_{dataset_type}.hdf5", 151 | dataset_size=dataset_size, 152 | ) 153 | 154 | 155 | register_all_links() 156 | register_all_momart_links() 157 | -------------------------------------------------------------------------------- /robomimic/algo/__init__.py: -------------------------------------------------------------------------------- 1 | from robomimic.algo.algo import register_algo_factory_func, res_mlp_args_from_config, algo_name_to_factory_func, algo_factory, Algo, PolicyAlgo, ValueAlgo, PlannerAlgo, HierarchicalAlgo, RolloutPolicy 2 | 3 | # note: these imports are needed to register these classes in the global algo registry 4 | from robomimic.algo.bc import BC, BC_Gaussian, BC_GMM, BC_VAE, BC_RNN, BC_RNN_GMM 5 | from robomimic.algo.bcq import BCQ, BCQ_GMM, BCQ_Distributional 6 | from robomimic.algo.cql import CQL 7 | from robomimic.algo.awac import AWAC 8 | from robomimic.algo.iql import IQL 9 | from robomimic.algo.gl import GL, GL_VAE, ValuePlanner 10 | from robomimic.algo.hbc import HBC 11 | from robomimic.algo.iris import IRIS 12 | from robomimic.algo.td3_bc import TD3_BC 13 | -------------------------------------------------------------------------------- /robomimic/algo/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/algo/__pycache__/algo.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/algo.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/algo/__pycache__/awac.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/awac.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/algo/__pycache__/bc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/bc.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/algo/__pycache__/bcq.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/bcq.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/algo/__pycache__/cql.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/cql.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/algo/__pycache__/gl.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/gl.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/algo/__pycache__/hbc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/hbc.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/algo/__pycache__/iql.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/iql.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/algo/__pycache__/iris.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/iris.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/algo/__pycache__/td3_bc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/td3_bc.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/algo/iris.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of IRIS (https://arxiv.org/abs/1911.05321). 3 | """ 4 | import numpy as np 5 | from collections import OrderedDict 6 | from copy import deepcopy 7 | 8 | import torch 9 | 10 | import robomimic.utils.tensor_utils as TensorUtils 11 | import robomimic.utils.obs_utils as ObsUtils 12 | from robomimic.config.config import Config 13 | from robomimic.algo import register_algo_factory_func, algo_name_to_factory_func, HBC, ValuePlanner, ValueAlgo, GL_VAE 14 | 15 | 16 | @register_algo_factory_func("iris") 17 | def algo_config_to_class(algo_config): 18 | """ 19 | Maps algo config to the IRIS algo class to instantiate, along with additional algo kwargs. 20 | 21 | Args: 22 | algo_config (Config instance): algo config 23 | 24 | Returns: 25 | algo_class: subclass of Algo 26 | algo_kwargs (dict): dictionary of additional kwargs to pass to algorithm 27 | """ 28 | pol_cls, _ = algo_name_to_factory_func("bc")(algo_config.actor) 29 | plan_cls, _ = algo_name_to_factory_func("gl")(algo_config.value_planner.planner) 30 | value_cls, _ = algo_name_to_factory_func("bcq")(algo_config.value_planner.value) 31 | return IRIS, dict(policy_algo_class=pol_cls, planner_algo_class=plan_cls, value_algo_class=value_cls) 32 | 33 | 34 | class IRIS(HBC, ValueAlgo): 35 | """ 36 | Implementation of IRIS (https://arxiv.org/abs/1911.05321). 37 | """ 38 | def __init__( 39 | self, 40 | planner_algo_class, 41 | value_algo_class, 42 | policy_algo_class, 43 | algo_config, 44 | obs_config, 45 | global_config, 46 | obs_key_shapes, 47 | ac_dim, 48 | device, 49 | ): 50 | """ 51 | Args: 52 | planner_algo_class (Algo class): algo class for the planner 53 | 54 | policy_algo_class (Algo class): algo class for the policy 55 | 56 | algo_config (Config object): instance of Config corresponding to the algo section 57 | of the config 58 | 59 | obs_config (Config object): instance of Config corresponding to the observation 60 | section of the config 61 | 62 | global_config (Config object): global training config 63 | 64 | obs_key_shapes (OrderedDict): dictionary that maps input/output observation keys to shapes 65 | 66 | ac_dim (int): action dimension 67 | 68 | device: torch device 69 | """ 70 | self.algo_config = algo_config 71 | self.obs_config = obs_config 72 | self.global_config = global_config 73 | 74 | self.ac_dim = ac_dim 75 | self.device = device 76 | 77 | self._subgoal_step_count = 0 # current step count for deciding when to update subgoal 78 | self._current_subgoal = None # latest subgoal 79 | self._subgoal_update_interval = self.algo_config.subgoal_update_interval # subgoal update frequency 80 | self._subgoal_horizon = self.algo_config.value_planner.planner.subgoal_horizon 81 | self._actor_horizon = self.algo_config.actor.rnn.horizon 82 | 83 | self._algo_mode = self.algo_config.mode 84 | assert self._algo_mode in ["separate", "cascade"] 85 | 86 | self.planner = ValuePlanner( 87 | planner_algo_class=planner_algo_class, 88 | value_algo_class=value_algo_class, 89 | algo_config=algo_config.value_planner, 90 | obs_config=obs_config.value_planner, 91 | global_config=global_config, 92 | obs_key_shapes=obs_key_shapes, 93 | ac_dim=ac_dim, 94 | device=device 95 | ) 96 | 97 | self.actor_goal_shapes = self.planner.subgoal_shapes 98 | assert not algo_config.latent_subgoal.enabled, "IRIS does not support latent subgoals" 99 | 100 | # only for the actor: override goal modalities and shapes to match the subgoal set by the planner 101 | actor_obs_key_shapes = deepcopy(obs_key_shapes) 102 | # make sure we are not modifying existing observation key shapes 103 | for k in self.actor_goal_shapes: 104 | if k in actor_obs_key_shapes: 105 | assert actor_obs_key_shapes[k] == self.actor_goal_shapes[k] 106 | actor_obs_key_shapes.update(self.actor_goal_shapes) 107 | 108 | goal_modalities = {obs_modality: [] for obs_modality in ObsUtils.OBS_MODALITY_CLASSES.keys()} 109 | for k in self.actor_goal_shapes.keys(): 110 | goal_modalities[ObsUtils.OBS_KEYS_TO_MODALITIES[k]].append(k) 111 | 112 | actor_obs_config = deepcopy(obs_config.actor) 113 | with actor_obs_config.unlocked(): 114 | actor_obs_config["goal"] = Config(**goal_modalities) 115 | 116 | self.actor = policy_algo_class( 117 | algo_config=algo_config.actor, 118 | obs_config=actor_obs_config, 119 | global_config=global_config, 120 | obs_key_shapes=actor_obs_key_shapes, 121 | ac_dim=ac_dim, 122 | device=device 123 | ) 124 | 125 | def process_batch_for_training(self, batch): 126 | """ 127 | Processes input batch from a data loader to filter out 128 | relevant information and prepare the batch for training. 129 | 130 | Args: 131 | batch (dict): dictionary with torch.Tensors sampled 132 | from a data loader 133 | 134 | Returns: 135 | input_batch (dict): processed and filtered batch that 136 | will be used for training 137 | """ 138 | input_batch = dict() 139 | 140 | input_batch["planner"] = self.planner.process_batch_for_training(batch) 141 | input_batch["actor"] = self.actor.process_batch_for_training(batch) 142 | 143 | if self.algo_config.actor_use_random_subgoals: 144 | # optionally use randomly sampled step between [1, seq_length] as policy goal 145 | policy_subgoal_indices = torch.randint( 146 | low=0, high=self.global_config.train.seq_length, size=(batch["actions"].shape[0],)) 147 | goal_obs = TensorUtils.gather_sequence(batch["next_obs"], policy_subgoal_indices) 148 | goal_obs = TensorUtils.to_device(TensorUtils.to_float(goal_obs), self.device) 149 | input_batch["actor"]["goal_obs"] = goal_obs 150 | else: 151 | # otherwise, use planner subgoal target as goal for the policy 152 | input_batch["actor"]["goal_obs"] = input_batch["planner"]["planner"]["target_subgoals"] 153 | 154 | return TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device) 155 | 156 | def get_state_value(self, obs_dict, goal_dict=None): 157 | """ 158 | Get state value outputs. 159 | 160 | Args: 161 | obs_dict (dict): current observation 162 | goal_dict (dict): (optional) goal 163 | 164 | Returns: 165 | value (torch.Tensor): value tensor 166 | """ 167 | return self.planner.get_state_value(obs_dict=obs_dict, goal_dict=goal_dict) 168 | 169 | def get_state_action_value(self, obs_dict, actions, goal_dict=None): 170 | """ 171 | Get state-action value outputs. 172 | 173 | Args: 174 | obs_dict (dict): current observation 175 | actions (torch.Tensor): action 176 | goal_dict (dict): (optional) goal 177 | 178 | Returns: 179 | value (torch.Tensor): value tensor 180 | """ 181 | return self.planner.get_state_action_value(obs_dict=obs_dict, actions=actions, goal_dict=goal_dict) 182 | -------------------------------------------------------------------------------- /robomimic/config/__init__.py: -------------------------------------------------------------------------------- 1 | from robomimic.config.config import Config 2 | from robomimic.config.base_config import config_factory, get_all_registered_configs 3 | 4 | # note: these imports are needed to register these classes in the global config registry 5 | from robomimic.config.bc_config import BCConfig 6 | from robomimic.config.bcq_config import BCQConfig 7 | from robomimic.config.cql_config import CQLConfig 8 | from robomimic.config.awac_config import AWACConfig 9 | from robomimic.config.iql_config import IQLConfig 10 | from robomimic.config.gl_config import GLConfig 11 | from robomimic.config.hbc_config import HBCConfig 12 | from robomimic.config.iris_config import IRISConfig 13 | from robomimic.config.td3_bc_config import TD3_BCConfig 14 | from robomimic.config.vae_config import VAEConfig -------------------------------------------------------------------------------- /robomimic/config/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/config/__pycache__/awac_config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/awac_config.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/config/__pycache__/base_config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/base_config.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/config/__pycache__/bc_config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/bc_config.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/config/__pycache__/bcq_config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/bcq_config.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/config/__pycache__/config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/config.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/config/__pycache__/cql_config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/cql_config.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/config/__pycache__/gl_config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/gl_config.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/config/__pycache__/hbc_config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/hbc_config.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/config/__pycache__/iql_config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/iql_config.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/config/__pycache__/iris_config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/iris_config.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/config/__pycache__/td3_bc_config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/td3_bc_config.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/config/__pycache__/vae_config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/vae_config.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/config/awac_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Config for CQL algorithm. 3 | """ 4 | 5 | from robomimic.config.base_config import BaseConfig 6 | 7 | 8 | class AWACConfig(BaseConfig): 9 | ALGO_NAME = "awac" 10 | 11 | def train_config(self): 12 | """ 13 | Update from superclass to change default batch size. 14 | """ 15 | super(AWACConfig, self).train_config() 16 | 17 | # increase batch size to 1024 (found to work better for most manipulation experiments) 18 | self.train.batch_size = 1024 19 | 20 | def algo_config(self): 21 | """ 22 | This function populates the `config.algo` attribute of the config, and is given to the 23 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 24 | argument to the constructor. Any parameter that an algorithm needs to determine its 25 | training and test-time behavior should be populated here. 26 | """ 27 | super(AWACConfig, self).algo_config() 28 | 29 | # optimization parameters 30 | self.algo.optim_params.critic.learning_rate.initial = 1e-4 # critic learning rate 31 | self.algo.optim_params.critic.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty) 32 | self.algo.optim_params.critic.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 33 | self.algo.optim_params.critic.regularization.L2 = 0.00 # L2 regularization strength 34 | 35 | self.algo.optim_params.actor.learning_rate.initial = 1e-4 # actor learning rate 36 | self.algo.optim_params.actor.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty) 37 | self.algo.optim_params.actor.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 38 | self.algo.optim_params.actor.regularization.L2 = 0.00 # L2 regularization strength 39 | 40 | # target network related parameters 41 | self.algo.discount = 0.99 # discount factor to use 42 | self.algo.target_tau = 0.01 # update rate for target networks 43 | self.algo.ignore_dones = False 44 | self.algo.use_negative_rewards = False 45 | self.algo.use_hardcoded_weights = False 46 | self.algo.hc_weights_key = "final_success" 47 | self.algo.relabel_dones_mode = None 48 | self.algo.relabel_rewards_mode = None 49 | 50 | # Actor network settings 51 | self.algo.actor.net.type = "gaussian" # Options are currently only "gaussian" (no support for GMM yet) 52 | 53 | # Actor network settings - shared 54 | self.algo.actor.net.common.std_activation = "softplus" # Activation to use for std output from policy net 55 | self.algo.actor.net.common.low_noise_eval = True # Whether to use deterministic action sampling at eval stage 56 | self.algo.actor.net.common.use_tanh = False 57 | 58 | # Actor network settings - gaussian 59 | self.algo.actor.net.gaussian.init_last_fc_weight = 0.001 # If set, will override the initialization of the final fc layer to be uniformly sampled limited by this value 60 | self.algo.actor.net.gaussian.init_std = 0.3 # Relative scaling factor for std from policy net 61 | self.algo.actor.net.gaussian.fixed_std = False # Whether to learn std dev or not 62 | 63 | self.algo.actor.net.gmm.num_modes = 5 64 | self.algo.actor.net.gmm.min_std = 0.0001 65 | 66 | self.algo.actor.layer_dims = (300, 400) # actor MLP layer dimensions 67 | 68 | self.algo.actor.max_gradient_norm = None 69 | 70 | # actor residual MLP settings 71 | self.algo.actor.res_mlp.enabled = False 72 | self.algo.actor.res_mlp.num_blocks = 4 73 | self.algo.actor.res_mlp.hidden_dim = 1024 74 | self.algo.actor.res_mlp.use_layer_norm = True 75 | 76 | # ================== Critic Network Config =================== 77 | # critic ensemble parameters (TD3 trick) 78 | self.algo.critic.ensemble.n = 2 # number of Q networks in the ensemble 79 | self.algo.critic.ensemble_method = "min" 80 | self.algo.critic.target_ensemble_method = "mean" 81 | self.algo.critic.layer_dims = (300, 400) # critic MLP layer dimensions 82 | self.algo.critic.use_huber = False 83 | 84 | # critic residual MLP settings 85 | self.algo.critic.res_mlp.enabled = False 86 | self.algo.critic.res_mlp.num_blocks = 4 87 | self.algo.critic.res_mlp.hidden_dim = 1024 88 | self.algo.critic.res_mlp.use_layer_norm = True 89 | 90 | # distributional critic 91 | self.algo.critic.distributional.enabled = False # train distributional critic 92 | self.algo.critic.distributional.num_atoms = 51 # number of values in categorical distribution 93 | self.algo.critic.value_bounds = None 94 | 95 | self.algo.adv.use_mle_for_vf = False 96 | self.algo.adv.vf_K = 4 97 | self.algo.adv.value_method = "mean" 98 | self.algo.adv.filter_type = "softmax" 99 | self.algo.adv.use_final_clip = False 100 | self.algo.adv.clip_adv_value = None 101 | self.algo.adv.beta = 1.0 102 | self.algo.adv.multi_weight = None 103 | 104 | self.algo.critic.max_gradient_norm = None 105 | 106 | self.algo.hc_weights.use_adv_score = False 107 | 108 | # RNN policy settings 109 | self.algo.actor.rnn.enabled = False # whether to train RNN policy 110 | self.algo.actor.rnn.horizon = 10 # unroll length for RNN - should usually match train.seq_length 111 | self.algo.actor.rnn.hidden_dim = 400 # hidden dimension size 112 | self.algo.actor.rnn.rnn_type = "LSTM" # rnn type - one of "LSTM" or "GRU" 113 | self.algo.actor.rnn.num_layers = 2 # number of RNN layers that are stacked 114 | self.algo.actor.rnn.open_loop = False # if True, action predictions are only based on a single observation (not sequence) 115 | self.algo.actor.rnn.kwargs.bidirectional = False # rnn kwargs 116 | self.algo.actor.rnn.use_res_mlp = False 117 | self.algo.actor.rnn.res_mlp_kwargs = None 118 | self.algo.actor.rnn.kwargs.do_not_lock_keys() 119 | 120 | self.algo.hc_weights.use_hardcode_weight = False -------------------------------------------------------------------------------- /robomimic/config/bc_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Config for BC algorithm. 3 | """ 4 | 5 | from robomimic.config.base_config import BaseConfig 6 | 7 | 8 | class BCConfig(BaseConfig): 9 | ALGO_NAME = "bc" 10 | 11 | def algo_config(self): 12 | """ 13 | This function populates the `config.algo` attribute of the config, and is given to the 14 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 15 | argument to the constructor. Any parameter that an algorithm needs to determine its 16 | training and test-time behavior should be populated here. 17 | """ 18 | super(BCConfig, self).algo_config() 19 | 20 | # optimization parameters 21 | self.algo.optim_params.policy.learning_rate.initial = 1e-4 # policy learning rate 22 | self.algo.optim_params.policy.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty) 23 | self.algo.optim_params.policy.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 24 | self.algo.optim_params.policy.regularization.L2 = 0.00 # L2 regularization strength 25 | 26 | # loss weights 27 | self.algo.loss.l2_weight = 1.0 # L2 loss weight 28 | self.algo.loss.l1_weight = 0.0 # L1 loss weight 29 | self.algo.loss.cos_weight = 0.0 # cosine loss weight 30 | 31 | # MLP network architecture (layers after observation encoder and RNN, if present) 32 | self.algo.actor_layer_dims = (1024, 1024) 33 | self.algo.max_gradient_norm = None 34 | 35 | # residual MLP settings 36 | self.algo.res_mlp.enabled = False 37 | self.algo.res_mlp.num_blocks = 4 38 | self.algo.res_mlp.hidden_dim = 1024 39 | self.algo.res_mlp.use_layer_norm = True 40 | 41 | # stochastic Gaussian policy settings 42 | self.algo.gaussian.enabled = False # whether to train a Gaussian policy 43 | self.algo.gaussian.fixed_std = False # whether to train std output or keep it constant 44 | self.algo.gaussian.init_std = 0.1 # initial standard deviation (or constant) 45 | self.algo.gaussian.min_std = 0.01 # minimum std output from network 46 | self.algo.gaussian.std_activation = "softplus" # activation to use for std output from policy net 47 | self.algo.gaussian.low_noise_eval = True # low-std at test-time 48 | 49 | # stochastic GMM policy settings 50 | self.algo.gmm.enabled = False # whether to train a GMM policy 51 | self.algo.gmm.num_modes = 5 # number of GMM modes 52 | self.algo.gmm.min_std = 0.0001 # minimum std output from network 53 | self.algo.gmm.std_activation = "softplus" # activation to use for std output from policy net 54 | self.algo.gmm.low_noise_eval = True # low-std at test-time 55 | 56 | # stochastic VAE policy settings 57 | self.algo.vae.enabled = False # whether to train a VAE policy 58 | self.algo.vae.latent_dim = 14 # VAE latent dimnsion - set to twice the dimensionality of action space 59 | self.algo.vae.latent_clip = None # clip latent space when decoding (set to None to disable) 60 | self.algo.vae.kl_weight = 1. # beta-VAE weight to scale KL loss relative to reconstruction loss in ELBO 61 | 62 | # VAE decoder settings 63 | self.algo.vae.decoder.is_conditioned = True # whether decoder should condition on observation 64 | self.algo.vae.decoder.reconstruction_sum_across_elements = False # sum instead of mean for reconstruction loss 65 | 66 | # VAE prior settings 67 | self.algo.vae.prior.learn = False # learn Gaussian / GMM prior instead of N(0, 1) 68 | self.algo.vae.prior.is_conditioned = False # whether to condition prior on observations 69 | self.algo.vae.prior.use_gmm = False # whether to use GMM prior 70 | self.algo.vae.prior.gmm_num_modes = 10 # number of GMM modes 71 | self.algo.vae.prior.gmm_learn_weights = False # whether to learn GMM weights 72 | self.algo.vae.prior.use_categorical = False # whether to use categorical prior 73 | self.algo.vae.prior.categorical_dim = 10 # the number of categorical classes for each latent dimension 74 | self.algo.vae.prior.categorical_gumbel_softmax_hard = False # use hard selection in forward pass 75 | self.algo.vae.prior.categorical_init_temp = 1.0 # initial gumbel-softmax temp 76 | self.algo.vae.prior.categorical_temp_anneal_step = 0.001 # linear temp annealing rate 77 | self.algo.vae.prior.categorical_min_temp = 0.3 # lowest gumbel-softmax temp 78 | 79 | self.algo.vae.encoder_layer_dims = (300, 400) # encoder MLP layer dimensions 80 | self.algo.vae.decoder_layer_dims = (300, 400) # decoder MLP layer dimensions 81 | self.algo.vae.prior_layer_dims = (300, 400) # prior MLP layer dimensions (if learning conditioned prior) 82 | 83 | # RNN policy settings 84 | self.algo.rnn.enabled = False # whether to train RNN policy 85 | self.algo.rnn.horizon = 10 # unroll length for RNN - should usually match train.seq_length 86 | self.algo.rnn.hidden_dim = 400 # hidden dimension size 87 | self.algo.rnn.rnn_type = "LSTM" # rnn type - one of "LSTM" or "GRU" 88 | self.algo.rnn.num_layers = 2 # number of RNN layers that are stacked 89 | self.algo.rnn.open_loop = False # if True, action predictions are only based on a single observation (not sequence) 90 | self.algo.rnn.kwargs.bidirectional = False # rnn kwargs 91 | self.algo.rnn.kwargs.do_not_lock_keys() 92 | 93 | self.algo.hc_weights.traj_label_type = "last" 94 | 95 | self.algo.hc_weights.batch_normalize = True -------------------------------------------------------------------------------- /robomimic/config/bcq_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Config for BCQ algorithm. 3 | """ 4 | 5 | from robomimic.config.base_config import BaseConfig 6 | from robomimic.config.bc_config import BCConfig 7 | 8 | 9 | class BCQConfig(BaseConfig): 10 | ALGO_NAME = "bcq" 11 | 12 | def algo_config(self): 13 | """ 14 | This function populates the `config.algo` attribute of the config, and is given to the 15 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 16 | argument to the constructor. Any parameter that an algorithm needs to determine its 17 | training and test-time behavior should be populated here. 18 | """ 19 | super(BCQConfig, self).algo_config() 20 | 21 | # optimization parameters 22 | self.algo.optim_params.critic.learning_rate.initial = 1e-3 # critic learning rate 23 | self.algo.optim_params.critic.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty) 24 | self.algo.optim_params.critic.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 25 | self.algo.optim_params.critic.regularization.L2 = 0.00 # L2 regularization strength 26 | self.algo.optim_params.critic.start_epoch = -1 # number of epochs before starting critic training (-1 means start right away) 27 | self.algo.optim_params.critic.end_epoch = -1 # number of epochs before ending critic training (-1 means start right away) 28 | 29 | self.algo.optim_params.action_sampler.learning_rate.initial = 1e-3 # action sampler learning rate 30 | self.algo.optim_params.action_sampler.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty) 31 | self.algo.optim_params.action_sampler.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 32 | self.algo.optim_params.action_sampler.regularization.L2 = 0.00 # L2 regularization strength 33 | self.algo.optim_params.action_sampler.start_epoch = -1 # number of epochs before starting action sampler training (-1 means start right away) 34 | self.algo.optim_params.action_sampler.end_epoch = -1 # number of epochs before ending action sampler training (-1 means start right away) 35 | 36 | self.algo.optim_params.actor.learning_rate.initial = 1e-3 # actor learning rate 37 | self.algo.optim_params.actor.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty) 38 | self.algo.optim_params.actor.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 39 | self.algo.optim_params.actor.regularization.L2 = 0.00 # L2 regularization strength 40 | self.algo.optim_params.actor.start_epoch = -1 # number of epochs before starting actor training (-1 means start right away) 41 | self.algo.optim_params.actor.end_epoch = -1 # number of epochs before ending actor training (-1 means start right away) 42 | 43 | # target network related parameters 44 | self.algo.discount = 0.99 # discount factor to use 45 | self.algo.n_step = 1 # for using n-step returns in TD-updates 46 | self.algo.target_tau = 0.005 # update rate for target networks 47 | self.algo.infinite_horizon = False # if True, scale terminal rewards by 1 / (1 - discount) to treat as infinite horizon 48 | 49 | # ================== Critic Network Config =================== 50 | self.algo.critic.use_huber = False # Huber Loss instead of L2 for critic 51 | self.algo.critic.max_gradient_norm = None # L2 gradient clipping for critic (None to use no clipping) 52 | self.algo.critic.value_bounds = None # optional 2-tuple to ensure lower and upper bound on value estimates 53 | self.algo.critic.num_action_samples = 10 # number of actions to sample per training batch to get target critic value 54 | self.algo.critic.num_action_samples_rollout = 100 # number of actions to sample per environment step 55 | 56 | # critic ensemble parameters (TD3 trick) 57 | self.algo.critic.ensemble.n = 2 # number of Q networks in the ensemble 58 | self.algo.critic.ensemble.weight = 0.75 # weighting for mixing min and max for target Q value 59 | 60 | # distributional critic 61 | self.algo.critic.distributional.enabled = False # train distributional critic (C51) 62 | self.algo.critic.distributional.num_atoms = 51 # number of values in categorical distribution 63 | 64 | self.algo.critic.layer_dims = (300, 400) # size of critic MLP 65 | 66 | # ================== Action Sampler Config =================== 67 | self.algo.action_sampler = BCConfig().algo 68 | # use VAE by default 69 | self.algo.action_sampler.vae.enabled = True 70 | # remove unused parts of BCConfig algo config 71 | del self.algo.action_sampler.optim_params # since action sampler optim params specified at top-level 72 | del self.algo.action_sampler.loss 73 | del self.algo.action_sampler.gaussian 74 | del self.algo.action_sampler.rnn 75 | 76 | # Number of epochs before freezing encoder (-1 for no freezing). Only applies to cVAE-based action samplers. 77 | with self.algo.action_sampler.unlocked(): 78 | self.algo.action_sampler.freeze_encoder_epoch = -1 79 | 80 | # ================== Actor Network Config =================== 81 | self.algo.actor.enabled = False # whether to use the actor perturbation network 82 | self.algo.actor.perturbation_scale = 0.05 # size of learned action perturbations 83 | self.algo.actor.layer_dims = (300, 400) # size of actor MLP 84 | -------------------------------------------------------------------------------- /robomimic/config/cql_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Config for CQL algorithm. 3 | """ 4 | 5 | from robomimic.config.base_config import BaseConfig 6 | 7 | 8 | class CQLConfig(BaseConfig): 9 | ALGO_NAME = "cql" 10 | 11 | def train_config(self): 12 | """ 13 | Update from superclass to change default batch size. 14 | """ 15 | super(CQLConfig, self).train_config() 16 | 17 | # increase batch size to 1024 (found to work better for most manipulation experiments) 18 | self.train.batch_size = 1024 19 | 20 | def algo_config(self): 21 | """ 22 | This function populates the `config.algo` attribute of the config, and is given to the 23 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 24 | argument to the constructor. Any parameter that an algorithm needs to determine its 25 | training and test-time behavior should be populated here. 26 | """ 27 | super(CQLConfig, self).algo_config() 28 | 29 | # optimization parameters 30 | self.algo.optim_params.critic.learning_rate.initial = 1e-3 # critic learning rate 31 | self.algo.optim_params.critic.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty) 32 | self.algo.optim_params.critic.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 33 | self.algo.optim_params.critic.regularization.L2 = 0.00 # L2 regularization strength 34 | 35 | self.algo.optim_params.actor.learning_rate.initial = 3e-4 # actor learning rate 36 | self.algo.optim_params.actor.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty) 37 | self.algo.optim_params.actor.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 38 | self.algo.optim_params.actor.regularization.L2 = 0.00 # L2 regularization strength 39 | 40 | # target network related parameters 41 | self.algo.discount = 0.99 # discount factor to use 42 | self.algo.n_step = 1 # for using n-step returns in TD-updates 43 | self.algo.target_tau = 0.005 # update rate for target networks 44 | 45 | # ================== Actor Network Config =================== 46 | self.algo.actor.bc_start_steps = 0 # uses BC policy loss for first n-training steps 47 | self.algo.actor.target_entropy = "default" # None is fixed entropy, otherwise is automatically tuned to match target. Can specify "default" as well for default tuning target 48 | self.algo.actor.max_gradient_norm = None # L2 gradient clipping for actor 49 | 50 | # Actor network settings 51 | self.algo.actor.net.type = "gaussian" # Options are currently only "gaussian" (no support for GMM yet) 52 | 53 | # Actor network settings - shared 54 | self.algo.actor.net.common.std_activation = "exp" # Activation to use for std output from policy net 55 | self.algo.actor.net.common.use_tanh = True # Whether to use tanh at output of actor network 56 | self.algo.actor.net.common.low_noise_eval = True # Whether to use deterministic action sampling at eval stage 57 | 58 | # Actor network settings - gaussian 59 | self.algo.actor.net.gaussian.init_last_fc_weight = 0.001 # If set, will override the initialization of the final fc layer to be uniformly sampled limited by this value 60 | self.algo.actor.net.gaussian.init_std = 0.3 # Relative scaling factor for std from policy net 61 | self.algo.actor.net.gaussian.fixed_std = False # Whether to learn std dev or not 62 | 63 | self.algo.actor.layer_dims = (300, 400) # actor MLP layer dimensions 64 | 65 | # ================== Critic Network Config =================== 66 | self.algo.critic.use_huber = False # Huber Loss instead of L2 for critic 67 | self.algo.critic.max_gradient_norm = None # L2 gradient clipping for critic (None to use no clipping) 68 | 69 | self.algo.critic.value_bounds = None # optional 2-tuple to ensure lower and upper bound on value estimates 70 | 71 | self.algo.critic.num_action_samples = 1 # number of actions to sample per training batch to get target critic value; use maximum Q value from n random sampled actions when doing TD error backup 72 | 73 | # cql settings for critic 74 | self.algo.critic.cql_weight = 1.0 # weighting for cql component of critic loss (only used if target_q_gap is < 0 or None) 75 | self.algo.critic.deterministic_backup = True # if not set, subtract weighted logprob of action when doing backup 76 | self.algo.critic.min_q_weight = 1.0 # min q weight (scaling factor) to apply 77 | self.algo.critic.target_q_gap = 5.0 # if set, sets the diff threshold at which Q-values will be penalized more (note: this overrides cql weight above!) Use None or a negative value if not set 78 | self.algo.critic.num_random_actions = 10 # Number of random actions to sample when calculating CQL loss 79 | 80 | # critic ensemble parameters (TD3 trick) 81 | self.algo.critic.ensemble.n = 2 # number of Q networks in the ensemble 82 | 83 | self.algo.critic.layer_dims = (300, 400) # critic MLP layer dimensions 84 | -------------------------------------------------------------------------------- /robomimic/config/gl_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Config for Goal Learning (sub-algorithm used by hierarchical models like HBC and IRIS). 3 | This class of model predicts (or samples) subgoal observations given a current observation. 4 | """ 5 | 6 | from robomimic.config.base_config import BaseConfig 7 | 8 | 9 | class GLConfig(BaseConfig): 10 | ALGO_NAME = "gl" 11 | 12 | def algo_config(self): 13 | """ 14 | This function populates the `config.algo` attribute of the config, and is given to the 15 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 16 | argument to the constructor. Any parameter that an algorithm needs to determine its 17 | training and test-time behavior should be populated here. 18 | """ 19 | super(GLConfig, self).algo_config() 20 | 21 | # optimization parameters 22 | self.algo.optim_params.goal_network.learning_rate.initial = 1e-4 # goal network learning rate 23 | self.algo.optim_params.goal_network.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty) 24 | self.algo.optim_params.goal_network.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 25 | self.algo.optim_params.goal_network.regularization.L2 = 0.00 26 | 27 | # subgoal definition: observation that is @subgoal_horizon number of timesteps in future from current observation 28 | self.algo.subgoal_horizon = 10 29 | 30 | # MLP size for deterministic goal network (unused if VAE is enabled) 31 | self.algo.ae.planner_layer_dims = (300, 400) 32 | 33 | # ================== VAE config ================== 34 | self.algo.vae.enabled = True # set to true to use VAE network 35 | self.algo.vae.latent_dim = 16 # VAE latent dimension 36 | self.algo.vae.latent_clip = None # clip latent space when decoding (set to None to disable) 37 | self.algo.vae.kl_weight = 1. # beta-VAE weight to scale KL loss relative to reconstruction loss in ELBO 38 | 39 | # VAE decoder settings 40 | self.algo.vae.decoder.is_conditioned = True # whether decoder should condition on observation 41 | self.algo.vae.decoder.reconstruction_sum_across_elements = False # sum instead of mean for reconstruction loss 42 | 43 | # VAE prior settings 44 | self.algo.vae.prior.learn = False # learn Gaussian / GMM prior instead of N(0, 1) 45 | self.algo.vae.prior.is_conditioned = False # whether to condition prior on observations 46 | self.algo.vae.prior.use_gmm = False # whether to use GMM prior 47 | self.algo.vae.prior.gmm_num_modes = 10 # number of GMM modes 48 | self.algo.vae.prior.gmm_learn_weights = False # whether to learn GMM weights 49 | self.algo.vae.prior.use_categorical = False # whether to use categorical prior 50 | self.algo.vae.prior.categorical_dim = 10 # the number of categorical classes for each latent dimension 51 | self.algo.vae.prior.categorical_gumbel_softmax_hard = False # use hard selection in forward pass 52 | self.algo.vae.prior.categorical_init_temp = 1.0 # initial gumbel-softmax temp 53 | self.algo.vae.prior.categorical_temp_anneal_step = 0.001 # linear temp annealing rate 54 | self.algo.vae.prior.categorical_min_temp = 0.3 # lowest gumbel-softmax temp 55 | 56 | self.algo.vae.encoder_layer_dims = (300, 400) # encoder MLP layer dimensions 57 | self.algo.vae.decoder_layer_dims = (300, 400) # decoder MLP layer dimensions 58 | self.algo.vae.prior_layer_dims = (300, 400) # prior MLP layer dimensions (if learning conditioned prior) 59 | 60 | def observation_config(self): 61 | """ 62 | Update from superclass to specify subgoal modalities. 63 | """ 64 | super(GLConfig, self).observation_config() 65 | self.observation.modalities.subgoal.low_dim = [ # specify low-dim subgoal observations for agent to predict 66 | "robot0_eef_pos", 67 | "robot0_eef_quat", 68 | "robot0_gripper_qpos", 69 | "object", 70 | ] 71 | self.observation.modalities.subgoal.rgb = [] # specify rgb image subgoal observations for agent to predict 72 | self.observation.modalities.subgoal.depth = [] 73 | self.observation.modalities.subgoal.scan = [] 74 | self.observation.modalities.subgoal.do_not_lock_keys() 75 | 76 | @property 77 | def all_obs_keys(self): 78 | """ 79 | Update from superclass to include subgoals. 80 | """ 81 | # pool all modalities 82 | return sorted(tuple(set([ 83 | obs_key for group in [ 84 | self.observation.modalities.obs.values(), 85 | self.observation.modalities.goal.values(), 86 | self.observation.modalities.subgoal.values(), 87 | ] 88 | for modality in group 89 | for obs_key in modality 90 | ]))) 91 | -------------------------------------------------------------------------------- /robomimic/config/hbc_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Config for HBC algorithm. 3 | """ 4 | 5 | from robomimic.config.base_config import BaseConfig 6 | from robomimic.config.gl_config import GLConfig 7 | from robomimic.config.bc_config import BCConfig 8 | 9 | 10 | class HBCConfig(BaseConfig): 11 | ALGO_NAME = "hbc" 12 | 13 | def train_config(self): 14 | """ 15 | Update from superclass to change default sequence length to load from dataset. 16 | """ 17 | super(HBCConfig, self).train_config() 18 | self.train.seq_length = 10 # length of experience sequence to fetch from the buffer 19 | 20 | def algo_config(self): 21 | """ 22 | This function populates the `config.algo` attribute of the config, and is given to the 23 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 24 | argument to the constructor. Any parameter that an algorithm needs to determine its 25 | training and test-time behavior should be populated here. 26 | """ 27 | super(HBCConfig, self).algo_config() 28 | 29 | # One of ["separate", "cascade"]. In "separate" mode (default), 30 | # the planner and actor are trained independently and then the planner subgoal predictions are 31 | # used to condition the actor at test-time. In "cascade" mode, the actor is trained directly 32 | # on planner subgoal predictions. In "actor_only" mode, only the actor is trained, and in 33 | # "planner_only" mode, only the planner is trained. 34 | self.algo.mode = "separate" 35 | self.algo.actor_use_random_subgoals = False # whether to sample subgoal index from [1, subgoal_horizon] 36 | self.algo.subgoal_update_interval = 10 # how frequently the subgoal should be updated at test-time 37 | 38 | 39 | # ================== Latent Subgoal Config ================== 40 | self.algo.latent_subgoal.enabled = False # if True, use VAE latent space as subgoals for actor, instead of reconstructions 41 | 42 | # prior correction trick for actor and value training: instead of using encoder for 43 | # transforming subgoals to latent subgoals, generate prior samples and choose 44 | # the closest one to the encoder output 45 | self.algo.latent_subgoal.prior_correction.enabled = False 46 | self.algo.latent_subgoal.prior_correction.num_samples = 100 47 | 48 | # ================== Planner Config ================== 49 | self.algo.planner = GLConfig().algo # config for goal learning 50 | # set subgoal horizon explicitly 51 | self.algo.planner.subgoal_horizon = 10 52 | # ensure VAE is used 53 | self.algo.planner.vae.enabled = True 54 | 55 | # ================== Actor Config =================== 56 | self.algo.actor = BCConfig().algo 57 | # use RNN 58 | self.algo.actor.rnn.enabled = True 59 | self.algo.actor.rnn.horizon = 10 60 | # remove unused parts of BCConfig algo config 61 | del self.algo.actor.gaussian 62 | del self.algo.actor.gmm 63 | del self.algo.actor.vae 64 | 65 | def observation_config(self): 66 | """ 67 | Update from superclass so that planner and actor each get their own observation config. 68 | """ 69 | self.observation.planner = GLConfig().observation 70 | self.observation.actor = BCConfig().observation 71 | 72 | @property 73 | def use_goals(self): 74 | """ 75 | Update from superclass - planner goal modalities determine goal-conditioning 76 | """ 77 | return len( 78 | self.observation.planner.modalities.goal.low_dim + 79 | self.observation.planner.modalities.goal.rgb) > 0 80 | 81 | @property 82 | def all_obs_keys(self): 83 | """ 84 | Update from superclass to include modalities from planner and actor. 85 | """ 86 | # pool all modalities 87 | return sorted(tuple(set([ 88 | obs_key for group in [ 89 | self.observation.planner.modalities.obs.values(), 90 | self.observation.planner.modalities.goal.values(), 91 | self.observation.planner.modalities.subgoal.values(), 92 | self.observation.actor.modalities.obs.values(), 93 | self.observation.actor.modalities.goal.values(), 94 | ] 95 | for modality in group 96 | for obs_key in modality 97 | ]))) 98 | -------------------------------------------------------------------------------- /robomimic/config/iql_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Config for CQL algorithm. 3 | """ 4 | 5 | from robomimic.config.base_config import BaseConfig 6 | 7 | 8 | class IQLConfig(BaseConfig): 9 | ALGO_NAME = "iql" 10 | 11 | def train_config(self): 12 | """ 13 | Update from superclass to change default batch size. 14 | """ 15 | super(IQLConfig, self).train_config() 16 | 17 | # increase batch size to 1024 (found to work better for most manipulation experiments) 18 | self.train.batch_size = 1024 19 | 20 | def algo_config(self): 21 | """ 22 | This function populates the `config.algo` attribute of the config, and is given to the 23 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 24 | argument to the constructor. Any parameter that an algorithm needs to determine its 25 | training and test-time behavior should be populated here. 26 | """ 27 | super(IQLConfig, self).algo_config() 28 | 29 | # optimization parameters 30 | self.algo.optim_params.critic.learning_rate.initial = 1e-4 # critic learning rate 31 | self.algo.optim_params.critic.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty) 32 | self.algo.optim_params.critic.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 33 | self.algo.optim_params.critic.regularization.L2 = 0.00 # L2 regularization strength 34 | 35 | self.algo.optim_params.vf.learning_rate.initial = 1e-4 # actor learning rate 36 | self.algo.optim_params.vf.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty) 37 | self.algo.optim_params.vf.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 38 | self.algo.optim_params.vf.regularization.L2 = 0.00 # L2 regularization strength 39 | 40 | self.algo.optim_params.actor.learning_rate.initial = 1e-4 # actor learning rate 41 | self.algo.optim_params.actor.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty) 42 | self.algo.optim_params.actor.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 43 | self.algo.optim_params.actor.regularization.L2 = 0.00 # L2 regularization strength 44 | 45 | # target network related parameters 46 | self.algo.discount = 0.99 # discount factor to use 47 | self.algo.target_tau = 0.01 # update rate for target networks 48 | self.algo.ignore_dones = False 49 | self.algo.use_negative_rewards = False 50 | self.algo.use_shaped_rewards = False 51 | self.algo.relabel_dones_mode = None 52 | self.algo.relabel_rewards_mode = None 53 | 54 | # Actor network settings 55 | self.algo.actor.net.type = "gaussian" # Options are currently only "gaussian" (no support for GMM yet) 56 | 57 | # Actor network settings - shared 58 | self.algo.actor.net.common.std_activation = "softplus" # Activation to use for std output from policy net 59 | self.algo.actor.net.common.low_noise_eval = True # Whether to use deterministic action sampling at eval stage 60 | self.algo.actor.net.common.use_tanh = False 61 | 62 | # Actor network settings - gaussian 63 | self.algo.actor.net.gaussian.init_last_fc_weight = 0.001 # If set, will override the initialization of the final fc layer to be uniformly sampled limited by this value 64 | self.algo.actor.net.gaussian.init_std = 0.3 # Relative scaling factor for std from policy net 65 | self.algo.actor.net.gaussian.fixed_std = False # Whether to learn std dev or not 66 | 67 | self.algo.actor.net.gmm.num_modes = 5 68 | self.algo.actor.net.gmm.min_std = 0.0001 69 | 70 | self.algo.actor.layer_dims = (300, 400) # actor MLP layer dimensions 71 | 72 | self.algo.actor.max_gradient_norm = None 73 | 74 | # actor residual MLP settings 75 | self.algo.actor.res_mlp.enabled = False 76 | self.algo.actor.res_mlp.num_blocks = 4 77 | self.algo.actor.res_mlp.hidden_dim = 1024 78 | self.algo.actor.res_mlp.use_layer_norm = True 79 | 80 | # ================== Critic Network Config =================== 81 | # critic ensemble parameters (TD3 trick) 82 | self.algo.critic.ensemble.n = 2 # number of Q networks in the ensemble 83 | self.algo.critic.layer_dims = (300, 400) # critic MLP layer dimensions 84 | self.algo.critic.use_huber = False 85 | 86 | # critic residual MLP settings 87 | self.algo.critic.res_mlp.enabled = False 88 | self.algo.critic.res_mlp.num_blocks = 4 89 | self.algo.critic.res_mlp.hidden_dim = 1024 90 | self.algo.critic.res_mlp.use_layer_norm = True 91 | 92 | self.algo.adv.filter_type = "softmax" 93 | self.algo.adv.use_final_clip = True 94 | self.algo.adv.clip_adv_value = None 95 | self.algo.adv.beta = 1.0 96 | 97 | self.algo.vf_quantile = 0.9 98 | 99 | self.algo.critic.max_gradient_norm = None 100 | -------------------------------------------------------------------------------- /robomimic/config/iris_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Config for IRIS algorithm. 3 | """ 4 | 5 | from robomimic.config.bcq_config import BCQConfig 6 | from robomimic.config.gl_config import GLConfig 7 | from robomimic.config.bc_config import BCConfig 8 | from robomimic.config.hbc_config import HBCConfig 9 | 10 | 11 | class IRISConfig(HBCConfig): 12 | ALGO_NAME = "iris" 13 | 14 | def algo_config(self): 15 | """ 16 | This function populates the `config.algo` attribute of the config, and is given to the 17 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 18 | argument to the constructor. Any parameter that an algorithm needs to determine its 19 | training and test-time behavior should be populated here. 20 | """ 21 | super(IRISConfig, self).algo_config() 22 | 23 | # One of ["separate", "cascade"]. In "separate" mode (default), 24 | # the planner and actor are trained independently and then the planner subgoal predictions are 25 | # used to condition the actor at test-time. In "cascade" mode, the actor is trained directly 26 | # on planner subgoal predictions. In "actor_only" mode, only the actor is trained, and in 27 | # "planner_only" mode, only the planner is trained. 28 | self.algo.mode = "separate" 29 | 30 | self.algo.actor_use_random_subgoals = False # whether to sample subgoal index from [1, subgoal_horizon] 31 | self.algo.subgoal_update_interval = 10 # how frequently the subgoal should be updated at test-time (usually matches train.seq_length) 32 | 33 | # ================== Latent Subgoal Config ================== 34 | 35 | # NOTE: latent subgoals are not supported by IRIS, but superclass expects this config 36 | self.algo.latent_subgoal.enabled = False 37 | self.algo.latent_subgoal.prior_correction.enabled = False 38 | self.algo.latent_subgoal.prior_correction.num_samples = 100 39 | 40 | # ================== Planner Config ================== 41 | 42 | # The ValuePlanner planner component is a Goal Learning VAE model 43 | self.algo.value_planner.planner = GLConfig().algo # config for goal learning 44 | # set subgoal horizon explicitly 45 | self.algo.value_planner.planner.subgoal_horizon = 10 46 | # ensure VAE is used 47 | self.algo.value_planner.planner.vae.enabled = True 48 | 49 | # The ValuePlanner value component is a BCQ model 50 | self.algo.value_planner.value = BCQConfig().algo 51 | self.algo.value_planner.value.actor.enabled = False # ensure no BCQ actor 52 | # number of subgoal samples to use for value planner 53 | self.algo.value_planner.num_samples = 100 54 | 55 | # ================== Actor Config =================== 56 | self.algo.actor = BCConfig().algo 57 | # use RNN 58 | self.algo.actor.rnn.enabled = True 59 | self.algo.actor.rnn.horizon = 10 60 | # remove unused parts of BCConfig algo config 61 | del self.algo.actor.gaussian 62 | del self.algo.actor.gmm 63 | del self.algo.actor.vae 64 | 65 | def observation_config(self): 66 | """ 67 | Update from superclass so that value planner and actor each get their own obs config. 68 | """ 69 | self.observation.value_planner.planner = GLConfig().observation 70 | self.observation.value_planner.value = BCQConfig().observation 71 | self.observation.actor = BCConfig().observation 72 | 73 | @property 74 | def use_goals(self): 75 | """ 76 | Update from superclass - value planner goal modalities determine goal-conditioning. 77 | """ 78 | return len( 79 | self.observation.value_planner.planner.modalities.goal.low_dim + 80 | self.observation.value_planner.planner.modalities.goal.rgb) > 0 81 | 82 | @property 83 | def all_obs_keys(self): 84 | """ 85 | Update from superclass to include modalities from value planner and actor. 86 | """ 87 | # pool all modalities 88 | return sorted(tuple(set([ 89 | obs_key for group in [ 90 | self.observation.value_planner.planner.modalities.obs.values(), 91 | self.observation.value_planner.planner.modalities.goal.values(), 92 | self.observation.value_planner.planner.modalities.subgoal.values(), 93 | self.observation.value_planner.value.modalities.obs.values(), 94 | self.observation.value_planner.value.modalities.goal.values(), 95 | self.observation.actor.modalities.obs.values(), 96 | self.observation.actor.modalities.goal.values(), 97 | ] 98 | for modality in group 99 | for obs_key in modality 100 | ]))) 101 | -------------------------------------------------------------------------------- /robomimic/config/td3_bc_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Config for TD3_BC. 3 | """ 4 | 5 | from robomimic.config.base_config import BaseConfig 6 | 7 | 8 | class TD3_BCConfig(BaseConfig): 9 | ALGO_NAME = "td3_bc" 10 | 11 | def experiment_config(self): 12 | """ 13 | Update from subclass to set paper defaults for gym envs. 14 | """ 15 | super(TD3_BCConfig, self).experiment_config() 16 | 17 | # no validation and no video rendering 18 | self.experiment.validate = False 19 | self.experiment.render_video = False 20 | 21 | # save 10 checkpoints throughout training 22 | self.experiment.save.every_n_epochs = 20 23 | 24 | # save models that achieve best rollout return instead of best success rate 25 | self.experiment.save.on_best_rollout_return = True 26 | self.experiment.save.on_best_rollout_success_rate = False 27 | 28 | # epoch definition - 5000 gradient steps per epoch, with 200 epochs = 1M gradient steps, and eval every 1 epochs 29 | self.experiment.epoch_every_n_steps = 5000 30 | 31 | # evaluate with normal environment rollouts 32 | self.experiment.rollout.enabled = True 33 | self.experiment.rollout.n = 50 # paper uses 10, but we can afford to do 50 34 | self.experiment.rollout.horizon = 1000 35 | self.experiment.rollout.rate = 1 # rollout every epoch to match paper 36 | 37 | def train_config(self): 38 | """ 39 | Update from subclass to set paper defaults for gym envs. 40 | """ 41 | super(TD3_BCConfig, self).train_config() 42 | 43 | # update to normalize observations 44 | self.train.hdf5_normalize_obs = True 45 | 46 | # increase batch size to 256 47 | self.train.batch_size = 256 48 | 49 | # 200 epochs, with each epoch lasting 5000 gradient steps, for 1M total steps 50 | self.train.num_epochs = 200 51 | 52 | def algo_config(self): 53 | """ 54 | This function populates the `config.algo` attribute of the config, and is given to the 55 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 56 | argument to the constructor. Any parameter that an algorithm needs to determine its 57 | training and test-time behavior should be populated here. 58 | """ 59 | super(TD3_BCConfig, self).algo_config() 60 | 61 | # optimization parameters 62 | self.algo.optim_params.critic.learning_rate.initial = 3e-4 # critic learning rate 63 | self.algo.optim_params.critic.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty) 64 | self.algo.optim_params.critic.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 65 | self.algo.optim_params.critic.regularization.L2 = 0.00 # L2 regularization strength 66 | self.algo.optim_params.critic.start_epoch = -1 # number of epochs before starting critic training (-1 means start right away) 67 | self.algo.optim_params.critic.end_epoch = -1 # number of epochs before ending critic training (-1 means start right away) 68 | 69 | self.algo.optim_params.actor.learning_rate.initial = 3e-4 # actor learning rate 70 | self.algo.optim_params.actor.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty) 71 | self.algo.optim_params.actor.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 72 | self.algo.optim_params.actor.regularization.L2 = 0.00 # L2 regularization strength 73 | self.algo.optim_params.actor.start_epoch = -1 # number of epochs before starting actor training (-1 means start right away) 74 | self.algo.optim_params.actor.end_epoch = -1 # number of epochs before ending actor training (-1 means start right away) 75 | 76 | # alpha value - for weighting critic loss vs. BC loss 77 | self.algo.alpha = 2.5 78 | 79 | # target network related parameters 80 | self.algo.discount = 0.99 # discount factor to use 81 | self.algo.n_step = 1 # for using n-step returns in TD-updates 82 | self.algo.target_tau = 0.005 # update rate for target networks 83 | self.algo.infinite_horizon = False # if True, scale terminal rewards by 1 / (1 - discount) to treat as infinite horizon 84 | 85 | # ================== Critic Network Config =================== 86 | self.algo.critic.use_huber = False # Huber Loss instead of L2 for critic 87 | self.algo.critic.max_gradient_norm = None # L2 gradient clipping for critic (None to use no clipping) 88 | self.algo.critic.value_bounds = None # optional 2-tuple to ensure lower and upper bound on value estimates 89 | 90 | # critic ensemble parameters (TD3 trick) 91 | self.algo.critic.ensemble.n = 2 # number of Q networks in the ensemble 92 | self.algo.critic.ensemble.weight = 1.0 # weighting for mixing min and max for target Q value 93 | 94 | self.algo.critic.layer_dims = (256, 256) # size of critic MLP 95 | 96 | # ================== Actor Network Config =================== 97 | 98 | # update actor and target networks every n gradients steps for each critic gradient step 99 | self.algo.actor.update_freq = 2 100 | 101 | # exploration noise used to form target action for Q-update - clipped Gaussian noise 102 | self.algo.actor.noise_std = 0.2 # zero-mean gaussian noise with this std is applied to actions 103 | self.algo.actor.noise_clip = 0.5 # noise is clipped in each dimension to (-noise_clip, noise_clip) 104 | 105 | self.algo.actor.layer_dims = (256, 256) # size of actor MLP 106 | 107 | def observation_config(self): 108 | """ 109 | Update from superclass to use flat observations from gym envs. 110 | """ 111 | super(TD3_BCConfig, self).observation_config() 112 | self.observation.modalities.obs.low_dim = ["flat"] 113 | -------------------------------------------------------------------------------- /robomimic/config/vae_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Config for BC algorithm. 3 | """ 4 | 5 | from robomimic.config.base_config import BaseConfig 6 | 7 | 8 | class VAEConfig(BaseConfig): 9 | ALGO_NAME = "vae" 10 | 11 | def algo_config(self): 12 | """ 13 | This function populates the `config.algo` attribute of the config, and is given to the 14 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config` 15 | argument to the constructor. Any parameter that an algorithm needs to determine its 16 | training and test-time behavior should be populated here. 17 | """ 18 | super(VAEConfig, self).algo_config() 19 | 20 | # optimization parameters 21 | self.algo.optim_params.policy.learning_rate.initial = 1e-4 # policy learning rate 22 | self.algo.optim_params.policy.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty) 23 | self.algo.optim_params.policy.learning_rate.epoch_schedule = [] # epochs where LR decay occurs 24 | self.algo.optim_params.policy.regularization.L2 = 0.00 # L2 regularization strength 25 | 26 | # loss weights 27 | self.algo.loss.l2_weight = 1.0 # L2 loss weight 28 | self.algo.loss.l1_weight = 0.0 # L1 loss weight 29 | self.algo.loss.cos_weight = 0.0 # cosine loss weight 30 | 31 | # MLP network architecture (layers after observation encoder and RNN, if present) 32 | self.algo.actor_layer_dims = (1024, 1024) 33 | self.algo.max_gradient_norm = None 34 | 35 | # residual MLP settings 36 | self.algo.res_mlp.enabled = False 37 | self.algo.res_mlp.num_blocks = 4 38 | self.algo.res_mlp.hidden_dim = 1024 39 | self.algo.res_mlp.use_layer_norm = True 40 | 41 | # stochastic Gaussian policy settings 42 | self.algo.gaussian.enabled = False # whether to train a Gaussian policy 43 | self.algo.gaussian.fixed_std = False # whether to train std output or keep it constant 44 | self.algo.gaussian.init_std = 0.1 # initial standard deviation (or constant) 45 | self.algo.gaussian.min_std = 0.01 # minimum std output from network 46 | self.algo.gaussian.std_activation = "softplus" # activation to use for std output from policy net 47 | self.algo.gaussian.low_noise_eval = True # low-std at test-time 48 | 49 | # stochastic GMM policy settings 50 | self.algo.gmm.enabled = False # whether to train a GMM policy 51 | self.algo.gmm.num_modes = 5 # number of GMM modes 52 | self.algo.gmm.min_std = 0.0001 # minimum std output from network 53 | self.algo.gmm.std_activation = "softplus" # activation to use for std output from policy net 54 | self.algo.gmm.low_noise_eval = True # low-std at test-time 55 | 56 | # stochastic VAE policy settings 57 | self.algo.vae.enabled = False # whether to train a VAE policy (unused) 58 | self.algo.vae.method = "" # to be specified in json file 59 | self.algo.vae.latent_dim = 14 # VAE latent dimnsion - set to twice the dimensionality of action space 60 | self.algo.vae.latent_clip = None # clip latent space when decoding (set to None to disable) 61 | self.algo.vae.kl_weight = 1. # beta-VAE weight to scale KL loss relative to reconstruction loss in ELBO 62 | self.algo.vae.conditioned_on_obs = True 63 | 64 | # VAE decoder settings 65 | self.algo.vae.decoder.is_conditioned = True # whether decoder should condition on observation 66 | self.algo.vae.decoder.reconstruction_sum_across_elements = False # sum instead of mean for reconstruction loss 67 | 68 | # VAE prior settings 69 | self.algo.vae.prior.learn = False # learn Gaussian / GMM prior instead of N(0, 1) 70 | self.algo.vae.prior.is_conditioned = False # whether to condition prior on observations 71 | self.algo.vae.prior.use_gmm = False # whether to use GMM prior 72 | self.algo.vae.prior.gmm_num_modes = 10 # number of GMM modes 73 | self.algo.vae.prior.gmm_learn_weights = False # whether to learn GMM weights 74 | self.algo.vae.prior.use_categorical = False # whether to use categorical prior 75 | self.algo.vae.prior.categorical_dim = 10 # the number of categorical classes for each latent dimension 76 | self.algo.vae.prior.categorical_gumbel_softmax_hard = False # use hard selection in forward pass 77 | self.algo.vae.prior.categorical_init_temp = 1.0 # initial gumbel-softmax temp 78 | self.algo.vae.prior.categorical_temp_anneal_step = 0.001 # linear temp annealing rate 79 | self.algo.vae.prior.categorical_min_temp = 0.3 # lowest gumbel-softmax temp 80 | 81 | self.algo.vae.encoder_layer_dims = (300, 400) # encoder MLP layer dimensions 82 | self.algo.vae.decoder_layer_dims = (300, 400) # decoder MLP layer dimensions 83 | self.algo.vae.prior_layer_dims = (300, 400) # prior MLP layer dimensions (if learning conditioned prior) 84 | 85 | # RNN policy settings 86 | self.algo.rnn.enabled = False # whether to train RNN policy 87 | self.algo.rnn.horizon = 10 # unroll length for RNN - should usually match train.seq_length 88 | self.algo.rnn.hidden_dim = 400 # hidden dimension size 89 | self.algo.rnn.rnn_type = "LSTM" # rnn type - one of "LSTM" or "GRU" 90 | self.algo.rnn.num_layers = 2 # number of RNN layers that are stacked 91 | self.algo.rnn.open_loop = False # if True, action predictions are only based on a single observation (not sequence) 92 | self.algo.rnn.kwargs.bidirectional = False # rnn kwargs 93 | self.algo.rnn.kwargs.do_not_lock_keys() 94 | 95 | # Hardcoded Weights 96 | self.algo.hc_weights.use_hardcode_weight = False 97 | self.algo.hc_weights.weight_key = "" 98 | self.algo.hc_weights.mixed_weights = False 99 | self.algo.hc_weights.use_adv_score = False 100 | 101 | self.algo.hc_weights.demos = 1 102 | self.algo.hc_weights.rollouts = 1 103 | self.algo.hc_weights.intvs = 1 104 | self.algo.hc_weights.pre_intvs = 0.1 105 | 106 | self.algo.hc_weights.traj_label_type = "last" 107 | 108 | self.algo.hc_weights.batch_normalize = True 109 | -------------------------------------------------------------------------------- /robomimic/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/envs/__init__.py -------------------------------------------------------------------------------- /robomimic/envs/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/envs/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/envs/__pycache__/env_base.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/envs/__pycache__/env_base.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/envs/env_base.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains the base class for environment wrappers that are used 3 | to provide a standardized environment API for training policies and interacting 4 | with metadata present in datasets. 5 | """ 6 | import abc 7 | 8 | 9 | class EnvType: 10 | """ 11 | Holds environment types - one per environment class. 12 | These act as identifiers for different environments. 13 | """ 14 | ROBOSUITE_TYPE = 1 15 | GYM_TYPE = 2 16 | IG_MOMART_TYPE = 3 17 | 18 | 19 | class EnvBase(abc.ABC): 20 | """A base class method for environments used by this repo.""" 21 | @abc.abstractmethod 22 | def __init__( 23 | self, 24 | env_name, 25 | render=False, 26 | render_offscreen=False, 27 | use_image_obs=False, 28 | postprocess_visual_obs=True, 29 | **kwargs, 30 | ): 31 | """ 32 | Args: 33 | env_name (str): name of environment. Only needs to be provided if making a different 34 | environment from the one in @env_meta. 35 | 36 | render (bool): if True, environment supports on-screen rendering 37 | 38 | render_offscreen (bool): if True, environment supports off-screen rendering. This 39 | is forced to be True if @env_meta["use_images"] is True. 40 | 41 | use_image_obs (bool): if True, environment is expected to render rgb image observations 42 | on every env.step call. Set this to False for efficiency reasons, if image 43 | observations are not required. 44 | 45 | postprocess_visual_obs (bool): if True, postprocess image observations 46 | to prepare for learning. This should only be False when extracting observations 47 | for saving to a dataset (to save space on RGB images for example). 48 | """ 49 | return 50 | 51 | @abc.abstractmethod 52 | def step(self, action): 53 | """ 54 | Step in the environment with an action. 55 | 56 | Args: 57 | action (np.array): action to take 58 | 59 | Returns: 60 | observation (dict): new observation dictionary 61 | reward (float): reward for this step 62 | done (bool): whether the task is done 63 | info (dict): extra information 64 | """ 65 | return 66 | 67 | @abc.abstractmethod 68 | def reset(self): 69 | """ 70 | Reset environment. 71 | 72 | Returns: 73 | observation (dict): initial observation dictionary. 74 | """ 75 | return 76 | 77 | @abc.abstractmethod 78 | def reset_to(self, state): 79 | """ 80 | Reset to a specific simulator state. 81 | 82 | Args: 83 | state (dict): current simulator state 84 | 85 | Returns: 86 | observation (dict): observation dictionary after setting the simulator state 87 | """ 88 | return 89 | 90 | @abc.abstractmethod 91 | def render(self, mode="human", height=None, width=None, camera_name=None): 92 | """Render""" 93 | return 94 | 95 | @abc.abstractmethod 96 | def get_observation(self): 97 | """Get environment observation""" 98 | return 99 | 100 | @abc.abstractmethod 101 | def get_state(self): 102 | """Get environment simulator state, compatible with @reset_to""" 103 | return 104 | 105 | @abc.abstractmethod 106 | def get_reward(self): 107 | """ 108 | Get current reward. 109 | """ 110 | return 111 | 112 | @abc.abstractmethod 113 | def get_goal(self): 114 | """ 115 | Get goal observation. Not all environments support this. 116 | """ 117 | return 118 | 119 | @abc.abstractmethod 120 | def set_goal(self, **kwargs): 121 | """ 122 | Set goal observation with external specification. Not all environments support this. 123 | """ 124 | return 125 | 126 | @abc.abstractmethod 127 | def is_done(self): 128 | """ 129 | Check if the task is done (not necessarily successful). 130 | """ 131 | return 132 | 133 | @abc.abstractmethod 134 | def is_success(self): 135 | """ 136 | Check if the task condition(s) is reached. Should return a dictionary 137 | { str: bool } with at least a "task" key for the overall task success, 138 | and additional optional keys corresponding to other task criteria. 139 | """ 140 | return 141 | 142 | @property 143 | @abc.abstractmethod 144 | def action_dimension(self): 145 | """ 146 | Returns dimension of actions (int). 147 | """ 148 | return 149 | 150 | @property 151 | @abc.abstractmethod 152 | def name(self): 153 | """ 154 | Returns name of environment name (str). 155 | """ 156 | return 157 | 158 | @property 159 | @abc.abstractmethod 160 | def type(self): 161 | """ 162 | Returns environment type (int) for this kind of environment. 163 | This helps identify this env class. 164 | """ 165 | return 166 | 167 | @abc.abstractmethod 168 | def serialize(self): 169 | """ 170 | Save all information needed to re-instantiate this environment in a dictionary. 171 | This is the same as @env_meta - environment metadata stored in hdf5 datasets, 172 | and used in utils/env_utils.py. 173 | """ 174 | return 175 | 176 | @classmethod 177 | @abc.abstractmethod 178 | def create_for_data_processing(cls, camera_names, camera_height, camera_width, reward_shaping, **kwargs): 179 | """ 180 | Create environment for processing datasets, which includes extracting 181 | observations, labeling dense / sparse rewards, and annotating dones in 182 | transitions. 183 | 184 | Args: 185 | camera_names ([str]): list of camera names that correspond to image observations 186 | camera_height (int): camera height for all cameras 187 | camera_width (int): camera width for all cameras 188 | reward_shaping (bool): if True, use shaped environment rewards, else use sparse task completion rewards 189 | 190 | Returns: 191 | env (EnvBase instance) 192 | """ 193 | return 194 | 195 | @property 196 | @abc.abstractmethod 197 | def rollout_exceptions(self): 198 | """ 199 | Return tuple of exceptions to except when doing rollouts. This is useful to ensure 200 | that the entire training run doesn't crash because of a bad policy that causes unstable 201 | simulation computations. 202 | """ 203 | return 204 | 205 | -------------------------------------------------------------------------------- /robomimic/exps/templates/td3_bc.json: -------------------------------------------------------------------------------- 1 | { 2 | "algo_name": "td3_bc", 3 | "experiment": { 4 | "name": "test", 5 | "validate": false, 6 | "logging": { 7 | "terminal_output_to_txt": true, 8 | "log_tb": true 9 | }, 10 | "save": { 11 | "enabled": true, 12 | "every_n_seconds": null, 13 | "every_n_epochs": 20, 14 | "epochs": [], 15 | "on_best_validation": false, 16 | "on_best_rollout_return": true, 17 | "on_best_rollout_success_rate": false 18 | }, 19 | "epoch_every_n_steps": 5000, 20 | "validation_epoch_every_n_steps": 10, 21 | "env": null, 22 | "additional_envs": null, 23 | "render": false, 24 | "render_video": false, 25 | "keep_all_videos": false, 26 | "video_skip": 5, 27 | "rollout": { 28 | "enabled": true, 29 | "n": 50, 30 | "horizon": 1000, 31 | "rate": 1, 32 | "warmstart": 0, 33 | "terminate_on_success": true 34 | } 35 | }, 36 | "train": { 37 | "data": null, 38 | "output_dir": "../td3_bc_trained_models", 39 | "num_data_workers": 0, 40 | "hdf5_cache_mode": "all", 41 | "hdf5_use_swmr": true, 42 | "hdf5_normalize_obs": true, 43 | "hdf5_filter_key": null, 44 | "seq_length": 1, 45 | "dataset_keys": [ 46 | "actions", 47 | "rewards", 48 | "dones" 49 | ], 50 | "goal_mode": null, 51 | "cuda": true, 52 | "batch_size": 256, 53 | "num_epochs": 200, 54 | "seed": 1 55 | }, 56 | "algo": { 57 | "optim_params": { 58 | "critic": { 59 | "learning_rate": { 60 | "initial": 0.0003, 61 | "decay_factor": 0.1, 62 | "epoch_schedule": [] 63 | }, 64 | "regularization": { 65 | "L2": 0.0 66 | }, 67 | "start_epoch": -1, 68 | "end_epoch": -1 69 | }, 70 | "actor": { 71 | "learning_rate": { 72 | "initial": 0.0003, 73 | "decay_factor": 0.1, 74 | "epoch_schedule": [] 75 | }, 76 | "regularization": { 77 | "L2": 0.0 78 | }, 79 | "start_epoch": -1, 80 | "end_epoch": -1 81 | } 82 | }, 83 | "alpha": 2.5, 84 | "discount": 0.99, 85 | "n_step": 1, 86 | "target_tau": 0.005, 87 | "infinite_horizon": false, 88 | "critic": { 89 | "use_huber": false, 90 | "max_gradient_norm": null, 91 | "value_bounds": null, 92 | "ensemble": { 93 | "n": 2, 94 | "weight": 1.0 95 | }, 96 | "layer_dims": [ 97 | 256, 98 | 256 99 | ] 100 | }, 101 | "actor": { 102 | "update_freq": 2, 103 | "noise_std": 0.2, 104 | "noise_clip": 0.5, 105 | "layer_dims": [ 106 | 256, 107 | 256 108 | ] 109 | } 110 | }, 111 | "observation": { 112 | "modalities": { 113 | "obs": { 114 | "low_dim": [ 115 | "flat" 116 | ], 117 | "rgb": [], 118 | "depth": [], 119 | "scan": [] 120 | }, 121 | "goal": { 122 | "low_dim": [], 123 | "rgb": [], 124 | "depth": [], 125 | "scan": [] 126 | } 127 | }, 128 | "encoder": { 129 | "low_dim": { 130 | "feature_dimension": null, 131 | "core_class": null, 132 | "core_kwargs": {}, 133 | "obs_randomizer_class": null, 134 | "obs_randomizer_kwargs": {} 135 | }, 136 | "rgb": { 137 | "feature_dimension": 64, 138 | "core_class": "VisualCore", 139 | "core_kwargs": { 140 | "backbone_class": "ResNet18Conv", 141 | "backbone_kwargs": { 142 | "pretrained": false, 143 | "input_coord_conv": false 144 | } 145 | }, 146 | "obs_randomizer_class": null, 147 | "obs_randomizer_kwargs": { 148 | "crop_height": 76, 149 | "crop_width": 76, 150 | "num_crops": 1, 151 | "pos_enc": false 152 | }, 153 | "pool_class": "SpatialSoftmax", 154 | "pool_kwargs": { 155 | "num_kp": 32, 156 | "learnable_temperature": false, 157 | "temperature": 1.0, 158 | "noise_std": 0.0 159 | } 160 | }, 161 | "depth": { 162 | "feature_dimension": 64, 163 | "core_class": "VisualCore", 164 | "core_kwargs": { 165 | "backbone_class": "ResNet18Conv", 166 | "backbone_kwargs": { 167 | "pretrained": false, 168 | "input_coord_conv": false 169 | } 170 | }, 171 | "obs_randomizer_class": null, 172 | "obs_randomizer_kwargs": { 173 | "crop_height": 76, 174 | "crop_width": 76, 175 | "num_crops": 1, 176 | "pos_enc": false 177 | }, 178 | "pool_class": "SpatialSoftmax", 179 | "pool_kwargs": { 180 | "num_kp": 32, 181 | "learnable_temperature": false, 182 | "temperature": 1.0, 183 | "noise_std": 0.0 184 | } 185 | }, 186 | "scan": { 187 | "feature_dimension": 64, 188 | "core_class": "ScanCore", 189 | "core_kwargs": { 190 | "backbone_class": "ResNet18Conv", 191 | "backbone_kwargs": { 192 | "pretrained": false, 193 | "input_coord_conv": false 194 | }, 195 | "conv_kwargs": { 196 | "out_channels": [ 197 | 32, 198 | 64, 199 | 64 200 | ], 201 | "kernel_size": [ 202 | 8, 203 | 4, 204 | 2 205 | ], 206 | "stride": [ 207 | 4, 208 | 2, 209 | 1 210 | ] 211 | } 212 | }, 213 | "obs_randomizer_class": null, 214 | "obs_randomizer_kwargs": { 215 | "crop_height": 76, 216 | "crop_width": 76, 217 | "num_crops": 1, 218 | "pos_enc": false 219 | }, 220 | "pool_class": "SpatialSoftmax", 221 | "pool_kwargs": { 222 | "num_kp": 32, 223 | "learnable_temperature": false, 224 | "temperature": 1.0, 225 | "noise_std": 0.0 226 | } 227 | } 228 | } 229 | } 230 | } -------------------------------------------------------------------------------- /robomimic/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_nets import EncoderCore, Randomizer 2 | -------------------------------------------------------------------------------- /robomimic/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/models/__pycache__/base_nets.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/base_nets.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/models/__pycache__/distributions.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/distributions.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/models/__pycache__/obs_nets.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/obs_nets.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/models/__pycache__/policy_nets.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/policy_nets.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/models/__pycache__/vae_nets.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/vae_nets.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/models/__pycache__/value_nets.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/value_nets.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/scripts/check_same_initial_configs.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import sys 3 | import numpy as np 4 | 5 | dataset_path = sys.argv[1] 6 | 7 | f = h5py.File(dataset_path, "r") 8 | data = f["data"] 9 | demos = list(data.keys()) 10 | 11 | initial_state_lst = [] 12 | 13 | def same_state(s1, s2): 14 | return np.all(np.equal(s1, s2)) 15 | 16 | for d in demos: 17 | this_s = data[d]["states"][()][0] 18 | for s in initial_state_lst: 19 | if same_state(s, this_s): 20 | print("same state") 21 | initial_state_lst.append(this_s) 22 | 23 | ######################################### 24 | 25 | sec_dataset_path = sys.argv[2] 26 | f = h5py.File(sec_dataset_path, "r") 27 | sec_data = f["data"] 28 | sec_demos = list(sec_data.keys()) 29 | 30 | for d in sec_demos: 31 | this_s = sec_data[d]["states"][()][0] 32 | for s in initial_state_lst: 33 | if same_state(s, this_s): 34 | print("same state") 35 | initial_state_lst.append(this_s) 36 | 37 | 38 | -------------------------------------------------------------------------------- /robomimic/scripts/conversion/convert_d4rl.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper script to convert D4RL data into an hdf5 compatible with this repository. 3 | Takes a folder path and a D4RL env name. This script downloads the corresponding 4 | raw D4RL dataset into a "d4rl" subfolder, and then makes a converted dataset 5 | in the "d4rl/converted" subfolder. 6 | 7 | This script has been tested on the follwing commits: 8 | 9 | https://github.com/rail-berkeley/d4rl/tree/9b68f31bab6a8546edfb28ff0bd9d5916c62fd1f 10 | https://github.com/rail-berkeley/d4rl/tree/26adf732efafdad864b3df2287e7b778ee4f7f63 11 | 12 | Args: 13 | env (str): d4rl env name, which specifies the dataset to download and convert 14 | folder (str): specify folder to download raw d4rl datasets and converted d4rl datasets to. 15 | A `d4rl` subfolder will be created in this folder with the raw d4rl dataset, and 16 | a `d4rl/converted` subfolder will be created in this folder with the converted 17 | datasets (if they do not already exist). Defaults to the datasets folder at 18 | the top-level of the repository. 19 | 20 | Example usage: 21 | 22 | # downloads to default path at robomimic/datasets/d4rl 23 | python convert_d4rl.py --env walker2d-medium-expert-v0 24 | 25 | # download to custom path 26 | python convert_d4rl.py --env walker2d-medium-expert-v0 --folder /path/to/folder 27 | """ 28 | 29 | import os 30 | import h5py 31 | import json 32 | import argparse 33 | import numpy as np 34 | 35 | import gym 36 | import d4rl 37 | import robomimic 38 | from robomimic.envs.env_gym import EnvGym 39 | from robomimic.utils.log_utils import custom_tqdm 40 | 41 | if __name__ == "__main__": 42 | parser = argparse.ArgumentParser() 43 | parser.add_argument( 44 | "--env", 45 | type=str, 46 | help="d4rl env name, which specifies the dataset to download and convert", 47 | ) 48 | parser.add_argument( 49 | "--folder", 50 | type=str, 51 | default=None, 52 | help="specify folder to download raw d4rl datasets and converted d4rl datasets to.\ 53 | A `d4rl` subfolder will be created in this folder with the raw d4rl dataset, and\ 54 | a `d4rl/converted` subfolder will be created in this folder with the converted\ 55 | datasets (if they do not already exist). Defaults to the datasets folder at\ 56 | the top-level of the repository.", 57 | ) 58 | args = parser.parse_args() 59 | 60 | base_folder = args.folder 61 | if base_folder is None: 62 | base_folder = os.path.join(robomimic.__path__[0], "../datasets") 63 | base_folder = os.path.join(base_folder, "d4rl") 64 | 65 | # get dataset 66 | d4rl.set_dataset_path(base_folder) 67 | env = gym.make(args.env) 68 | ds = env.env.get_dataset() 69 | env.close() 70 | 71 | # env 72 | env = EnvGym(args.env) 73 | 74 | # output file 75 | write_folder = os.path.join(base_folder, "converted") 76 | if not os.path.exists(write_folder): 77 | os.makedirs(write_folder) 78 | output_path = os.path.join(base_folder, "converted", "{}.hdf5".format(args.env.replace("-", "_"))) 79 | f_sars = h5py.File(output_path, "w") 80 | f_sars_grp = f_sars.create_group("data") 81 | 82 | # code to split D4RL data into trajectories 83 | # (modified from https://github.com/aviralkumar2907/d4rl_evaluations/blob/bear_intergrate/bear/examples/bear_hdf5_d4rl.py#L18) 84 | all_obs = ds['observations'] 85 | all_act = ds['actions'] 86 | N = all_obs.shape[0] 87 | 88 | obs = all_obs[:N-1] 89 | actions = all_act[:N-1] 90 | next_obs = all_obs[1:] 91 | rewards = np.squeeze(ds['rewards'][:N-1]) 92 | dones = np.squeeze(ds['terminals'][:N-1]).astype(np.int32) 93 | 94 | assert 'timeouts' in ds 95 | timeouts = ds['timeouts'][:] 96 | 97 | ctr = 0 98 | total_samples = 0 99 | num_traj = 0 100 | traj = dict(obs=[], next_obs=[], actions=[], rewards=[], dones=[]) 101 | 102 | print("\nConverting hdf5...") 103 | for idx in custom_tqdm(range(obs.shape[0])): 104 | 105 | # add transition 106 | traj["obs"].append(obs[idx]) 107 | traj["actions"].append(actions[idx]) 108 | traj["rewards"].append(rewards[idx]) 109 | traj["next_obs"].append(next_obs[idx]) 110 | traj["dones"].append(dones[idx]) 111 | ctr += 1 112 | 113 | # if hit timeout or done is True, end the current trajectory and start a new trajectory 114 | if timeouts[idx] or dones[idx]: 115 | 116 | # replace next obs with copy of current obs for final timestep, and make sure done is true 117 | traj["next_obs"][-1] = np.array(obs[idx]) 118 | traj["dones"][-1] = 1 119 | 120 | # store trajectory 121 | ep_data_grp = f_sars_grp.create_group("demo_{}".format(num_traj)) 122 | ep_data_grp.create_dataset("obs/flat", data=np.array(traj["obs"])) 123 | ep_data_grp.create_dataset("next_obs/flat", data=np.array(traj["next_obs"])) 124 | ep_data_grp.create_dataset("actions", data=np.array(traj["actions"])) 125 | ep_data_grp.create_dataset("rewards", data=np.array(traj["rewards"])) 126 | ep_data_grp.create_dataset("dones", data=np.array(traj["dones"])) 127 | ep_data_grp.attrs["num_samples"] = len(traj["actions"]) 128 | total_samples += len(traj["actions"]) 129 | num_traj += 1 130 | 131 | # reset 132 | ctr = 0 133 | traj = dict(obs=[], next_obs=[], actions=[], rewards=[], dones=[]) 134 | 135 | print("\nExcluding {} samples at end of file due to no trajectory truncation.".format(len(traj["actions"]))) 136 | print("Wrote {} trajectories to new converted hdf5 at {}\n".format(num_traj, output_path)) 137 | 138 | # metadata 139 | f_sars_grp.attrs["total"] = total_samples 140 | f_sars_grp.attrs["env_args"] = json.dumps(env.serialize(), indent=4) 141 | 142 | f_sars.close() 143 | 144 | -------------------------------------------------------------------------------- /robomimic/scripts/conversion/convert_robosuite.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper script to convert a dataset collected using robosuite into an hdf5 compatible with 3 | this repository. Takes a dataset path corresponding to the demo.hdf5 file containing the 4 | demonstrations. It modifies the dataset in-place. By default, the script also creates a 5 | 90-10 train-validation split. 6 | 7 | For more information on collecting datasets with robosuite, see the code link and documentation 8 | link below. 9 | 10 | Code: https://github.com/ARISE-Initiative/robosuite/blob/offline_study/robosuite/scripts/collect_human_demonstrations.py 11 | 12 | Documentation: https://robosuite.ai/docs/algorithms/demonstrations.html 13 | 14 | Example usage: 15 | 16 | python convert_robosuite.py --dataset /path/to/your/demo.hdf5 17 | """ 18 | 19 | import h5py 20 | import json 21 | import argparse 22 | import os 23 | 24 | import robomimic.envs.env_base as EB 25 | from robomimic.scripts.split_train_val import split_train_val_from_hdf5 26 | 27 | 28 | if __name__ == "__main__": 29 | parser = argparse.ArgumentParser() 30 | parser.add_argument( 31 | "--dataset", 32 | type=str, 33 | help="path to input hdf5 dataset", 34 | ) 35 | args = parser.parse_args() 36 | 37 | f = h5py.File(os.path.expanduser(args.dataset), "a") # edit mode 38 | 39 | # store env meta 40 | env_name = f["data"].attrs["env"] 41 | env_info = json.loads(f["data"].attrs["env_info"]) 42 | env_meta = dict( 43 | type=EB.EnvType.ROBOSUITE_TYPE, 44 | env_name=env_name, 45 | env_kwargs=env_info, 46 | ) 47 | if "env_args" in f["data"].attrs: 48 | del f["data"].attrs["env_args"] 49 | f["data"].attrs["env_args"] = json.dumps(env_meta, indent=4) 50 | 51 | print("====== Stored env meta ======") 52 | print(f["data"].attrs["env_args"]) 53 | 54 | # store metadata about number of samples 55 | total_samples = 0 56 | for ep in f["data"]: 57 | # ensure model-xml is in per-episode metadata 58 | assert "model_file" in f["data/{}".format(ep)].attrs 59 | 60 | # add "num_samples" into per-episode metadata 61 | if "num_samples" in f["data/{}".format(ep)].attrs: 62 | del f["data/{}".format(ep)].attrs["num_samples"] 63 | n_sample = f["data/{}/actions".format(ep)].shape[0] 64 | f["data/{}".format(ep)].attrs["num_samples"] = n_sample 65 | total_samples += n_sample 66 | 67 | # add total samples to global metadata 68 | if "total" in f["data"].attrs: 69 | del f["data"].attrs["total"] 70 | f["data"].attrs["total"] = total_samples 71 | 72 | f.close() 73 | 74 | # create 90-10 train-validation split in the dataset 75 | split_train_val_from_hdf5(hdf5_path=args.dataset, val_ratio=0.1) 76 | -------------------------------------------------------------------------------- /robomimic/scripts/conversion/convert_roboturk_pilot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper script to convert the RoboTurk Pilot datasets (https://roboturk.stanford.edu/dataset_sim.html) 3 | into a format compatible with this repository. It will also create some useful filter keys 4 | in the file (e.g. training, validation, and fastest n trajectories). Prior work 5 | (https://arxiv.org/abs/1911.05321) has found this useful (for example, training on the 6 | fastest 225 demonstrations for bins-Can). 7 | 8 | Direct download link for dataset: http://cvgl.stanford.edu/projects/roboturk/RoboTurkPilot.zip 9 | 10 | Args: 11 | folder (str): path to a folder containing a demo.hdf5 and a models directory containing 12 | mujoco xml files. For example, RoboTurkPilot/bins-Can. 13 | 14 | n (int): creates a filter key corresponding to the n fastest trajectories. Defaults to 225. 15 | 16 | Example usage: 17 | 18 | python convert_roboturk_pilot.py --folder /path/to/RoboTurkPilot/bins-Can --n 225 19 | """ 20 | 21 | import os 22 | import h5py 23 | import json 24 | import argparse 25 | import numpy as np 26 | from tqdm import tqdm 27 | 28 | import robomimic 29 | import robomimic.envs.env_base as EB 30 | from robomimic.utils.file_utils import create_hdf5_filter_key 31 | from robomimic.scripts.split_train_val import split_train_val_from_hdf5 32 | 33 | 34 | def convert_rt_pilot_hdf5(ref_folder): 35 | """ 36 | Uses the reference demo hdf5 to write a new converted hdf5 compatible with 37 | the repository. 38 | 39 | Args: 40 | ref_folder (str): path to a folder containing a demo.hdf5 and a models directory containing 41 | mujoco xml files. 42 | """ 43 | hdf5_path = os.path.join(ref_folder, "demo.hdf5") 44 | new_path = os.path.join(ref_folder, "demo_new.hdf5") 45 | 46 | f = h5py.File(hdf5_path, "r") 47 | f_new = h5py.File(new_path, "w") 48 | f_new_grp = f_new.create_group("data") 49 | 50 | # sorted list of demonstrations by demo number 51 | demos = list(f["data"].keys()) 52 | inds = np.argsort([int(elem[5:]) for elem in demos]) 53 | demos = [demos[i] for i in inds] 54 | 55 | # write each demo 56 | num_samples_arr = [] 57 | for demo_id in tqdm(range(len(demos))): 58 | ep = demos[demo_id] 59 | 60 | # create group for this demonstration 61 | ep_data_grp = f_new_grp.create_group(ep) 62 | 63 | # copy states over 64 | states = f["data/{}/states".format(ep)][()] 65 | ep_data_grp.create_dataset("states", data=np.array(states)) 66 | 67 | # concat jvels and gripper actions to form full actions 68 | jvels = f["data/{}/joint_velocities".format(ep)][()] 69 | gripper_acts = f["data/{}/gripper_actuations".format(ep)][()] 70 | actions = np.concatenate([jvels, gripper_acts], axis=1) 71 | 72 | # IMPORTANT: clip actions to -1, 1, since this is expected by the codebase 73 | actions = np.clip(actions, -1., 1.) 74 | ep_data_grp.create_dataset("actions", data=actions) 75 | 76 | # store model xml directly in the new hdf5 file 77 | model_path = os.path.join(ref_folder, "models", f["data/{}".format(ep)].attrs["model_file"]) 78 | f_model = open(model_path, "r") 79 | model_xml = f_model.read() 80 | f_model.close() 81 | ep_data_grp.attrs["model_file"] = model_xml 82 | 83 | # store num samples for this ep 84 | num_samples = actions.shape[0] 85 | ep_data_grp.attrs["num_samples"] = num_samples # number of transitions in this episode 86 | num_samples_arr.append(num_samples) 87 | 88 | # write dataset attributes (metadata) 89 | f_new_grp.attrs["total"] = np.sum(num_samples_arr) 90 | 91 | # construct and save env metadata 92 | env_meta = dict() 93 | env_meta["type"] = EB.EnvType.ROBOSUITE_TYPE 94 | env_meta["env_name"] = (f["data"].attrs["env"] + "Teleop") 95 | # hardcode robosuite v0.3 args 96 | robosuite_args = { 97 | "has_renderer": False, 98 | "has_offscreen_renderer": False, 99 | "ignore_done": True, 100 | "use_object_obs": True, 101 | "use_camera_obs": False, 102 | "camera_depth": False, 103 | "camera_height": 84, 104 | "camera_width": 84, 105 | "camera_name": "agentview", 106 | "gripper_visualization": False, 107 | "reward_shaping": False, 108 | "control_freq": 100, 109 | } 110 | env_meta["env_kwargs"] = robosuite_args 111 | f_new_grp.attrs["env_args"] = json.dumps(env_meta, indent=4) # environment info 112 | 113 | print("\n====== Added env meta ======") 114 | print(f_new_grp.attrs["env_args"]) 115 | 116 | f.close() 117 | f_new.close() 118 | 119 | # back up the old dataset, and replace with new dataset 120 | os.rename(hdf5_path, os.path.join(ref_folder, "demo_bak.hdf5")) 121 | os.rename(new_path, hdf5_path) 122 | 123 | 124 | def split_fastest_from_hdf5(hdf5_path, n): 125 | """ 126 | Creates filter key for fastest N trajectories, named 127 | "fastest_{}".format(n). 128 | 129 | Args: 130 | hdf5_path (str): path to the hdf5 file 131 | 132 | n (int): fastest n demos to create filter key for 133 | """ 134 | 135 | # retrieve fastest n demos 136 | f = h5py.File(hdf5_path, "r") 137 | demos = sorted(list(f["data"].keys())) 138 | traj_lengths = [] 139 | for ep in demos: 140 | traj_lengths.append(f["data/{}/actions".format(ep)].shape[0]) 141 | inds = np.argsort(traj_lengths)[:n] 142 | filtered_demos = [demos[i] for i in inds] 143 | f.close() 144 | 145 | # create filter key 146 | name = "fastest_{}".format(n) 147 | lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filtered_demos, key_name=name) 148 | 149 | print("Total number of samples in fastest {} demos: {}".format(n, np.sum(lengths))) 150 | print("Average number of samples in fastest {} demos: {}".format(n, np.mean(lengths))) 151 | 152 | 153 | if __name__ == "__main__": 154 | parser = argparse.ArgumentParser() 155 | parser.add_argument( 156 | "--folder", 157 | type=str, 158 | help="path to a folder containing a demo.hdf5 and a models directory containing \ 159 | mujoco xml files. For example, RoboTurkPilot/bins-Can.", 160 | ) 161 | parser.add_argument( 162 | "--n", 163 | type=int, 164 | default=225, 165 | help="creates a filter key corresponding to the n fastest trajectories. Defaults to 225.", 166 | ) 167 | args = parser.parse_args() 168 | 169 | # convert hdf5 170 | convert_rt_pilot_hdf5(ref_folder=args.folder) 171 | 172 | # create 90-10 train-validation split in the dataset 173 | print("\nCreating 90-10 train-validation split...\n") 174 | hdf5_path = os.path.join(args.folder, "demo.hdf5") 175 | split_train_val_from_hdf5(hdf5_path=hdf5_path, val_ratio=0.1) 176 | 177 | print("\nCreating filter key for fastest {} trajectories...".format(args.n)) 178 | split_fastest_from_hdf5(hdf5_path=hdf5_path, n=args.n) 179 | 180 | print("\nCreating 90-10 train-validation split for fastest {} trajectories...".format(args.n)) 181 | split_train_val_from_hdf5(hdf5_path=hdf5_path, val_ratio=0.1, filter_key="fastest_{}".format(args.n)) 182 | 183 | print( 184 | "\nWARNING: new dataset has replaced old one in demo.hdf5 file. " 185 | "The old dataset file has been moved to demo_bak.hdf5" 186 | ) 187 | 188 | print( 189 | "\nNOTE: the new dataset also contains a fastest_{} filter key, for an easy way " 190 | "to train on the fastest trajectories. Just set config.train.hdf5_filter to train on this " 191 | "subset. A common choice is 225 when training on the bins-Can dataset.\n".format(args.n) 192 | ) 193 | -------------------------------------------------------------------------------- /robomimic/scripts/download_datasets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to download datasets packaged with the repository. By default, all 3 | datasets will be stored at robomimic/datasets, unless the @download_dir 4 | argument is supplied. We recommend using the default, as most examples that 5 | use these datasets assume that they can be found there. 6 | 7 | The @tasks, @dataset_types, and @hdf5_types arguments can all be supplied 8 | to choose which datasets to download. 9 | 10 | Args: 11 | download_dir (str): Base download directory. Created if it doesn't exist. 12 | Defaults to datasets folder in repository - only pass in if you would 13 | like to override the location. 14 | 15 | tasks (list): Tasks to download datasets for. Defaults to lift task. Pass 'all' to 16 | download all tasks (sim + real) 'sim' to download all sim tasks, 'real' to 17 | download all real tasks, or directly specify the list of tasks. 18 | 19 | dataset_types (list): Dataset types to download datasets for (e.g. ph, mh, mg). 20 | Defaults to ph. Pass 'all' to download datasets for all available dataset 21 | types per task, or directly specify the list of dataset types. 22 | 23 | hdf5_types (list): hdf5 types to download datasets for (e.g. raw, low_dim, image). 24 | Defaults to low_dim. Pass 'all' to download datasets for all available hdf5 25 | types per task and dataset, or directly specify the list of hdf5 types. 26 | 27 | Example usage: 28 | 29 | # default behavior - just download lift proficient-human low-dim dataset 30 | python download_datasets.py 31 | 32 | # download low-dim proficient-human datasets for all simulation tasks 33 | # (do a dry run first to see which datasets would be downloaded) 34 | python download_datasets.py --tasks sim --dataset_types ph --hdf5_types low_dim --dry_run 35 | python download_datasets.py --tasks sim --dataset_types ph --hdf5_types low_dim 36 | 37 | # download all low-dim and image multi-human datasets for the can and square tasks 38 | python download_datasets.py --tasks can square --dataset_types mh --hdf5_types low_dim image 39 | 40 | # download the sparse reward machine-generated low-dim datasets 41 | python download_datasets.py --tasks all --dataset_types mg --hdf5_types low_dim_sparse 42 | 43 | # download all real robot datasets 44 | python download_datasets.py --tasks real 45 | """ 46 | import os 47 | import argparse 48 | 49 | import robomimic 50 | import robomimic.utils.file_utils as FileUtils 51 | from robomimic import DATASET_REGISTRY 52 | 53 | ALL_TASKS = ["lift", "can", "square", "transport", "tool_hang", "lift_real", "can_real", "tool_hang_real"] 54 | ALL_DATASET_TYPES = ["ph", "mh", "mg", "paired"] 55 | ALL_HDF5_TYPES = ["raw", "low_dim", "image", "low_dim_sparse", "low_dim_dense", "image_sparse", "image_dense"] 56 | 57 | 58 | if __name__ == "__main__": 59 | parser = argparse.ArgumentParser() 60 | 61 | # directory to download datasets to 62 | parser.add_argument( 63 | "--download_dir", 64 | type=str, 65 | default=None, 66 | help="Base download directory. Created if it doesn't exist. Defaults to datasets folder in repository.", 67 | ) 68 | 69 | # tasks to download datasets for 70 | parser.add_argument( 71 | "--tasks", 72 | type=str, 73 | nargs='+', 74 | default=["lift"], 75 | help="Tasks to download datasets for. Defaults to lift task. Pass 'all' to download all tasks (sim + real)\ 76 | 'sim' to download all sim tasks, 'real' to download all real tasks, or directly specify the list of\ 77 | tasks.", 78 | ) 79 | 80 | # dataset types to download datasets for 81 | parser.add_argument( 82 | "--dataset_types", 83 | type=str, 84 | nargs='+', 85 | default=["ph"], 86 | help="Dataset types to download datasets for (e.g. ph, mh, mg). Defaults to ph. Pass 'all' to download \ 87 | datasets for all available dataset types per task, or directly specify the list of dataset types.", 88 | ) 89 | 90 | # hdf5 types to download datasets for 91 | parser.add_argument( 92 | "--hdf5_types", 93 | type=str, 94 | nargs='+', 95 | default=["low_dim"], 96 | help="hdf5 types to download datasets for (e.g. raw, low_dim, image). Defaults to low_dim. Pass 'all' \ 97 | to download datasets for all available hdf5 types per task and dataset, or directly specify the list\ 98 | of hdf5 types.", 99 | ) 100 | 101 | # dry run - don't actually download datasets, but print which datasets would be downloaded 102 | parser.add_argument( 103 | "--dry_run", 104 | action='store_true', 105 | help="set this flag to do a dry run to only print which datasets would be downloaded" 106 | ) 107 | 108 | args = parser.parse_args() 109 | 110 | # set default base directory for downloads 111 | default_base_dir = args.download_dir 112 | if default_base_dir is None: 113 | default_base_dir = os.path.join(robomimic.__path__[0], "../datasets") 114 | 115 | # load args 116 | download_tasks = args.tasks 117 | if "all" in download_tasks: 118 | assert len(download_tasks) == 1, "all should be only tasks argument but got: {}".format(args.tasks) 119 | download_tasks = ALL_TASKS 120 | elif "sim" in download_tasks: 121 | assert len(download_tasks) == 1, "sim should be only tasks argument but got: {}".format(args.tasks) 122 | download_tasks = [task for task in ALL_TASKS if "real" not in task] 123 | elif "real" in download_tasks: 124 | assert len(download_tasks) == 1, "real should be only tasks argument but got: {}".format(args.tasks) 125 | download_tasks = [task for task in ALL_TASKS if "real" in task] 126 | 127 | download_dataset_types = args.dataset_types 128 | if "all" in download_dataset_types: 129 | assert len(download_dataset_types) == 1, "all should be only dataset_types argument but got: {}".format(args.dataset_types) 130 | download_dataset_types = ALL_DATASET_TYPES 131 | 132 | download_hdf5_types = args.hdf5_types 133 | if "all" in download_hdf5_types: 134 | assert len(download_hdf5_types) == 1, "all should be only hdf5_types argument but got: {}".format(args.hdf5_types) 135 | download_hdf5_types = ALL_HDF5_TYPES 136 | 137 | # download requested datasets 138 | for task in DATASET_REGISTRY: 139 | if task in download_tasks: 140 | for dataset_type in DATASET_REGISTRY[task]: 141 | if dataset_type in download_dataset_types: 142 | for hdf5_type in DATASET_REGISTRY[task][dataset_type]: 143 | if hdf5_type in download_hdf5_types: 144 | download_dir = os.path.abspath(os.path.join(default_base_dir, task, dataset_type)) 145 | print("\nDownloading dataset:\n task: {}\n dataset type: {}\n hdf5 type: {}\n download path: {}" 146 | .format(task, dataset_type, hdf5_type, download_dir)) 147 | if args.dry_run: 148 | print("\ndry run: skip download") 149 | else: 150 | # Make sure path exists and create if it doesn't 151 | os.makedirs(download_dir, exist_ok=True) 152 | FileUtils.download_url( 153 | url=DATASET_REGISTRY[task][dataset_type][hdf5_type]["url"], 154 | download_dir=download_dir, 155 | ) 156 | print("") 157 | -------------------------------------------------------------------------------- /robomimic/scripts/download_momart_datasets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to download datasets used in MoMaRT paper (https://arxiv.org/abs/2112.05251). By default, all 3 | datasets will be stored at robomimic/datasets, unless the @download_dir 4 | argument is supplied. We recommend using the default, as most examples that 5 | use these datasets assume that they can be found there. 6 | 7 | The @tasks and @dataset_types arguments can all be supplied 8 | to choose which datasets to download. 9 | 10 | Args: 11 | download_dir (str): Base download directory. Created if it doesn't exist. 12 | Defaults to datasets folder in repository - only pass in if you would 13 | like to override the location. 14 | 15 | tasks (list): Tasks to download datasets for. Defaults to table_setup_from_dishwasher task. Pass 'all' to 16 | download all tasks - 5 total: 17 | - table_setup_from_dishwasher 18 | - table_setup_from_dresser 19 | - table_cleanup_to_dishwasher 20 | - table_cleanup_to_sink 21 | - unload_dishwasher 22 | 23 | dataset_types (list): Dataset types to download datasets for (expert, suboptimal, generalize, sample). 24 | Defaults to expert. Pass 'all' to download datasets for all available dataset 25 | types per task, or directly specify the list of dataset types. 26 | NOTE: Because these datasets are huge, we will always print out a warning 27 | that a user must respond yes to to acknowledge the data size (can be up to >100G for all tasks of a single type) 28 | 29 | Example usage: 30 | 31 | # default behavior - just download expert table_setup_from_dishwasher dataset 32 | python download_momart_datasets.py 33 | 34 | # download expert datasets for all tasks 35 | # (do a dry run first to see which datasets would be downloaded) 36 | python download_momart_datasets.py --tasks all --dataset_types expert --dry_run 37 | python download_momart_datasets.py --tasks all --dataset_types expert low_dim 38 | 39 | # download all expert and suboptimal datasets for the table_setup_from_dishwasher and table_cleanup_to_dishwasher tasks 40 | python download_datasets.py --tasks table_setup_from_dishwasher table_cleanup_to_dishwasher --dataset_types expert suboptimal 41 | 42 | # download the sample datasets 43 | python download_datasets.py --tasks all --dataset_types sample 44 | 45 | # download all datasets 46 | python download_datasets.py --tasks all --dataset_types all 47 | """ 48 | import os 49 | import argparse 50 | 51 | import robomimic 52 | import robomimic.utils.file_utils as FileUtils 53 | from robomimic import MOMART_DATASET_REGISTRY 54 | 55 | ALL_TASKS = [ 56 | "table_setup_from_dishwasher", 57 | "table_setup_from_dresser", 58 | "table_cleanup_to_dishwasher", 59 | "table_cleanup_to_sink", 60 | "unload_dishwasher", 61 | ] 62 | ALL_DATASET_TYPES = [ 63 | "expert", 64 | "suboptimal", 65 | "generalize", 66 | "sample", 67 | ] 68 | 69 | 70 | if __name__ == "__main__": 71 | parser = argparse.ArgumentParser() 72 | 73 | # directory to download datasets to 74 | parser.add_argument( 75 | "--download_dir", 76 | type=str, 77 | default=None, 78 | help="Base download directory. Created if it doesn't exist. Defaults to datasets folder in repository.", 79 | ) 80 | 81 | # tasks to download datasets for 82 | parser.add_argument( 83 | "--tasks", 84 | type=str, 85 | nargs='+', 86 | default=["table_setup_from_dishwasher"], 87 | help="Tasks to download datasets for. Defaults to table_setup_from_dishwasher task. Pass 'all' to download all" 88 | f"5 tasks, or directly specify the list of tasks. Options are any of: {ALL_TASKS}", 89 | ) 90 | 91 | # dataset types to download datasets for 92 | parser.add_argument( 93 | "--dataset_types", 94 | type=str, 95 | nargs='+', 96 | default=["expert"], 97 | help="Dataset types to download datasets for (e.g. expert, suboptimal). Defaults to expert. Pass 'all' to " 98 | "download datasets for all available dataset types per task, or directly specify the list of dataset " 99 | f"types. Options are any of: {ALL_DATASET_TYPES}", 100 | ) 101 | 102 | # dry run - don't actually download datasets, but print which datasets would be downloaded 103 | parser.add_argument( 104 | "--dry_run", 105 | action='store_true', 106 | help="set this flag to do a dry run to only print which datasets would be downloaded" 107 | ) 108 | 109 | args = parser.parse_args() 110 | 111 | # set default base directory for downloads 112 | default_base_dir = args.download_dir 113 | if default_base_dir is None: 114 | default_base_dir = os.path.join(robomimic.__path__[0], "../datasets") 115 | 116 | # load args 117 | download_tasks = args.tasks 118 | if "all" in download_tasks: 119 | assert len(download_tasks) == 1, "all should be only tasks argument but got: {}".format(args.tasks) 120 | download_tasks = ALL_TASKS 121 | 122 | download_dataset_types = args.dataset_types 123 | if "all" in download_dataset_types: 124 | assert len(download_dataset_types) == 1, "all should be only dataset_types argument but got: {}".format(args.dataset_types) 125 | download_dataset_types = ALL_DATASET_TYPES 126 | 127 | # Run sanity check first to warn user if they're about to download a huge amount of data 128 | total_size = 0 129 | for task in MOMART_DATASET_REGISTRY: 130 | if task in download_tasks: 131 | for dataset_type in MOMART_DATASET_REGISTRY[task]: 132 | if dataset_type in download_dataset_types: 133 | total_size += MOMART_DATASET_REGISTRY[task][dataset_type]["size"] 134 | 135 | # Verify user acknowledgement if we're not doing a dry run 136 | if not args.dry_run: 137 | user_response = input(f"Warning: requested datasets will take a total of {total_size}GB. Proceed? y/n\n") 138 | assert user_response.lower() in {"yes", "y"}, f"Did not receive confirmation. Aborting download." 139 | 140 | # download requested datasets 141 | for task in MOMART_DATASET_REGISTRY: 142 | if task in download_tasks: 143 | for dataset_type in MOMART_DATASET_REGISTRY[task]: 144 | if dataset_type in download_dataset_types: 145 | dataset_info = MOMART_DATASET_REGISTRY[task][dataset_type] 146 | download_dir = os.path.abspath(os.path.join(default_base_dir, task, dataset_type)) 147 | print(f"\nDownloading dataset:\n" 148 | f" task: {task}\n" 149 | f" dataset type: {dataset_type}\n" 150 | f" dataset size: {dataset_info['size']}GB\n" 151 | f" download path: {download_dir}") 152 | if args.dry_run: 153 | print("\ndry run: skip download") 154 | else: 155 | # Make sure path exists and create if it doesn't 156 | os.makedirs(download_dir, exist_ok=True) 157 | FileUtils.download_url( 158 | url=dataset_info["url"], 159 | download_dir=download_dir, 160 | ) 161 | print("") 162 | -------------------------------------------------------------------------------- /robomimic/scripts/extract_obs_from_raw_datasets.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script holds the commands that were used to go from raw robosuite demo.hdf5 files 4 | # to our processed low-dim and image hdf5 files. 5 | 6 | BASE_DATASET_DIR="../../datasets" 7 | echo "Using base dataset directory: $BASE_DATASET_DIR" 8 | 9 | 10 | ### NOTE: we use done-mode 0 for MG (dones on task success) ### 11 | 12 | 13 | ### mg ### 14 | 15 | 16 | # lift - mg, sparse 17 | python dataset_states_to_obs.py --done_mode 0 \ 18 | --dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \ 19 | --output_name low_dim_sparse.hdf5 20 | python dataset_states_to_obs.py --done_mode 0 \ 21 | --dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \ 22 | --output_name image_sparse.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 23 | 24 | # lift - mg, dense 25 | python dataset_states_to_obs.py --done_mode 0 --shaped \ 26 | --dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \ 27 | --output_name low_dim_dense.hdf5 28 | python dataset_states_to_obs.py --done_mode 0 --shaped \ 29 | --dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \ 30 | --output_name image_dense.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 31 | 32 | # can - mg, sparse 33 | python dataset_states_to_obs.py --done_mode 0 \ 34 | --dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \ 35 | --output_name low_dim_sparse.hdf5 36 | python dataset_states_to_obs.py --done_mode 0 \ 37 | --dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \ 38 | --output_name image_sparse.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 39 | 40 | # can - mg, dense 41 | python dataset_states_to_obs.py --done_mode 0 --shaped \ 42 | --dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \ 43 | --output_name low_dim_dense.hdf5 44 | python dataset_states_to_obs.py --done_mode 0 --shaped \ 45 | --dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \ 46 | --output_name image_dense.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 47 | 48 | 49 | ### NOTE: we use done-mode 2 for PH / MH (dones on task success and end of trajectory) ### 50 | 51 | 52 | ### ph ### 53 | 54 | 55 | # lift - ph 56 | python dataset_states_to_obs.py --done_mode 2 \ 57 | --dataset $BASE_DATASET_DIR/lift/ph/demo.hdf5 \ 58 | --output_name low_dim.hdf5 59 | python dataset_states_to_obs.py --done_mode 2 \ 60 | --dataset $BASE_DATASET_DIR/lift/ph/demo.hdf5 \ 61 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 62 | 63 | # can - ph 64 | python dataset_states_to_obs.py --done_mode 2 \ 65 | --dataset $BASE_DATASET_DIR/can/ph/demo.hdf5 \ 66 | --output_name low_dim.hdf5 67 | python dataset_states_to_obs.py --done_mode 2 \ 68 | --dataset $BASE_DATASET_DIR/can/ph/demo.hdf5 \ 69 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 70 | 71 | # square - ph 72 | python dataset_states_to_obs.py --done_mode 2 \ 73 | --dataset $BASE_DATASET_DIR/square/ph/demo.hdf5 \ 74 | --output_name low_dim.hdf5 75 | python dataset_states_to_obs.py --done_mode 2 \ 76 | --dataset $BASE_DATASET_DIR/square/ph/demo.hdf5 \ 77 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 78 | 79 | # transport - ph 80 | python dataset_states_to_obs.py --done_mode 2 \ 81 | --dataset $BASE_DATASET_DIR/transport/ph/demo.hdf5 \ 82 | --output_name low_dim.hdf5 83 | python dataset_states_to_obs.py --done_mode 2 \ 84 | --dataset $BASE_DATASET_DIR/transport/ph/demo.hdf5 \ 85 | --output_name image.hdf5 --camera_names shouldercamera0 shouldercamera1 robot0_eye_in_hand robot1_eye_in_hand --camera_height 84 --camera_width 84 86 | 87 | # tool hang - ph 88 | python dataset_states_to_obs.py --done_mode 2 \ 89 | --dataset $BASE_DATASET_DIR/tool_hang/ph/demo.hdf5 \ 90 | --output_name low_dim.hdf5 91 | python dataset_states_to_obs.py --done_mode 2 \ 92 | --dataset $BASE_DATASET_DIR/tool_hang/ph/demo.hdf5 \ 93 | --output_name image.hdf5 --camera_names sideview robot0_eye_in_hand --camera_height 240 --camera_width 240 94 | 95 | 96 | ### mh ### 97 | 98 | 99 | # lift - mh 100 | python dataset_states_to_obs.py --done_mode 2 \ 101 | --dataset $BASE_DATASET_DIR/lift/mh/demo.hdf5 \ 102 | --output_name low_dim.hdf5 103 | python dataset_states_to_obs.py --done_mode 2 \ 104 | --dataset $BASE_DATASET_DIR/lift/mh/demo.hdf5 \ 105 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 106 | 107 | # can - mh 108 | python dataset_states_to_obs.py --done_mode 2 \ 109 | --dataset $BASE_DATASET_DIR/can/mh/demo.hdf5 \ 110 | --output_name low_dim.hdf5 111 | python dataset_states_to_obs.py --done_mode 2 \ 112 | --dataset $BASE_DATASET_DIR/can/mh/demo.hdf5 \ 113 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 114 | 115 | # square - mh 116 | python dataset_states_to_obs.py --done_mode 2 \ 117 | --dataset $BASE_DATASET_DIR/square/mh/demo.hdf5 \ 118 | --output_name low_dim.hdf5 119 | python dataset_states_to_obs.py --done_mode 2 \ 120 | --dataset $BASE_DATASET_DIR/square/mh/demo.hdf5 \ 121 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 122 | 123 | # transport - mh 124 | python dataset_states_to_obs.py --done_mode 2 \ 125 | --dataset $BASE_DATASET_DIR/transport/mh/demo.hdf5 \ 126 | --output_name low_dim.hdf5 127 | python dataset_states_to_obs.py --done_mode 2 \ 128 | --dataset $BASE_DATASET_DIR/transport/mh/demo.hdf5 \ 129 | --output_name image.hdf5 --camera_names shouldercamera0 shouldercamera1 robot0_eye_in_hand robot1_eye_in_hand --camera_height 84 --camera_width 84 130 | 131 | 132 | ### can-paired ### 133 | 134 | 135 | python dataset_states_to_obs.py --done_mode 2 \ 136 | --dataset $BASE_DATASET_DIR/can/paired/demo.hdf5 \ 137 | --output_name low_dim.hdf5 138 | python dataset_states_to_obs.py --done_mode 2 \ 139 | --dataset $BASE_DATASET_DIR/can/paired/demo.hdf5 \ 140 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84 141 | -------------------------------------------------------------------------------- /robomimic/scripts/generate_config_templates.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helpful script to generate example config files for each algorithm. These should be re-generated 3 | when new config options are added, or when default settings in the config classes are modified. 4 | """ 5 | import os 6 | import json 7 | 8 | import robomimic 9 | from robomimic.config import get_all_registered_configs 10 | 11 | 12 | def main(): 13 | # store template config jsons in this directory 14 | target_dir = os.path.join(robomimic.__path__[0], "exps/templates/") 15 | 16 | # iterate through registered algorithm config classes 17 | all_configs = get_all_registered_configs() 18 | for algo_name in all_configs: 19 | # make config class for this algorithm 20 | c = all_configs[algo_name]() 21 | assert algo_name == c.algo_name 22 | # dump to json 23 | json_path = os.path.join(target_dir, "{}.json".format(algo_name)) 24 | c.dump(filename=json_path) 25 | 26 | 27 | if __name__ == '__main__': 28 | main() 29 | -------------------------------------------------------------------------------- /robomimic/scripts/get_dataset_info.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper script to report dataset information. By default, will print trajectory length statistics, 3 | the maximum and minimum action element in the dataset, filter keys present, environment 4 | metadata, and the structure of the first demonstration. If --verbose is passed, it will 5 | report the exact demo keys under each filter key, and the structure of all demonstrations 6 | (not just the first one). 7 | 8 | Args: 9 | dataset (str): path to hdf5 dataset 10 | 11 | filter_key (str): if provided, report statistics on the subset of trajectories 12 | in the file that correspond to this filter key 13 | 14 | verbose (bool): if flag is provided, print more details, like the structure of all 15 | demonstrations (not just the first one) 16 | 17 | Example usage: 18 | 19 | # run script on example hdf5 packaged with repository 20 | python get_dataset_info.py --dataset ../../tests/assets/test.hdf5 21 | 22 | # run script only on validation data 23 | python get_dataset_info.py --dataset ../../tests/assets/test.hdf5 --filter_key valid 24 | """ 25 | import h5py 26 | import json 27 | import argparse 28 | import numpy as np 29 | 30 | if __name__ == "__main__": 31 | parser = argparse.ArgumentParser() 32 | parser.add_argument( 33 | "--dataset", 34 | type=str, 35 | help="path to hdf5 dataset", 36 | ) 37 | parser.add_argument( 38 | "--filter_key", 39 | type=str, 40 | default=None, 41 | help="(optional) if provided, report statistics on the subset of trajectories \ 42 | in the file that correspond to this filter key", 43 | ) 44 | parser.add_argument( 45 | "--verbose", 46 | action='store_true', 47 | help="verbose output", 48 | ) 49 | args = parser.parse_args() 50 | 51 | # extract demonstration list from file 52 | filter_key = args.filter_key 53 | all_filter_keys = None 54 | f = h5py.File(args.dataset, "r") 55 | if filter_key is not None: 56 | # use the demonstrations from the filter key instead 57 | print("NOTE: using filter key {}".format(filter_key)) 58 | demos = sorted([elem.decode("utf-8") for elem in np.array(f["mask/{}".format(filter_key)])]) 59 | else: 60 | # use all demonstrations 61 | demos = sorted(list(f["data"].keys())) 62 | 63 | # extract filter key information 64 | if "mask" in f: 65 | all_filter_keys = {} 66 | for fk in f["mask"]: 67 | fk_demos = sorted([elem.decode("utf-8") for elem in np.array(f["mask/{}".format(fk)])]) 68 | all_filter_keys[fk] = fk_demos 69 | 70 | # put demonstration list in increasing episode order 71 | inds = np.argsort([int(elem[5:]) for elem in demos]) 72 | demos = [demos[i] for i in inds] 73 | 74 | # extract length of each trajectory in the file 75 | traj_lengths = [] 76 | action_min = np.inf 77 | action_max = -np.inf 78 | for ep in demos: 79 | traj_lengths.append(f["data/{}/actions".format(ep)].shape[0]) 80 | action_min = min(action_min, np.min(f["data/{}/actions".format(ep)][()])) 81 | action_max = max(action_max, np.max(f["data/{}/actions".format(ep)][()])) 82 | traj_lengths = np.array(traj_lengths) 83 | 84 | # report statistics on the data 85 | print("") 86 | print("total transitions: {}".format(np.sum(traj_lengths))) 87 | print("total trajectories: {}".format(traj_lengths.shape[0])) 88 | print("traj length mean: {}".format(np.mean(traj_lengths))) 89 | print("traj length std: {}".format(np.std(traj_lengths))) 90 | print("traj length min: {}".format(np.min(traj_lengths))) 91 | print("traj length max: {}".format(np.max(traj_lengths))) 92 | print("action min: {}".format(action_min)) 93 | print("action max: {}".format(action_max)) 94 | print("") 95 | print("==== Filter Keys ====") 96 | if all_filter_keys is not None: 97 | for fk in all_filter_keys: 98 | print("filter key {} with {} demos".format(fk, len(all_filter_keys[fk]))) 99 | else: 100 | print("no filter keys") 101 | print("") 102 | if args.verbose: 103 | if all_filter_keys is not None: 104 | print("==== Filter Key Contents ====") 105 | for fk in all_filter_keys: 106 | print("filter_key {} with {} demos: {}".format(fk, len(all_filter_keys[fk]), all_filter_keys[fk])) 107 | print("") 108 | env_meta = json.loads(f["data"].attrs["env_args"]) 109 | print("==== Env Meta ====") 110 | print(json.dumps(env_meta, indent=4)) 111 | print("") 112 | 113 | print("==== Dataset Structure ====") 114 | for ep in demos: 115 | print("episode {} with {} transitions".format(ep, f["data/{}".format(ep)].attrs["num_samples"])) 116 | for k in f["data/{}".format(ep)]: 117 | if k in ["obs", "next_obs"]: 118 | print(" key: {}".format(k)) 119 | for obs_k in f["data/{}/{}".format(ep, k)]: 120 | shape = f["data/{}/{}/{}".format(ep, k, obs_k)].shape 121 | print(" observation key {} with shape {}".format(obs_k, shape)) 122 | elif isinstance(f["data/{}/{}".format(ep, k)], h5py.Dataset): 123 | key_shape = f["data/{}/{}".format(ep, k)].shape 124 | print(" key: {} with shape {}".format(k, key_shape)) 125 | 126 | if not args.verbose: 127 | break 128 | 129 | f.close() 130 | 131 | # maybe display error message 132 | print("") 133 | if (action_min < -1.) or (action_max > 1.): 134 | raise Exception("Dataset should have actions in [-1., 1.] but got bounds [{}, {}]".format(action_min, action_max)) 135 | -------------------------------------------------------------------------------- /robomimic/scripts/hitl/collect_playback_utils.py: -------------------------------------------------------------------------------- 1 | """Teleoperate robot with keyboard or SpaceMouse. """ 2 | 3 | import argparse 4 | import numpy as np 5 | import os 6 | import robosuite as suite 7 | from robosuite import load_controller_config 8 | from robosuite.utils.input_utils import input2action 9 | from robosuite.wrappers import DataCollectionWrapper 10 | import time 11 | import numpy as np 12 | import json 13 | from robosuite.scripts.collect_human_demonstrations import gather_demonstrations_as_hdf5 14 | import robomimic 15 | import cv2 16 | import robomimic.utils.obs_utils as ObsUtils 17 | import copy 18 | import h5py 19 | 20 | import robosuite 21 | is_v1 = (robosuite.__version__.split(".")[0] == "1") 22 | 23 | # Change later 24 | GOOD_EPISODE_LENGTH = None 25 | MAX_EPISODE_LENGTH = None 26 | SUCCESS_HOLD = None 27 | 28 | class RandomPolicy: 29 | def __init__(self, env): 30 | self.env = env 31 | self.low, self.high = env.action_spec 32 | 33 | def get_action(self, obs): 34 | return np.random.uniform(self.low, self.high) / 2 35 | 36 | class TrainedPolicy: 37 | def __init__(self, checkpoint): 38 | from robomimic.utils.file_utils import policy_from_checkpoint 39 | self.policy = policy_from_checkpoint(ckpt_path=checkpoint)[0] 40 | 41 | def get_action(self, obs): 42 | obs = copy.deepcopy(obs) 43 | di = obs 44 | postprocess_visual_obs = True 45 | 46 | ret = {} 47 | for k in di: 48 | pass 49 | """ 50 | if ObsUtils.key_is_image(k): 51 | ret[k] = di[k][::-1] 52 | if postprocess_visual_obs: 53 | ret[k] = ObsUtils.process_image(ret[k]) 54 | """ 55 | obs.update(ret) 56 | 57 | return self.policy(obs) 58 | 59 | def is_empty_input_spacemouse(action): 60 | # empty_input1 = np.array([0.000, 0.000, 0.000, 0.000, 0.000, 0.000, -1.000]) 61 | empty_input = np.array([0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 1.000]) 62 | if np.array_equal(np.abs(action), empty_input): 63 | return True 64 | return False 65 | 66 | def terminate_condition_met(time_success, timestep_count, term_cond): 67 | assert term_cond in ["fixed_length", "success_count", "stop"] 68 | if term_cond == "fixed_length": 69 | return timestep_count >= GOOD_EPISODE_LENGTH and time_success > 0 70 | elif term_cond == "success_count": 71 | return time_success == SUCCESS_HOLD 72 | elif term_cond == "stop": 73 | return timestep_count >= MAX_EPISODE_LENGTH 74 | 75 | def post_process_spacemouse_action(action, grasp, last_grasp): 76 | """ Fixing Spacemouse Action """ 77 | # If the current grasp is active (1) and last grasp is not (-1) (i.e.: grasping input just pressed), 78 | # toggle arm control and / or camera viewing angle if requested 79 | if last_grasp < 0 < grasp: 80 | if args.switch_on_grasp: 81 | args.arm = "left" if args.arm == "right" else "right" 82 | if args.toggle_camera_on_grasp: 83 | cam_id = (cam_id + 1) % num_cam 84 | env.viewer.set_camera(camera_id=cam_id) 85 | # Update last grasp 86 | last_grasp = grasp 87 | 88 | if is_v1: 89 | env_action_dim = env.action_dim 90 | else: 91 | env_action_dim = 7 92 | 93 | # Fill out the rest of the action space if necessary 94 | rem_action_dim = env_action_dim - action.size 95 | if rem_action_dim > 0: 96 | # Initialize remaining action space 97 | rem_action = np.zeros(rem_action_dim) 98 | # This is a multi-arm setting, choose which arm to control and fill the rest with zeros 99 | if args.arm == "right": 100 | action = np.concatenate([action, rem_action]) 101 | elif args.arm == "left": 102 | action = np.concatenate([rem_action, action]) 103 | else: 104 | # Only right and left arms supported 105 | print("Error: Unsupported arm specified -- " 106 | "must be either 'right' or 'left'! Got: {}".format(args.arm)) 107 | elif rem_action_dim < 0: 108 | # We're in an environment with no gripper action space, so trim the action space to be the action dim 109 | action = action[:env_action_dim] 110 | 111 | """ End Fixing Spacemouse Action """ 112 | return action, last_grasp 113 | 114 | def reset_to(env, state): 115 | """ 116 | Reset to a specific simulator state. 117 | 118 | Args: 119 | state (dict): current simulator state that contains one or more of: 120 | - states (np.ndarray): initial state of the mujoco environment 121 | - model (str): mujoco scene xml 122 | 123 | Returns: 124 | observation (dict): observation dictionary after setting the simulator state (only 125 | if "states" is in @state) 126 | """ 127 | should_ret = False 128 | if "model" in state: 129 | env.reset() 130 | xml = env.postprocess_model_xml(state["model"]) 131 | env.reset_from_xml_string(xml) 132 | env.sim.reset() 133 | if not is_v1: 134 | # hide teleop visualization after restoring from model 135 | env.sim.model.site_rgba[self.env.eef_site_id] = np.array([0., 0., 0., 0.]) 136 | env.sim.model.site_rgba[self.env.eef_cylinder_id] = np.array([0., 0., 0., 0.]) 137 | if "states" in state: 138 | env.sim.set_state_from_flattened(state["states"]) 139 | env.sim.forward() 140 | should_ret = True 141 | 142 | if "goal" in state: 143 | env.set_goal(**state["goal"]) 144 | 145 | return env._get_observations(force_update=True) 146 | -------------------------------------------------------------------------------- /robomimic/scripts/hyperparam_helper.py: -------------------------------------------------------------------------------- 1 | """ 2 | A useful script for generating json files and shell scripts for conducting parameter scans. 3 | The script takes a path to a base json file as an argument and a shell file name. 4 | It generates a set of new json files in the same folder as the base json file, and 5 | a shell file script that contains commands to run for each experiment. 6 | 7 | Instructions: 8 | 9 | (1) Start with a base json that specifies a complete set of parameters for a single 10 | run. This only needs to include parameters you want to sweep over, and parameters 11 | that are different from the defaults. You can set this file path by either 12 | passing it as an argument (e.g. --config /path/to/base.json) or by directly 13 | setting the config file in @make_generator. The new experiment jsons will be put 14 | into the same directory as the base json. 15 | 16 | (2) Decide on what json parameters you would like to sweep over, and fill those in as 17 | keys in @make_generator below, taking note of the hierarchical key 18 | formatting using "/" or ".". Fill in corresponding values for each - these will 19 | be used in creating the experiment names, and for determining the range 20 | of values to sweep. Parameters that should be sweeped together should 21 | be assigned the same group number. 22 | 23 | (3) Set the output script name by either passing it as an argument (e.g. --script /path/to/script.sh) 24 | or by directly setting the script file in @make_generator. The script to run all experiments 25 | will be created at the specified path. 26 | 27 | Args: 28 | config (str): path to a base config json file that will be modified to generate config jsons. 29 | The jsons will be generated in the same folder as this file. 30 | 31 | script (str): path to output script that contains commands to run the generated training runs 32 | 33 | Example usage: 34 | 35 | # assumes that /tmp/gen_configs/base.json has already been created (see quickstart section of docs for an example) 36 | python hyperparam_helper.py --config /tmp/gen_configs/base.json --script /tmp/gen_configs/out.sh 37 | """ 38 | import argparse 39 | 40 | import robomimic 41 | import robomimic.utils.hyperparam_utils as HyperparamUtils 42 | 43 | 44 | def make_generator(config_file, script_file): 45 | """ 46 | Implement this function to setup your own hyperparameter scan! 47 | """ 48 | generator = HyperparamUtils.ConfigGenerator( 49 | base_config_file=config_file, script_file=script_file 50 | ) 51 | 52 | # use RNN with horizon 10 53 | generator.add_param( 54 | key="algo.rnn.enabled", 55 | name="", 56 | group=0, 57 | values=[True], 58 | ) 59 | generator.add_param( 60 | key="train.seq_length", 61 | name="", 62 | group=0, 63 | values=[10], 64 | ) 65 | generator.add_param( 66 | key="algo.rnn.horizon", 67 | name="", 68 | group=0, 69 | values=[10], 70 | ) 71 | 72 | # LR - 1e-3, 1e-4 73 | generator.add_param( 74 | key="algo.optim_params.policy.learning_rate.initial", 75 | name="plr", 76 | group=1, 77 | values=[1e-3, 1e-4], 78 | ) 79 | 80 | # GMM y / n 81 | generator.add_param( 82 | key="algo.gmm.enabled", 83 | name="gmm", 84 | group=2, 85 | values=[True, False], 86 | value_names=["t", "f"], 87 | ) 88 | 89 | # RNN dim 400 + MLP dims (1024, 1024) vs. RNN dim 1000 + empty MLP dims () 90 | generator.add_param( 91 | key="algo.rnn.hidden_dim", 92 | name="rnnd", 93 | group=3, 94 | values=[ 95 | 400, 96 | 1000, 97 | ], 98 | ) 99 | generator.add_param( 100 | key="algo.actor_layer_dims", 101 | name="mlp", 102 | group=3, 103 | values=[ 104 | [1024, 1024], 105 | [], 106 | ], 107 | value_names=["1024", "0"], 108 | ) 109 | 110 | return generator 111 | 112 | 113 | def main(args): 114 | 115 | # make config generator 116 | generator = make_generator(config_file=args.config, script_file=args.script) 117 | 118 | # generate jsons and script 119 | generator.generate() 120 | 121 | 122 | if __name__ == "__main__": 123 | parser = argparse.ArgumentParser() 124 | 125 | # Path to base json config - will override any defaults. 126 | parser.add_argument( 127 | "--config", 128 | type=str, 129 | help="path to base config json that will be modified to generate jsons. The jsons will\ 130 | be generated in the same folder as this file.", 131 | ) 132 | 133 | # Script name to generate - will override any defaults 134 | parser.add_argument( 135 | "--script", 136 | type=str, 137 | help="path to output script that contains commands to run the generated training runs", 138 | ) 139 | 140 | args = parser.parse_args() 141 | main(args) 142 | -------------------------------------------------------------------------------- /robomimic/scripts/slurm/auto_append.txt: -------------------------------------------------------------------------------- 1 | no 2 | -------------------------------------------------------------------------------- /robomimic/scripts/slurm/auto_overwrite.txt: -------------------------------------------------------------------------------- 1 | yes 2 | -------------------------------------------------------------------------------- /robomimic/scripts/slurm/base_args.py: -------------------------------------------------------------------------------- 1 | """ 2 | File holding all command line arguments to use 3 | """ 4 | 5 | from argparse import ArgumentParser, Namespace, Action, ArgumentError, SUPPRESS, _UNRECOGNIZED_ARGS_ATTR 6 | import sys as _sys 7 | 8 | BOOL_CHOICES = ['True', 'False', 'true', 'false'] 9 | BOOL_MAPPING = { 10 | "false": False, 11 | "true": True 12 | } 13 | BOOL_STR = BOOL_MAPPING.keys() 14 | 15 | 16 | def maybe_array_to_element(inp): 17 | """ 18 | Maybe converts an array to a single (numerical) element. If len(inp) == 1, returns the input's first 19 | element. Otherwise, returns the input 20 | """ 21 | return inp[0] if type(inp) is list and len(inp) == 1 else inp 22 | 23 | 24 | # Define custom parsing class for nested default parses 25 | class NestedParser(ArgumentParser): 26 | def parse_known_args(self, args=None, namespace=None): 27 | if args is None: 28 | # args default to the system args 29 | args = _sys.argv[1:] 30 | else: 31 | # make sure that args are mutable 32 | args = list(args) 33 | 34 | # default Namespace built from parser defaults 35 | if namespace is None: 36 | namespace = Namespace() 37 | 38 | # add any action defaults that aren't present 39 | for action in self._actions: 40 | if action.dest is not SUPPRESS: 41 | if not hasattr(namespace, action.dest): 42 | if action.default is not SUPPRESS: 43 | # Send attribute to groupspace, not namespace! 44 | groupspace = getattr(namespace, action.const, None) if action.const else namespace 45 | if groupspace is None: 46 | # Create new attribute in main namespace and reference this with groupspace 47 | setattr(namespace, action.const, Namespace()) 48 | groupspace = getattr(namespace, action.const) 49 | default = BOOL_MAPPING[action.default.lower()] \ 50 | if type(action.default) is str and action.default.lower() in BOOL_STR \ 51 | else action.default 52 | setattr(groupspace, action.dest, default) 53 | 54 | # add any parser defaults that aren't present 55 | for dest in self._defaults: 56 | if not hasattr(namespace, dest): 57 | #groupspace = getattr(namespace, dest.const, Namespace()) if dest.const else namespace 58 | setattr(namespace, dest, self._defaults[dest]) 59 | 60 | # parse the arguments and exit if there are any errors 61 | try: 62 | namespace, args = self._parse_known_args(args, namespace) 63 | if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): 64 | args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) 65 | delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) 66 | return namespace, args 67 | except ArgumentError: 68 | err = _sys.exc_info()[1] 69 | self.error(str(err)) 70 | 71 | 72 | # Define class for creating custom nested namespaces 73 | class GroupedAction(Action): 74 | 75 | def __init__(self, 76 | option_strings, 77 | dest, 78 | nargs=None, 79 | const=None, 80 | default=None, 81 | type=None, 82 | choices=None, 83 | required=False, 84 | help=None, 85 | metavar=None, 86 | maybe_array=False, 87 | ): 88 | # Add custom attributes 89 | self.maybe_array = maybe_array 90 | 91 | # Run super init 92 | super().__init__( 93 | option_strings=option_strings, 94 | dest=dest, 95 | nargs=nargs, 96 | const=const, 97 | default=default, 98 | type=type, 99 | choices=choices, 100 | required=required, 101 | help=help, 102 | metavar=metavar, 103 | ) 104 | 105 | def __call__(self, parser, namespace, values, option_string=None): 106 | groupspace = getattr(namespace, self.const, Namespace()) 107 | if type(values) is str and values.lower() in BOOL_STR: 108 | values = BOOL_MAPPING[values.lower()] 109 | # Possibly convert array if requested 110 | if self.maybe_array: 111 | values = maybe_array_to_element(values) 112 | setattr(groupspace, self.dest, values) 113 | setattr(namespace, self.const, groupspace) 114 | 115 | 116 | # Define global parser 117 | parser = NestedParser(description='Top level arguments') 118 | 119 | # Add seed arg always 120 | parser.add_argument( 121 | '--seed', type=int, default=1, help='random seed (default: 1)') 122 | 123 | 124 | # def parse_arguments(): 125 | # """ 126 | # Parses all arguments and splits them into their appropriate namespaces, returning separately the robosuite args, 127 | # rllib args, and agent args 128 | # """ 129 | # args = parser.parse_args() 130 | # robosuite_args = getattr(args, "robosuite", None) 131 | # rllib_args = getattr(args, "rllib", None) 132 | # model_args = getattr(args, "model", None) 133 | # agent_args = getattr(args, "agent", None) 134 | # 135 | # # Print all args 136 | # print() 137 | # for t, arg in zip(("robosuite", "rllib", "model", "agent"), (robosuite_args, rllib_args, model_args, agent_args)): 138 | # print(' {} Params: '.format(t)) 139 | # if arg is not None: 140 | # for key, value in arg.__dict__.items(): 141 | # if key.startswith('__') or key.startswith('_'): 142 | # continue 143 | # print(' {}: {}'.format(key, value)) 144 | # print() 145 | # 146 | # # Return args 147 | # return robosuite_args, rllib_args, model_args, agent_args 148 | 149 | 150 | if __name__ == '__main__': 151 | # Add arguments 152 | # add_robosuite_arguments() 153 | # add_rllib_arguments() 154 | # add_ppo_arguments() 155 | # 156 | # # Test parsing functionality 157 | # a, b, c = parse_arguments() 158 | # print(a) 159 | # print(b) 160 | # print(c) 161 | pass 162 | -------------------------------------------------------------------------------- /robomimic/scripts/slurm/base_template.sbatch: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | #all commands that start with SBATCH contain commands that are just used by SLURM for scheduling 4 | ################# 5 | #partition name 6 | #SBATCH --partition={{PARTITION}} 7 | #specific machines to avoid 8 | #SBATCH --exclude={{EXCLUDE}} 9 | ################# 10 | #number of GPUs 11 | #SBATCH --gres=gpu:{{NUM_GPU}} 12 | ##SBATCH --nodes=1 13 | #SBATCH --cpus-per-task=4 14 | #SBATCH --ntasks={{NUM_CPU}} 15 | ################# 16 | #set a job name 17 | #SBATCH --job-name="{{JOB_NAME}}" 18 | ################# 19 | #a file for job output, you can check job progress, append the job ID with %j to make it unique 20 | #SBATCH --output={{EXECUTABLE_LOG_DIR}}/%j.out 21 | ################# 22 | # a file for errors from the job 23 | #SBATCH --error={{EXECUTABLE_LOG_DIR}}/%j.err 24 | ################# 25 | #time you think you need; default is 2 hours 26 | #format could be dd-hh:mm:ss, hh:mm:ss, mm:ss, or mm 27 | #SBATCH --time={{HOURS}}:00:00 28 | ################# 29 | # Quality of Service (QOS); think of it as sending your job into a special queue; --qos=long for with a max job length of 7 days. 30 | # uncomment ##SBATCH --qos=long if you want your job to run longer than 48 hours, which is the default for normal partition, 31 | # NOTE- in the hns partition the default max run time is 7 days , so you wont need to include qos, also change to normal partition 32 | # since dev max run time is 2 hours. 33 | #{{QOS_LONG}} 34 | # We are submitting to the dev partition, there are several on sherlock: normal, gpu, bigmem (jobs requiring >64Gigs RAM) 35 | ##SBATCH -p dev 36 | ################# 37 | # --mem is memory per node; default is 4000 MB per CPU, remember to ask for enough mem to match your CPU request, since 38 | # sherlock automatically allocates 4 Gigs of RAM/CPU, if you ask for 8 CPUs you will get 32 Gigs of RAM, so either 39 | # leave --mem commented out or request >= to the RAM needed for your CPU request. It will also accept mem. in units, ie "--mem=4G" 40 | #SBATCH --mem={{MEM}}G 41 | # to request multiple threads/CPUs use the -c option, on Sherlock we use 1 thread/CPU, 16 CPUs on each normal compute node 4Gigs RAM per CPU. Here we will request just 1. 42 | #SBATCH -c 1 43 | ################# 44 | # Have SLURM send you an email when the job ends or fails, careful, the email could end up in your clutter folder 45 | # Also, if you submit hundreds of jobs at once you will get hundreds of emails. 46 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail 47 | # Remember to change this to your email 48 | #SBATCH --mail-user={{NOTIFICATION_EMAIL}} 49 | # list out some useful information 50 | echo "SLURM_JOBID="$SLURM_JOBID 51 | echo "SLURM_JOB_NAME="$SLURM_JOB_NAME 52 | echo "SLURM_JOB_NODELIST"=$SLURM_JOB_NODELIST 53 | echo "SLURM_NNODES"=$SLURM_NNODES 54 | echo "SLURMTMPDIR="$SLURMTMPDIR 55 | echo "working directory = "$SLURM_SUBMIT_DIR 56 | #now run normal batch commands 57 | {{SHELL_SOURCE_SCRIPT}} 58 | conda activate {{PYTHON_INTERPRETER}} 59 | export PYTHONPATH=$PYTHONPATH:{{EXTRA_PYTHONPATH}} 60 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{{MUJOCO_DIR}} 61 | 62 | {{COPY_FILE}} 63 | {{CMD}} 64 | {{EXTRA_CMDS}} 65 | 66 | # done 67 | echo "Done" 68 | exit 0 69 | -------------------------------------------------------------------------------- /robomimic/scripts/slurm/run_hp_sweep.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for executing all configs generated from hyperparamter_helper.py (in batchRL) 3 | 4 | Note that this assumes that hyperparameter_helper.py has already been run, and that all the resulting 5 | configurations exist in a single folder 6 | """ 7 | 8 | # from slurm.util.arguments import * 9 | from robomimic.scripts.slurm.batchrl_args import * 10 | from robomimic.scripts.slurm.sbatch_args import * 11 | 12 | # from slurm.util.sbatch_utils import create_and_execute_sbatch_script 13 | from robomimic.scripts.slurm.sbatch_utils import create_and_execute_sbatch_script 14 | 15 | import copy 16 | 17 | # Add relevant input arguments 18 | add_sbatch_args() 19 | add_batchrl_hp_args() 20 | 21 | 22 | def parse_configs_from_hp_script(hp_script): 23 | """ 24 | Helper script to parse the executable hyperparameter script generated from hyperparameter_helper.py (in batchRL) 25 | to infer the filepaths to the generated configs. 26 | 27 | Args: 28 | hp_script (str): Absolute fpath to the generated hyperparameter script 29 | 30 | Returns: 31 | list: Absolute paths to the configs to be deployed in the hp sweep 32 | """ 33 | # Create list to fill as we parse the script 34 | configs = [] 35 | # Open and parse file line by line 36 | with open(hp_script) as f: 37 | for line in f: 38 | # Make sure we only parse the lines where we have a valid python command 39 | if line.startswith("python"): 40 | # Extract only the config path 41 | configs.append(line.split(" ")[-1].split("\n")[0]) 42 | # Return configs 43 | return configs 44 | 45 | 46 | def generate_debug_script(hp_script): 47 | """ 48 | Helper script to generate an .sh executable debug hyperparameter script using the hp sweep script generated from 49 | hyperparameter_helper.py (in batchRL) 50 | 51 | Args: 52 | hp_script (str): Absolute fpath to the generated hyperparameter script 53 | """ 54 | # Modify the path so that we add "_debug" to the end -- hacky way since we know ".sh" extension is 3 chars long 55 | debug_script = hp_script[:-3] + "_debug.sh" 56 | # Open and parse file line by line 57 | with open(hp_script) as f: 58 | # Open a new file to write the debug script to 59 | with open(debug_script, 'w+') as new_file: 60 | # Loop through hp script and write to this new file 61 | for line in f: 62 | # Make sure we only parse the lines where we have a valid python command 63 | if line.startswith("python"): 64 | # We write the line plus the extra --debug flag 65 | new_file.write(line.split("\n")[0] + " --debug\n") 66 | else: 67 | # Just write line normally 68 | new_file.write(line) 69 | 70 | 71 | if __name__ == '__main__': 72 | # First, parse args 73 | args = parser.parse_args() 74 | 75 | # Extract configs from hp sweep script 76 | configs = parse_configs_from_hp_script(hp_script=args.batchrl_hp.hp_sweep_script) 77 | 78 | # If user requested a debug script to be generated, do that now 79 | if args.batchrl_hp.generate_debug_script: 80 | generate_debug_script(hp_script=args.batchrl_hp.hp_sweep_script) 81 | 82 | n = args.batchrl_hp.n_exps_per_instance 83 | 84 | # Loop through each config to create an sbatch script from 85 | for i in range(0, len(configs), n): 86 | script_args = [] 87 | configs_for_batch = configs[i:i+n] 88 | for config in configs_for_batch: 89 | # Extract name for this sbatch script 90 | name = config.split("/")[-1].split(".json")[0] 91 | 92 | # Compose script arguments to pass to sbatch script 93 | script_args.append({ 94 | "config": config, 95 | }) 96 | 97 | # Generate the sbatch file 98 | print(f"Creating {name}...") 99 | 100 | # Multiple resources by number of jobs in batch 101 | sbatch_args = copy.deepcopy(args.sbatch) 102 | sbatch_args.num_cpu *= len(configs_for_batch) 103 | sbatch_args.mem_gb *= len(configs_for_batch) 104 | 105 | create_and_execute_sbatch_script( 106 | filename=name, 107 | job_name=name, 108 | sbatch_args=sbatch_args, 109 | script_args=script_args) 110 | -------------------------------------------------------------------------------- /robomimic/scripts/slurm/sbatch_args.py: -------------------------------------------------------------------------------- 1 | # from slurm.util.arguments.base_args import * 2 | from robomimic.scripts.slurm.base_args import * 3 | 4 | PARTITIONS = ( 5 | "napoli", 6 | "tibet", 7 | "svl", 8 | 9 | "titans", 10 | "dgx", 11 | ) 12 | 13 | 14 | def add_sbatch_args(): 15 | """ 16 | Adds sbatch arguments needed for automatically generating and executing python files 17 | """ 18 | # Define namespace for the robosuite args 19 | prefix = 'sbatch' 20 | actions = { 21 | "const": prefix, 22 | "action": GroupedAction 23 | } 24 | # Required args 25 | parser.add_argument( 26 | '--script', 27 | type=str, 28 | required=True, 29 | help='path to the Python script to execute', 30 | **actions 31 | ) 32 | parser.add_argument( 33 | '--generated_dir', 34 | type=str, 35 | required=True, 36 | help='Sets the location where generated sbatch scripts will be stored', 37 | **actions 38 | ) 39 | parser.add_argument( 40 | '--python_interpreter', 41 | type=str, 42 | required=True, 43 | help='Python interepreter to use for the executed python script', 44 | **actions 45 | ) 46 | 47 | # Additional args 48 | parser.add_argument( 49 | '--partition', 50 | type=str, 51 | default='titans', 52 | choices=PARTITIONS, 53 | help='partition to run on for this process', 54 | **actions 55 | ) 56 | parser.add_argument( 57 | '--exclude', 58 | type=str, 59 | default='', 60 | help='any specific machines to avoid, comma separated', 61 | **actions 62 | ) 63 | parser.add_argument( 64 | '--gpu_type', 65 | type=str, 66 | default="any", 67 | help='Specific GPU to use. Any results in any GPU being used for this run', 68 | **actions 69 | ) 70 | parser.add_argument( 71 | '--num_gpu', 72 | type=int, 73 | default=0, 74 | help='Sets the number of gpus to use for this sbatch script', 75 | **actions 76 | ) 77 | parser.add_argument( 78 | '--num_cpu', 79 | type=int, 80 | default=4, 81 | help='Sets the number of cpus to use for this sbatch script', 82 | **actions 83 | ) 84 | parser.add_argument( 85 | '--mem_gb', 86 | type=int, 87 | default=0, 88 | help='If nonzero, sets the amount of memory to be this many GB', 89 | **actions 90 | ) 91 | parser.add_argument( 92 | '--max_hours', 93 | type=int, 94 | default=20, 95 | help='Sets the maximum number of hours this script will be run for', 96 | **actions 97 | ) 98 | parser.add_argument( 99 | '--extra_pythonpath', 100 | type=str, 101 | default="", 102 | help='Extra paths to set to the pythonpath variable', 103 | **actions 104 | ) 105 | parser.add_argument( 106 | '--overwrite', 107 | type=str, 108 | default="False", 109 | choices=BOOL_CHOICES, 110 | help='Whether to overwrite or not', 111 | **actions 112 | ) 113 | parser.add_argument( 114 | '--extra_commands', 115 | nargs="+", 116 | type=str, 117 | default=None, 118 | help='Extra commands to run after main python command', 119 | **actions 120 | ) 121 | parser.add_argument( 122 | '--copy_file', 123 | nargs="+", 124 | type=str, 125 | default=None, 126 | help='Copies a file from source to location. Expected format is [source_file, targeT_dir]. New file will' 127 | 'share the same file name as the original source file. Useful in cases e.g.: copying datasets to local ssd', 128 | **actions 129 | ) 130 | parser.add_argument( 131 | '--executable_log_dir', 132 | type=str, 133 | default='/cvgl2/u/jdwong/test_output', 134 | help='Location to dump sbatch log out / err text to', 135 | **actions 136 | ) 137 | parser.add_argument( 138 | '--shell_source_script', 139 | type=str, 140 | default=None, 141 | help='If specified, bash script to source at beginning of sbatch execution', 142 | **actions 143 | ) 144 | parser.add_argument( 145 | '--notification_email', 146 | type=str, 147 | default='jdwong@stanford.edu', 148 | help='Email address to send slurm notifications to (i.e.: when the script finishes running)', 149 | **actions 150 | ) 151 | parser.add_argument( 152 | '--mujoco_dir', 153 | type=str, 154 | default='/cvgl2/u/jdwong/.mujoco/mujoco200/bin', 155 | help='Absolute path to mujoco 200 installation bin directory', 156 | **actions 157 | ) 158 | -------------------------------------------------------------------------------- /robomimic/scripts/slurm/sbatch_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Python script for generating and executing sbatch files 3 | """ 4 | 5 | import os 6 | # import slurm 7 | import robomimic 8 | from pathlib import Path 9 | 10 | PARTITIONS = ( 11 | "napoli", 12 | "tibet", 13 | "svl", 14 | 15 | "titans", 16 | "dgx", 17 | ) 18 | 19 | robomimic_base_path = os.path.abspath(os.path.join(os.path.dirname(robomimic.__file__), os.pardir)) 20 | 21 | AUTO_OVERWRITE_RESP = os.path.join(robomimic_base_path, "robomimic/scripts/slurm/auto_overwrite.txt") 22 | AUTO_APPEND_RESP = os.path.join(robomimic_base_path, "robomimic/scripts/slurm/auto_append.txt") 23 | 24 | import time 25 | from datetime import datetime 26 | 27 | def create_and_execute_sbatch_script(filename, job_name, sbatch_args, script_args=None): 28 | """ 29 | Function that creates and executes an sbatch script based off of a template 30 | 31 | Args: 32 | @filename (str): Name of the sbatch file that will be generated 33 | @job_name (str): Name of sbatch job to execute 34 | @sbatch_args (Namespace): Input arguments to fill in sbatch script 35 | @script_args (list of dicts, dict or None): If specified, adds additional 36 | input arguments to script execution based on key-value mappings. 37 | If of type list, indicates multiple commands in one sbatch script. 38 | """ 39 | # Create a new directory path if it doesn't exist and create a new filename that we will write to 40 | Path(sbatch_args.generated_dir).mkdir(parents=True, exist_ok=True) 41 | ts = time.time() 42 | new_sbatch_fpath = os.path.join(sbatch_args.generated_dir, "{}_{}.sbatch".format(filename, ts)) 43 | 44 | # Compose extra commands 45 | if sbatch_args.extra_commands is not None: 46 | sbatch_args.extra_commands = sbatch_args.extra_commands if type(sbatch_args.extra_commands) is list else \ 47 | [sbatch_args.extra_commands] 48 | sbatch_args.extra_commands = "\n".join(sbatch_args.extra_commands) 49 | else: 50 | sbatch_args.extra_commands = "" 51 | 52 | # infer number of commands from script args 53 | if script_args is None: 54 | num_commands = 1 55 | elif not isinstance(script_args, list): 56 | script_args = [script_args] 57 | num_commands = 1 58 | else: 59 | num_commands = len(script_args) 60 | 61 | command = "" 62 | for i in range(num_commands): 63 | # Compose main command to be executed in script 64 | command += "python {}".format(sbatch_args.script) 65 | 66 | # Add additional input args if necessary 67 | if script_args is not None: 68 | for k, v in script_args[i].items(): 69 | if v is not None: 70 | if type(v) is list or type(v) is tuple: 71 | v = " ".join(str(vi) for vi in v) 72 | command += " --{} {}".format(k, v) 73 | 74 | # Add overwrite if requested 75 | if sbatch_args.overwrite: 76 | command += f" < {AUTO_OVERWRITE_RESP}" 77 | else: 78 | command += f" < {AUTO_APPEND_RESP}" 79 | 80 | command += " & \n" 81 | command += "wait" 82 | 83 | # Define partition 84 | if sbatch_args.partition == "napoli": 85 | partition = "napoli-gpu" if sbatch_args.num_gpu > 0 else "napoli-cpu\n#SBATCH --exclude=napoli[15-16]" 86 | else: 87 | partition = sbatch_args.partition 88 | 89 | # Define GPU(s) to use 90 | num_gpu = sbatch_args.num_gpu 91 | if sbatch_args.gpu_type != "any": 92 | num_gpu = f"{sbatch_args.gpu_type}:{num_gpu}" 93 | 94 | # Add copy file if requested 95 | copy_file = "" if sbatch_args.copy_file is None else create_copy_file_cmd(*sbatch_args.copy_file) 96 | 97 | # Add shell source script if requested 98 | shell_source_script = "" if sbatch_args.shell_source_script is None else f"source {sbatch_args.shell_source_script}" 99 | 100 | # Define a dict to map expected fill-ins with replacement values 101 | fill_ins = { 102 | "{{PARTITION}}": partition, 103 | "{{EXCLUDE}}": sbatch_args.exclude, 104 | "{{NUM_GPU}}": num_gpu, 105 | "{{NUM_CPU}}": sbatch_args.num_cpu, 106 | "{{JOB_NAME}}": job_name, 107 | "{{EXECUTABLE_LOG_DIR}}": sbatch_args.executable_log_dir, 108 | "{{HOURS}}": sbatch_args.max_hours, 109 | "{{QOS_LONG}}": "#SBATCH --qos=long" if sbatch_args.max_hours > 48 else "", 110 | "{{MEM}}": sbatch_args.mem_gb, 111 | "{{NOTIFICATION_EMAIL}}": sbatch_args.notification_email, 112 | "{{SHELL_SOURCE_SCRIPT}}": shell_source_script, 113 | "{{PYTHON_INTERPRETER}}": sbatch_args.python_interpreter, 114 | "{{EXTRA_PYTHONPATH}}": sbatch_args.extra_pythonpath, 115 | "{{MUJOCO_DIR}}": sbatch_args.mujoco_dir, 116 | "{{COPY_FILE}}": copy_file, 117 | "{{CMD}}": command, 118 | "{{EXTRA_CMDS}}": sbatch_args.extra_commands 119 | } 120 | 121 | # Open the template file 122 | with open(os.path.join(robomimic_base_path, "robomimic/scripts/slurm/base_template.sbatch")) as template: 123 | # Open the new sbatch file 124 | print(new_sbatch_fpath) 125 | with open(new_sbatch_fpath, 'w+') as new_file: 126 | # Loop through template and write to this new file 127 | for line in template: 128 | wrote = False 129 | # Check for various cases 130 | for k, v in fill_ins.items(): 131 | # If the key is found in the line, replace it with its value and pop it from the dict 132 | if k in line: 133 | new_file.write(line.replace(k, str(v))) 134 | wrote = True 135 | break 136 | # Otherwise, we just write the line from the template directly 137 | if not wrote: 138 | new_file.write(line) 139 | 140 | # Execute this file! 141 | # TODO: Fix! (Permission denied error) 142 | #os.system(new_sbatch_fpath) 143 | 144 | 145 | def create_copy_file_cmd(source_file, target_dir): 146 | """ 147 | Helper function to create a bash command (in string format) to copy a source file to a target location. 148 | 149 | Args: 150 | source_file (str): Absolute path to the source file to copy 151 | target_dir (str): Absolute path to the target directory to which the source file will be copied 152 | 153 | Returns: 154 | str: bash command to execute in string format 155 | """ 156 | target_filename = source_file.split("/")[-1] 157 | target_fpath = os.path.join(target_dir, target_filename) 158 | cmd =\ 159 | f'mkdir -p {target_dir}\n'\ 160 | f'if [[ -f "{target_fpath}" ]]; then\n'\ 161 | f' echo "{target_fpath} exists, no copying"\n'\ 162 | f'else\n'\ 163 | f' echo "{target_fpath} does not exist, copying dataset"\n'\ 164 | f' cp {source_file} {target_fpath}\n'\ 165 | f'fi' 166 | 167 | return cmd 168 | -------------------------------------------------------------------------------- /robomimic/scripts/split_train_val.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script for splitting a dataset hdf5 file into training and validation trajectories. 3 | 4 | Args: 5 | dataset (str): path to hdf5 dataset 6 | 7 | filter_key (str): if provided, split the subset of trajectories 8 | in the file that correspond to this filter key into a training 9 | and validation set of trajectories, instead of splitting the 10 | full set of trajectories 11 | 12 | ratio (float): validation ratio, in (0, 1). Defaults to 0.1, which is 10%. 13 | 14 | Example usage: 15 | python split_train_val.py --dataset /path/to/demo.hdf5 --ratio 0.1 16 | """ 17 | 18 | import argparse 19 | import h5py 20 | import numpy as np 21 | 22 | from robomimic.utils.file_utils import create_hdf5_filter_key 23 | 24 | 25 | def split_train_val_from_hdf5(hdf5_path, val_ratio=0.1, filter_key=None): 26 | """ 27 | Splits data into training set and validation set from HDF5 file. 28 | 29 | Args: 30 | hdf5_path (str): path to the hdf5 file 31 | to load the transitions from 32 | 33 | val_ratio (float): ratio of validation demonstrations to all demonstrations 34 | 35 | filter_key (str): if provided, split the subset of demonstration keys stored 36 | under mask/@filter_key instead of the full set of demonstrations 37 | """ 38 | 39 | # retrieve number of demos 40 | f = h5py.File(hdf5_path, "r") 41 | if filter_key is not None: 42 | print("using filter key: {}".format(filter_key)) 43 | demos = sorted([elem.decode("utf-8") for elem in np.array(f["mask/{}".format(filter_key)])]) 44 | else: 45 | demos = sorted(list(f["data"].keys())) 46 | num_demos = len(demos) 47 | f.close() 48 | 49 | # get random split 50 | num_demos = len(demos) 51 | num_val = int(val_ratio * num_demos) 52 | mask = np.zeros(num_demos) 53 | mask[:num_val] = 1. 54 | np.random.shuffle(mask) 55 | mask = mask.astype(int) 56 | train_inds = (1 - mask).nonzero()[0] 57 | valid_inds = mask.nonzero()[0] 58 | train_keys = [demos[i] for i in train_inds] 59 | valid_keys = [demos[i] for i in valid_inds] 60 | print("{} validation demonstrations out of {} total demonstrations.".format(num_val, num_demos)) 61 | 62 | # pass mask to generate split 63 | name_1 = "train" 64 | name_2 = "valid" 65 | if filter_key is not None: 66 | name_1 = "{}_{}".format(filter_key, name_1) 67 | name_2 = "{}_{}".format(filter_key, name_2) 68 | 69 | train_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=train_keys, key_name=name_1) 70 | valid_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=valid_keys, key_name=name_2) 71 | 72 | print("Total number of train samples: {}".format(np.sum(train_lengths))) 73 | print("Average number of train samples {}".format(np.mean(train_lengths))) 74 | 75 | print("Total number of valid samples: {}".format(np.sum(valid_lengths))) 76 | print("Average number of valid samples {}".format(np.mean(valid_lengths))) 77 | 78 | 79 | if __name__ == "__main__": 80 | parser = argparse.ArgumentParser() 81 | parser.add_argument( 82 | "--dataset", 83 | type=str, 84 | help="path to hdf5 dataset", 85 | ) 86 | parser.add_argument( 87 | "--filter_key", 88 | type=str, 89 | default=None, 90 | help="if provided, split the subset of trajectories in the file that correspond to\ 91 | this filter key into a training and validation set of trajectories, instead of\ 92 | splitting the full set of trajectories", 93 | ) 94 | parser.add_argument( 95 | "--ratio", 96 | type=float, 97 | default=0.1, 98 | help="validation ratio, in (0, 1)" 99 | ) 100 | args = parser.parse_args() 101 | 102 | # seed to make sure results are consistent 103 | np.random.seed(0) 104 | 105 | split_train_val_from_hdf5(args.dataset, val_ratio=args.ratio, filter_key=args.filter_key) -------------------------------------------------------------------------------- /robomimic/scripts/vis/vis_preintv.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from robomimic.scripts.vis.vis_utils import get_argparser, playback_dataset 5 | from robomimic.scripts.vis.image_utils import apply_filter 6 | 7 | import matplotlib 8 | matplotlib.use('Agg') 9 | import matplotlib.pyplot as plt 10 | 11 | def get_intv_and_preintv_inds(ep_info): 12 | if args.model == 'Q': 13 | vals = ep_info['q_vals'] 14 | elif args.model == 'V': 15 | vals = ep_info['v_vals'] 16 | else: 17 | raise ValueError 18 | 19 | ac_mods = ep_info["action_modes"] 20 | intv_inds = np.reshape(np.argwhere(ac_mods == 1), -1) 21 | 22 | preintv_inds = [] 23 | intv_start_inds = [i for i in intv_inds if i > 0 and ac_mods[-1] != 1] 24 | for i_start in intv_start_inds: 25 | for j in range(i_start-1, 0, -1): 26 | if j in intv_inds or vals[j] > args.th: 27 | break 28 | 29 | preintv_inds.append(j) 30 | 31 | return intv_inds, preintv_inds 32 | 33 | 34 | def plot_helper(ep_num, ep_info): 35 | fig, ax1 = plt.subplots() 36 | 37 | if args.model == 'Q': 38 | y_vals = ep_info['q_vals'] 39 | y_label = 'Q' 40 | elif args.model == 'V': 41 | y_vals = ep_info['v_vals'] 42 | y_label = 'V' 43 | else: 44 | raise ValueError 45 | 46 | color = 'tab:blue' 47 | ax1.set_xlabel('Timestep') 48 | 49 | ax1.set_ylabel(y_label) 50 | ax1.plot(y_vals, color = color) 51 | ax1.tick_params(axis ='y') 52 | 53 | ax1.axhline(y = 0.0, color = 'black') 54 | 55 | ax1.set_ylim(-1.2, 0.2) 56 | 57 | intv_inds, preintv_inds = get_intv_and_preintv_inds(ep_info) 58 | for i in intv_inds: 59 | ax1.axvline(x=i, color='green', linewidth=5, alpha=0.10) 60 | 61 | for i in preintv_inds: 62 | ax1.axvline(x=i, color='red', linewidth=5, alpha=0.10) 63 | 64 | plt.savefig(os.path.join( 65 | args.vis_path, 66 | 'plot_{}.png'.format(ep_num) 67 | )) 68 | plt.close() 69 | 70 | 71 | def video_helper(ep_num, ep_info): 72 | intv_inds, preintv_inds = get_intv_and_preintv_inds(ep_info) 73 | 74 | if len(intv_inds) == 0: 75 | return [] 76 | 77 | video_frames = ep_info['video_frames'] 78 | for (i, img) in video_frames: 79 | if i in intv_inds: 80 | img[::] = apply_filter(img, color=(0, 255, 0)) 81 | 82 | if i in preintv_inds: 83 | img[::] = apply_filter(img, color=(255, 0, 0)) 84 | 85 | return video_frames 86 | 87 | 88 | if __name__ == "__main__": 89 | parser = get_argparser() 90 | 91 | parser.add_argument( 92 | "--th", 93 | type=float, 94 | default=-0.35, 95 | help="threshold for pre-intervention", 96 | ) 97 | 98 | parser.add_argument( 99 | "--model", 100 | type=str, 101 | default='Q', 102 | choices=['Q', 'V'], 103 | help="Model to use for determining pre-intv", 104 | ) 105 | 106 | args = parser.parse_args() 107 | playback_dataset(args, plot_helper=plot_helper, video_helper=video_helper) 108 | -------------------------------------------------------------------------------- /robomimic/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__init__.py -------------------------------------------------------------------------------- /robomimic/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/utils/__pycache__/dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/dataset.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/utils/__pycache__/env_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/env_utils.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/utils/__pycache__/file_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/file_utils.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/utils/__pycache__/log_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/log_utils.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/utils/__pycache__/loss_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/loss_utils.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/utils/__pycache__/macros.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/macros.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/utils/__pycache__/obs_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/obs_utils.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/utils/__pycache__/python_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/python_utils.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/utils/__pycache__/tensor_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/tensor_utils.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/utils/__pycache__/torch_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/torch_utils.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/utils/__pycache__/train_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/train_utils.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/utils/__pycache__/vis_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/vis_utils.cpython-38.pyc -------------------------------------------------------------------------------- /robomimic/utils/log_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains utility classes and functions for logging to stdout, stderr, 3 | and to tensorboard. 4 | """ 5 | import os 6 | import sys 7 | import numpy as np 8 | from datetime import datetime 9 | from contextlib import contextmanager 10 | from tqdm import tqdm 11 | import time 12 | 13 | 14 | class PrintLogger(object): 15 | """ 16 | This class redirects print statements to both console and a file. 17 | """ 18 | def __init__(self, log_file): 19 | self.terminal = sys.stdout 20 | print('STDOUT will be forked to %s' % log_file) 21 | self.log_file = open(log_file, "a") 22 | 23 | def write(self, message): 24 | self.terminal.write(message) 25 | self.log_file.write(message) 26 | self.log_file.flush() 27 | 28 | def flush(self): 29 | # this flush method is needed for python 3 compatibility. 30 | # this handles the flush command by doing nothing. 31 | # you might want to specify some extra behavior here. 32 | pass 33 | 34 | 35 | class DataLogger(object): 36 | """ 37 | Logging class to log metrics to tensorboard and/or retrieve running statistics about logged data. 38 | """ 39 | def __init__(self, log_dir, config, log_tb=True, log_wandb=False): 40 | """ 41 | Args: 42 | log_dir (str): base path to store logs 43 | log_tb (bool): whether to use tensorboard logging 44 | """ 45 | self._tb_logger = None 46 | self._wandb_logger = None 47 | self._data = dict() # store all the scalar data logged so far 48 | 49 | if log_tb: 50 | from tensorboardX import SummaryWriter 51 | self._tb_logger = SummaryWriter(os.path.join(log_dir, 'tb')) 52 | 53 | if log_wandb: 54 | import wandb 55 | 56 | num_attempts = 10 57 | for attempt in range(num_attempts): 58 | try: 59 | # set up wandb 60 | self._wandb_logger = wandb 61 | self._wandb_logger.init( 62 | entity="sirius", 63 | project=config['tags']['wandb_proj_name'], 64 | name=config.experiment.name, 65 | dir=log_dir, 66 | mode=("offline" if attempt == num_attempts - 1 else "online"), 67 | ) 68 | 69 | # set up tags for identifying experiment 70 | tags = config['tags'] 71 | wandb_config = {k: v for (k, v) in tags.items() if k not in ['hp_keys', 'hp_values']} 72 | for (k, v) in zip(tags['hp_keys'], tags['hp_values']): 73 | wandb_config[k] = v 74 | self._wandb_logger.config.update(wandb_config) 75 | 76 | break 77 | except: 78 | print("wandb initialization, attempt #{}".format(attempt + 1)) 79 | self._wandb_logger = None 80 | time.sleep(30) 81 | 82 | def record(self, k, v, epoch, data_type='scalar', log_stats=False): 83 | """ 84 | Record data with logger. 85 | 86 | Args: 87 | k (str): key string 88 | v (float or image): value to store 89 | epoch: current epoch number 90 | data_type (str): the type of data. either 'scalar' or 'image' 91 | log_stats (bool): whether to store the mean/max/min/std for all data logged so far with key k 92 | """ 93 | 94 | assert data_type in ['scalar', 'image'] 95 | 96 | if data_type == 'scalar': 97 | # maybe update internal cache if logging stats for this key 98 | if log_stats or k in self._data: # any key that we're logging or previously logged 99 | if k not in self._data: 100 | self._data[k] = [] 101 | self._data[k].append(v) 102 | 103 | # maybe log to tensorboard 104 | if self._tb_logger is not None: 105 | if data_type == 'scalar': 106 | self._tb_logger.add_scalar(k, v, epoch) 107 | if log_stats: 108 | stats = self.get_stats(k) 109 | for (stat_k, stat_v) in stats.items(): 110 | stat_k_name = '{}-{}'.format(k, stat_k) 111 | self._tb_logger.add_scalar(stat_k_name, stat_v, epoch) 112 | elif data_type == 'image': 113 | self._tb_logger.add_images(k, img_tensor=v, global_step=epoch, dataformats="NHWC") 114 | 115 | if self._wandb_logger is not None: 116 | if data_type == 'scalar': 117 | self._wandb_logger.log({k: v}, step=epoch) 118 | if log_stats: 119 | stats = self.get_stats(k) 120 | for (stat_k, stat_v) in stats.items(): 121 | self._wandb_logger.log({stat_k: stat_v}, step=epoch) 122 | elif data_type == 'image': 123 | pass # Not Implemented 124 | 125 | def get_stats(self, k): 126 | """ 127 | Computes running statistics for a particular key. 128 | 129 | Args: 130 | k (str): key string 131 | Returns: 132 | stats (dict): dictionary of statistics 133 | """ 134 | stats = dict() 135 | stats['mean'] = np.mean(self._data[k]) 136 | stats['std'] = np.std(self._data[k]) 137 | stats['min'] = np.min(self._data[k]) 138 | stats['max'] = np.max(self._data[k]) 139 | return stats 140 | 141 | def close(self): 142 | """ 143 | Run before terminating to make sure all logs are flushed 144 | """ 145 | if self._tb_logger is not None: 146 | self._tb_logger.close() 147 | 148 | if self._wandb_logger is not None: 149 | self._wandb_logger.finish() 150 | 151 | 152 | class custom_tqdm(tqdm): 153 | """ 154 | Small extension to tqdm to make a few changes from default behavior. 155 | By default tqdm writes to stderr. Instead, we change it to write 156 | to stdout. 157 | """ 158 | def __init__(self, *args, **kwargs): 159 | assert "file" not in kwargs 160 | super(custom_tqdm, self).__init__(*args, file=sys.stdout, **kwargs) 161 | 162 | 163 | @contextmanager 164 | def silence_stdout(): 165 | """ 166 | This contextmanager will redirect stdout so that nothing is printed 167 | to the terminal. Taken from the link below: 168 | 169 | https://stackoverflow.com/questions/6735917/redirecting-stdout-to-nothing-in-python 170 | """ 171 | old_target = sys.stdout 172 | try: 173 | with open(os.devnull, "w") as new_target: 174 | sys.stdout = new_target 175 | yield new_target 176 | finally: 177 | sys.stdout = old_target 178 | -------------------------------------------------------------------------------- /robomimic/utils/macros.py: -------------------------------------------------------------------------------- 1 | """ 2 | Set of global variables shared across robomimic 3 | """ 4 | # Sets debugging mode. Should be set at top-level script so that internal 5 | # debugging functionalities are made active 6 | DEBUG = False 7 | -------------------------------------------------------------------------------- /robomimic/utils/python_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Set of general purpose utility functions for easier interfacing with Python API 3 | """ 4 | import inspect 5 | from copy import deepcopy 6 | import robomimic.utils.macros as Macros 7 | 8 | 9 | def get_class_init_kwargs(cls): 10 | """ 11 | Helper function to return a list of all valid keyword arguments (excluding "self") for the given @cls class. 12 | 13 | Args: 14 | cls (object): Class from which to grab __init__ kwargs 15 | 16 | Returns: 17 | list: All keyword arguments (excluding "self") specified by @cls __init__ constructor method 18 | """ 19 | return list(inspect.signature(cls.__init__).parameters.keys())[1:] 20 | 21 | 22 | def extract_subset_dict(dic, keys, copy=False): 23 | """ 24 | Helper function to extract a subset of dictionary key-values from a current dictionary. Optionally (deep)copies 25 | the values extracted from the original @dic if @copy is True. 26 | 27 | Args: 28 | dic (dict): Dictionary containing multiple key-values 29 | keys (Iterable): Specific keys to extract from @dic. If the key doesn't exist in @dic, then the key is skipped 30 | copy (bool): If True, will deepcopy all values corresponding to the specified @keys 31 | 32 | Returns: 33 | dict: Extracted subset dictionary containing only the specified @keys and their corresponding values 34 | """ 35 | subset = {k: dic[k] for k in keys if k in dic} 36 | return deepcopy(subset) if copy else subset 37 | 38 | 39 | def extract_class_init_kwargs_from_dict(cls, dic, copy=False, verbose=False): 40 | """ 41 | Helper function to return a dictionary of key-values that specifically correspond to @cls class's __init__ 42 | constructor method, from @dic which may or may not contain additional, irrelevant kwargs. 43 | 44 | Note that @dic may possibly be missing certain kwargs as specified by cls.__init__. No error will be raised. 45 | 46 | Args: 47 | cls (object): Class from which to grab __init__ kwargs that will be be used as filtering keys for @dic 48 | dic (dict): Dictionary containing multiple key-values 49 | copy (bool): If True, will deepcopy all values corresponding to the specified @keys 50 | verbose (bool): If True (or if macro DEBUG is True), then will print out mismatched keys 51 | 52 | Returns: 53 | dict: Extracted subset dictionary possibly containing only the specified keys from cls.__init__ and their 54 | corresponding values 55 | """ 56 | # extract only relevant kwargs for this specific backbone 57 | cls_keys = get_class_init_kwargs(cls) 58 | subdic = extract_subset_dict( 59 | dic=dic, 60 | keys=cls_keys, 61 | copy=copy, 62 | ) 63 | 64 | # Run sanity check if verbose or debugging 65 | if verbose or Macros.DEBUG: 66 | keys_not_in_cls = [k for k in dic if k not in cls_keys] 67 | keys_not_in_dic = [k for k in cls_keys if k not in list(dic.keys())] 68 | if len(keys_not_in_cls) > 0: 69 | print(f"Warning: For class {cls.__name__}, got unknown keys: {keys_not_in_cls} ") 70 | if len(keys_not_in_dic) > 0: 71 | print(f"Warning: For class {cls.__name__}, got missing keys: {keys_not_in_dic} ") 72 | 73 | return subdic -------------------------------------------------------------------------------- /robomimic/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains some PyTorch utilities. 3 | """ 4 | import numpy as np 5 | import torch 6 | import torch.optim as optim 7 | 8 | 9 | def soft_update(source, target, tau): 10 | """ 11 | Soft update from the parameters of a @source torch module to a @target torch module 12 | with strength @tau. The update follows target = target * (1 - tau) + source * tau. 13 | 14 | Args: 15 | source (torch.nn.Module): source network to push target network parameters towards 16 | target (torch.nn.Module): target network to update 17 | """ 18 | for target_param, param in zip(target.parameters(), source.parameters()): 19 | target_param.copy_( 20 | target_param * (1.0 - tau) + param * tau 21 | ) 22 | 23 | 24 | def hard_update(source, target): 25 | """ 26 | Hard update @target parameters to match @source. 27 | 28 | Args: 29 | source (torch.nn.Module): source network to provide parameters 30 | target (torch.nn.Module): target network to update parameters for 31 | """ 32 | for target_param, param in zip(target.parameters(), source.parameters()): 33 | target_param.copy_(param) 34 | 35 | 36 | def get_torch_device(try_to_use_cuda): 37 | """ 38 | Return torch device. If using cuda (GPU), will also set cudnn.benchmark to True 39 | to optimize CNNs. 40 | 41 | Args: 42 | try_to_use_cuda (bool): if True and cuda is available, will use GPU 43 | 44 | Returns: 45 | device (torch.Device): device to use for models 46 | """ 47 | if try_to_use_cuda and torch.cuda.is_available(): 48 | torch.backends.cudnn.benchmark = True 49 | device = torch.device("cuda:0") 50 | else: 51 | device = torch.device("cpu") 52 | return device 53 | 54 | 55 | def reparameterize(mu, logvar): 56 | """ 57 | Reparameterize for the backpropagation of z instead of q. 58 | This makes it so that we can backpropagate through the sampling of z from 59 | our encoder when feeding the sampled variable to the decoder. 60 | 61 | (See "The reparameterization trick" section of https://arxiv.org/abs/1312.6114) 62 | 63 | Args: 64 | mu (torch.Tensor): batch of means from the encoder distribution 65 | logvar (torch.Tensor): batch of log variances from the encoder distribution 66 | 67 | Returns: 68 | z (torch.Tensor): batch of sampled latents from the encoder distribution that 69 | support backpropagation 70 | """ 71 | # logvar = \log(\sigma^2) = 2 * \log(\sigma) 72 | # \sigma = \exp(0.5 * logvar) 73 | 74 | # clamped for numerical stability 75 | logstd = (0.5 * logvar).clamp(-4, 15) 76 | std = torch.exp(logstd) 77 | 78 | # Sample \epsilon from normal distribution 79 | # use std to create a new tensor, so we don't have to care 80 | # about running on GPU or not 81 | eps = std.new(std.size()).normal_() 82 | 83 | # Then multiply with the standard deviation and add the mean 84 | z = eps.mul(std).add_(mu) 85 | 86 | return z 87 | 88 | 89 | def optimizer_from_optim_params(net_optim_params, net): 90 | """ 91 | Helper function to return a torch Optimizer from the optim_params 92 | section of the config for a particular network. 93 | 94 | Args: 95 | optim_params (Config): optim_params part of algo_config corresponding 96 | to @net. This determines the optimizer that is created. 97 | 98 | net (torch.nn.Module): module whose parameters this optimizer will be 99 | responsible 100 | 101 | Returns: 102 | optimizer (torch.optim.Optimizer): optimizer 103 | """ 104 | return optim.Adam( 105 | params=net.parameters(), 106 | lr=net_optim_params["learning_rate"]["initial"], 107 | weight_decay=net_optim_params["regularization"]["L2"], 108 | ) 109 | 110 | 111 | def lr_scheduler_from_optim_params(net_optim_params, net, optimizer): 112 | """ 113 | Helper function to return a LRScheduler from the optim_params 114 | section of the config for a particular network. Returns None 115 | if a scheduler is not needed. 116 | 117 | Args: 118 | optim_params (Config): optim_params part of algo_config corresponding 119 | to @net. This determines whether a learning rate scheduler is created. 120 | 121 | net (torch.nn.Module): module whose parameters this optimizer will be 122 | responsible 123 | 124 | optimizer (torch.optim.Optimizer): optimizer for this net 125 | 126 | Returns: 127 | lr_scheduler (torch.optim.lr_scheduler or None): learning rate scheduler 128 | """ 129 | lr_scheduler = None 130 | if len(net_optim_params["learning_rate"]["epoch_schedule"]) > 0: 131 | # decay LR according to the epoch schedule 132 | lr_scheduler = optim.lr_scheduler.MultiStepLR( 133 | optimizer=optimizer, 134 | milestones=net_optim_params["learning_rate"]["epoch_schedule"], 135 | gamma=net_optim_params["learning_rate"]["decay_factor"], 136 | ) 137 | return lr_scheduler 138 | 139 | 140 | def backprop_for_loss(net, optim, loss, max_grad_norm=None, retain_graph=False, dont_step=False): 141 | """ 142 | Backpropagate loss and update parameters for network with 143 | name @name. 144 | 145 | Args: 146 | net (torch.nn.Module): network to update 147 | 148 | optim (torch.optim.Optimizer): optimizer to use 149 | 150 | loss (torch.Tensor): loss to use for backpropagation 151 | 152 | max_grad_norm (float): if provided, used to clip gradients 153 | 154 | retain_graph (bool): if True, graph is not freed after backward call 155 | 156 | Returns: 157 | grad_norms (float): average gradient norms from backpropagation 158 | """ 159 | 160 | # backprop 161 | optim.zero_grad() 162 | loss.backward(retain_graph=retain_graph) 163 | 164 | # gradient clipping 165 | if max_grad_norm is not None: 166 | torch.nn.utils.clip_grad_norm_(net.parameters(), max_grad_norm) 167 | 168 | # compute grad norms 169 | grad_norms = 0. 170 | for p in net.parameters(): 171 | # only clip gradients for parameters for which requires_grad is True 172 | if p.grad is not None: 173 | grad_norms += p.grad.data.norm(2).pow(2).item() 174 | 175 | if not dont_step: 176 | # step 177 | optim.step() 178 | 179 | return grad_norms 180 | 181 | 182 | class dummy_context_mgr(): 183 | """ 184 | A dummy context manager - useful for having conditional scopes (such 185 | as @maybe_no_grad). Nothing happens in this scope. 186 | """ 187 | def __enter__(self): 188 | return None 189 | def __exit__(self, exc_type, exc_value, traceback): 190 | return False 191 | 192 | 193 | def maybe_no_grad(no_grad): 194 | """ 195 | Args: 196 | no_grad (bool): if True, the returned context will be torch.no_grad(), otherwise 197 | it will be a dummy context 198 | """ 199 | return torch.no_grad() if no_grad else dummy_context_mgr() 200 | -------------------------------------------------------------------------------- /robomimic/utils/vis_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains utility functions for visualizing image observations in the training pipeline. 3 | These functions can be a useful debugging tool. 4 | """ 5 | import numpy as np 6 | 7 | import robomimic.utils.tensor_utils as TensorUtils 8 | import robomimic.utils.obs_utils as ObsUtils 9 | 10 | from PIL import Image, ImageFont, ImageDraw 11 | 12 | 13 | def image_tensor_to_numpy(image): 14 | """ 15 | Converts processed image tensors to numpy so that they can be saved to disk or video. 16 | A useful utility function for visualizing images in the middle of training. 17 | 18 | Args: 19 | image (torch.Tensor): images of shape [..., C, H, W] 20 | 21 | Returns: 22 | image (np.array): converted images of shape [..., H, W, C] and type uint8 23 | """ 24 | return TensorUtils.to_numpy( 25 | ObsUtils.unprocess_image(image) 26 | ).astype(np.uint8) 27 | 28 | 29 | def image_to_disk(image, fname): 30 | """ 31 | Writes an image to disk. 32 | 33 | Args: 34 | image (np.array): image of shape [H, W, 3] 35 | fname (str): path to save image to 36 | """ 37 | image = Image.fromarray(image) 38 | image.save(fname) 39 | 40 | 41 | def image_tensor_to_disk(image, fname): 42 | """ 43 | Writes an image tensor to disk. Any leading batch dimensions are indexed out 44 | with the first element. 45 | 46 | Args: 47 | image (torch.Tensor): image of shape [..., C, H, W]. All leading dimensions 48 | will be indexed out with the first element 49 | fname (str): path to save image to 50 | """ 51 | # index out all leading dimensions before [C, H, W] 52 | num_leading_dims = len(image.shape[:-3]) 53 | for _ in range(num_leading_dims): 54 | image = image[0] 55 | image = image_tensor_to_numpy(image) 56 | image_to_disk(image, fname) 57 | 58 | def write_text_on_image(image_arr, text, def_color=None, font=30, pos=(15, 15)): 59 | img = Image.fromarray(image_arr) 60 | image_edit = ImageDraw.Draw(img) 61 | font = ImageFont.truetype("FreeMono.ttf", font) 62 | color = (0, 255, 0) if def_color is None else def_color 63 | image_edit.text(pos, text, color, font=font) 64 | return np.array(img) 65 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | # read the contents of your README file 4 | from os import path 5 | this_directory = path.abspath(path.dirname(__file__)) 6 | with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f: 7 | lines = f.readlines() 8 | 9 | # remove images from README 10 | lines = [x for x in lines if (('.png' not in x) and ('.gif' not in x))] 11 | long_description = ''.join(lines) 12 | 13 | setup( 14 | name="robomimic", 15 | packages=[ 16 | package for package in find_packages() if package.startswith("robomimic") 17 | ], 18 | install_requires=[ 19 | "numpy>=1.13.3", 20 | "h5py", 21 | "psutil", 22 | "tqdm", 23 | "termcolor", 24 | "tensorboard", 25 | "tensorboardX", 26 | "imageio", 27 | "imageio-ffmpeg", 28 | "egl_probe>=1.0.1", 29 | "torch", 30 | "torchvision", 31 | ], 32 | eager_resources=['*'], 33 | include_package_data=True, 34 | python_requires='>=3', 35 | description="robomimic: A Modular Framework for Robot Learning from Demonstration", 36 | author="Ajay Mandlekar, Danfei Xu, Josiah Wong, Soroush Nasiriany, Chen Wang", 37 | url="https://github.com/ARISE-Initiative/robomimic", 38 | author_email="amandlek@cs.stanford.edu", 39 | version="0.2.0", 40 | long_description=long_description, 41 | long_description_content_type='text/markdown' 42 | ) 43 | --------------------------------------------------------------------------------