├── LICENSE
├── MANIFEST.in
├── README.md
├── images
└── sirius.png
├── requirements-docs.txt
├── requirements.txt
├── robomimic
├── __init__.py
├── algo
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── algo.cpython-38.pyc
│ │ ├── awac.cpython-38.pyc
│ │ ├── bc.cpython-38.pyc
│ │ ├── bcq.cpython-38.pyc
│ │ ├── cql.cpython-38.pyc
│ │ ├── gl.cpython-38.pyc
│ │ ├── hbc.cpython-38.pyc
│ │ ├── iql.cpython-38.pyc
│ │ ├── iris.cpython-38.pyc
│ │ └── td3_bc.cpython-38.pyc
│ ├── algo.py
│ ├── awac.py
│ ├── bc.py
│ ├── bcq.py
│ ├── cql.py
│ ├── gl.py
│ ├── hbc.py
│ ├── iql.py
│ ├── iris.py
│ └── td3_bc.py
├── config
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── awac_config.cpython-38.pyc
│ │ ├── base_config.cpython-38.pyc
│ │ ├── bc_config.cpython-38.pyc
│ │ ├── bcq_config.cpython-38.pyc
│ │ ├── config.cpython-38.pyc
│ │ ├── cql_config.cpython-38.pyc
│ │ ├── gl_config.cpython-38.pyc
│ │ ├── hbc_config.cpython-38.pyc
│ │ ├── iql_config.cpython-38.pyc
│ │ ├── iris_config.cpython-38.pyc
│ │ ├── td3_bc_config.cpython-38.pyc
│ │ └── vae_config.cpython-38.pyc
│ ├── awac_config.py
│ ├── base_config.py
│ ├── bc_config.py
│ ├── bcq_config.py
│ ├── config.py
│ ├── cql_config.py
│ ├── gl_config.py
│ ├── hbc_config.py
│ ├── iql_config.py
│ ├── iris_config.py
│ ├── td3_bc_config.py
│ └── vae_config.py
├── envs
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ └── env_base.cpython-38.pyc
│ ├── env_base.py
│ ├── env_gym.py
│ ├── env_ig_momart.py
│ └── env_robosuite.py
├── exps
│ ├── sirius
│ │ ├── bc.json
│ │ ├── bc_iwr.json
│ │ └── sirius.json
│ ├── sirius_template
│ │ ├── awac
│ │ │ ├── awac_im.json
│ │ │ └── awac_ld.json
│ │ ├── bc
│ │ │ ├── bc_im.json
│ │ │ ├── bc_ld.json
│ │ │ ├── bc_real.json
│ │ │ ├── bc_sim_v0_im.json
│ │ │ └── bc_sim_v0_ld.json
│ │ └── iql
│ │ │ └── iql_ld.json
│ └── templates
│ │ ├── bc.json
│ │ ├── bcq.json
│ │ ├── cql.json
│ │ ├── gl.json
│ │ ├── hbc.json
│ │ ├── iris.json
│ │ └── td3_bc.json
├── models
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── base_nets.cpython-38.pyc
│ │ ├── distributions.cpython-38.pyc
│ │ ├── obs_nets.cpython-38.pyc
│ │ ├── policy_nets.cpython-38.pyc
│ │ ├── vae_nets.cpython-38.pyc
│ │ └── value_nets.cpython-38.pyc
│ ├── base_nets.py
│ ├── distributions.py
│ ├── obs_nets.py
│ ├── policy_nets.py
│ ├── vae_nets.py
│ └── value_nets.py
├── scripts
│ ├── check_same_initial_configs.py
│ ├── conversion
│ │ ├── convert_d4rl.py
│ │ ├── convert_robosuite.py
│ │ └── convert_roboturk_pilot.py
│ ├── dataset_states_to_obs.py
│ ├── download_datasets.py
│ ├── download_momart_datasets.py
│ ├── extract_obs_from_raw_datasets.sh
│ ├── generate_config_templates.py
│ ├── generate_paper_configs.py
│ ├── get_dataset_info.py
│ ├── hitl
│ │ ├── collect_hitl_demos.py
│ │ └── collect_playback_utils.py
│ ├── hyperparam_helper.py
│ ├── playback_dataset.py
│ ├── run_trained_agent.py
│ ├── slurm
│ │ ├── auto_append.txt
│ │ ├── auto_overwrite.txt
│ │ ├── base_args.py
│ │ ├── base_template.sbatch
│ │ ├── batchrl_args.py
│ │ ├── run_hp_sweep.py
│ │ ├── sbatch_args.py
│ │ └── sbatch_utils.py
│ ├── split_train_val.py
│ ├── train.py
│ └── vis
│ │ ├── image_utils.py
│ │ ├── vis_preintv.py
│ │ └── vis_utils.py
└── utils
│ ├── __init__.py
│ ├── __pycache__
│ ├── __init__.cpython-38.pyc
│ ├── dataset.cpython-38.pyc
│ ├── env_utils.cpython-38.pyc
│ ├── file_utils.cpython-38.pyc
│ ├── log_utils.cpython-38.pyc
│ ├── loss_utils.cpython-38.pyc
│ ├── macros.cpython-38.pyc
│ ├── obs_utils.cpython-38.pyc
│ ├── python_utils.cpython-38.pyc
│ ├── tensor_utils.cpython-38.pyc
│ ├── torch_utils.cpython-38.pyc
│ ├── train_utils.cpython-38.pyc
│ └── vis_utils.cpython-38.pyc
│ ├── dataset.py
│ ├── env_utils.py
│ ├── file_utils.py
│ ├── hyperparam_utils.py
│ ├── log_utils.py
│ ├── loss_utils.py
│ ├── macros.py
│ ├── obs_utils.py
│ ├── python_utils.py
│ ├── tensor_utils.py
│ ├── test_utils.py
│ ├── torch_utils.py
│ ├── train_utils.py
│ └── vis_utils.py
├── setup.py
└── sirius.yml
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 UT Robot Perception and Learning Lab
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include robomimic/exps/templates/*.json
2 | include robomimic/scripts/*.py
3 | include robomimic/scripts/*.sh
4 | include robomimic/scripts/conversion/*.py
5 | include robomimic/scripts/conversion/*.sh
6 | recursive-include examples/ *.py
7 | recursive-include tests/ *.py
8 | recursive-include tests/ *.sh
9 | recursive-include tests/assets/ *
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Sirius 🌟: Robot Learning on the Job
2 |
3 |
4 |
5 | This is the official codebase for the [**Sirius**](https://ut-austin-rpl.github.io/sirius/) paper:
6 |
7 | **Robot Learning on the Job: Human-in-the-Loop Autonomy and Learning During Deployment**
8 |
[Huihan Liu](https://huihanl.github.io/), [Soroush Nasiriany](http://snasiriany.me/), [Lance Zhang](https://github.com/Lantian-Lance-Zhang), [Zhiyao Bao](https://www.linkedin.com/in/zhiyao-bao/), [Yuke Zhu](https://www.cs.utexas.edu/~yukez/)
9 |
[UT Austin Robot Perception and Learning Lab](https://rpl.cs.utexas.edu/)
10 |
Robotics: Science and Systems (RSS), 2023
11 |
**[[Paper]](https://arxiv.org/abs/2211.08416)** **[[Project Website]](https://ut-austin-rpl.github.io/sirius/)** **[[Real Robot Control]](https://ut-austin-rpl.github.io/deoxys-docs/html/getting_started/overview.html)**
12 |
13 |
14 |
15 |
16 |
17 | ## Quickstart
18 |
19 | Sirius builds upon [robomimic](https://github.com/ARISE-Initiative/robomimic), a framework for robot learning from demonstration. Sirius also uses the robotics simulator [robosuite](https://github.com/ARISE-Initiative/robosuite) powered by the MuJoCo physics engine.
20 |
21 | ### Setup Sirius codebase
22 |
23 | #### Installing Sirius
24 |
25 | ```
26 | git clone https://github.com/UT-Austin-RPL/sirius
27 | cd sirius
28 | conda env create -f sirius.yml
29 | conda activate sirius
30 | pip install -e .
31 | ```
32 |
33 | #### Installing ```robosuite```
34 |
35 | The additional reference for installing robosuite [here](https://robomimic.github.io/docs/introduction/installation.html) and [here](https://robosuite.ai/docs/installation.html#install-from-source) could be helpful.
36 |
37 | ```
38 | $ git clone https://github.com/ARISE-Initiative/robosuite.git
39 | $ cd robosuite
40 | $ pip install -r requirements.txt
41 | $ pip install -e .
42 | ```
43 |
44 | ## Usage
45 |
46 | ### Running Sirius
47 |
48 |
49 | Running Sirius intervention-guided policy learning:
50 |
51 | ```
52 | python robomimic/scripts/train.py --config robomimic/exps/sirius/sirius.json
53 | ```
54 |
55 | IWR baseline:
56 |
57 | ```
58 | python robomimic/scripts/train.py --config robomimic/exps/sirius/bc_iwr.json
59 | ```
60 |
61 | BC baseline:
62 |
63 | ```
64 | python robomimic/scripts/train.py --config robomimic/exps/sirius/bc.json
65 | ```
66 |
67 |
68 |
69 | ### Sirius Data Collection Pipeline
70 |
71 | We include the script for collecting demonstrations and performing human intervention during robot policy execution below. We use a spacemouse for providing both demonstration and intervention. More details for setting up Spacemouse can be found [here](https://ut-austin-rpl.github.io/deoxys-docs/html/tutorials/using_teleoperation_devices.html).
72 |
73 | #### Performing Human Demonstration
74 |
75 | Perform human demonstration with the flag ```--all-demos```:
76 |
77 | ```
78 | python robomimic/scripts/hitl/collect_hitl_demos.py --all-demos --num-traj 50
79 | ```
80 |
81 | #### Policy Execution with Intervention
82 |
83 | Perform human intervention with the policy checkpoint ```${checkpoint}```:
84 |
85 | ```
86 | python robomimic/scripts/hitl/collect_hitl_demos.py --num-traj 50 --checkpoint ${checkpoint}
87 | ```
88 |
89 |
90 |
91 | ### Processing data
92 |
93 | #### Adding modalities
94 |
95 | By default, the datasets are generated in the minimum format with only low-level state information to save space. To add image observation and other modalities for training, run the following post-processing script. It will process the original data ```${data.hdf5}``` into ```${data_processed.hdf5}```, with image size ```${image_size}```. By default, the two camera view uses are agentview and robot0_eye_in_hand, which you can modify in the script ```template_process_sim_dataset.sh```.
96 |
97 | ```
98 | cd robomimic/scripts/hitl
99 |
100 | source template_process_sim_dataset.sh ${data.hdf5} ${data_processed.hdf5} ${image_size}
101 | ```
102 |
103 |
104 |
105 | ## Acknowledgements
106 |
107 | This codebase is largely built on [robomimic](https://github.com/ARISE-Initiative/robomimic) and [robosuite](https://github.com/ARISE-Initiative/robosuite). We also thank [Ajay Mandlekar](https://ai.stanford.edu/~amandlek/) for sharing well-designed simulation task environments beyond the robomimic codebase like ```Coffee``` and ```Threading``` tasks during project development.
108 |
109 | For real-robot experiments, we used [Deoxys](https://ut-austin-rpl.github.io/deoxys-docs/html/getting_started/overview.html), a controller library for Franka Emika Panda developed by [Yifeng Zhu](https://zhuyifengzju.github.io/).
110 |
111 |
112 |
113 | ## Citation
114 | ```bibtex
115 | @inproceedings{liu2022robot,
116 | title = {Robot Learning on the Job: Human-in-the-Loop Autonomy and Learning During Deployment},
117 | author = {Huihan Liu and Soroush Nasiriany and Lance Zhang and Zhiyao Bao and Yuke Zhu},
118 | booktitle = {Robotics: Science and Systems (RSS)},
119 | year = {2023}
120 | }
121 | ```
122 |
--------------------------------------------------------------------------------
/images/sirius.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/images/sirius.png
--------------------------------------------------------------------------------
/requirements-docs.txt:
--------------------------------------------------------------------------------
1 | # requirements for building sphinx docs
2 | pygments==2.4.1
3 | sphinx
4 | sphinx_rtd_theme
5 | sphinx_markdown_tables
6 | recommonmark
7 | nbsphinx
8 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.13.3
2 | h5py
3 | psutil
4 | tqdm
5 | termcolor
6 | tensorboard
7 | tensorboardX
8 | imageio
9 | imageio-ffmpeg
10 | egl_probe>=1.0.1
11 | torch
12 | torchvision
13 |
--------------------------------------------------------------------------------
/robomimic/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.2.0"
2 |
3 |
4 | # stores released dataset links and rollout horizons in global dictionary.
5 | # Structure is given below for each type of dataset:
6 |
7 | # robosuite / real
8 | # {
9 | # task:
10 | # dataset_type:
11 | # hdf5_type:
12 | # url: link
13 | # horizon: value
14 | # ...
15 | # ...
16 | # ...
17 | # }
18 | DATASET_REGISTRY = {}
19 |
20 | # momart
21 | # {
22 | # task:
23 | # dataset_type:
24 | # url: link
25 | # size: value
26 | # ...
27 | # ...
28 | # }
29 | MOMART_DATASET_REGISTRY = {}
30 |
31 |
32 | def register_dataset_link(task, dataset_type, hdf5_type, link, horizon):
33 | """
34 | Helper function to register dataset link in global dictionary.
35 | Also takes a @horizon parameter - this corresponds to the evaluation
36 | rollout horizon that should be used during training.
37 |
38 | Args:
39 | task (str): name of task for this dataset
40 | dataset_type (str): type of dataset (usually identifies the dataset source)
41 | hdf5_type (str): type of hdf5 - usually one of "raw", "low_dim", or "image",
42 | to identify the kind of observations in the dataset
43 | link (str): download link for the dataset
44 | horizon (int): evaluation rollout horizon that should be used with this dataset
45 | """
46 | if task not in DATASET_REGISTRY:
47 | DATASET_REGISTRY[task] = {}
48 | if dataset_type not in DATASET_REGISTRY[task]:
49 | DATASET_REGISTRY[task][dataset_type] = {}
50 | DATASET_REGISTRY[task][dataset_type][hdf5_type] = dict(url=link, horizon=horizon)
51 |
52 |
53 | def register_all_links():
54 | """
55 | Record all dataset links in this function.
56 | """
57 |
58 | # all proficient human datasets
59 | ph_tasks = ["lift", "can", "square", "transport", "tool_hang", "lift_real", "can_real", "tool_hang_real"]
60 | ph_horizons = [400, 400, 400, 700, 700, 1000, 1000, 1000]
61 | for task, horizon in zip(ph_tasks, ph_horizons):
62 | register_dataset_link(task=task, dataset_type="ph", hdf5_type="raw", horizon=horizon,
63 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/demo.hdf5".format(task))
64 | # real world datasets only have demo.hdf5 files which already contain all observation modalities
65 | # while sim datasets store raw low-dim mujoco states in the demo.hdf5
66 | if "real" not in task:
67 | register_dataset_link(task=task, dataset_type="ph", hdf5_type="low_dim", horizon=horizon,
68 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/low_dim.hdf5".format(task))
69 | register_dataset_link(task=task, dataset_type="ph", hdf5_type="image", horizon=horizon,
70 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/ph/image.hdf5".format(task))
71 |
72 | # all multi human datasets
73 | mh_tasks = ["lift", "can", "square", "transport"]
74 | mh_horizons = [500, 500, 500, 1100]
75 | for task, horizon in zip(mh_tasks, mh_horizons):
76 | register_dataset_link(task=task, dataset_type="mh", hdf5_type="raw", horizon=horizon,
77 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/demo.hdf5".format(task))
78 | register_dataset_link(task=task, dataset_type="mh", hdf5_type="low_dim", horizon=horizon,
79 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/low_dim.hdf5".format(task))
80 | register_dataset_link(task=task, dataset_type="mh", hdf5_type="image", horizon=horizon,
81 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mh/image.hdf5".format(task))
82 |
83 | # all machine generated datasets
84 | for task, horizon in zip(["lift", "can"], [400, 400]):
85 | register_dataset_link(task=task, dataset_type="mg", hdf5_type="raw", horizon=horizon,
86 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/demo.hdf5".format(task))
87 | register_dataset_link(task=task, dataset_type="mg", hdf5_type="low_dim_sparse", horizon=horizon,
88 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/low_dim_sparse.hdf5".format(task))
89 | register_dataset_link(task=task, dataset_type="mg", hdf5_type="image_sparse", horizon=horizon,
90 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/image_sparse.hdf5".format(task))
91 | register_dataset_link(task=task, dataset_type="mg", hdf5_type="low_dim_dense", horizon=horizon,
92 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/low_dim_dense.hdf5".format(task))
93 | register_dataset_link(task=task, dataset_type="mg", hdf5_type="image_dense", horizon=horizon,
94 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/{}/mg/image_dense.hdf5".format(task))
95 |
96 | # can-paired dataset
97 | register_dataset_link(task="can", dataset_type="paired", hdf5_type="raw", horizon=400,
98 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/demo.hdf5")
99 | register_dataset_link(task="can", dataset_type="paired", hdf5_type="low_dim", horizon=400,
100 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/low_dim.hdf5")
101 | register_dataset_link(task="can", dataset_type="paired", hdf5_type="image", horizon=400,
102 | link="http://downloads.cs.stanford.edu/downloads/rt_benchmark/can/paired/image.hdf5")
103 |
104 |
105 | def register_momart_dataset_link(task, dataset_type, link, dataset_size):
106 | """
107 | Helper function to register dataset link in global dictionary.
108 | Also takes a @horizon parameter - this corresponds to the evaluation
109 | rollout horizon that should be used during training.
110 |
111 | Args:
112 | task (str): name of task for this dataset
113 | dataset_type (str): type of dataset (usually identifies the dataset source)
114 | link (str): download link for the dataset
115 | dataset_size (float): size of the dataset, in GB
116 | """
117 | if task not in MOMART_DATASET_REGISTRY:
118 | MOMART_DATASET_REGISTRY[task] = {}
119 | if dataset_type not in MOMART_DATASET_REGISTRY[task]:
120 | MOMART_DATASET_REGISTRY[task][dataset_type] = {}
121 | MOMART_DATASET_REGISTRY[task][dataset_type] = dict(url=link, size=dataset_size)
122 |
123 |
124 | def register_all_momart_links():
125 | """
126 | Record all dataset links in this function.
127 | """
128 | # all tasks, mapped to their [exp, sub, gen, sam] sizes
129 | momart_tasks = {
130 | "table_setup_from_dishwasher": [14, 14, 3.3, 0.6],
131 | "table_setup_from_dresser": [16, 17, 3.1, 0.7],
132 | "table_cleanup_to_dishwasher": [23, 36, 5.3, 1.1],
133 | "table_cleanup_to_sink": [17, 28, 2.9, 0.8],
134 | "unload_dishwasher": [21, 27, 5.4, 1.0],
135 | }
136 |
137 | momart_dataset_types = [
138 | "expert",
139 | "suboptimal",
140 | "generalize",
141 | "sample",
142 | ]
143 |
144 | # Iterate over all combos and register the link
145 | for task, dataset_sizes in momart_tasks.items():
146 | for dataset_type, dataset_size in zip(momart_dataset_types, dataset_sizes):
147 | register_momart_dataset_link(
148 | task=task,
149 | dataset_type=dataset_type,
150 | link=f"http://downloads.cs.stanford.edu/downloads/rt_mm/{dataset_type}/{task}_{dataset_type}.hdf5",
151 | dataset_size=dataset_size,
152 | )
153 |
154 |
155 | register_all_links()
156 | register_all_momart_links()
157 |
--------------------------------------------------------------------------------
/robomimic/algo/__init__.py:
--------------------------------------------------------------------------------
1 | from robomimic.algo.algo import register_algo_factory_func, res_mlp_args_from_config, algo_name_to_factory_func, algo_factory, Algo, PolicyAlgo, ValueAlgo, PlannerAlgo, HierarchicalAlgo, RolloutPolicy
2 |
3 | # note: these imports are needed to register these classes in the global algo registry
4 | from robomimic.algo.bc import BC, BC_Gaussian, BC_GMM, BC_VAE, BC_RNN, BC_RNN_GMM
5 | from robomimic.algo.bcq import BCQ, BCQ_GMM, BCQ_Distributional
6 | from robomimic.algo.cql import CQL
7 | from robomimic.algo.awac import AWAC
8 | from robomimic.algo.iql import IQL
9 | from robomimic.algo.gl import GL, GL_VAE, ValuePlanner
10 | from robomimic.algo.hbc import HBC
11 | from robomimic.algo.iris import IRIS
12 | from robomimic.algo.td3_bc import TD3_BC
13 |
--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/algo.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/algo.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/awac.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/awac.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/bc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/bc.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/bcq.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/bcq.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/cql.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/cql.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/gl.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/gl.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/hbc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/hbc.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/iql.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/iql.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/iris.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/iris.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/algo/__pycache__/td3_bc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/algo/__pycache__/td3_bc.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/algo/iris.py:
--------------------------------------------------------------------------------
1 | """
2 | Implementation of IRIS (https://arxiv.org/abs/1911.05321).
3 | """
4 | import numpy as np
5 | from collections import OrderedDict
6 | from copy import deepcopy
7 |
8 | import torch
9 |
10 | import robomimic.utils.tensor_utils as TensorUtils
11 | import robomimic.utils.obs_utils as ObsUtils
12 | from robomimic.config.config import Config
13 | from robomimic.algo import register_algo_factory_func, algo_name_to_factory_func, HBC, ValuePlanner, ValueAlgo, GL_VAE
14 |
15 |
16 | @register_algo_factory_func("iris")
17 | def algo_config_to_class(algo_config):
18 | """
19 | Maps algo config to the IRIS algo class to instantiate, along with additional algo kwargs.
20 |
21 | Args:
22 | algo_config (Config instance): algo config
23 |
24 | Returns:
25 | algo_class: subclass of Algo
26 | algo_kwargs (dict): dictionary of additional kwargs to pass to algorithm
27 | """
28 | pol_cls, _ = algo_name_to_factory_func("bc")(algo_config.actor)
29 | plan_cls, _ = algo_name_to_factory_func("gl")(algo_config.value_planner.planner)
30 | value_cls, _ = algo_name_to_factory_func("bcq")(algo_config.value_planner.value)
31 | return IRIS, dict(policy_algo_class=pol_cls, planner_algo_class=plan_cls, value_algo_class=value_cls)
32 |
33 |
34 | class IRIS(HBC, ValueAlgo):
35 | """
36 | Implementation of IRIS (https://arxiv.org/abs/1911.05321).
37 | """
38 | def __init__(
39 | self,
40 | planner_algo_class,
41 | value_algo_class,
42 | policy_algo_class,
43 | algo_config,
44 | obs_config,
45 | global_config,
46 | obs_key_shapes,
47 | ac_dim,
48 | device,
49 | ):
50 | """
51 | Args:
52 | planner_algo_class (Algo class): algo class for the planner
53 |
54 | policy_algo_class (Algo class): algo class for the policy
55 |
56 | algo_config (Config object): instance of Config corresponding to the algo section
57 | of the config
58 |
59 | obs_config (Config object): instance of Config corresponding to the observation
60 | section of the config
61 |
62 | global_config (Config object): global training config
63 |
64 | obs_key_shapes (OrderedDict): dictionary that maps input/output observation keys to shapes
65 |
66 | ac_dim (int): action dimension
67 |
68 | device: torch device
69 | """
70 | self.algo_config = algo_config
71 | self.obs_config = obs_config
72 | self.global_config = global_config
73 |
74 | self.ac_dim = ac_dim
75 | self.device = device
76 |
77 | self._subgoal_step_count = 0 # current step count for deciding when to update subgoal
78 | self._current_subgoal = None # latest subgoal
79 | self._subgoal_update_interval = self.algo_config.subgoal_update_interval # subgoal update frequency
80 | self._subgoal_horizon = self.algo_config.value_planner.planner.subgoal_horizon
81 | self._actor_horizon = self.algo_config.actor.rnn.horizon
82 |
83 | self._algo_mode = self.algo_config.mode
84 | assert self._algo_mode in ["separate", "cascade"]
85 |
86 | self.planner = ValuePlanner(
87 | planner_algo_class=planner_algo_class,
88 | value_algo_class=value_algo_class,
89 | algo_config=algo_config.value_planner,
90 | obs_config=obs_config.value_planner,
91 | global_config=global_config,
92 | obs_key_shapes=obs_key_shapes,
93 | ac_dim=ac_dim,
94 | device=device
95 | )
96 |
97 | self.actor_goal_shapes = self.planner.subgoal_shapes
98 | assert not algo_config.latent_subgoal.enabled, "IRIS does not support latent subgoals"
99 |
100 | # only for the actor: override goal modalities and shapes to match the subgoal set by the planner
101 | actor_obs_key_shapes = deepcopy(obs_key_shapes)
102 | # make sure we are not modifying existing observation key shapes
103 | for k in self.actor_goal_shapes:
104 | if k in actor_obs_key_shapes:
105 | assert actor_obs_key_shapes[k] == self.actor_goal_shapes[k]
106 | actor_obs_key_shapes.update(self.actor_goal_shapes)
107 |
108 | goal_modalities = {obs_modality: [] for obs_modality in ObsUtils.OBS_MODALITY_CLASSES.keys()}
109 | for k in self.actor_goal_shapes.keys():
110 | goal_modalities[ObsUtils.OBS_KEYS_TO_MODALITIES[k]].append(k)
111 |
112 | actor_obs_config = deepcopy(obs_config.actor)
113 | with actor_obs_config.unlocked():
114 | actor_obs_config["goal"] = Config(**goal_modalities)
115 |
116 | self.actor = policy_algo_class(
117 | algo_config=algo_config.actor,
118 | obs_config=actor_obs_config,
119 | global_config=global_config,
120 | obs_key_shapes=actor_obs_key_shapes,
121 | ac_dim=ac_dim,
122 | device=device
123 | )
124 |
125 | def process_batch_for_training(self, batch):
126 | """
127 | Processes input batch from a data loader to filter out
128 | relevant information and prepare the batch for training.
129 |
130 | Args:
131 | batch (dict): dictionary with torch.Tensors sampled
132 | from a data loader
133 |
134 | Returns:
135 | input_batch (dict): processed and filtered batch that
136 | will be used for training
137 | """
138 | input_batch = dict()
139 |
140 | input_batch["planner"] = self.planner.process_batch_for_training(batch)
141 | input_batch["actor"] = self.actor.process_batch_for_training(batch)
142 |
143 | if self.algo_config.actor_use_random_subgoals:
144 | # optionally use randomly sampled step between [1, seq_length] as policy goal
145 | policy_subgoal_indices = torch.randint(
146 | low=0, high=self.global_config.train.seq_length, size=(batch["actions"].shape[0],))
147 | goal_obs = TensorUtils.gather_sequence(batch["next_obs"], policy_subgoal_indices)
148 | goal_obs = TensorUtils.to_device(TensorUtils.to_float(goal_obs), self.device)
149 | input_batch["actor"]["goal_obs"] = goal_obs
150 | else:
151 | # otherwise, use planner subgoal target as goal for the policy
152 | input_batch["actor"]["goal_obs"] = input_batch["planner"]["planner"]["target_subgoals"]
153 |
154 | return TensorUtils.to_device(TensorUtils.to_float(input_batch), self.device)
155 |
156 | def get_state_value(self, obs_dict, goal_dict=None):
157 | """
158 | Get state value outputs.
159 |
160 | Args:
161 | obs_dict (dict): current observation
162 | goal_dict (dict): (optional) goal
163 |
164 | Returns:
165 | value (torch.Tensor): value tensor
166 | """
167 | return self.planner.get_state_value(obs_dict=obs_dict, goal_dict=goal_dict)
168 |
169 | def get_state_action_value(self, obs_dict, actions, goal_dict=None):
170 | """
171 | Get state-action value outputs.
172 |
173 | Args:
174 | obs_dict (dict): current observation
175 | actions (torch.Tensor): action
176 | goal_dict (dict): (optional) goal
177 |
178 | Returns:
179 | value (torch.Tensor): value tensor
180 | """
181 | return self.planner.get_state_action_value(obs_dict=obs_dict, actions=actions, goal_dict=goal_dict)
182 |
--------------------------------------------------------------------------------
/robomimic/config/__init__.py:
--------------------------------------------------------------------------------
1 | from robomimic.config.config import Config
2 | from robomimic.config.base_config import config_factory, get_all_registered_configs
3 |
4 | # note: these imports are needed to register these classes in the global config registry
5 | from robomimic.config.bc_config import BCConfig
6 | from robomimic.config.bcq_config import BCQConfig
7 | from robomimic.config.cql_config import CQLConfig
8 | from robomimic.config.awac_config import AWACConfig
9 | from robomimic.config.iql_config import IQLConfig
10 | from robomimic.config.gl_config import GLConfig
11 | from robomimic.config.hbc_config import HBCConfig
12 | from robomimic.config.iris_config import IRISConfig
13 | from robomimic.config.td3_bc_config import TD3_BCConfig
14 | from robomimic.config.vae_config import VAEConfig
--------------------------------------------------------------------------------
/robomimic/config/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/config/__pycache__/awac_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/awac_config.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/config/__pycache__/base_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/base_config.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/config/__pycache__/bc_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/bc_config.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/config/__pycache__/bcq_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/bcq_config.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/config/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/config.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/config/__pycache__/cql_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/cql_config.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/config/__pycache__/gl_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/gl_config.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/config/__pycache__/hbc_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/hbc_config.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/config/__pycache__/iql_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/iql_config.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/config/__pycache__/iris_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/iris_config.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/config/__pycache__/td3_bc_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/td3_bc_config.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/config/__pycache__/vae_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/config/__pycache__/vae_config.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/config/awac_config.py:
--------------------------------------------------------------------------------
1 | """
2 | Config for CQL algorithm.
3 | """
4 |
5 | from robomimic.config.base_config import BaseConfig
6 |
7 |
8 | class AWACConfig(BaseConfig):
9 | ALGO_NAME = "awac"
10 |
11 | def train_config(self):
12 | """
13 | Update from superclass to change default batch size.
14 | """
15 | super(AWACConfig, self).train_config()
16 |
17 | # increase batch size to 1024 (found to work better for most manipulation experiments)
18 | self.train.batch_size = 1024
19 |
20 | def algo_config(self):
21 | """
22 | This function populates the `config.algo` attribute of the config, and is given to the
23 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config`
24 | argument to the constructor. Any parameter that an algorithm needs to determine its
25 | training and test-time behavior should be populated here.
26 | """
27 | super(AWACConfig, self).algo_config()
28 |
29 | # optimization parameters
30 | self.algo.optim_params.critic.learning_rate.initial = 1e-4 # critic learning rate
31 | self.algo.optim_params.critic.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty)
32 | self.algo.optim_params.critic.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
33 | self.algo.optim_params.critic.regularization.L2 = 0.00 # L2 regularization strength
34 |
35 | self.algo.optim_params.actor.learning_rate.initial = 1e-4 # actor learning rate
36 | self.algo.optim_params.actor.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty)
37 | self.algo.optim_params.actor.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
38 | self.algo.optim_params.actor.regularization.L2 = 0.00 # L2 regularization strength
39 |
40 | # target network related parameters
41 | self.algo.discount = 0.99 # discount factor to use
42 | self.algo.target_tau = 0.01 # update rate for target networks
43 | self.algo.ignore_dones = False
44 | self.algo.use_negative_rewards = False
45 | self.algo.use_hardcoded_weights = False
46 | self.algo.hc_weights_key = "final_success"
47 | self.algo.relabel_dones_mode = None
48 | self.algo.relabel_rewards_mode = None
49 |
50 | # Actor network settings
51 | self.algo.actor.net.type = "gaussian" # Options are currently only "gaussian" (no support for GMM yet)
52 |
53 | # Actor network settings - shared
54 | self.algo.actor.net.common.std_activation = "softplus" # Activation to use for std output from policy net
55 | self.algo.actor.net.common.low_noise_eval = True # Whether to use deterministic action sampling at eval stage
56 | self.algo.actor.net.common.use_tanh = False
57 |
58 | # Actor network settings - gaussian
59 | self.algo.actor.net.gaussian.init_last_fc_weight = 0.001 # If set, will override the initialization of the final fc layer to be uniformly sampled limited by this value
60 | self.algo.actor.net.gaussian.init_std = 0.3 # Relative scaling factor for std from policy net
61 | self.algo.actor.net.gaussian.fixed_std = False # Whether to learn std dev or not
62 |
63 | self.algo.actor.net.gmm.num_modes = 5
64 | self.algo.actor.net.gmm.min_std = 0.0001
65 |
66 | self.algo.actor.layer_dims = (300, 400) # actor MLP layer dimensions
67 |
68 | self.algo.actor.max_gradient_norm = None
69 |
70 | # actor residual MLP settings
71 | self.algo.actor.res_mlp.enabled = False
72 | self.algo.actor.res_mlp.num_blocks = 4
73 | self.algo.actor.res_mlp.hidden_dim = 1024
74 | self.algo.actor.res_mlp.use_layer_norm = True
75 |
76 | # ================== Critic Network Config ===================
77 | # critic ensemble parameters (TD3 trick)
78 | self.algo.critic.ensemble.n = 2 # number of Q networks in the ensemble
79 | self.algo.critic.ensemble_method = "min"
80 | self.algo.critic.target_ensemble_method = "mean"
81 | self.algo.critic.layer_dims = (300, 400) # critic MLP layer dimensions
82 | self.algo.critic.use_huber = False
83 |
84 | # critic residual MLP settings
85 | self.algo.critic.res_mlp.enabled = False
86 | self.algo.critic.res_mlp.num_blocks = 4
87 | self.algo.critic.res_mlp.hidden_dim = 1024
88 | self.algo.critic.res_mlp.use_layer_norm = True
89 |
90 | # distributional critic
91 | self.algo.critic.distributional.enabled = False # train distributional critic
92 | self.algo.critic.distributional.num_atoms = 51 # number of values in categorical distribution
93 | self.algo.critic.value_bounds = None
94 |
95 | self.algo.adv.use_mle_for_vf = False
96 | self.algo.adv.vf_K = 4
97 | self.algo.adv.value_method = "mean"
98 | self.algo.adv.filter_type = "softmax"
99 | self.algo.adv.use_final_clip = False
100 | self.algo.adv.clip_adv_value = None
101 | self.algo.adv.beta = 1.0
102 | self.algo.adv.multi_weight = None
103 |
104 | self.algo.critic.max_gradient_norm = None
105 |
106 | self.algo.hc_weights.use_adv_score = False
107 |
108 | # RNN policy settings
109 | self.algo.actor.rnn.enabled = False # whether to train RNN policy
110 | self.algo.actor.rnn.horizon = 10 # unroll length for RNN - should usually match train.seq_length
111 | self.algo.actor.rnn.hidden_dim = 400 # hidden dimension size
112 | self.algo.actor.rnn.rnn_type = "LSTM" # rnn type - one of "LSTM" or "GRU"
113 | self.algo.actor.rnn.num_layers = 2 # number of RNN layers that are stacked
114 | self.algo.actor.rnn.open_loop = False # if True, action predictions are only based on a single observation (not sequence)
115 | self.algo.actor.rnn.kwargs.bidirectional = False # rnn kwargs
116 | self.algo.actor.rnn.use_res_mlp = False
117 | self.algo.actor.rnn.res_mlp_kwargs = None
118 | self.algo.actor.rnn.kwargs.do_not_lock_keys()
119 |
120 | self.algo.hc_weights.use_hardcode_weight = False
--------------------------------------------------------------------------------
/robomimic/config/bc_config.py:
--------------------------------------------------------------------------------
1 | """
2 | Config for BC algorithm.
3 | """
4 |
5 | from robomimic.config.base_config import BaseConfig
6 |
7 |
8 | class BCConfig(BaseConfig):
9 | ALGO_NAME = "bc"
10 |
11 | def algo_config(self):
12 | """
13 | This function populates the `config.algo` attribute of the config, and is given to the
14 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config`
15 | argument to the constructor. Any parameter that an algorithm needs to determine its
16 | training and test-time behavior should be populated here.
17 | """
18 | super(BCConfig, self).algo_config()
19 |
20 | # optimization parameters
21 | self.algo.optim_params.policy.learning_rate.initial = 1e-4 # policy learning rate
22 | self.algo.optim_params.policy.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty)
23 | self.algo.optim_params.policy.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
24 | self.algo.optim_params.policy.regularization.L2 = 0.00 # L2 regularization strength
25 |
26 | # loss weights
27 | self.algo.loss.l2_weight = 1.0 # L2 loss weight
28 | self.algo.loss.l1_weight = 0.0 # L1 loss weight
29 | self.algo.loss.cos_weight = 0.0 # cosine loss weight
30 |
31 | # MLP network architecture (layers after observation encoder and RNN, if present)
32 | self.algo.actor_layer_dims = (1024, 1024)
33 | self.algo.max_gradient_norm = None
34 |
35 | # residual MLP settings
36 | self.algo.res_mlp.enabled = False
37 | self.algo.res_mlp.num_blocks = 4
38 | self.algo.res_mlp.hidden_dim = 1024
39 | self.algo.res_mlp.use_layer_norm = True
40 |
41 | # stochastic Gaussian policy settings
42 | self.algo.gaussian.enabled = False # whether to train a Gaussian policy
43 | self.algo.gaussian.fixed_std = False # whether to train std output or keep it constant
44 | self.algo.gaussian.init_std = 0.1 # initial standard deviation (or constant)
45 | self.algo.gaussian.min_std = 0.01 # minimum std output from network
46 | self.algo.gaussian.std_activation = "softplus" # activation to use for std output from policy net
47 | self.algo.gaussian.low_noise_eval = True # low-std at test-time
48 |
49 | # stochastic GMM policy settings
50 | self.algo.gmm.enabled = False # whether to train a GMM policy
51 | self.algo.gmm.num_modes = 5 # number of GMM modes
52 | self.algo.gmm.min_std = 0.0001 # minimum std output from network
53 | self.algo.gmm.std_activation = "softplus" # activation to use for std output from policy net
54 | self.algo.gmm.low_noise_eval = True # low-std at test-time
55 |
56 | # stochastic VAE policy settings
57 | self.algo.vae.enabled = False # whether to train a VAE policy
58 | self.algo.vae.latent_dim = 14 # VAE latent dimnsion - set to twice the dimensionality of action space
59 | self.algo.vae.latent_clip = None # clip latent space when decoding (set to None to disable)
60 | self.algo.vae.kl_weight = 1. # beta-VAE weight to scale KL loss relative to reconstruction loss in ELBO
61 |
62 | # VAE decoder settings
63 | self.algo.vae.decoder.is_conditioned = True # whether decoder should condition on observation
64 | self.algo.vae.decoder.reconstruction_sum_across_elements = False # sum instead of mean for reconstruction loss
65 |
66 | # VAE prior settings
67 | self.algo.vae.prior.learn = False # learn Gaussian / GMM prior instead of N(0, 1)
68 | self.algo.vae.prior.is_conditioned = False # whether to condition prior on observations
69 | self.algo.vae.prior.use_gmm = False # whether to use GMM prior
70 | self.algo.vae.prior.gmm_num_modes = 10 # number of GMM modes
71 | self.algo.vae.prior.gmm_learn_weights = False # whether to learn GMM weights
72 | self.algo.vae.prior.use_categorical = False # whether to use categorical prior
73 | self.algo.vae.prior.categorical_dim = 10 # the number of categorical classes for each latent dimension
74 | self.algo.vae.prior.categorical_gumbel_softmax_hard = False # use hard selection in forward pass
75 | self.algo.vae.prior.categorical_init_temp = 1.0 # initial gumbel-softmax temp
76 | self.algo.vae.prior.categorical_temp_anneal_step = 0.001 # linear temp annealing rate
77 | self.algo.vae.prior.categorical_min_temp = 0.3 # lowest gumbel-softmax temp
78 |
79 | self.algo.vae.encoder_layer_dims = (300, 400) # encoder MLP layer dimensions
80 | self.algo.vae.decoder_layer_dims = (300, 400) # decoder MLP layer dimensions
81 | self.algo.vae.prior_layer_dims = (300, 400) # prior MLP layer dimensions (if learning conditioned prior)
82 |
83 | # RNN policy settings
84 | self.algo.rnn.enabled = False # whether to train RNN policy
85 | self.algo.rnn.horizon = 10 # unroll length for RNN - should usually match train.seq_length
86 | self.algo.rnn.hidden_dim = 400 # hidden dimension size
87 | self.algo.rnn.rnn_type = "LSTM" # rnn type - one of "LSTM" or "GRU"
88 | self.algo.rnn.num_layers = 2 # number of RNN layers that are stacked
89 | self.algo.rnn.open_loop = False # if True, action predictions are only based on a single observation (not sequence)
90 | self.algo.rnn.kwargs.bidirectional = False # rnn kwargs
91 | self.algo.rnn.kwargs.do_not_lock_keys()
92 |
93 | self.algo.hc_weights.traj_label_type = "last"
94 |
95 | self.algo.hc_weights.batch_normalize = True
--------------------------------------------------------------------------------
/robomimic/config/bcq_config.py:
--------------------------------------------------------------------------------
1 | """
2 | Config for BCQ algorithm.
3 | """
4 |
5 | from robomimic.config.base_config import BaseConfig
6 | from robomimic.config.bc_config import BCConfig
7 |
8 |
9 | class BCQConfig(BaseConfig):
10 | ALGO_NAME = "bcq"
11 |
12 | def algo_config(self):
13 | """
14 | This function populates the `config.algo` attribute of the config, and is given to the
15 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config`
16 | argument to the constructor. Any parameter that an algorithm needs to determine its
17 | training and test-time behavior should be populated here.
18 | """
19 | super(BCQConfig, self).algo_config()
20 |
21 | # optimization parameters
22 | self.algo.optim_params.critic.learning_rate.initial = 1e-3 # critic learning rate
23 | self.algo.optim_params.critic.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty)
24 | self.algo.optim_params.critic.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
25 | self.algo.optim_params.critic.regularization.L2 = 0.00 # L2 regularization strength
26 | self.algo.optim_params.critic.start_epoch = -1 # number of epochs before starting critic training (-1 means start right away)
27 | self.algo.optim_params.critic.end_epoch = -1 # number of epochs before ending critic training (-1 means start right away)
28 |
29 | self.algo.optim_params.action_sampler.learning_rate.initial = 1e-3 # action sampler learning rate
30 | self.algo.optim_params.action_sampler.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty)
31 | self.algo.optim_params.action_sampler.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
32 | self.algo.optim_params.action_sampler.regularization.L2 = 0.00 # L2 regularization strength
33 | self.algo.optim_params.action_sampler.start_epoch = -1 # number of epochs before starting action sampler training (-1 means start right away)
34 | self.algo.optim_params.action_sampler.end_epoch = -1 # number of epochs before ending action sampler training (-1 means start right away)
35 |
36 | self.algo.optim_params.actor.learning_rate.initial = 1e-3 # actor learning rate
37 | self.algo.optim_params.actor.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty)
38 | self.algo.optim_params.actor.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
39 | self.algo.optim_params.actor.regularization.L2 = 0.00 # L2 regularization strength
40 | self.algo.optim_params.actor.start_epoch = -1 # number of epochs before starting actor training (-1 means start right away)
41 | self.algo.optim_params.actor.end_epoch = -1 # number of epochs before ending actor training (-1 means start right away)
42 |
43 | # target network related parameters
44 | self.algo.discount = 0.99 # discount factor to use
45 | self.algo.n_step = 1 # for using n-step returns in TD-updates
46 | self.algo.target_tau = 0.005 # update rate for target networks
47 | self.algo.infinite_horizon = False # if True, scale terminal rewards by 1 / (1 - discount) to treat as infinite horizon
48 |
49 | # ================== Critic Network Config ===================
50 | self.algo.critic.use_huber = False # Huber Loss instead of L2 for critic
51 | self.algo.critic.max_gradient_norm = None # L2 gradient clipping for critic (None to use no clipping)
52 | self.algo.critic.value_bounds = None # optional 2-tuple to ensure lower and upper bound on value estimates
53 | self.algo.critic.num_action_samples = 10 # number of actions to sample per training batch to get target critic value
54 | self.algo.critic.num_action_samples_rollout = 100 # number of actions to sample per environment step
55 |
56 | # critic ensemble parameters (TD3 trick)
57 | self.algo.critic.ensemble.n = 2 # number of Q networks in the ensemble
58 | self.algo.critic.ensemble.weight = 0.75 # weighting for mixing min and max for target Q value
59 |
60 | # distributional critic
61 | self.algo.critic.distributional.enabled = False # train distributional critic (C51)
62 | self.algo.critic.distributional.num_atoms = 51 # number of values in categorical distribution
63 |
64 | self.algo.critic.layer_dims = (300, 400) # size of critic MLP
65 |
66 | # ================== Action Sampler Config ===================
67 | self.algo.action_sampler = BCConfig().algo
68 | # use VAE by default
69 | self.algo.action_sampler.vae.enabled = True
70 | # remove unused parts of BCConfig algo config
71 | del self.algo.action_sampler.optim_params # since action sampler optim params specified at top-level
72 | del self.algo.action_sampler.loss
73 | del self.algo.action_sampler.gaussian
74 | del self.algo.action_sampler.rnn
75 |
76 | # Number of epochs before freezing encoder (-1 for no freezing). Only applies to cVAE-based action samplers.
77 | with self.algo.action_sampler.unlocked():
78 | self.algo.action_sampler.freeze_encoder_epoch = -1
79 |
80 | # ================== Actor Network Config ===================
81 | self.algo.actor.enabled = False # whether to use the actor perturbation network
82 | self.algo.actor.perturbation_scale = 0.05 # size of learned action perturbations
83 | self.algo.actor.layer_dims = (300, 400) # size of actor MLP
84 |
--------------------------------------------------------------------------------
/robomimic/config/cql_config.py:
--------------------------------------------------------------------------------
1 | """
2 | Config for CQL algorithm.
3 | """
4 |
5 | from robomimic.config.base_config import BaseConfig
6 |
7 |
8 | class CQLConfig(BaseConfig):
9 | ALGO_NAME = "cql"
10 |
11 | def train_config(self):
12 | """
13 | Update from superclass to change default batch size.
14 | """
15 | super(CQLConfig, self).train_config()
16 |
17 | # increase batch size to 1024 (found to work better for most manipulation experiments)
18 | self.train.batch_size = 1024
19 |
20 | def algo_config(self):
21 | """
22 | This function populates the `config.algo` attribute of the config, and is given to the
23 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config`
24 | argument to the constructor. Any parameter that an algorithm needs to determine its
25 | training and test-time behavior should be populated here.
26 | """
27 | super(CQLConfig, self).algo_config()
28 |
29 | # optimization parameters
30 | self.algo.optim_params.critic.learning_rate.initial = 1e-3 # critic learning rate
31 | self.algo.optim_params.critic.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty)
32 | self.algo.optim_params.critic.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
33 | self.algo.optim_params.critic.regularization.L2 = 0.00 # L2 regularization strength
34 |
35 | self.algo.optim_params.actor.learning_rate.initial = 3e-4 # actor learning rate
36 | self.algo.optim_params.actor.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty)
37 | self.algo.optim_params.actor.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
38 | self.algo.optim_params.actor.regularization.L2 = 0.00 # L2 regularization strength
39 |
40 | # target network related parameters
41 | self.algo.discount = 0.99 # discount factor to use
42 | self.algo.n_step = 1 # for using n-step returns in TD-updates
43 | self.algo.target_tau = 0.005 # update rate for target networks
44 |
45 | # ================== Actor Network Config ===================
46 | self.algo.actor.bc_start_steps = 0 # uses BC policy loss for first n-training steps
47 | self.algo.actor.target_entropy = "default" # None is fixed entropy, otherwise is automatically tuned to match target. Can specify "default" as well for default tuning target
48 | self.algo.actor.max_gradient_norm = None # L2 gradient clipping for actor
49 |
50 | # Actor network settings
51 | self.algo.actor.net.type = "gaussian" # Options are currently only "gaussian" (no support for GMM yet)
52 |
53 | # Actor network settings - shared
54 | self.algo.actor.net.common.std_activation = "exp" # Activation to use for std output from policy net
55 | self.algo.actor.net.common.use_tanh = True # Whether to use tanh at output of actor network
56 | self.algo.actor.net.common.low_noise_eval = True # Whether to use deterministic action sampling at eval stage
57 |
58 | # Actor network settings - gaussian
59 | self.algo.actor.net.gaussian.init_last_fc_weight = 0.001 # If set, will override the initialization of the final fc layer to be uniformly sampled limited by this value
60 | self.algo.actor.net.gaussian.init_std = 0.3 # Relative scaling factor for std from policy net
61 | self.algo.actor.net.gaussian.fixed_std = False # Whether to learn std dev or not
62 |
63 | self.algo.actor.layer_dims = (300, 400) # actor MLP layer dimensions
64 |
65 | # ================== Critic Network Config ===================
66 | self.algo.critic.use_huber = False # Huber Loss instead of L2 for critic
67 | self.algo.critic.max_gradient_norm = None # L2 gradient clipping for critic (None to use no clipping)
68 |
69 | self.algo.critic.value_bounds = None # optional 2-tuple to ensure lower and upper bound on value estimates
70 |
71 | self.algo.critic.num_action_samples = 1 # number of actions to sample per training batch to get target critic value; use maximum Q value from n random sampled actions when doing TD error backup
72 |
73 | # cql settings for critic
74 | self.algo.critic.cql_weight = 1.0 # weighting for cql component of critic loss (only used if target_q_gap is < 0 or None)
75 | self.algo.critic.deterministic_backup = True # if not set, subtract weighted logprob of action when doing backup
76 | self.algo.critic.min_q_weight = 1.0 # min q weight (scaling factor) to apply
77 | self.algo.critic.target_q_gap = 5.0 # if set, sets the diff threshold at which Q-values will be penalized more (note: this overrides cql weight above!) Use None or a negative value if not set
78 | self.algo.critic.num_random_actions = 10 # Number of random actions to sample when calculating CQL loss
79 |
80 | # critic ensemble parameters (TD3 trick)
81 | self.algo.critic.ensemble.n = 2 # number of Q networks in the ensemble
82 |
83 | self.algo.critic.layer_dims = (300, 400) # critic MLP layer dimensions
84 |
--------------------------------------------------------------------------------
/robomimic/config/gl_config.py:
--------------------------------------------------------------------------------
1 | """
2 | Config for Goal Learning (sub-algorithm used by hierarchical models like HBC and IRIS).
3 | This class of model predicts (or samples) subgoal observations given a current observation.
4 | """
5 |
6 | from robomimic.config.base_config import BaseConfig
7 |
8 |
9 | class GLConfig(BaseConfig):
10 | ALGO_NAME = "gl"
11 |
12 | def algo_config(self):
13 | """
14 | This function populates the `config.algo` attribute of the config, and is given to the
15 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config`
16 | argument to the constructor. Any parameter that an algorithm needs to determine its
17 | training and test-time behavior should be populated here.
18 | """
19 | super(GLConfig, self).algo_config()
20 |
21 | # optimization parameters
22 | self.algo.optim_params.goal_network.learning_rate.initial = 1e-4 # goal network learning rate
23 | self.algo.optim_params.goal_network.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty)
24 | self.algo.optim_params.goal_network.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
25 | self.algo.optim_params.goal_network.regularization.L2 = 0.00
26 |
27 | # subgoal definition: observation that is @subgoal_horizon number of timesteps in future from current observation
28 | self.algo.subgoal_horizon = 10
29 |
30 | # MLP size for deterministic goal network (unused if VAE is enabled)
31 | self.algo.ae.planner_layer_dims = (300, 400)
32 |
33 | # ================== VAE config ==================
34 | self.algo.vae.enabled = True # set to true to use VAE network
35 | self.algo.vae.latent_dim = 16 # VAE latent dimension
36 | self.algo.vae.latent_clip = None # clip latent space when decoding (set to None to disable)
37 | self.algo.vae.kl_weight = 1. # beta-VAE weight to scale KL loss relative to reconstruction loss in ELBO
38 |
39 | # VAE decoder settings
40 | self.algo.vae.decoder.is_conditioned = True # whether decoder should condition on observation
41 | self.algo.vae.decoder.reconstruction_sum_across_elements = False # sum instead of mean for reconstruction loss
42 |
43 | # VAE prior settings
44 | self.algo.vae.prior.learn = False # learn Gaussian / GMM prior instead of N(0, 1)
45 | self.algo.vae.prior.is_conditioned = False # whether to condition prior on observations
46 | self.algo.vae.prior.use_gmm = False # whether to use GMM prior
47 | self.algo.vae.prior.gmm_num_modes = 10 # number of GMM modes
48 | self.algo.vae.prior.gmm_learn_weights = False # whether to learn GMM weights
49 | self.algo.vae.prior.use_categorical = False # whether to use categorical prior
50 | self.algo.vae.prior.categorical_dim = 10 # the number of categorical classes for each latent dimension
51 | self.algo.vae.prior.categorical_gumbel_softmax_hard = False # use hard selection in forward pass
52 | self.algo.vae.prior.categorical_init_temp = 1.0 # initial gumbel-softmax temp
53 | self.algo.vae.prior.categorical_temp_anneal_step = 0.001 # linear temp annealing rate
54 | self.algo.vae.prior.categorical_min_temp = 0.3 # lowest gumbel-softmax temp
55 |
56 | self.algo.vae.encoder_layer_dims = (300, 400) # encoder MLP layer dimensions
57 | self.algo.vae.decoder_layer_dims = (300, 400) # decoder MLP layer dimensions
58 | self.algo.vae.prior_layer_dims = (300, 400) # prior MLP layer dimensions (if learning conditioned prior)
59 |
60 | def observation_config(self):
61 | """
62 | Update from superclass to specify subgoal modalities.
63 | """
64 | super(GLConfig, self).observation_config()
65 | self.observation.modalities.subgoal.low_dim = [ # specify low-dim subgoal observations for agent to predict
66 | "robot0_eef_pos",
67 | "robot0_eef_quat",
68 | "robot0_gripper_qpos",
69 | "object",
70 | ]
71 | self.observation.modalities.subgoal.rgb = [] # specify rgb image subgoal observations for agent to predict
72 | self.observation.modalities.subgoal.depth = []
73 | self.observation.modalities.subgoal.scan = []
74 | self.observation.modalities.subgoal.do_not_lock_keys()
75 |
76 | @property
77 | def all_obs_keys(self):
78 | """
79 | Update from superclass to include subgoals.
80 | """
81 | # pool all modalities
82 | return sorted(tuple(set([
83 | obs_key for group in [
84 | self.observation.modalities.obs.values(),
85 | self.observation.modalities.goal.values(),
86 | self.observation.modalities.subgoal.values(),
87 | ]
88 | for modality in group
89 | for obs_key in modality
90 | ])))
91 |
--------------------------------------------------------------------------------
/robomimic/config/hbc_config.py:
--------------------------------------------------------------------------------
1 | """
2 | Config for HBC algorithm.
3 | """
4 |
5 | from robomimic.config.base_config import BaseConfig
6 | from robomimic.config.gl_config import GLConfig
7 | from robomimic.config.bc_config import BCConfig
8 |
9 |
10 | class HBCConfig(BaseConfig):
11 | ALGO_NAME = "hbc"
12 |
13 | def train_config(self):
14 | """
15 | Update from superclass to change default sequence length to load from dataset.
16 | """
17 | super(HBCConfig, self).train_config()
18 | self.train.seq_length = 10 # length of experience sequence to fetch from the buffer
19 |
20 | def algo_config(self):
21 | """
22 | This function populates the `config.algo` attribute of the config, and is given to the
23 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config`
24 | argument to the constructor. Any parameter that an algorithm needs to determine its
25 | training and test-time behavior should be populated here.
26 | """
27 | super(HBCConfig, self).algo_config()
28 |
29 | # One of ["separate", "cascade"]. In "separate" mode (default),
30 | # the planner and actor are trained independently and then the planner subgoal predictions are
31 | # used to condition the actor at test-time. In "cascade" mode, the actor is trained directly
32 | # on planner subgoal predictions. In "actor_only" mode, only the actor is trained, and in
33 | # "planner_only" mode, only the planner is trained.
34 | self.algo.mode = "separate"
35 | self.algo.actor_use_random_subgoals = False # whether to sample subgoal index from [1, subgoal_horizon]
36 | self.algo.subgoal_update_interval = 10 # how frequently the subgoal should be updated at test-time
37 |
38 |
39 | # ================== Latent Subgoal Config ==================
40 | self.algo.latent_subgoal.enabled = False # if True, use VAE latent space as subgoals for actor, instead of reconstructions
41 |
42 | # prior correction trick for actor and value training: instead of using encoder for
43 | # transforming subgoals to latent subgoals, generate prior samples and choose
44 | # the closest one to the encoder output
45 | self.algo.latent_subgoal.prior_correction.enabled = False
46 | self.algo.latent_subgoal.prior_correction.num_samples = 100
47 |
48 | # ================== Planner Config ==================
49 | self.algo.planner = GLConfig().algo # config for goal learning
50 | # set subgoal horizon explicitly
51 | self.algo.planner.subgoal_horizon = 10
52 | # ensure VAE is used
53 | self.algo.planner.vae.enabled = True
54 |
55 | # ================== Actor Config ===================
56 | self.algo.actor = BCConfig().algo
57 | # use RNN
58 | self.algo.actor.rnn.enabled = True
59 | self.algo.actor.rnn.horizon = 10
60 | # remove unused parts of BCConfig algo config
61 | del self.algo.actor.gaussian
62 | del self.algo.actor.gmm
63 | del self.algo.actor.vae
64 |
65 | def observation_config(self):
66 | """
67 | Update from superclass so that planner and actor each get their own observation config.
68 | """
69 | self.observation.planner = GLConfig().observation
70 | self.observation.actor = BCConfig().observation
71 |
72 | @property
73 | def use_goals(self):
74 | """
75 | Update from superclass - planner goal modalities determine goal-conditioning
76 | """
77 | return len(
78 | self.observation.planner.modalities.goal.low_dim +
79 | self.observation.planner.modalities.goal.rgb) > 0
80 |
81 | @property
82 | def all_obs_keys(self):
83 | """
84 | Update from superclass to include modalities from planner and actor.
85 | """
86 | # pool all modalities
87 | return sorted(tuple(set([
88 | obs_key for group in [
89 | self.observation.planner.modalities.obs.values(),
90 | self.observation.planner.modalities.goal.values(),
91 | self.observation.planner.modalities.subgoal.values(),
92 | self.observation.actor.modalities.obs.values(),
93 | self.observation.actor.modalities.goal.values(),
94 | ]
95 | for modality in group
96 | for obs_key in modality
97 | ])))
98 |
--------------------------------------------------------------------------------
/robomimic/config/iql_config.py:
--------------------------------------------------------------------------------
1 | """
2 | Config for CQL algorithm.
3 | """
4 |
5 | from robomimic.config.base_config import BaseConfig
6 |
7 |
8 | class IQLConfig(BaseConfig):
9 | ALGO_NAME = "iql"
10 |
11 | def train_config(self):
12 | """
13 | Update from superclass to change default batch size.
14 | """
15 | super(IQLConfig, self).train_config()
16 |
17 | # increase batch size to 1024 (found to work better for most manipulation experiments)
18 | self.train.batch_size = 1024
19 |
20 | def algo_config(self):
21 | """
22 | This function populates the `config.algo` attribute of the config, and is given to the
23 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config`
24 | argument to the constructor. Any parameter that an algorithm needs to determine its
25 | training and test-time behavior should be populated here.
26 | """
27 | super(IQLConfig, self).algo_config()
28 |
29 | # optimization parameters
30 | self.algo.optim_params.critic.learning_rate.initial = 1e-4 # critic learning rate
31 | self.algo.optim_params.critic.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty)
32 | self.algo.optim_params.critic.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
33 | self.algo.optim_params.critic.regularization.L2 = 0.00 # L2 regularization strength
34 |
35 | self.algo.optim_params.vf.learning_rate.initial = 1e-4 # actor learning rate
36 | self.algo.optim_params.vf.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty)
37 | self.algo.optim_params.vf.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
38 | self.algo.optim_params.vf.regularization.L2 = 0.00 # L2 regularization strength
39 |
40 | self.algo.optim_params.actor.learning_rate.initial = 1e-4 # actor learning rate
41 | self.algo.optim_params.actor.learning_rate.decay_factor = 0.0 # factor to decay LR by (if epoch schedule non-empty)
42 | self.algo.optim_params.actor.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
43 | self.algo.optim_params.actor.regularization.L2 = 0.00 # L2 regularization strength
44 |
45 | # target network related parameters
46 | self.algo.discount = 0.99 # discount factor to use
47 | self.algo.target_tau = 0.01 # update rate for target networks
48 | self.algo.ignore_dones = False
49 | self.algo.use_negative_rewards = False
50 | self.algo.use_shaped_rewards = False
51 | self.algo.relabel_dones_mode = None
52 | self.algo.relabel_rewards_mode = None
53 |
54 | # Actor network settings
55 | self.algo.actor.net.type = "gaussian" # Options are currently only "gaussian" (no support for GMM yet)
56 |
57 | # Actor network settings - shared
58 | self.algo.actor.net.common.std_activation = "softplus" # Activation to use for std output from policy net
59 | self.algo.actor.net.common.low_noise_eval = True # Whether to use deterministic action sampling at eval stage
60 | self.algo.actor.net.common.use_tanh = False
61 |
62 | # Actor network settings - gaussian
63 | self.algo.actor.net.gaussian.init_last_fc_weight = 0.001 # If set, will override the initialization of the final fc layer to be uniformly sampled limited by this value
64 | self.algo.actor.net.gaussian.init_std = 0.3 # Relative scaling factor for std from policy net
65 | self.algo.actor.net.gaussian.fixed_std = False # Whether to learn std dev or not
66 |
67 | self.algo.actor.net.gmm.num_modes = 5
68 | self.algo.actor.net.gmm.min_std = 0.0001
69 |
70 | self.algo.actor.layer_dims = (300, 400) # actor MLP layer dimensions
71 |
72 | self.algo.actor.max_gradient_norm = None
73 |
74 | # actor residual MLP settings
75 | self.algo.actor.res_mlp.enabled = False
76 | self.algo.actor.res_mlp.num_blocks = 4
77 | self.algo.actor.res_mlp.hidden_dim = 1024
78 | self.algo.actor.res_mlp.use_layer_norm = True
79 |
80 | # ================== Critic Network Config ===================
81 | # critic ensemble parameters (TD3 trick)
82 | self.algo.critic.ensemble.n = 2 # number of Q networks in the ensemble
83 | self.algo.critic.layer_dims = (300, 400) # critic MLP layer dimensions
84 | self.algo.critic.use_huber = False
85 |
86 | # critic residual MLP settings
87 | self.algo.critic.res_mlp.enabled = False
88 | self.algo.critic.res_mlp.num_blocks = 4
89 | self.algo.critic.res_mlp.hidden_dim = 1024
90 | self.algo.critic.res_mlp.use_layer_norm = True
91 |
92 | self.algo.adv.filter_type = "softmax"
93 | self.algo.adv.use_final_clip = True
94 | self.algo.adv.clip_adv_value = None
95 | self.algo.adv.beta = 1.0
96 |
97 | self.algo.vf_quantile = 0.9
98 |
99 | self.algo.critic.max_gradient_norm = None
100 |
--------------------------------------------------------------------------------
/robomimic/config/iris_config.py:
--------------------------------------------------------------------------------
1 | """
2 | Config for IRIS algorithm.
3 | """
4 |
5 | from robomimic.config.bcq_config import BCQConfig
6 | from robomimic.config.gl_config import GLConfig
7 | from robomimic.config.bc_config import BCConfig
8 | from robomimic.config.hbc_config import HBCConfig
9 |
10 |
11 | class IRISConfig(HBCConfig):
12 | ALGO_NAME = "iris"
13 |
14 | def algo_config(self):
15 | """
16 | This function populates the `config.algo` attribute of the config, and is given to the
17 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config`
18 | argument to the constructor. Any parameter that an algorithm needs to determine its
19 | training and test-time behavior should be populated here.
20 | """
21 | super(IRISConfig, self).algo_config()
22 |
23 | # One of ["separate", "cascade"]. In "separate" mode (default),
24 | # the planner and actor are trained independently and then the planner subgoal predictions are
25 | # used to condition the actor at test-time. In "cascade" mode, the actor is trained directly
26 | # on planner subgoal predictions. In "actor_only" mode, only the actor is trained, and in
27 | # "planner_only" mode, only the planner is trained.
28 | self.algo.mode = "separate"
29 |
30 | self.algo.actor_use_random_subgoals = False # whether to sample subgoal index from [1, subgoal_horizon]
31 | self.algo.subgoal_update_interval = 10 # how frequently the subgoal should be updated at test-time (usually matches train.seq_length)
32 |
33 | # ================== Latent Subgoal Config ==================
34 |
35 | # NOTE: latent subgoals are not supported by IRIS, but superclass expects this config
36 | self.algo.latent_subgoal.enabled = False
37 | self.algo.latent_subgoal.prior_correction.enabled = False
38 | self.algo.latent_subgoal.prior_correction.num_samples = 100
39 |
40 | # ================== Planner Config ==================
41 |
42 | # The ValuePlanner planner component is a Goal Learning VAE model
43 | self.algo.value_planner.planner = GLConfig().algo # config for goal learning
44 | # set subgoal horizon explicitly
45 | self.algo.value_planner.planner.subgoal_horizon = 10
46 | # ensure VAE is used
47 | self.algo.value_planner.planner.vae.enabled = True
48 |
49 | # The ValuePlanner value component is a BCQ model
50 | self.algo.value_planner.value = BCQConfig().algo
51 | self.algo.value_planner.value.actor.enabled = False # ensure no BCQ actor
52 | # number of subgoal samples to use for value planner
53 | self.algo.value_planner.num_samples = 100
54 |
55 | # ================== Actor Config ===================
56 | self.algo.actor = BCConfig().algo
57 | # use RNN
58 | self.algo.actor.rnn.enabled = True
59 | self.algo.actor.rnn.horizon = 10
60 | # remove unused parts of BCConfig algo config
61 | del self.algo.actor.gaussian
62 | del self.algo.actor.gmm
63 | del self.algo.actor.vae
64 |
65 | def observation_config(self):
66 | """
67 | Update from superclass so that value planner and actor each get their own obs config.
68 | """
69 | self.observation.value_planner.planner = GLConfig().observation
70 | self.observation.value_planner.value = BCQConfig().observation
71 | self.observation.actor = BCConfig().observation
72 |
73 | @property
74 | def use_goals(self):
75 | """
76 | Update from superclass - value planner goal modalities determine goal-conditioning.
77 | """
78 | return len(
79 | self.observation.value_planner.planner.modalities.goal.low_dim +
80 | self.observation.value_planner.planner.modalities.goal.rgb) > 0
81 |
82 | @property
83 | def all_obs_keys(self):
84 | """
85 | Update from superclass to include modalities from value planner and actor.
86 | """
87 | # pool all modalities
88 | return sorted(tuple(set([
89 | obs_key for group in [
90 | self.observation.value_planner.planner.modalities.obs.values(),
91 | self.observation.value_planner.planner.modalities.goal.values(),
92 | self.observation.value_planner.planner.modalities.subgoal.values(),
93 | self.observation.value_planner.value.modalities.obs.values(),
94 | self.observation.value_planner.value.modalities.goal.values(),
95 | self.observation.actor.modalities.obs.values(),
96 | self.observation.actor.modalities.goal.values(),
97 | ]
98 | for modality in group
99 | for obs_key in modality
100 | ])))
101 |
--------------------------------------------------------------------------------
/robomimic/config/td3_bc_config.py:
--------------------------------------------------------------------------------
1 | """
2 | Config for TD3_BC.
3 | """
4 |
5 | from robomimic.config.base_config import BaseConfig
6 |
7 |
8 | class TD3_BCConfig(BaseConfig):
9 | ALGO_NAME = "td3_bc"
10 |
11 | def experiment_config(self):
12 | """
13 | Update from subclass to set paper defaults for gym envs.
14 | """
15 | super(TD3_BCConfig, self).experiment_config()
16 |
17 | # no validation and no video rendering
18 | self.experiment.validate = False
19 | self.experiment.render_video = False
20 |
21 | # save 10 checkpoints throughout training
22 | self.experiment.save.every_n_epochs = 20
23 |
24 | # save models that achieve best rollout return instead of best success rate
25 | self.experiment.save.on_best_rollout_return = True
26 | self.experiment.save.on_best_rollout_success_rate = False
27 |
28 | # epoch definition - 5000 gradient steps per epoch, with 200 epochs = 1M gradient steps, and eval every 1 epochs
29 | self.experiment.epoch_every_n_steps = 5000
30 |
31 | # evaluate with normal environment rollouts
32 | self.experiment.rollout.enabled = True
33 | self.experiment.rollout.n = 50 # paper uses 10, but we can afford to do 50
34 | self.experiment.rollout.horizon = 1000
35 | self.experiment.rollout.rate = 1 # rollout every epoch to match paper
36 |
37 | def train_config(self):
38 | """
39 | Update from subclass to set paper defaults for gym envs.
40 | """
41 | super(TD3_BCConfig, self).train_config()
42 |
43 | # update to normalize observations
44 | self.train.hdf5_normalize_obs = True
45 |
46 | # increase batch size to 256
47 | self.train.batch_size = 256
48 |
49 | # 200 epochs, with each epoch lasting 5000 gradient steps, for 1M total steps
50 | self.train.num_epochs = 200
51 |
52 | def algo_config(self):
53 | """
54 | This function populates the `config.algo` attribute of the config, and is given to the
55 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config`
56 | argument to the constructor. Any parameter that an algorithm needs to determine its
57 | training and test-time behavior should be populated here.
58 | """
59 | super(TD3_BCConfig, self).algo_config()
60 |
61 | # optimization parameters
62 | self.algo.optim_params.critic.learning_rate.initial = 3e-4 # critic learning rate
63 | self.algo.optim_params.critic.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty)
64 | self.algo.optim_params.critic.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
65 | self.algo.optim_params.critic.regularization.L2 = 0.00 # L2 regularization strength
66 | self.algo.optim_params.critic.start_epoch = -1 # number of epochs before starting critic training (-1 means start right away)
67 | self.algo.optim_params.critic.end_epoch = -1 # number of epochs before ending critic training (-1 means start right away)
68 |
69 | self.algo.optim_params.actor.learning_rate.initial = 3e-4 # actor learning rate
70 | self.algo.optim_params.actor.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty)
71 | self.algo.optim_params.actor.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
72 | self.algo.optim_params.actor.regularization.L2 = 0.00 # L2 regularization strength
73 | self.algo.optim_params.actor.start_epoch = -1 # number of epochs before starting actor training (-1 means start right away)
74 | self.algo.optim_params.actor.end_epoch = -1 # number of epochs before ending actor training (-1 means start right away)
75 |
76 | # alpha value - for weighting critic loss vs. BC loss
77 | self.algo.alpha = 2.5
78 |
79 | # target network related parameters
80 | self.algo.discount = 0.99 # discount factor to use
81 | self.algo.n_step = 1 # for using n-step returns in TD-updates
82 | self.algo.target_tau = 0.005 # update rate for target networks
83 | self.algo.infinite_horizon = False # if True, scale terminal rewards by 1 / (1 - discount) to treat as infinite horizon
84 |
85 | # ================== Critic Network Config ===================
86 | self.algo.critic.use_huber = False # Huber Loss instead of L2 for critic
87 | self.algo.critic.max_gradient_norm = None # L2 gradient clipping for critic (None to use no clipping)
88 | self.algo.critic.value_bounds = None # optional 2-tuple to ensure lower and upper bound on value estimates
89 |
90 | # critic ensemble parameters (TD3 trick)
91 | self.algo.critic.ensemble.n = 2 # number of Q networks in the ensemble
92 | self.algo.critic.ensemble.weight = 1.0 # weighting for mixing min and max for target Q value
93 |
94 | self.algo.critic.layer_dims = (256, 256) # size of critic MLP
95 |
96 | # ================== Actor Network Config ===================
97 |
98 | # update actor and target networks every n gradients steps for each critic gradient step
99 | self.algo.actor.update_freq = 2
100 |
101 | # exploration noise used to form target action for Q-update - clipped Gaussian noise
102 | self.algo.actor.noise_std = 0.2 # zero-mean gaussian noise with this std is applied to actions
103 | self.algo.actor.noise_clip = 0.5 # noise is clipped in each dimension to (-noise_clip, noise_clip)
104 |
105 | self.algo.actor.layer_dims = (256, 256) # size of actor MLP
106 |
107 | def observation_config(self):
108 | """
109 | Update from superclass to use flat observations from gym envs.
110 | """
111 | super(TD3_BCConfig, self).observation_config()
112 | self.observation.modalities.obs.low_dim = ["flat"]
113 |
--------------------------------------------------------------------------------
/robomimic/config/vae_config.py:
--------------------------------------------------------------------------------
1 | """
2 | Config for BC algorithm.
3 | """
4 |
5 | from robomimic.config.base_config import BaseConfig
6 |
7 |
8 | class VAEConfig(BaseConfig):
9 | ALGO_NAME = "vae"
10 |
11 | def algo_config(self):
12 | """
13 | This function populates the `config.algo` attribute of the config, and is given to the
14 | `Algo` subclass (see `algo/algo.py`) for each algorithm through the `algo_config`
15 | argument to the constructor. Any parameter that an algorithm needs to determine its
16 | training and test-time behavior should be populated here.
17 | """
18 | super(VAEConfig, self).algo_config()
19 |
20 | # optimization parameters
21 | self.algo.optim_params.policy.learning_rate.initial = 1e-4 # policy learning rate
22 | self.algo.optim_params.policy.learning_rate.decay_factor = 0.1 # factor to decay LR by (if epoch schedule non-empty)
23 | self.algo.optim_params.policy.learning_rate.epoch_schedule = [] # epochs where LR decay occurs
24 | self.algo.optim_params.policy.regularization.L2 = 0.00 # L2 regularization strength
25 |
26 | # loss weights
27 | self.algo.loss.l2_weight = 1.0 # L2 loss weight
28 | self.algo.loss.l1_weight = 0.0 # L1 loss weight
29 | self.algo.loss.cos_weight = 0.0 # cosine loss weight
30 |
31 | # MLP network architecture (layers after observation encoder and RNN, if present)
32 | self.algo.actor_layer_dims = (1024, 1024)
33 | self.algo.max_gradient_norm = None
34 |
35 | # residual MLP settings
36 | self.algo.res_mlp.enabled = False
37 | self.algo.res_mlp.num_blocks = 4
38 | self.algo.res_mlp.hidden_dim = 1024
39 | self.algo.res_mlp.use_layer_norm = True
40 |
41 | # stochastic Gaussian policy settings
42 | self.algo.gaussian.enabled = False # whether to train a Gaussian policy
43 | self.algo.gaussian.fixed_std = False # whether to train std output or keep it constant
44 | self.algo.gaussian.init_std = 0.1 # initial standard deviation (or constant)
45 | self.algo.gaussian.min_std = 0.01 # minimum std output from network
46 | self.algo.gaussian.std_activation = "softplus" # activation to use for std output from policy net
47 | self.algo.gaussian.low_noise_eval = True # low-std at test-time
48 |
49 | # stochastic GMM policy settings
50 | self.algo.gmm.enabled = False # whether to train a GMM policy
51 | self.algo.gmm.num_modes = 5 # number of GMM modes
52 | self.algo.gmm.min_std = 0.0001 # minimum std output from network
53 | self.algo.gmm.std_activation = "softplus" # activation to use for std output from policy net
54 | self.algo.gmm.low_noise_eval = True # low-std at test-time
55 |
56 | # stochastic VAE policy settings
57 | self.algo.vae.enabled = False # whether to train a VAE policy (unused)
58 | self.algo.vae.method = "" # to be specified in json file
59 | self.algo.vae.latent_dim = 14 # VAE latent dimnsion - set to twice the dimensionality of action space
60 | self.algo.vae.latent_clip = None # clip latent space when decoding (set to None to disable)
61 | self.algo.vae.kl_weight = 1. # beta-VAE weight to scale KL loss relative to reconstruction loss in ELBO
62 | self.algo.vae.conditioned_on_obs = True
63 |
64 | # VAE decoder settings
65 | self.algo.vae.decoder.is_conditioned = True # whether decoder should condition on observation
66 | self.algo.vae.decoder.reconstruction_sum_across_elements = False # sum instead of mean for reconstruction loss
67 |
68 | # VAE prior settings
69 | self.algo.vae.prior.learn = False # learn Gaussian / GMM prior instead of N(0, 1)
70 | self.algo.vae.prior.is_conditioned = False # whether to condition prior on observations
71 | self.algo.vae.prior.use_gmm = False # whether to use GMM prior
72 | self.algo.vae.prior.gmm_num_modes = 10 # number of GMM modes
73 | self.algo.vae.prior.gmm_learn_weights = False # whether to learn GMM weights
74 | self.algo.vae.prior.use_categorical = False # whether to use categorical prior
75 | self.algo.vae.prior.categorical_dim = 10 # the number of categorical classes for each latent dimension
76 | self.algo.vae.prior.categorical_gumbel_softmax_hard = False # use hard selection in forward pass
77 | self.algo.vae.prior.categorical_init_temp = 1.0 # initial gumbel-softmax temp
78 | self.algo.vae.prior.categorical_temp_anneal_step = 0.001 # linear temp annealing rate
79 | self.algo.vae.prior.categorical_min_temp = 0.3 # lowest gumbel-softmax temp
80 |
81 | self.algo.vae.encoder_layer_dims = (300, 400) # encoder MLP layer dimensions
82 | self.algo.vae.decoder_layer_dims = (300, 400) # decoder MLP layer dimensions
83 | self.algo.vae.prior_layer_dims = (300, 400) # prior MLP layer dimensions (if learning conditioned prior)
84 |
85 | # RNN policy settings
86 | self.algo.rnn.enabled = False # whether to train RNN policy
87 | self.algo.rnn.horizon = 10 # unroll length for RNN - should usually match train.seq_length
88 | self.algo.rnn.hidden_dim = 400 # hidden dimension size
89 | self.algo.rnn.rnn_type = "LSTM" # rnn type - one of "LSTM" or "GRU"
90 | self.algo.rnn.num_layers = 2 # number of RNN layers that are stacked
91 | self.algo.rnn.open_loop = False # if True, action predictions are only based on a single observation (not sequence)
92 | self.algo.rnn.kwargs.bidirectional = False # rnn kwargs
93 | self.algo.rnn.kwargs.do_not_lock_keys()
94 |
95 | # Hardcoded Weights
96 | self.algo.hc_weights.use_hardcode_weight = False
97 | self.algo.hc_weights.weight_key = ""
98 | self.algo.hc_weights.mixed_weights = False
99 | self.algo.hc_weights.use_adv_score = False
100 |
101 | self.algo.hc_weights.demos = 1
102 | self.algo.hc_weights.rollouts = 1
103 | self.algo.hc_weights.intvs = 1
104 | self.algo.hc_weights.pre_intvs = 0.1
105 |
106 | self.algo.hc_weights.traj_label_type = "last"
107 |
108 | self.algo.hc_weights.batch_normalize = True
109 |
--------------------------------------------------------------------------------
/robomimic/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/envs/__init__.py
--------------------------------------------------------------------------------
/robomimic/envs/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/envs/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/envs/__pycache__/env_base.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/envs/__pycache__/env_base.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/envs/env_base.py:
--------------------------------------------------------------------------------
1 | """
2 | This file contains the base class for environment wrappers that are used
3 | to provide a standardized environment API for training policies and interacting
4 | with metadata present in datasets.
5 | """
6 | import abc
7 |
8 |
9 | class EnvType:
10 | """
11 | Holds environment types - one per environment class.
12 | These act as identifiers for different environments.
13 | """
14 | ROBOSUITE_TYPE = 1
15 | GYM_TYPE = 2
16 | IG_MOMART_TYPE = 3
17 |
18 |
19 | class EnvBase(abc.ABC):
20 | """A base class method for environments used by this repo."""
21 | @abc.abstractmethod
22 | def __init__(
23 | self,
24 | env_name,
25 | render=False,
26 | render_offscreen=False,
27 | use_image_obs=False,
28 | postprocess_visual_obs=True,
29 | **kwargs,
30 | ):
31 | """
32 | Args:
33 | env_name (str): name of environment. Only needs to be provided if making a different
34 | environment from the one in @env_meta.
35 |
36 | render (bool): if True, environment supports on-screen rendering
37 |
38 | render_offscreen (bool): if True, environment supports off-screen rendering. This
39 | is forced to be True if @env_meta["use_images"] is True.
40 |
41 | use_image_obs (bool): if True, environment is expected to render rgb image observations
42 | on every env.step call. Set this to False for efficiency reasons, if image
43 | observations are not required.
44 |
45 | postprocess_visual_obs (bool): if True, postprocess image observations
46 | to prepare for learning. This should only be False when extracting observations
47 | for saving to a dataset (to save space on RGB images for example).
48 | """
49 | return
50 |
51 | @abc.abstractmethod
52 | def step(self, action):
53 | """
54 | Step in the environment with an action.
55 |
56 | Args:
57 | action (np.array): action to take
58 |
59 | Returns:
60 | observation (dict): new observation dictionary
61 | reward (float): reward for this step
62 | done (bool): whether the task is done
63 | info (dict): extra information
64 | """
65 | return
66 |
67 | @abc.abstractmethod
68 | def reset(self):
69 | """
70 | Reset environment.
71 |
72 | Returns:
73 | observation (dict): initial observation dictionary.
74 | """
75 | return
76 |
77 | @abc.abstractmethod
78 | def reset_to(self, state):
79 | """
80 | Reset to a specific simulator state.
81 |
82 | Args:
83 | state (dict): current simulator state
84 |
85 | Returns:
86 | observation (dict): observation dictionary after setting the simulator state
87 | """
88 | return
89 |
90 | @abc.abstractmethod
91 | def render(self, mode="human", height=None, width=None, camera_name=None):
92 | """Render"""
93 | return
94 |
95 | @abc.abstractmethod
96 | def get_observation(self):
97 | """Get environment observation"""
98 | return
99 |
100 | @abc.abstractmethod
101 | def get_state(self):
102 | """Get environment simulator state, compatible with @reset_to"""
103 | return
104 |
105 | @abc.abstractmethod
106 | def get_reward(self):
107 | """
108 | Get current reward.
109 | """
110 | return
111 |
112 | @abc.abstractmethod
113 | def get_goal(self):
114 | """
115 | Get goal observation. Not all environments support this.
116 | """
117 | return
118 |
119 | @abc.abstractmethod
120 | def set_goal(self, **kwargs):
121 | """
122 | Set goal observation with external specification. Not all environments support this.
123 | """
124 | return
125 |
126 | @abc.abstractmethod
127 | def is_done(self):
128 | """
129 | Check if the task is done (not necessarily successful).
130 | """
131 | return
132 |
133 | @abc.abstractmethod
134 | def is_success(self):
135 | """
136 | Check if the task condition(s) is reached. Should return a dictionary
137 | { str: bool } with at least a "task" key for the overall task success,
138 | and additional optional keys corresponding to other task criteria.
139 | """
140 | return
141 |
142 | @property
143 | @abc.abstractmethod
144 | def action_dimension(self):
145 | """
146 | Returns dimension of actions (int).
147 | """
148 | return
149 |
150 | @property
151 | @abc.abstractmethod
152 | def name(self):
153 | """
154 | Returns name of environment name (str).
155 | """
156 | return
157 |
158 | @property
159 | @abc.abstractmethod
160 | def type(self):
161 | """
162 | Returns environment type (int) for this kind of environment.
163 | This helps identify this env class.
164 | """
165 | return
166 |
167 | @abc.abstractmethod
168 | def serialize(self):
169 | """
170 | Save all information needed to re-instantiate this environment in a dictionary.
171 | This is the same as @env_meta - environment metadata stored in hdf5 datasets,
172 | and used in utils/env_utils.py.
173 | """
174 | return
175 |
176 | @classmethod
177 | @abc.abstractmethod
178 | def create_for_data_processing(cls, camera_names, camera_height, camera_width, reward_shaping, **kwargs):
179 | """
180 | Create environment for processing datasets, which includes extracting
181 | observations, labeling dense / sparse rewards, and annotating dones in
182 | transitions.
183 |
184 | Args:
185 | camera_names ([str]): list of camera names that correspond to image observations
186 | camera_height (int): camera height for all cameras
187 | camera_width (int): camera width for all cameras
188 | reward_shaping (bool): if True, use shaped environment rewards, else use sparse task completion rewards
189 |
190 | Returns:
191 | env (EnvBase instance)
192 | """
193 | return
194 |
195 | @property
196 | @abc.abstractmethod
197 | def rollout_exceptions(self):
198 | """
199 | Return tuple of exceptions to except when doing rollouts. This is useful to ensure
200 | that the entire training run doesn't crash because of a bad policy that causes unstable
201 | simulation computations.
202 | """
203 | return
204 |
205 |
--------------------------------------------------------------------------------
/robomimic/exps/templates/td3_bc.json:
--------------------------------------------------------------------------------
1 | {
2 | "algo_name": "td3_bc",
3 | "experiment": {
4 | "name": "test",
5 | "validate": false,
6 | "logging": {
7 | "terminal_output_to_txt": true,
8 | "log_tb": true
9 | },
10 | "save": {
11 | "enabled": true,
12 | "every_n_seconds": null,
13 | "every_n_epochs": 20,
14 | "epochs": [],
15 | "on_best_validation": false,
16 | "on_best_rollout_return": true,
17 | "on_best_rollout_success_rate": false
18 | },
19 | "epoch_every_n_steps": 5000,
20 | "validation_epoch_every_n_steps": 10,
21 | "env": null,
22 | "additional_envs": null,
23 | "render": false,
24 | "render_video": false,
25 | "keep_all_videos": false,
26 | "video_skip": 5,
27 | "rollout": {
28 | "enabled": true,
29 | "n": 50,
30 | "horizon": 1000,
31 | "rate": 1,
32 | "warmstart": 0,
33 | "terminate_on_success": true
34 | }
35 | },
36 | "train": {
37 | "data": null,
38 | "output_dir": "../td3_bc_trained_models",
39 | "num_data_workers": 0,
40 | "hdf5_cache_mode": "all",
41 | "hdf5_use_swmr": true,
42 | "hdf5_normalize_obs": true,
43 | "hdf5_filter_key": null,
44 | "seq_length": 1,
45 | "dataset_keys": [
46 | "actions",
47 | "rewards",
48 | "dones"
49 | ],
50 | "goal_mode": null,
51 | "cuda": true,
52 | "batch_size": 256,
53 | "num_epochs": 200,
54 | "seed": 1
55 | },
56 | "algo": {
57 | "optim_params": {
58 | "critic": {
59 | "learning_rate": {
60 | "initial": 0.0003,
61 | "decay_factor": 0.1,
62 | "epoch_schedule": []
63 | },
64 | "regularization": {
65 | "L2": 0.0
66 | },
67 | "start_epoch": -1,
68 | "end_epoch": -1
69 | },
70 | "actor": {
71 | "learning_rate": {
72 | "initial": 0.0003,
73 | "decay_factor": 0.1,
74 | "epoch_schedule": []
75 | },
76 | "regularization": {
77 | "L2": 0.0
78 | },
79 | "start_epoch": -1,
80 | "end_epoch": -1
81 | }
82 | },
83 | "alpha": 2.5,
84 | "discount": 0.99,
85 | "n_step": 1,
86 | "target_tau": 0.005,
87 | "infinite_horizon": false,
88 | "critic": {
89 | "use_huber": false,
90 | "max_gradient_norm": null,
91 | "value_bounds": null,
92 | "ensemble": {
93 | "n": 2,
94 | "weight": 1.0
95 | },
96 | "layer_dims": [
97 | 256,
98 | 256
99 | ]
100 | },
101 | "actor": {
102 | "update_freq": 2,
103 | "noise_std": 0.2,
104 | "noise_clip": 0.5,
105 | "layer_dims": [
106 | 256,
107 | 256
108 | ]
109 | }
110 | },
111 | "observation": {
112 | "modalities": {
113 | "obs": {
114 | "low_dim": [
115 | "flat"
116 | ],
117 | "rgb": [],
118 | "depth": [],
119 | "scan": []
120 | },
121 | "goal": {
122 | "low_dim": [],
123 | "rgb": [],
124 | "depth": [],
125 | "scan": []
126 | }
127 | },
128 | "encoder": {
129 | "low_dim": {
130 | "feature_dimension": null,
131 | "core_class": null,
132 | "core_kwargs": {},
133 | "obs_randomizer_class": null,
134 | "obs_randomizer_kwargs": {}
135 | },
136 | "rgb": {
137 | "feature_dimension": 64,
138 | "core_class": "VisualCore",
139 | "core_kwargs": {
140 | "backbone_class": "ResNet18Conv",
141 | "backbone_kwargs": {
142 | "pretrained": false,
143 | "input_coord_conv": false
144 | }
145 | },
146 | "obs_randomizer_class": null,
147 | "obs_randomizer_kwargs": {
148 | "crop_height": 76,
149 | "crop_width": 76,
150 | "num_crops": 1,
151 | "pos_enc": false
152 | },
153 | "pool_class": "SpatialSoftmax",
154 | "pool_kwargs": {
155 | "num_kp": 32,
156 | "learnable_temperature": false,
157 | "temperature": 1.0,
158 | "noise_std": 0.0
159 | }
160 | },
161 | "depth": {
162 | "feature_dimension": 64,
163 | "core_class": "VisualCore",
164 | "core_kwargs": {
165 | "backbone_class": "ResNet18Conv",
166 | "backbone_kwargs": {
167 | "pretrained": false,
168 | "input_coord_conv": false
169 | }
170 | },
171 | "obs_randomizer_class": null,
172 | "obs_randomizer_kwargs": {
173 | "crop_height": 76,
174 | "crop_width": 76,
175 | "num_crops": 1,
176 | "pos_enc": false
177 | },
178 | "pool_class": "SpatialSoftmax",
179 | "pool_kwargs": {
180 | "num_kp": 32,
181 | "learnable_temperature": false,
182 | "temperature": 1.0,
183 | "noise_std": 0.0
184 | }
185 | },
186 | "scan": {
187 | "feature_dimension": 64,
188 | "core_class": "ScanCore",
189 | "core_kwargs": {
190 | "backbone_class": "ResNet18Conv",
191 | "backbone_kwargs": {
192 | "pretrained": false,
193 | "input_coord_conv": false
194 | },
195 | "conv_kwargs": {
196 | "out_channels": [
197 | 32,
198 | 64,
199 | 64
200 | ],
201 | "kernel_size": [
202 | 8,
203 | 4,
204 | 2
205 | ],
206 | "stride": [
207 | 4,
208 | 2,
209 | 1
210 | ]
211 | }
212 | },
213 | "obs_randomizer_class": null,
214 | "obs_randomizer_kwargs": {
215 | "crop_height": 76,
216 | "crop_width": 76,
217 | "num_crops": 1,
218 | "pos_enc": false
219 | },
220 | "pool_class": "SpatialSoftmax",
221 | "pool_kwargs": {
222 | "num_kp": 32,
223 | "learnable_temperature": false,
224 | "temperature": 1.0,
225 | "noise_std": 0.0
226 | }
227 | }
228 | }
229 | }
230 | }
--------------------------------------------------------------------------------
/robomimic/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_nets import EncoderCore, Randomizer
2 |
--------------------------------------------------------------------------------
/robomimic/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/models/__pycache__/base_nets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/base_nets.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/models/__pycache__/distributions.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/distributions.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/models/__pycache__/obs_nets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/obs_nets.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/models/__pycache__/policy_nets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/policy_nets.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/models/__pycache__/vae_nets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/vae_nets.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/models/__pycache__/value_nets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/models/__pycache__/value_nets.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/scripts/check_same_initial_configs.py:
--------------------------------------------------------------------------------
1 | import h5py
2 | import sys
3 | import numpy as np
4 |
5 | dataset_path = sys.argv[1]
6 |
7 | f = h5py.File(dataset_path, "r")
8 | data = f["data"]
9 | demos = list(data.keys())
10 |
11 | initial_state_lst = []
12 |
13 | def same_state(s1, s2):
14 | return np.all(np.equal(s1, s2))
15 |
16 | for d in demos:
17 | this_s = data[d]["states"][()][0]
18 | for s in initial_state_lst:
19 | if same_state(s, this_s):
20 | print("same state")
21 | initial_state_lst.append(this_s)
22 |
23 | #########################################
24 |
25 | sec_dataset_path = sys.argv[2]
26 | f = h5py.File(sec_dataset_path, "r")
27 | sec_data = f["data"]
28 | sec_demos = list(sec_data.keys())
29 |
30 | for d in sec_demos:
31 | this_s = sec_data[d]["states"][()][0]
32 | for s in initial_state_lst:
33 | if same_state(s, this_s):
34 | print("same state")
35 | initial_state_lst.append(this_s)
36 |
37 |
38 |
--------------------------------------------------------------------------------
/robomimic/scripts/conversion/convert_d4rl.py:
--------------------------------------------------------------------------------
1 | """
2 | Helper script to convert D4RL data into an hdf5 compatible with this repository.
3 | Takes a folder path and a D4RL env name. This script downloads the corresponding
4 | raw D4RL dataset into a "d4rl" subfolder, and then makes a converted dataset
5 | in the "d4rl/converted" subfolder.
6 |
7 | This script has been tested on the follwing commits:
8 |
9 | https://github.com/rail-berkeley/d4rl/tree/9b68f31bab6a8546edfb28ff0bd9d5916c62fd1f
10 | https://github.com/rail-berkeley/d4rl/tree/26adf732efafdad864b3df2287e7b778ee4f7f63
11 |
12 | Args:
13 | env (str): d4rl env name, which specifies the dataset to download and convert
14 | folder (str): specify folder to download raw d4rl datasets and converted d4rl datasets to.
15 | A `d4rl` subfolder will be created in this folder with the raw d4rl dataset, and
16 | a `d4rl/converted` subfolder will be created in this folder with the converted
17 | datasets (if they do not already exist). Defaults to the datasets folder at
18 | the top-level of the repository.
19 |
20 | Example usage:
21 |
22 | # downloads to default path at robomimic/datasets/d4rl
23 | python convert_d4rl.py --env walker2d-medium-expert-v0
24 |
25 | # download to custom path
26 | python convert_d4rl.py --env walker2d-medium-expert-v0 --folder /path/to/folder
27 | """
28 |
29 | import os
30 | import h5py
31 | import json
32 | import argparse
33 | import numpy as np
34 |
35 | import gym
36 | import d4rl
37 | import robomimic
38 | from robomimic.envs.env_gym import EnvGym
39 | from robomimic.utils.log_utils import custom_tqdm
40 |
41 | if __name__ == "__main__":
42 | parser = argparse.ArgumentParser()
43 | parser.add_argument(
44 | "--env",
45 | type=str,
46 | help="d4rl env name, which specifies the dataset to download and convert",
47 | )
48 | parser.add_argument(
49 | "--folder",
50 | type=str,
51 | default=None,
52 | help="specify folder to download raw d4rl datasets and converted d4rl datasets to.\
53 | A `d4rl` subfolder will be created in this folder with the raw d4rl dataset, and\
54 | a `d4rl/converted` subfolder will be created in this folder with the converted\
55 | datasets (if they do not already exist). Defaults to the datasets folder at\
56 | the top-level of the repository.",
57 | )
58 | args = parser.parse_args()
59 |
60 | base_folder = args.folder
61 | if base_folder is None:
62 | base_folder = os.path.join(robomimic.__path__[0], "../datasets")
63 | base_folder = os.path.join(base_folder, "d4rl")
64 |
65 | # get dataset
66 | d4rl.set_dataset_path(base_folder)
67 | env = gym.make(args.env)
68 | ds = env.env.get_dataset()
69 | env.close()
70 |
71 | # env
72 | env = EnvGym(args.env)
73 |
74 | # output file
75 | write_folder = os.path.join(base_folder, "converted")
76 | if not os.path.exists(write_folder):
77 | os.makedirs(write_folder)
78 | output_path = os.path.join(base_folder, "converted", "{}.hdf5".format(args.env.replace("-", "_")))
79 | f_sars = h5py.File(output_path, "w")
80 | f_sars_grp = f_sars.create_group("data")
81 |
82 | # code to split D4RL data into trajectories
83 | # (modified from https://github.com/aviralkumar2907/d4rl_evaluations/blob/bear_intergrate/bear/examples/bear_hdf5_d4rl.py#L18)
84 | all_obs = ds['observations']
85 | all_act = ds['actions']
86 | N = all_obs.shape[0]
87 |
88 | obs = all_obs[:N-1]
89 | actions = all_act[:N-1]
90 | next_obs = all_obs[1:]
91 | rewards = np.squeeze(ds['rewards'][:N-1])
92 | dones = np.squeeze(ds['terminals'][:N-1]).astype(np.int32)
93 |
94 | assert 'timeouts' in ds
95 | timeouts = ds['timeouts'][:]
96 |
97 | ctr = 0
98 | total_samples = 0
99 | num_traj = 0
100 | traj = dict(obs=[], next_obs=[], actions=[], rewards=[], dones=[])
101 |
102 | print("\nConverting hdf5...")
103 | for idx in custom_tqdm(range(obs.shape[0])):
104 |
105 | # add transition
106 | traj["obs"].append(obs[idx])
107 | traj["actions"].append(actions[idx])
108 | traj["rewards"].append(rewards[idx])
109 | traj["next_obs"].append(next_obs[idx])
110 | traj["dones"].append(dones[idx])
111 | ctr += 1
112 |
113 | # if hit timeout or done is True, end the current trajectory and start a new trajectory
114 | if timeouts[idx] or dones[idx]:
115 |
116 | # replace next obs with copy of current obs for final timestep, and make sure done is true
117 | traj["next_obs"][-1] = np.array(obs[idx])
118 | traj["dones"][-1] = 1
119 |
120 | # store trajectory
121 | ep_data_grp = f_sars_grp.create_group("demo_{}".format(num_traj))
122 | ep_data_grp.create_dataset("obs/flat", data=np.array(traj["obs"]))
123 | ep_data_grp.create_dataset("next_obs/flat", data=np.array(traj["next_obs"]))
124 | ep_data_grp.create_dataset("actions", data=np.array(traj["actions"]))
125 | ep_data_grp.create_dataset("rewards", data=np.array(traj["rewards"]))
126 | ep_data_grp.create_dataset("dones", data=np.array(traj["dones"]))
127 | ep_data_grp.attrs["num_samples"] = len(traj["actions"])
128 | total_samples += len(traj["actions"])
129 | num_traj += 1
130 |
131 | # reset
132 | ctr = 0
133 | traj = dict(obs=[], next_obs=[], actions=[], rewards=[], dones=[])
134 |
135 | print("\nExcluding {} samples at end of file due to no trajectory truncation.".format(len(traj["actions"])))
136 | print("Wrote {} trajectories to new converted hdf5 at {}\n".format(num_traj, output_path))
137 |
138 | # metadata
139 | f_sars_grp.attrs["total"] = total_samples
140 | f_sars_grp.attrs["env_args"] = json.dumps(env.serialize(), indent=4)
141 |
142 | f_sars.close()
143 |
144 |
--------------------------------------------------------------------------------
/robomimic/scripts/conversion/convert_robosuite.py:
--------------------------------------------------------------------------------
1 | """
2 | Helper script to convert a dataset collected using robosuite into an hdf5 compatible with
3 | this repository. Takes a dataset path corresponding to the demo.hdf5 file containing the
4 | demonstrations. It modifies the dataset in-place. By default, the script also creates a
5 | 90-10 train-validation split.
6 |
7 | For more information on collecting datasets with robosuite, see the code link and documentation
8 | link below.
9 |
10 | Code: https://github.com/ARISE-Initiative/robosuite/blob/offline_study/robosuite/scripts/collect_human_demonstrations.py
11 |
12 | Documentation: https://robosuite.ai/docs/algorithms/demonstrations.html
13 |
14 | Example usage:
15 |
16 | python convert_robosuite.py --dataset /path/to/your/demo.hdf5
17 | """
18 |
19 | import h5py
20 | import json
21 | import argparse
22 | import os
23 |
24 | import robomimic.envs.env_base as EB
25 | from robomimic.scripts.split_train_val import split_train_val_from_hdf5
26 |
27 |
28 | if __name__ == "__main__":
29 | parser = argparse.ArgumentParser()
30 | parser.add_argument(
31 | "--dataset",
32 | type=str,
33 | help="path to input hdf5 dataset",
34 | )
35 | args = parser.parse_args()
36 |
37 | f = h5py.File(os.path.expanduser(args.dataset), "a") # edit mode
38 |
39 | # store env meta
40 | env_name = f["data"].attrs["env"]
41 | env_info = json.loads(f["data"].attrs["env_info"])
42 | env_meta = dict(
43 | type=EB.EnvType.ROBOSUITE_TYPE,
44 | env_name=env_name,
45 | env_kwargs=env_info,
46 | )
47 | if "env_args" in f["data"].attrs:
48 | del f["data"].attrs["env_args"]
49 | f["data"].attrs["env_args"] = json.dumps(env_meta, indent=4)
50 |
51 | print("====== Stored env meta ======")
52 | print(f["data"].attrs["env_args"])
53 |
54 | # store metadata about number of samples
55 | total_samples = 0
56 | for ep in f["data"]:
57 | # ensure model-xml is in per-episode metadata
58 | assert "model_file" in f["data/{}".format(ep)].attrs
59 |
60 | # add "num_samples" into per-episode metadata
61 | if "num_samples" in f["data/{}".format(ep)].attrs:
62 | del f["data/{}".format(ep)].attrs["num_samples"]
63 | n_sample = f["data/{}/actions".format(ep)].shape[0]
64 | f["data/{}".format(ep)].attrs["num_samples"] = n_sample
65 | total_samples += n_sample
66 |
67 | # add total samples to global metadata
68 | if "total" in f["data"].attrs:
69 | del f["data"].attrs["total"]
70 | f["data"].attrs["total"] = total_samples
71 |
72 | f.close()
73 |
74 | # create 90-10 train-validation split in the dataset
75 | split_train_val_from_hdf5(hdf5_path=args.dataset, val_ratio=0.1)
76 |
--------------------------------------------------------------------------------
/robomimic/scripts/conversion/convert_roboturk_pilot.py:
--------------------------------------------------------------------------------
1 | """
2 | Helper script to convert the RoboTurk Pilot datasets (https://roboturk.stanford.edu/dataset_sim.html)
3 | into a format compatible with this repository. It will also create some useful filter keys
4 | in the file (e.g. training, validation, and fastest n trajectories). Prior work
5 | (https://arxiv.org/abs/1911.05321) has found this useful (for example, training on the
6 | fastest 225 demonstrations for bins-Can).
7 |
8 | Direct download link for dataset: http://cvgl.stanford.edu/projects/roboturk/RoboTurkPilot.zip
9 |
10 | Args:
11 | folder (str): path to a folder containing a demo.hdf5 and a models directory containing
12 | mujoco xml files. For example, RoboTurkPilot/bins-Can.
13 |
14 | n (int): creates a filter key corresponding to the n fastest trajectories. Defaults to 225.
15 |
16 | Example usage:
17 |
18 | python convert_roboturk_pilot.py --folder /path/to/RoboTurkPilot/bins-Can --n 225
19 | """
20 |
21 | import os
22 | import h5py
23 | import json
24 | import argparse
25 | import numpy as np
26 | from tqdm import tqdm
27 |
28 | import robomimic
29 | import robomimic.envs.env_base as EB
30 | from robomimic.utils.file_utils import create_hdf5_filter_key
31 | from robomimic.scripts.split_train_val import split_train_val_from_hdf5
32 |
33 |
34 | def convert_rt_pilot_hdf5(ref_folder):
35 | """
36 | Uses the reference demo hdf5 to write a new converted hdf5 compatible with
37 | the repository.
38 |
39 | Args:
40 | ref_folder (str): path to a folder containing a demo.hdf5 and a models directory containing
41 | mujoco xml files.
42 | """
43 | hdf5_path = os.path.join(ref_folder, "demo.hdf5")
44 | new_path = os.path.join(ref_folder, "demo_new.hdf5")
45 |
46 | f = h5py.File(hdf5_path, "r")
47 | f_new = h5py.File(new_path, "w")
48 | f_new_grp = f_new.create_group("data")
49 |
50 | # sorted list of demonstrations by demo number
51 | demos = list(f["data"].keys())
52 | inds = np.argsort([int(elem[5:]) for elem in demos])
53 | demos = [demos[i] for i in inds]
54 |
55 | # write each demo
56 | num_samples_arr = []
57 | for demo_id in tqdm(range(len(demos))):
58 | ep = demos[demo_id]
59 |
60 | # create group for this demonstration
61 | ep_data_grp = f_new_grp.create_group(ep)
62 |
63 | # copy states over
64 | states = f["data/{}/states".format(ep)][()]
65 | ep_data_grp.create_dataset("states", data=np.array(states))
66 |
67 | # concat jvels and gripper actions to form full actions
68 | jvels = f["data/{}/joint_velocities".format(ep)][()]
69 | gripper_acts = f["data/{}/gripper_actuations".format(ep)][()]
70 | actions = np.concatenate([jvels, gripper_acts], axis=1)
71 |
72 | # IMPORTANT: clip actions to -1, 1, since this is expected by the codebase
73 | actions = np.clip(actions, -1., 1.)
74 | ep_data_grp.create_dataset("actions", data=actions)
75 |
76 | # store model xml directly in the new hdf5 file
77 | model_path = os.path.join(ref_folder, "models", f["data/{}".format(ep)].attrs["model_file"])
78 | f_model = open(model_path, "r")
79 | model_xml = f_model.read()
80 | f_model.close()
81 | ep_data_grp.attrs["model_file"] = model_xml
82 |
83 | # store num samples for this ep
84 | num_samples = actions.shape[0]
85 | ep_data_grp.attrs["num_samples"] = num_samples # number of transitions in this episode
86 | num_samples_arr.append(num_samples)
87 |
88 | # write dataset attributes (metadata)
89 | f_new_grp.attrs["total"] = np.sum(num_samples_arr)
90 |
91 | # construct and save env metadata
92 | env_meta = dict()
93 | env_meta["type"] = EB.EnvType.ROBOSUITE_TYPE
94 | env_meta["env_name"] = (f["data"].attrs["env"] + "Teleop")
95 | # hardcode robosuite v0.3 args
96 | robosuite_args = {
97 | "has_renderer": False,
98 | "has_offscreen_renderer": False,
99 | "ignore_done": True,
100 | "use_object_obs": True,
101 | "use_camera_obs": False,
102 | "camera_depth": False,
103 | "camera_height": 84,
104 | "camera_width": 84,
105 | "camera_name": "agentview",
106 | "gripper_visualization": False,
107 | "reward_shaping": False,
108 | "control_freq": 100,
109 | }
110 | env_meta["env_kwargs"] = robosuite_args
111 | f_new_grp.attrs["env_args"] = json.dumps(env_meta, indent=4) # environment info
112 |
113 | print("\n====== Added env meta ======")
114 | print(f_new_grp.attrs["env_args"])
115 |
116 | f.close()
117 | f_new.close()
118 |
119 | # back up the old dataset, and replace with new dataset
120 | os.rename(hdf5_path, os.path.join(ref_folder, "demo_bak.hdf5"))
121 | os.rename(new_path, hdf5_path)
122 |
123 |
124 | def split_fastest_from_hdf5(hdf5_path, n):
125 | """
126 | Creates filter key for fastest N trajectories, named
127 | "fastest_{}".format(n).
128 |
129 | Args:
130 | hdf5_path (str): path to the hdf5 file
131 |
132 | n (int): fastest n demos to create filter key for
133 | """
134 |
135 | # retrieve fastest n demos
136 | f = h5py.File(hdf5_path, "r")
137 | demos = sorted(list(f["data"].keys()))
138 | traj_lengths = []
139 | for ep in demos:
140 | traj_lengths.append(f["data/{}/actions".format(ep)].shape[0])
141 | inds = np.argsort(traj_lengths)[:n]
142 | filtered_demos = [demos[i] for i in inds]
143 | f.close()
144 |
145 | # create filter key
146 | name = "fastest_{}".format(n)
147 | lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=filtered_demos, key_name=name)
148 |
149 | print("Total number of samples in fastest {} demos: {}".format(n, np.sum(lengths)))
150 | print("Average number of samples in fastest {} demos: {}".format(n, np.mean(lengths)))
151 |
152 |
153 | if __name__ == "__main__":
154 | parser = argparse.ArgumentParser()
155 | parser.add_argument(
156 | "--folder",
157 | type=str,
158 | help="path to a folder containing a demo.hdf5 and a models directory containing \
159 | mujoco xml files. For example, RoboTurkPilot/bins-Can.",
160 | )
161 | parser.add_argument(
162 | "--n",
163 | type=int,
164 | default=225,
165 | help="creates a filter key corresponding to the n fastest trajectories. Defaults to 225.",
166 | )
167 | args = parser.parse_args()
168 |
169 | # convert hdf5
170 | convert_rt_pilot_hdf5(ref_folder=args.folder)
171 |
172 | # create 90-10 train-validation split in the dataset
173 | print("\nCreating 90-10 train-validation split...\n")
174 | hdf5_path = os.path.join(args.folder, "demo.hdf5")
175 | split_train_val_from_hdf5(hdf5_path=hdf5_path, val_ratio=0.1)
176 |
177 | print("\nCreating filter key for fastest {} trajectories...".format(args.n))
178 | split_fastest_from_hdf5(hdf5_path=hdf5_path, n=args.n)
179 |
180 | print("\nCreating 90-10 train-validation split for fastest {} trajectories...".format(args.n))
181 | split_train_val_from_hdf5(hdf5_path=hdf5_path, val_ratio=0.1, filter_key="fastest_{}".format(args.n))
182 |
183 | print(
184 | "\nWARNING: new dataset has replaced old one in demo.hdf5 file. "
185 | "The old dataset file has been moved to demo_bak.hdf5"
186 | )
187 |
188 | print(
189 | "\nNOTE: the new dataset also contains a fastest_{} filter key, for an easy way "
190 | "to train on the fastest trajectories. Just set config.train.hdf5_filter to train on this "
191 | "subset. A common choice is 225 when training on the bins-Can dataset.\n".format(args.n)
192 | )
193 |
--------------------------------------------------------------------------------
/robomimic/scripts/download_datasets.py:
--------------------------------------------------------------------------------
1 | """
2 | Script to download datasets packaged with the repository. By default, all
3 | datasets will be stored at robomimic/datasets, unless the @download_dir
4 | argument is supplied. We recommend using the default, as most examples that
5 | use these datasets assume that they can be found there.
6 |
7 | The @tasks, @dataset_types, and @hdf5_types arguments can all be supplied
8 | to choose which datasets to download.
9 |
10 | Args:
11 | download_dir (str): Base download directory. Created if it doesn't exist.
12 | Defaults to datasets folder in repository - only pass in if you would
13 | like to override the location.
14 |
15 | tasks (list): Tasks to download datasets for. Defaults to lift task. Pass 'all' to
16 | download all tasks (sim + real) 'sim' to download all sim tasks, 'real' to
17 | download all real tasks, or directly specify the list of tasks.
18 |
19 | dataset_types (list): Dataset types to download datasets for (e.g. ph, mh, mg).
20 | Defaults to ph. Pass 'all' to download datasets for all available dataset
21 | types per task, or directly specify the list of dataset types.
22 |
23 | hdf5_types (list): hdf5 types to download datasets for (e.g. raw, low_dim, image).
24 | Defaults to low_dim. Pass 'all' to download datasets for all available hdf5
25 | types per task and dataset, or directly specify the list of hdf5 types.
26 |
27 | Example usage:
28 |
29 | # default behavior - just download lift proficient-human low-dim dataset
30 | python download_datasets.py
31 |
32 | # download low-dim proficient-human datasets for all simulation tasks
33 | # (do a dry run first to see which datasets would be downloaded)
34 | python download_datasets.py --tasks sim --dataset_types ph --hdf5_types low_dim --dry_run
35 | python download_datasets.py --tasks sim --dataset_types ph --hdf5_types low_dim
36 |
37 | # download all low-dim and image multi-human datasets for the can and square tasks
38 | python download_datasets.py --tasks can square --dataset_types mh --hdf5_types low_dim image
39 |
40 | # download the sparse reward machine-generated low-dim datasets
41 | python download_datasets.py --tasks all --dataset_types mg --hdf5_types low_dim_sparse
42 |
43 | # download all real robot datasets
44 | python download_datasets.py --tasks real
45 | """
46 | import os
47 | import argparse
48 |
49 | import robomimic
50 | import robomimic.utils.file_utils as FileUtils
51 | from robomimic import DATASET_REGISTRY
52 |
53 | ALL_TASKS = ["lift", "can", "square", "transport", "tool_hang", "lift_real", "can_real", "tool_hang_real"]
54 | ALL_DATASET_TYPES = ["ph", "mh", "mg", "paired"]
55 | ALL_HDF5_TYPES = ["raw", "low_dim", "image", "low_dim_sparse", "low_dim_dense", "image_sparse", "image_dense"]
56 |
57 |
58 | if __name__ == "__main__":
59 | parser = argparse.ArgumentParser()
60 |
61 | # directory to download datasets to
62 | parser.add_argument(
63 | "--download_dir",
64 | type=str,
65 | default=None,
66 | help="Base download directory. Created if it doesn't exist. Defaults to datasets folder in repository.",
67 | )
68 |
69 | # tasks to download datasets for
70 | parser.add_argument(
71 | "--tasks",
72 | type=str,
73 | nargs='+',
74 | default=["lift"],
75 | help="Tasks to download datasets for. Defaults to lift task. Pass 'all' to download all tasks (sim + real)\
76 | 'sim' to download all sim tasks, 'real' to download all real tasks, or directly specify the list of\
77 | tasks.",
78 | )
79 |
80 | # dataset types to download datasets for
81 | parser.add_argument(
82 | "--dataset_types",
83 | type=str,
84 | nargs='+',
85 | default=["ph"],
86 | help="Dataset types to download datasets for (e.g. ph, mh, mg). Defaults to ph. Pass 'all' to download \
87 | datasets for all available dataset types per task, or directly specify the list of dataset types.",
88 | )
89 |
90 | # hdf5 types to download datasets for
91 | parser.add_argument(
92 | "--hdf5_types",
93 | type=str,
94 | nargs='+',
95 | default=["low_dim"],
96 | help="hdf5 types to download datasets for (e.g. raw, low_dim, image). Defaults to low_dim. Pass 'all' \
97 | to download datasets for all available hdf5 types per task and dataset, or directly specify the list\
98 | of hdf5 types.",
99 | )
100 |
101 | # dry run - don't actually download datasets, but print which datasets would be downloaded
102 | parser.add_argument(
103 | "--dry_run",
104 | action='store_true',
105 | help="set this flag to do a dry run to only print which datasets would be downloaded"
106 | )
107 |
108 | args = parser.parse_args()
109 |
110 | # set default base directory for downloads
111 | default_base_dir = args.download_dir
112 | if default_base_dir is None:
113 | default_base_dir = os.path.join(robomimic.__path__[0], "../datasets")
114 |
115 | # load args
116 | download_tasks = args.tasks
117 | if "all" in download_tasks:
118 | assert len(download_tasks) == 1, "all should be only tasks argument but got: {}".format(args.tasks)
119 | download_tasks = ALL_TASKS
120 | elif "sim" in download_tasks:
121 | assert len(download_tasks) == 1, "sim should be only tasks argument but got: {}".format(args.tasks)
122 | download_tasks = [task for task in ALL_TASKS if "real" not in task]
123 | elif "real" in download_tasks:
124 | assert len(download_tasks) == 1, "real should be only tasks argument but got: {}".format(args.tasks)
125 | download_tasks = [task for task in ALL_TASKS if "real" in task]
126 |
127 | download_dataset_types = args.dataset_types
128 | if "all" in download_dataset_types:
129 | assert len(download_dataset_types) == 1, "all should be only dataset_types argument but got: {}".format(args.dataset_types)
130 | download_dataset_types = ALL_DATASET_TYPES
131 |
132 | download_hdf5_types = args.hdf5_types
133 | if "all" in download_hdf5_types:
134 | assert len(download_hdf5_types) == 1, "all should be only hdf5_types argument but got: {}".format(args.hdf5_types)
135 | download_hdf5_types = ALL_HDF5_TYPES
136 |
137 | # download requested datasets
138 | for task in DATASET_REGISTRY:
139 | if task in download_tasks:
140 | for dataset_type in DATASET_REGISTRY[task]:
141 | if dataset_type in download_dataset_types:
142 | for hdf5_type in DATASET_REGISTRY[task][dataset_type]:
143 | if hdf5_type in download_hdf5_types:
144 | download_dir = os.path.abspath(os.path.join(default_base_dir, task, dataset_type))
145 | print("\nDownloading dataset:\n task: {}\n dataset type: {}\n hdf5 type: {}\n download path: {}"
146 | .format(task, dataset_type, hdf5_type, download_dir))
147 | if args.dry_run:
148 | print("\ndry run: skip download")
149 | else:
150 | # Make sure path exists and create if it doesn't
151 | os.makedirs(download_dir, exist_ok=True)
152 | FileUtils.download_url(
153 | url=DATASET_REGISTRY[task][dataset_type][hdf5_type]["url"],
154 | download_dir=download_dir,
155 | )
156 | print("")
157 |
--------------------------------------------------------------------------------
/robomimic/scripts/download_momart_datasets.py:
--------------------------------------------------------------------------------
1 | """
2 | Script to download datasets used in MoMaRT paper (https://arxiv.org/abs/2112.05251). By default, all
3 | datasets will be stored at robomimic/datasets, unless the @download_dir
4 | argument is supplied. We recommend using the default, as most examples that
5 | use these datasets assume that they can be found there.
6 |
7 | The @tasks and @dataset_types arguments can all be supplied
8 | to choose which datasets to download.
9 |
10 | Args:
11 | download_dir (str): Base download directory. Created if it doesn't exist.
12 | Defaults to datasets folder in repository - only pass in if you would
13 | like to override the location.
14 |
15 | tasks (list): Tasks to download datasets for. Defaults to table_setup_from_dishwasher task. Pass 'all' to
16 | download all tasks - 5 total:
17 | - table_setup_from_dishwasher
18 | - table_setup_from_dresser
19 | - table_cleanup_to_dishwasher
20 | - table_cleanup_to_sink
21 | - unload_dishwasher
22 |
23 | dataset_types (list): Dataset types to download datasets for (expert, suboptimal, generalize, sample).
24 | Defaults to expert. Pass 'all' to download datasets for all available dataset
25 | types per task, or directly specify the list of dataset types.
26 | NOTE: Because these datasets are huge, we will always print out a warning
27 | that a user must respond yes to to acknowledge the data size (can be up to >100G for all tasks of a single type)
28 |
29 | Example usage:
30 |
31 | # default behavior - just download expert table_setup_from_dishwasher dataset
32 | python download_momart_datasets.py
33 |
34 | # download expert datasets for all tasks
35 | # (do a dry run first to see which datasets would be downloaded)
36 | python download_momart_datasets.py --tasks all --dataset_types expert --dry_run
37 | python download_momart_datasets.py --tasks all --dataset_types expert low_dim
38 |
39 | # download all expert and suboptimal datasets for the table_setup_from_dishwasher and table_cleanup_to_dishwasher tasks
40 | python download_datasets.py --tasks table_setup_from_dishwasher table_cleanup_to_dishwasher --dataset_types expert suboptimal
41 |
42 | # download the sample datasets
43 | python download_datasets.py --tasks all --dataset_types sample
44 |
45 | # download all datasets
46 | python download_datasets.py --tasks all --dataset_types all
47 | """
48 | import os
49 | import argparse
50 |
51 | import robomimic
52 | import robomimic.utils.file_utils as FileUtils
53 | from robomimic import MOMART_DATASET_REGISTRY
54 |
55 | ALL_TASKS = [
56 | "table_setup_from_dishwasher",
57 | "table_setup_from_dresser",
58 | "table_cleanup_to_dishwasher",
59 | "table_cleanup_to_sink",
60 | "unload_dishwasher",
61 | ]
62 | ALL_DATASET_TYPES = [
63 | "expert",
64 | "suboptimal",
65 | "generalize",
66 | "sample",
67 | ]
68 |
69 |
70 | if __name__ == "__main__":
71 | parser = argparse.ArgumentParser()
72 |
73 | # directory to download datasets to
74 | parser.add_argument(
75 | "--download_dir",
76 | type=str,
77 | default=None,
78 | help="Base download directory. Created if it doesn't exist. Defaults to datasets folder in repository.",
79 | )
80 |
81 | # tasks to download datasets for
82 | parser.add_argument(
83 | "--tasks",
84 | type=str,
85 | nargs='+',
86 | default=["table_setup_from_dishwasher"],
87 | help="Tasks to download datasets for. Defaults to table_setup_from_dishwasher task. Pass 'all' to download all"
88 | f"5 tasks, or directly specify the list of tasks. Options are any of: {ALL_TASKS}",
89 | )
90 |
91 | # dataset types to download datasets for
92 | parser.add_argument(
93 | "--dataset_types",
94 | type=str,
95 | nargs='+',
96 | default=["expert"],
97 | help="Dataset types to download datasets for (e.g. expert, suboptimal). Defaults to expert. Pass 'all' to "
98 | "download datasets for all available dataset types per task, or directly specify the list of dataset "
99 | f"types. Options are any of: {ALL_DATASET_TYPES}",
100 | )
101 |
102 | # dry run - don't actually download datasets, but print which datasets would be downloaded
103 | parser.add_argument(
104 | "--dry_run",
105 | action='store_true',
106 | help="set this flag to do a dry run to only print which datasets would be downloaded"
107 | )
108 |
109 | args = parser.parse_args()
110 |
111 | # set default base directory for downloads
112 | default_base_dir = args.download_dir
113 | if default_base_dir is None:
114 | default_base_dir = os.path.join(robomimic.__path__[0], "../datasets")
115 |
116 | # load args
117 | download_tasks = args.tasks
118 | if "all" in download_tasks:
119 | assert len(download_tasks) == 1, "all should be only tasks argument but got: {}".format(args.tasks)
120 | download_tasks = ALL_TASKS
121 |
122 | download_dataset_types = args.dataset_types
123 | if "all" in download_dataset_types:
124 | assert len(download_dataset_types) == 1, "all should be only dataset_types argument but got: {}".format(args.dataset_types)
125 | download_dataset_types = ALL_DATASET_TYPES
126 |
127 | # Run sanity check first to warn user if they're about to download a huge amount of data
128 | total_size = 0
129 | for task in MOMART_DATASET_REGISTRY:
130 | if task in download_tasks:
131 | for dataset_type in MOMART_DATASET_REGISTRY[task]:
132 | if dataset_type in download_dataset_types:
133 | total_size += MOMART_DATASET_REGISTRY[task][dataset_type]["size"]
134 |
135 | # Verify user acknowledgement if we're not doing a dry run
136 | if not args.dry_run:
137 | user_response = input(f"Warning: requested datasets will take a total of {total_size}GB. Proceed? y/n\n")
138 | assert user_response.lower() in {"yes", "y"}, f"Did not receive confirmation. Aborting download."
139 |
140 | # download requested datasets
141 | for task in MOMART_DATASET_REGISTRY:
142 | if task in download_tasks:
143 | for dataset_type in MOMART_DATASET_REGISTRY[task]:
144 | if dataset_type in download_dataset_types:
145 | dataset_info = MOMART_DATASET_REGISTRY[task][dataset_type]
146 | download_dir = os.path.abspath(os.path.join(default_base_dir, task, dataset_type))
147 | print(f"\nDownloading dataset:\n"
148 | f" task: {task}\n"
149 | f" dataset type: {dataset_type}\n"
150 | f" dataset size: {dataset_info['size']}GB\n"
151 | f" download path: {download_dir}")
152 | if args.dry_run:
153 | print("\ndry run: skip download")
154 | else:
155 | # Make sure path exists and create if it doesn't
156 | os.makedirs(download_dir, exist_ok=True)
157 | FileUtils.download_url(
158 | url=dataset_info["url"],
159 | download_dir=download_dir,
160 | )
161 | print("")
162 |
--------------------------------------------------------------------------------
/robomimic/scripts/extract_obs_from_raw_datasets.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script holds the commands that were used to go from raw robosuite demo.hdf5 files
4 | # to our processed low-dim and image hdf5 files.
5 |
6 | BASE_DATASET_DIR="../../datasets"
7 | echo "Using base dataset directory: $BASE_DATASET_DIR"
8 |
9 |
10 | ### NOTE: we use done-mode 0 for MG (dones on task success) ###
11 |
12 |
13 | ### mg ###
14 |
15 |
16 | # lift - mg, sparse
17 | python dataset_states_to_obs.py --done_mode 0 \
18 | --dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \
19 | --output_name low_dim_sparse.hdf5
20 | python dataset_states_to_obs.py --done_mode 0 \
21 | --dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \
22 | --output_name image_sparse.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
23 |
24 | # lift - mg, dense
25 | python dataset_states_to_obs.py --done_mode 0 --shaped \
26 | --dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \
27 | --output_name low_dim_dense.hdf5
28 | python dataset_states_to_obs.py --done_mode 0 --shaped \
29 | --dataset $BASE_DATASET_DIR/lift/mg/demo.hdf5 \
30 | --output_name image_dense.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
31 |
32 | # can - mg, sparse
33 | python dataset_states_to_obs.py --done_mode 0 \
34 | --dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \
35 | --output_name low_dim_sparse.hdf5
36 | python dataset_states_to_obs.py --done_mode 0 \
37 | --dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \
38 | --output_name image_sparse.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
39 |
40 | # can - mg, dense
41 | python dataset_states_to_obs.py --done_mode 0 --shaped \
42 | --dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \
43 | --output_name low_dim_dense.hdf5
44 | python dataset_states_to_obs.py --done_mode 0 --shaped \
45 | --dataset $BASE_DATASET_DIR/can/mg/demo.hdf5 \
46 | --output_name image_dense.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
47 |
48 |
49 | ### NOTE: we use done-mode 2 for PH / MH (dones on task success and end of trajectory) ###
50 |
51 |
52 | ### ph ###
53 |
54 |
55 | # lift - ph
56 | python dataset_states_to_obs.py --done_mode 2 \
57 | --dataset $BASE_DATASET_DIR/lift/ph/demo.hdf5 \
58 | --output_name low_dim.hdf5
59 | python dataset_states_to_obs.py --done_mode 2 \
60 | --dataset $BASE_DATASET_DIR/lift/ph/demo.hdf5 \
61 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
62 |
63 | # can - ph
64 | python dataset_states_to_obs.py --done_mode 2 \
65 | --dataset $BASE_DATASET_DIR/can/ph/demo.hdf5 \
66 | --output_name low_dim.hdf5
67 | python dataset_states_to_obs.py --done_mode 2 \
68 | --dataset $BASE_DATASET_DIR/can/ph/demo.hdf5 \
69 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
70 |
71 | # square - ph
72 | python dataset_states_to_obs.py --done_mode 2 \
73 | --dataset $BASE_DATASET_DIR/square/ph/demo.hdf5 \
74 | --output_name low_dim.hdf5
75 | python dataset_states_to_obs.py --done_mode 2 \
76 | --dataset $BASE_DATASET_DIR/square/ph/demo.hdf5 \
77 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
78 |
79 | # transport - ph
80 | python dataset_states_to_obs.py --done_mode 2 \
81 | --dataset $BASE_DATASET_DIR/transport/ph/demo.hdf5 \
82 | --output_name low_dim.hdf5
83 | python dataset_states_to_obs.py --done_mode 2 \
84 | --dataset $BASE_DATASET_DIR/transport/ph/demo.hdf5 \
85 | --output_name image.hdf5 --camera_names shouldercamera0 shouldercamera1 robot0_eye_in_hand robot1_eye_in_hand --camera_height 84 --camera_width 84
86 |
87 | # tool hang - ph
88 | python dataset_states_to_obs.py --done_mode 2 \
89 | --dataset $BASE_DATASET_DIR/tool_hang/ph/demo.hdf5 \
90 | --output_name low_dim.hdf5
91 | python dataset_states_to_obs.py --done_mode 2 \
92 | --dataset $BASE_DATASET_DIR/tool_hang/ph/demo.hdf5 \
93 | --output_name image.hdf5 --camera_names sideview robot0_eye_in_hand --camera_height 240 --camera_width 240
94 |
95 |
96 | ### mh ###
97 |
98 |
99 | # lift - mh
100 | python dataset_states_to_obs.py --done_mode 2 \
101 | --dataset $BASE_DATASET_DIR/lift/mh/demo.hdf5 \
102 | --output_name low_dim.hdf5
103 | python dataset_states_to_obs.py --done_mode 2 \
104 | --dataset $BASE_DATASET_DIR/lift/mh/demo.hdf5 \
105 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
106 |
107 | # can - mh
108 | python dataset_states_to_obs.py --done_mode 2 \
109 | --dataset $BASE_DATASET_DIR/can/mh/demo.hdf5 \
110 | --output_name low_dim.hdf5
111 | python dataset_states_to_obs.py --done_mode 2 \
112 | --dataset $BASE_DATASET_DIR/can/mh/demo.hdf5 \
113 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
114 |
115 | # square - mh
116 | python dataset_states_to_obs.py --done_mode 2 \
117 | --dataset $BASE_DATASET_DIR/square/mh/demo.hdf5 \
118 | --output_name low_dim.hdf5
119 | python dataset_states_to_obs.py --done_mode 2 \
120 | --dataset $BASE_DATASET_DIR/square/mh/demo.hdf5 \
121 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
122 |
123 | # transport - mh
124 | python dataset_states_to_obs.py --done_mode 2 \
125 | --dataset $BASE_DATASET_DIR/transport/mh/demo.hdf5 \
126 | --output_name low_dim.hdf5
127 | python dataset_states_to_obs.py --done_mode 2 \
128 | --dataset $BASE_DATASET_DIR/transport/mh/demo.hdf5 \
129 | --output_name image.hdf5 --camera_names shouldercamera0 shouldercamera1 robot0_eye_in_hand robot1_eye_in_hand --camera_height 84 --camera_width 84
130 |
131 |
132 | ### can-paired ###
133 |
134 |
135 | python dataset_states_to_obs.py --done_mode 2 \
136 | --dataset $BASE_DATASET_DIR/can/paired/demo.hdf5 \
137 | --output_name low_dim.hdf5
138 | python dataset_states_to_obs.py --done_mode 2 \
139 | --dataset $BASE_DATASET_DIR/can/paired/demo.hdf5 \
140 | --output_name image.hdf5 --camera_names agentview robot0_eye_in_hand --camera_height 84 --camera_width 84
141 |
--------------------------------------------------------------------------------
/robomimic/scripts/generate_config_templates.py:
--------------------------------------------------------------------------------
1 | """
2 | Helpful script to generate example config files for each algorithm. These should be re-generated
3 | when new config options are added, or when default settings in the config classes are modified.
4 | """
5 | import os
6 | import json
7 |
8 | import robomimic
9 | from robomimic.config import get_all_registered_configs
10 |
11 |
12 | def main():
13 | # store template config jsons in this directory
14 | target_dir = os.path.join(robomimic.__path__[0], "exps/templates/")
15 |
16 | # iterate through registered algorithm config classes
17 | all_configs = get_all_registered_configs()
18 | for algo_name in all_configs:
19 | # make config class for this algorithm
20 | c = all_configs[algo_name]()
21 | assert algo_name == c.algo_name
22 | # dump to json
23 | json_path = os.path.join(target_dir, "{}.json".format(algo_name))
24 | c.dump(filename=json_path)
25 |
26 |
27 | if __name__ == '__main__':
28 | main()
29 |
--------------------------------------------------------------------------------
/robomimic/scripts/get_dataset_info.py:
--------------------------------------------------------------------------------
1 | """
2 | Helper script to report dataset information. By default, will print trajectory length statistics,
3 | the maximum and minimum action element in the dataset, filter keys present, environment
4 | metadata, and the structure of the first demonstration. If --verbose is passed, it will
5 | report the exact demo keys under each filter key, and the structure of all demonstrations
6 | (not just the first one).
7 |
8 | Args:
9 | dataset (str): path to hdf5 dataset
10 |
11 | filter_key (str): if provided, report statistics on the subset of trajectories
12 | in the file that correspond to this filter key
13 |
14 | verbose (bool): if flag is provided, print more details, like the structure of all
15 | demonstrations (not just the first one)
16 |
17 | Example usage:
18 |
19 | # run script on example hdf5 packaged with repository
20 | python get_dataset_info.py --dataset ../../tests/assets/test.hdf5
21 |
22 | # run script only on validation data
23 | python get_dataset_info.py --dataset ../../tests/assets/test.hdf5 --filter_key valid
24 | """
25 | import h5py
26 | import json
27 | import argparse
28 | import numpy as np
29 |
30 | if __name__ == "__main__":
31 | parser = argparse.ArgumentParser()
32 | parser.add_argument(
33 | "--dataset",
34 | type=str,
35 | help="path to hdf5 dataset",
36 | )
37 | parser.add_argument(
38 | "--filter_key",
39 | type=str,
40 | default=None,
41 | help="(optional) if provided, report statistics on the subset of trajectories \
42 | in the file that correspond to this filter key",
43 | )
44 | parser.add_argument(
45 | "--verbose",
46 | action='store_true',
47 | help="verbose output",
48 | )
49 | args = parser.parse_args()
50 |
51 | # extract demonstration list from file
52 | filter_key = args.filter_key
53 | all_filter_keys = None
54 | f = h5py.File(args.dataset, "r")
55 | if filter_key is not None:
56 | # use the demonstrations from the filter key instead
57 | print("NOTE: using filter key {}".format(filter_key))
58 | demos = sorted([elem.decode("utf-8") for elem in np.array(f["mask/{}".format(filter_key)])])
59 | else:
60 | # use all demonstrations
61 | demos = sorted(list(f["data"].keys()))
62 |
63 | # extract filter key information
64 | if "mask" in f:
65 | all_filter_keys = {}
66 | for fk in f["mask"]:
67 | fk_demos = sorted([elem.decode("utf-8") for elem in np.array(f["mask/{}".format(fk)])])
68 | all_filter_keys[fk] = fk_demos
69 |
70 | # put demonstration list in increasing episode order
71 | inds = np.argsort([int(elem[5:]) for elem in demos])
72 | demos = [demos[i] for i in inds]
73 |
74 | # extract length of each trajectory in the file
75 | traj_lengths = []
76 | action_min = np.inf
77 | action_max = -np.inf
78 | for ep in demos:
79 | traj_lengths.append(f["data/{}/actions".format(ep)].shape[0])
80 | action_min = min(action_min, np.min(f["data/{}/actions".format(ep)][()]))
81 | action_max = max(action_max, np.max(f["data/{}/actions".format(ep)][()]))
82 | traj_lengths = np.array(traj_lengths)
83 |
84 | # report statistics on the data
85 | print("")
86 | print("total transitions: {}".format(np.sum(traj_lengths)))
87 | print("total trajectories: {}".format(traj_lengths.shape[0]))
88 | print("traj length mean: {}".format(np.mean(traj_lengths)))
89 | print("traj length std: {}".format(np.std(traj_lengths)))
90 | print("traj length min: {}".format(np.min(traj_lengths)))
91 | print("traj length max: {}".format(np.max(traj_lengths)))
92 | print("action min: {}".format(action_min))
93 | print("action max: {}".format(action_max))
94 | print("")
95 | print("==== Filter Keys ====")
96 | if all_filter_keys is not None:
97 | for fk in all_filter_keys:
98 | print("filter key {} with {} demos".format(fk, len(all_filter_keys[fk])))
99 | else:
100 | print("no filter keys")
101 | print("")
102 | if args.verbose:
103 | if all_filter_keys is not None:
104 | print("==== Filter Key Contents ====")
105 | for fk in all_filter_keys:
106 | print("filter_key {} with {} demos: {}".format(fk, len(all_filter_keys[fk]), all_filter_keys[fk]))
107 | print("")
108 | env_meta = json.loads(f["data"].attrs["env_args"])
109 | print("==== Env Meta ====")
110 | print(json.dumps(env_meta, indent=4))
111 | print("")
112 |
113 | print("==== Dataset Structure ====")
114 | for ep in demos:
115 | print("episode {} with {} transitions".format(ep, f["data/{}".format(ep)].attrs["num_samples"]))
116 | for k in f["data/{}".format(ep)]:
117 | if k in ["obs", "next_obs"]:
118 | print(" key: {}".format(k))
119 | for obs_k in f["data/{}/{}".format(ep, k)]:
120 | shape = f["data/{}/{}/{}".format(ep, k, obs_k)].shape
121 | print(" observation key {} with shape {}".format(obs_k, shape))
122 | elif isinstance(f["data/{}/{}".format(ep, k)], h5py.Dataset):
123 | key_shape = f["data/{}/{}".format(ep, k)].shape
124 | print(" key: {} with shape {}".format(k, key_shape))
125 |
126 | if not args.verbose:
127 | break
128 |
129 | f.close()
130 |
131 | # maybe display error message
132 | print("")
133 | if (action_min < -1.) or (action_max > 1.):
134 | raise Exception("Dataset should have actions in [-1., 1.] but got bounds [{}, {}]".format(action_min, action_max))
135 |
--------------------------------------------------------------------------------
/robomimic/scripts/hitl/collect_playback_utils.py:
--------------------------------------------------------------------------------
1 | """Teleoperate robot with keyboard or SpaceMouse. """
2 |
3 | import argparse
4 | import numpy as np
5 | import os
6 | import robosuite as suite
7 | from robosuite import load_controller_config
8 | from robosuite.utils.input_utils import input2action
9 | from robosuite.wrappers import DataCollectionWrapper
10 | import time
11 | import numpy as np
12 | import json
13 | from robosuite.scripts.collect_human_demonstrations import gather_demonstrations_as_hdf5
14 | import robomimic
15 | import cv2
16 | import robomimic.utils.obs_utils as ObsUtils
17 | import copy
18 | import h5py
19 |
20 | import robosuite
21 | is_v1 = (robosuite.__version__.split(".")[0] == "1")
22 |
23 | # Change later
24 | GOOD_EPISODE_LENGTH = None
25 | MAX_EPISODE_LENGTH = None
26 | SUCCESS_HOLD = None
27 |
28 | class RandomPolicy:
29 | def __init__(self, env):
30 | self.env = env
31 | self.low, self.high = env.action_spec
32 |
33 | def get_action(self, obs):
34 | return np.random.uniform(self.low, self.high) / 2
35 |
36 | class TrainedPolicy:
37 | def __init__(self, checkpoint):
38 | from robomimic.utils.file_utils import policy_from_checkpoint
39 | self.policy = policy_from_checkpoint(ckpt_path=checkpoint)[0]
40 |
41 | def get_action(self, obs):
42 | obs = copy.deepcopy(obs)
43 | di = obs
44 | postprocess_visual_obs = True
45 |
46 | ret = {}
47 | for k in di:
48 | pass
49 | """
50 | if ObsUtils.key_is_image(k):
51 | ret[k] = di[k][::-1]
52 | if postprocess_visual_obs:
53 | ret[k] = ObsUtils.process_image(ret[k])
54 | """
55 | obs.update(ret)
56 |
57 | return self.policy(obs)
58 |
59 | def is_empty_input_spacemouse(action):
60 | # empty_input1 = np.array([0.000, 0.000, 0.000, 0.000, 0.000, 0.000, -1.000])
61 | empty_input = np.array([0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 1.000])
62 | if np.array_equal(np.abs(action), empty_input):
63 | return True
64 | return False
65 |
66 | def terminate_condition_met(time_success, timestep_count, term_cond):
67 | assert term_cond in ["fixed_length", "success_count", "stop"]
68 | if term_cond == "fixed_length":
69 | return timestep_count >= GOOD_EPISODE_LENGTH and time_success > 0
70 | elif term_cond == "success_count":
71 | return time_success == SUCCESS_HOLD
72 | elif term_cond == "stop":
73 | return timestep_count >= MAX_EPISODE_LENGTH
74 |
75 | def post_process_spacemouse_action(action, grasp, last_grasp):
76 | """ Fixing Spacemouse Action """
77 | # If the current grasp is active (1) and last grasp is not (-1) (i.e.: grasping input just pressed),
78 | # toggle arm control and / or camera viewing angle if requested
79 | if last_grasp < 0 < grasp:
80 | if args.switch_on_grasp:
81 | args.arm = "left" if args.arm == "right" else "right"
82 | if args.toggle_camera_on_grasp:
83 | cam_id = (cam_id + 1) % num_cam
84 | env.viewer.set_camera(camera_id=cam_id)
85 | # Update last grasp
86 | last_grasp = grasp
87 |
88 | if is_v1:
89 | env_action_dim = env.action_dim
90 | else:
91 | env_action_dim = 7
92 |
93 | # Fill out the rest of the action space if necessary
94 | rem_action_dim = env_action_dim - action.size
95 | if rem_action_dim > 0:
96 | # Initialize remaining action space
97 | rem_action = np.zeros(rem_action_dim)
98 | # This is a multi-arm setting, choose which arm to control and fill the rest with zeros
99 | if args.arm == "right":
100 | action = np.concatenate([action, rem_action])
101 | elif args.arm == "left":
102 | action = np.concatenate([rem_action, action])
103 | else:
104 | # Only right and left arms supported
105 | print("Error: Unsupported arm specified -- "
106 | "must be either 'right' or 'left'! Got: {}".format(args.arm))
107 | elif rem_action_dim < 0:
108 | # We're in an environment with no gripper action space, so trim the action space to be the action dim
109 | action = action[:env_action_dim]
110 |
111 | """ End Fixing Spacemouse Action """
112 | return action, last_grasp
113 |
114 | def reset_to(env, state):
115 | """
116 | Reset to a specific simulator state.
117 |
118 | Args:
119 | state (dict): current simulator state that contains one or more of:
120 | - states (np.ndarray): initial state of the mujoco environment
121 | - model (str): mujoco scene xml
122 |
123 | Returns:
124 | observation (dict): observation dictionary after setting the simulator state (only
125 | if "states" is in @state)
126 | """
127 | should_ret = False
128 | if "model" in state:
129 | env.reset()
130 | xml = env.postprocess_model_xml(state["model"])
131 | env.reset_from_xml_string(xml)
132 | env.sim.reset()
133 | if not is_v1:
134 | # hide teleop visualization after restoring from model
135 | env.sim.model.site_rgba[self.env.eef_site_id] = np.array([0., 0., 0., 0.])
136 | env.sim.model.site_rgba[self.env.eef_cylinder_id] = np.array([0., 0., 0., 0.])
137 | if "states" in state:
138 | env.sim.set_state_from_flattened(state["states"])
139 | env.sim.forward()
140 | should_ret = True
141 |
142 | if "goal" in state:
143 | env.set_goal(**state["goal"])
144 |
145 | return env._get_observations(force_update=True)
146 |
--------------------------------------------------------------------------------
/robomimic/scripts/hyperparam_helper.py:
--------------------------------------------------------------------------------
1 | """
2 | A useful script for generating json files and shell scripts for conducting parameter scans.
3 | The script takes a path to a base json file as an argument and a shell file name.
4 | It generates a set of new json files in the same folder as the base json file, and
5 | a shell file script that contains commands to run for each experiment.
6 |
7 | Instructions:
8 |
9 | (1) Start with a base json that specifies a complete set of parameters for a single
10 | run. This only needs to include parameters you want to sweep over, and parameters
11 | that are different from the defaults. You can set this file path by either
12 | passing it as an argument (e.g. --config /path/to/base.json) or by directly
13 | setting the config file in @make_generator. The new experiment jsons will be put
14 | into the same directory as the base json.
15 |
16 | (2) Decide on what json parameters you would like to sweep over, and fill those in as
17 | keys in @make_generator below, taking note of the hierarchical key
18 | formatting using "/" or ".". Fill in corresponding values for each - these will
19 | be used in creating the experiment names, and for determining the range
20 | of values to sweep. Parameters that should be sweeped together should
21 | be assigned the same group number.
22 |
23 | (3) Set the output script name by either passing it as an argument (e.g. --script /path/to/script.sh)
24 | or by directly setting the script file in @make_generator. The script to run all experiments
25 | will be created at the specified path.
26 |
27 | Args:
28 | config (str): path to a base config json file that will be modified to generate config jsons.
29 | The jsons will be generated in the same folder as this file.
30 |
31 | script (str): path to output script that contains commands to run the generated training runs
32 |
33 | Example usage:
34 |
35 | # assumes that /tmp/gen_configs/base.json has already been created (see quickstart section of docs for an example)
36 | python hyperparam_helper.py --config /tmp/gen_configs/base.json --script /tmp/gen_configs/out.sh
37 | """
38 | import argparse
39 |
40 | import robomimic
41 | import robomimic.utils.hyperparam_utils as HyperparamUtils
42 |
43 |
44 | def make_generator(config_file, script_file):
45 | """
46 | Implement this function to setup your own hyperparameter scan!
47 | """
48 | generator = HyperparamUtils.ConfigGenerator(
49 | base_config_file=config_file, script_file=script_file
50 | )
51 |
52 | # use RNN with horizon 10
53 | generator.add_param(
54 | key="algo.rnn.enabled",
55 | name="",
56 | group=0,
57 | values=[True],
58 | )
59 | generator.add_param(
60 | key="train.seq_length",
61 | name="",
62 | group=0,
63 | values=[10],
64 | )
65 | generator.add_param(
66 | key="algo.rnn.horizon",
67 | name="",
68 | group=0,
69 | values=[10],
70 | )
71 |
72 | # LR - 1e-3, 1e-4
73 | generator.add_param(
74 | key="algo.optim_params.policy.learning_rate.initial",
75 | name="plr",
76 | group=1,
77 | values=[1e-3, 1e-4],
78 | )
79 |
80 | # GMM y / n
81 | generator.add_param(
82 | key="algo.gmm.enabled",
83 | name="gmm",
84 | group=2,
85 | values=[True, False],
86 | value_names=["t", "f"],
87 | )
88 |
89 | # RNN dim 400 + MLP dims (1024, 1024) vs. RNN dim 1000 + empty MLP dims ()
90 | generator.add_param(
91 | key="algo.rnn.hidden_dim",
92 | name="rnnd",
93 | group=3,
94 | values=[
95 | 400,
96 | 1000,
97 | ],
98 | )
99 | generator.add_param(
100 | key="algo.actor_layer_dims",
101 | name="mlp",
102 | group=3,
103 | values=[
104 | [1024, 1024],
105 | [],
106 | ],
107 | value_names=["1024", "0"],
108 | )
109 |
110 | return generator
111 |
112 |
113 | def main(args):
114 |
115 | # make config generator
116 | generator = make_generator(config_file=args.config, script_file=args.script)
117 |
118 | # generate jsons and script
119 | generator.generate()
120 |
121 |
122 | if __name__ == "__main__":
123 | parser = argparse.ArgumentParser()
124 |
125 | # Path to base json config - will override any defaults.
126 | parser.add_argument(
127 | "--config",
128 | type=str,
129 | help="path to base config json that will be modified to generate jsons. The jsons will\
130 | be generated in the same folder as this file.",
131 | )
132 |
133 | # Script name to generate - will override any defaults
134 | parser.add_argument(
135 | "--script",
136 | type=str,
137 | help="path to output script that contains commands to run the generated training runs",
138 | )
139 |
140 | args = parser.parse_args()
141 | main(args)
142 |
--------------------------------------------------------------------------------
/robomimic/scripts/slurm/auto_append.txt:
--------------------------------------------------------------------------------
1 | no
2 |
--------------------------------------------------------------------------------
/robomimic/scripts/slurm/auto_overwrite.txt:
--------------------------------------------------------------------------------
1 | yes
2 |
--------------------------------------------------------------------------------
/robomimic/scripts/slurm/base_args.py:
--------------------------------------------------------------------------------
1 | """
2 | File holding all command line arguments to use
3 | """
4 |
5 | from argparse import ArgumentParser, Namespace, Action, ArgumentError, SUPPRESS, _UNRECOGNIZED_ARGS_ATTR
6 | import sys as _sys
7 |
8 | BOOL_CHOICES = ['True', 'False', 'true', 'false']
9 | BOOL_MAPPING = {
10 | "false": False,
11 | "true": True
12 | }
13 | BOOL_STR = BOOL_MAPPING.keys()
14 |
15 |
16 | def maybe_array_to_element(inp):
17 | """
18 | Maybe converts an array to a single (numerical) element. If len(inp) == 1, returns the input's first
19 | element. Otherwise, returns the input
20 | """
21 | return inp[0] if type(inp) is list and len(inp) == 1 else inp
22 |
23 |
24 | # Define custom parsing class for nested default parses
25 | class NestedParser(ArgumentParser):
26 | def parse_known_args(self, args=None, namespace=None):
27 | if args is None:
28 | # args default to the system args
29 | args = _sys.argv[1:]
30 | else:
31 | # make sure that args are mutable
32 | args = list(args)
33 |
34 | # default Namespace built from parser defaults
35 | if namespace is None:
36 | namespace = Namespace()
37 |
38 | # add any action defaults that aren't present
39 | for action in self._actions:
40 | if action.dest is not SUPPRESS:
41 | if not hasattr(namespace, action.dest):
42 | if action.default is not SUPPRESS:
43 | # Send attribute to groupspace, not namespace!
44 | groupspace = getattr(namespace, action.const, None) if action.const else namespace
45 | if groupspace is None:
46 | # Create new attribute in main namespace and reference this with groupspace
47 | setattr(namespace, action.const, Namespace())
48 | groupspace = getattr(namespace, action.const)
49 | default = BOOL_MAPPING[action.default.lower()] \
50 | if type(action.default) is str and action.default.lower() in BOOL_STR \
51 | else action.default
52 | setattr(groupspace, action.dest, default)
53 |
54 | # add any parser defaults that aren't present
55 | for dest in self._defaults:
56 | if not hasattr(namespace, dest):
57 | #groupspace = getattr(namespace, dest.const, Namespace()) if dest.const else namespace
58 | setattr(namespace, dest, self._defaults[dest])
59 |
60 | # parse the arguments and exit if there are any errors
61 | try:
62 | namespace, args = self._parse_known_args(args, namespace)
63 | if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR):
64 | args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR))
65 | delattr(namespace, _UNRECOGNIZED_ARGS_ATTR)
66 | return namespace, args
67 | except ArgumentError:
68 | err = _sys.exc_info()[1]
69 | self.error(str(err))
70 |
71 |
72 | # Define class for creating custom nested namespaces
73 | class GroupedAction(Action):
74 |
75 | def __init__(self,
76 | option_strings,
77 | dest,
78 | nargs=None,
79 | const=None,
80 | default=None,
81 | type=None,
82 | choices=None,
83 | required=False,
84 | help=None,
85 | metavar=None,
86 | maybe_array=False,
87 | ):
88 | # Add custom attributes
89 | self.maybe_array = maybe_array
90 |
91 | # Run super init
92 | super().__init__(
93 | option_strings=option_strings,
94 | dest=dest,
95 | nargs=nargs,
96 | const=const,
97 | default=default,
98 | type=type,
99 | choices=choices,
100 | required=required,
101 | help=help,
102 | metavar=metavar,
103 | )
104 |
105 | def __call__(self, parser, namespace, values, option_string=None):
106 | groupspace = getattr(namespace, self.const, Namespace())
107 | if type(values) is str and values.lower() in BOOL_STR:
108 | values = BOOL_MAPPING[values.lower()]
109 | # Possibly convert array if requested
110 | if self.maybe_array:
111 | values = maybe_array_to_element(values)
112 | setattr(groupspace, self.dest, values)
113 | setattr(namespace, self.const, groupspace)
114 |
115 |
116 | # Define global parser
117 | parser = NestedParser(description='Top level arguments')
118 |
119 | # Add seed arg always
120 | parser.add_argument(
121 | '--seed', type=int, default=1, help='random seed (default: 1)')
122 |
123 |
124 | # def parse_arguments():
125 | # """
126 | # Parses all arguments and splits them into their appropriate namespaces, returning separately the robosuite args,
127 | # rllib args, and agent args
128 | # """
129 | # args = parser.parse_args()
130 | # robosuite_args = getattr(args, "robosuite", None)
131 | # rllib_args = getattr(args, "rllib", None)
132 | # model_args = getattr(args, "model", None)
133 | # agent_args = getattr(args, "agent", None)
134 | #
135 | # # Print all args
136 | # print()
137 | # for t, arg in zip(("robosuite", "rllib", "model", "agent"), (robosuite_args, rllib_args, model_args, agent_args)):
138 | # print(' {} Params: '.format(t))
139 | # if arg is not None:
140 | # for key, value in arg.__dict__.items():
141 | # if key.startswith('__') or key.startswith('_'):
142 | # continue
143 | # print(' {}: {}'.format(key, value))
144 | # print()
145 | #
146 | # # Return args
147 | # return robosuite_args, rllib_args, model_args, agent_args
148 |
149 |
150 | if __name__ == '__main__':
151 | # Add arguments
152 | # add_robosuite_arguments()
153 | # add_rllib_arguments()
154 | # add_ppo_arguments()
155 | #
156 | # # Test parsing functionality
157 | # a, b, c = parse_arguments()
158 | # print(a)
159 | # print(b)
160 | # print(c)
161 | pass
162 |
--------------------------------------------------------------------------------
/robomimic/scripts/slurm/base_template.sbatch:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | #all commands that start with SBATCH contain commands that are just used by SLURM for scheduling
4 | #################
5 | #partition name
6 | #SBATCH --partition={{PARTITION}}
7 | #specific machines to avoid
8 | #SBATCH --exclude={{EXCLUDE}}
9 | #################
10 | #number of GPUs
11 | #SBATCH --gres=gpu:{{NUM_GPU}}
12 | ##SBATCH --nodes=1
13 | #SBATCH --cpus-per-task=4
14 | #SBATCH --ntasks={{NUM_CPU}}
15 | #################
16 | #set a job name
17 | #SBATCH --job-name="{{JOB_NAME}}"
18 | #################
19 | #a file for job output, you can check job progress, append the job ID with %j to make it unique
20 | #SBATCH --output={{EXECUTABLE_LOG_DIR}}/%j.out
21 | #################
22 | # a file for errors from the job
23 | #SBATCH --error={{EXECUTABLE_LOG_DIR}}/%j.err
24 | #################
25 | #time you think you need; default is 2 hours
26 | #format could be dd-hh:mm:ss, hh:mm:ss, mm:ss, or mm
27 | #SBATCH --time={{HOURS}}:00:00
28 | #################
29 | # Quality of Service (QOS); think of it as sending your job into a special queue; --qos=long for with a max job length of 7 days.
30 | # uncomment ##SBATCH --qos=long if you want your job to run longer than 48 hours, which is the default for normal partition,
31 | # NOTE- in the hns partition the default max run time is 7 days , so you wont need to include qos, also change to normal partition
32 | # since dev max run time is 2 hours.
33 | #{{QOS_LONG}}
34 | # We are submitting to the dev partition, there are several on sherlock: normal, gpu, bigmem (jobs requiring >64Gigs RAM)
35 | ##SBATCH -p dev
36 | #################
37 | # --mem is memory per node; default is 4000 MB per CPU, remember to ask for enough mem to match your CPU request, since
38 | # sherlock automatically allocates 4 Gigs of RAM/CPU, if you ask for 8 CPUs you will get 32 Gigs of RAM, so either
39 | # leave --mem commented out or request >= to the RAM needed for your CPU request. It will also accept mem. in units, ie "--mem=4G"
40 | #SBATCH --mem={{MEM}}G
41 | # to request multiple threads/CPUs use the -c option, on Sherlock we use 1 thread/CPU, 16 CPUs on each normal compute node 4Gigs RAM per CPU. Here we will request just 1.
42 | #SBATCH -c 1
43 | #################
44 | # Have SLURM send you an email when the job ends or fails, careful, the email could end up in your clutter folder
45 | # Also, if you submit hundreds of jobs at once you will get hundreds of emails.
46 | #SBATCH --mail-type=END,FAIL # notifications for job done & fail
47 | # Remember to change this to your email
48 | #SBATCH --mail-user={{NOTIFICATION_EMAIL}}
49 | # list out some useful information
50 | echo "SLURM_JOBID="$SLURM_JOBID
51 | echo "SLURM_JOB_NAME="$SLURM_JOB_NAME
52 | echo "SLURM_JOB_NODELIST"=$SLURM_JOB_NODELIST
53 | echo "SLURM_NNODES"=$SLURM_NNODES
54 | echo "SLURMTMPDIR="$SLURMTMPDIR
55 | echo "working directory = "$SLURM_SUBMIT_DIR
56 | #now run normal batch commands
57 | {{SHELL_SOURCE_SCRIPT}}
58 | conda activate {{PYTHON_INTERPRETER}}
59 | export PYTHONPATH=$PYTHONPATH:{{EXTRA_PYTHONPATH}}
60 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{{MUJOCO_DIR}}
61 |
62 | {{COPY_FILE}}
63 | {{CMD}}
64 | {{EXTRA_CMDS}}
65 |
66 | # done
67 | echo "Done"
68 | exit 0
69 |
--------------------------------------------------------------------------------
/robomimic/scripts/slurm/run_hp_sweep.py:
--------------------------------------------------------------------------------
1 | """
2 | Script for executing all configs generated from hyperparamter_helper.py (in batchRL)
3 |
4 | Note that this assumes that hyperparameter_helper.py has already been run, and that all the resulting
5 | configurations exist in a single folder
6 | """
7 |
8 | # from slurm.util.arguments import *
9 | from robomimic.scripts.slurm.batchrl_args import *
10 | from robomimic.scripts.slurm.sbatch_args import *
11 |
12 | # from slurm.util.sbatch_utils import create_and_execute_sbatch_script
13 | from robomimic.scripts.slurm.sbatch_utils import create_and_execute_sbatch_script
14 |
15 | import copy
16 |
17 | # Add relevant input arguments
18 | add_sbatch_args()
19 | add_batchrl_hp_args()
20 |
21 |
22 | def parse_configs_from_hp_script(hp_script):
23 | """
24 | Helper script to parse the executable hyperparameter script generated from hyperparameter_helper.py (in batchRL)
25 | to infer the filepaths to the generated configs.
26 |
27 | Args:
28 | hp_script (str): Absolute fpath to the generated hyperparameter script
29 |
30 | Returns:
31 | list: Absolute paths to the configs to be deployed in the hp sweep
32 | """
33 | # Create list to fill as we parse the script
34 | configs = []
35 | # Open and parse file line by line
36 | with open(hp_script) as f:
37 | for line in f:
38 | # Make sure we only parse the lines where we have a valid python command
39 | if line.startswith("python"):
40 | # Extract only the config path
41 | configs.append(line.split(" ")[-1].split("\n")[0])
42 | # Return configs
43 | return configs
44 |
45 |
46 | def generate_debug_script(hp_script):
47 | """
48 | Helper script to generate an .sh executable debug hyperparameter script using the hp sweep script generated from
49 | hyperparameter_helper.py (in batchRL)
50 |
51 | Args:
52 | hp_script (str): Absolute fpath to the generated hyperparameter script
53 | """
54 | # Modify the path so that we add "_debug" to the end -- hacky way since we know ".sh" extension is 3 chars long
55 | debug_script = hp_script[:-3] + "_debug.sh"
56 | # Open and parse file line by line
57 | with open(hp_script) as f:
58 | # Open a new file to write the debug script to
59 | with open(debug_script, 'w+') as new_file:
60 | # Loop through hp script and write to this new file
61 | for line in f:
62 | # Make sure we only parse the lines where we have a valid python command
63 | if line.startswith("python"):
64 | # We write the line plus the extra --debug flag
65 | new_file.write(line.split("\n")[0] + " --debug\n")
66 | else:
67 | # Just write line normally
68 | new_file.write(line)
69 |
70 |
71 | if __name__ == '__main__':
72 | # First, parse args
73 | args = parser.parse_args()
74 |
75 | # Extract configs from hp sweep script
76 | configs = parse_configs_from_hp_script(hp_script=args.batchrl_hp.hp_sweep_script)
77 |
78 | # If user requested a debug script to be generated, do that now
79 | if args.batchrl_hp.generate_debug_script:
80 | generate_debug_script(hp_script=args.batchrl_hp.hp_sweep_script)
81 |
82 | n = args.batchrl_hp.n_exps_per_instance
83 |
84 | # Loop through each config to create an sbatch script from
85 | for i in range(0, len(configs), n):
86 | script_args = []
87 | configs_for_batch = configs[i:i+n]
88 | for config in configs_for_batch:
89 | # Extract name for this sbatch script
90 | name = config.split("/")[-1].split(".json")[0]
91 |
92 | # Compose script arguments to pass to sbatch script
93 | script_args.append({
94 | "config": config,
95 | })
96 |
97 | # Generate the sbatch file
98 | print(f"Creating {name}...")
99 |
100 | # Multiple resources by number of jobs in batch
101 | sbatch_args = copy.deepcopy(args.sbatch)
102 | sbatch_args.num_cpu *= len(configs_for_batch)
103 | sbatch_args.mem_gb *= len(configs_for_batch)
104 |
105 | create_and_execute_sbatch_script(
106 | filename=name,
107 | job_name=name,
108 | sbatch_args=sbatch_args,
109 | script_args=script_args)
110 |
--------------------------------------------------------------------------------
/robomimic/scripts/slurm/sbatch_args.py:
--------------------------------------------------------------------------------
1 | # from slurm.util.arguments.base_args import *
2 | from robomimic.scripts.slurm.base_args import *
3 |
4 | PARTITIONS = (
5 | "napoli",
6 | "tibet",
7 | "svl",
8 |
9 | "titans",
10 | "dgx",
11 | )
12 |
13 |
14 | def add_sbatch_args():
15 | """
16 | Adds sbatch arguments needed for automatically generating and executing python files
17 | """
18 | # Define namespace for the robosuite args
19 | prefix = 'sbatch'
20 | actions = {
21 | "const": prefix,
22 | "action": GroupedAction
23 | }
24 | # Required args
25 | parser.add_argument(
26 | '--script',
27 | type=str,
28 | required=True,
29 | help='path to the Python script to execute',
30 | **actions
31 | )
32 | parser.add_argument(
33 | '--generated_dir',
34 | type=str,
35 | required=True,
36 | help='Sets the location where generated sbatch scripts will be stored',
37 | **actions
38 | )
39 | parser.add_argument(
40 | '--python_interpreter',
41 | type=str,
42 | required=True,
43 | help='Python interepreter to use for the executed python script',
44 | **actions
45 | )
46 |
47 | # Additional args
48 | parser.add_argument(
49 | '--partition',
50 | type=str,
51 | default='titans',
52 | choices=PARTITIONS,
53 | help='partition to run on for this process',
54 | **actions
55 | )
56 | parser.add_argument(
57 | '--exclude',
58 | type=str,
59 | default='',
60 | help='any specific machines to avoid, comma separated',
61 | **actions
62 | )
63 | parser.add_argument(
64 | '--gpu_type',
65 | type=str,
66 | default="any",
67 | help='Specific GPU to use. Any results in any GPU being used for this run',
68 | **actions
69 | )
70 | parser.add_argument(
71 | '--num_gpu',
72 | type=int,
73 | default=0,
74 | help='Sets the number of gpus to use for this sbatch script',
75 | **actions
76 | )
77 | parser.add_argument(
78 | '--num_cpu',
79 | type=int,
80 | default=4,
81 | help='Sets the number of cpus to use for this sbatch script',
82 | **actions
83 | )
84 | parser.add_argument(
85 | '--mem_gb',
86 | type=int,
87 | default=0,
88 | help='If nonzero, sets the amount of memory to be this many GB',
89 | **actions
90 | )
91 | parser.add_argument(
92 | '--max_hours',
93 | type=int,
94 | default=20,
95 | help='Sets the maximum number of hours this script will be run for',
96 | **actions
97 | )
98 | parser.add_argument(
99 | '--extra_pythonpath',
100 | type=str,
101 | default="",
102 | help='Extra paths to set to the pythonpath variable',
103 | **actions
104 | )
105 | parser.add_argument(
106 | '--overwrite',
107 | type=str,
108 | default="False",
109 | choices=BOOL_CHOICES,
110 | help='Whether to overwrite or not',
111 | **actions
112 | )
113 | parser.add_argument(
114 | '--extra_commands',
115 | nargs="+",
116 | type=str,
117 | default=None,
118 | help='Extra commands to run after main python command',
119 | **actions
120 | )
121 | parser.add_argument(
122 | '--copy_file',
123 | nargs="+",
124 | type=str,
125 | default=None,
126 | help='Copies a file from source to location. Expected format is [source_file, targeT_dir]. New file will'
127 | 'share the same file name as the original source file. Useful in cases e.g.: copying datasets to local ssd',
128 | **actions
129 | )
130 | parser.add_argument(
131 | '--executable_log_dir',
132 | type=str,
133 | default='/cvgl2/u/jdwong/test_output',
134 | help='Location to dump sbatch log out / err text to',
135 | **actions
136 | )
137 | parser.add_argument(
138 | '--shell_source_script',
139 | type=str,
140 | default=None,
141 | help='If specified, bash script to source at beginning of sbatch execution',
142 | **actions
143 | )
144 | parser.add_argument(
145 | '--notification_email',
146 | type=str,
147 | default='jdwong@stanford.edu',
148 | help='Email address to send slurm notifications to (i.e.: when the script finishes running)',
149 | **actions
150 | )
151 | parser.add_argument(
152 | '--mujoco_dir',
153 | type=str,
154 | default='/cvgl2/u/jdwong/.mujoco/mujoco200/bin',
155 | help='Absolute path to mujoco 200 installation bin directory',
156 | **actions
157 | )
158 |
--------------------------------------------------------------------------------
/robomimic/scripts/slurm/sbatch_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Python script for generating and executing sbatch files
3 | """
4 |
5 | import os
6 | # import slurm
7 | import robomimic
8 | from pathlib import Path
9 |
10 | PARTITIONS = (
11 | "napoli",
12 | "tibet",
13 | "svl",
14 |
15 | "titans",
16 | "dgx",
17 | )
18 |
19 | robomimic_base_path = os.path.abspath(os.path.join(os.path.dirname(robomimic.__file__), os.pardir))
20 |
21 | AUTO_OVERWRITE_RESP = os.path.join(robomimic_base_path, "robomimic/scripts/slurm/auto_overwrite.txt")
22 | AUTO_APPEND_RESP = os.path.join(robomimic_base_path, "robomimic/scripts/slurm/auto_append.txt")
23 |
24 | import time
25 | from datetime import datetime
26 |
27 | def create_and_execute_sbatch_script(filename, job_name, sbatch_args, script_args=None):
28 | """
29 | Function that creates and executes an sbatch script based off of a template
30 |
31 | Args:
32 | @filename (str): Name of the sbatch file that will be generated
33 | @job_name (str): Name of sbatch job to execute
34 | @sbatch_args (Namespace): Input arguments to fill in sbatch script
35 | @script_args (list of dicts, dict or None): If specified, adds additional
36 | input arguments to script execution based on key-value mappings.
37 | If of type list, indicates multiple commands in one sbatch script.
38 | """
39 | # Create a new directory path if it doesn't exist and create a new filename that we will write to
40 | Path(sbatch_args.generated_dir).mkdir(parents=True, exist_ok=True)
41 | ts = time.time()
42 | new_sbatch_fpath = os.path.join(sbatch_args.generated_dir, "{}_{}.sbatch".format(filename, ts))
43 |
44 | # Compose extra commands
45 | if sbatch_args.extra_commands is not None:
46 | sbatch_args.extra_commands = sbatch_args.extra_commands if type(sbatch_args.extra_commands) is list else \
47 | [sbatch_args.extra_commands]
48 | sbatch_args.extra_commands = "\n".join(sbatch_args.extra_commands)
49 | else:
50 | sbatch_args.extra_commands = ""
51 |
52 | # infer number of commands from script args
53 | if script_args is None:
54 | num_commands = 1
55 | elif not isinstance(script_args, list):
56 | script_args = [script_args]
57 | num_commands = 1
58 | else:
59 | num_commands = len(script_args)
60 |
61 | command = ""
62 | for i in range(num_commands):
63 | # Compose main command to be executed in script
64 | command += "python {}".format(sbatch_args.script)
65 |
66 | # Add additional input args if necessary
67 | if script_args is not None:
68 | for k, v in script_args[i].items():
69 | if v is not None:
70 | if type(v) is list or type(v) is tuple:
71 | v = " ".join(str(vi) for vi in v)
72 | command += " --{} {}".format(k, v)
73 |
74 | # Add overwrite if requested
75 | if sbatch_args.overwrite:
76 | command += f" < {AUTO_OVERWRITE_RESP}"
77 | else:
78 | command += f" < {AUTO_APPEND_RESP}"
79 |
80 | command += " & \n"
81 | command += "wait"
82 |
83 | # Define partition
84 | if sbatch_args.partition == "napoli":
85 | partition = "napoli-gpu" if sbatch_args.num_gpu > 0 else "napoli-cpu\n#SBATCH --exclude=napoli[15-16]"
86 | else:
87 | partition = sbatch_args.partition
88 |
89 | # Define GPU(s) to use
90 | num_gpu = sbatch_args.num_gpu
91 | if sbatch_args.gpu_type != "any":
92 | num_gpu = f"{sbatch_args.gpu_type}:{num_gpu}"
93 |
94 | # Add copy file if requested
95 | copy_file = "" if sbatch_args.copy_file is None else create_copy_file_cmd(*sbatch_args.copy_file)
96 |
97 | # Add shell source script if requested
98 | shell_source_script = "" if sbatch_args.shell_source_script is None else f"source {sbatch_args.shell_source_script}"
99 |
100 | # Define a dict to map expected fill-ins with replacement values
101 | fill_ins = {
102 | "{{PARTITION}}": partition,
103 | "{{EXCLUDE}}": sbatch_args.exclude,
104 | "{{NUM_GPU}}": num_gpu,
105 | "{{NUM_CPU}}": sbatch_args.num_cpu,
106 | "{{JOB_NAME}}": job_name,
107 | "{{EXECUTABLE_LOG_DIR}}": sbatch_args.executable_log_dir,
108 | "{{HOURS}}": sbatch_args.max_hours,
109 | "{{QOS_LONG}}": "#SBATCH --qos=long" if sbatch_args.max_hours > 48 else "",
110 | "{{MEM}}": sbatch_args.mem_gb,
111 | "{{NOTIFICATION_EMAIL}}": sbatch_args.notification_email,
112 | "{{SHELL_SOURCE_SCRIPT}}": shell_source_script,
113 | "{{PYTHON_INTERPRETER}}": sbatch_args.python_interpreter,
114 | "{{EXTRA_PYTHONPATH}}": sbatch_args.extra_pythonpath,
115 | "{{MUJOCO_DIR}}": sbatch_args.mujoco_dir,
116 | "{{COPY_FILE}}": copy_file,
117 | "{{CMD}}": command,
118 | "{{EXTRA_CMDS}}": sbatch_args.extra_commands
119 | }
120 |
121 | # Open the template file
122 | with open(os.path.join(robomimic_base_path, "robomimic/scripts/slurm/base_template.sbatch")) as template:
123 | # Open the new sbatch file
124 | print(new_sbatch_fpath)
125 | with open(new_sbatch_fpath, 'w+') as new_file:
126 | # Loop through template and write to this new file
127 | for line in template:
128 | wrote = False
129 | # Check for various cases
130 | for k, v in fill_ins.items():
131 | # If the key is found in the line, replace it with its value and pop it from the dict
132 | if k in line:
133 | new_file.write(line.replace(k, str(v)))
134 | wrote = True
135 | break
136 | # Otherwise, we just write the line from the template directly
137 | if not wrote:
138 | new_file.write(line)
139 |
140 | # Execute this file!
141 | # TODO: Fix! (Permission denied error)
142 | #os.system(new_sbatch_fpath)
143 |
144 |
145 | def create_copy_file_cmd(source_file, target_dir):
146 | """
147 | Helper function to create a bash command (in string format) to copy a source file to a target location.
148 |
149 | Args:
150 | source_file (str): Absolute path to the source file to copy
151 | target_dir (str): Absolute path to the target directory to which the source file will be copied
152 |
153 | Returns:
154 | str: bash command to execute in string format
155 | """
156 | target_filename = source_file.split("/")[-1]
157 | target_fpath = os.path.join(target_dir, target_filename)
158 | cmd =\
159 | f'mkdir -p {target_dir}\n'\
160 | f'if [[ -f "{target_fpath}" ]]; then\n'\
161 | f' echo "{target_fpath} exists, no copying"\n'\
162 | f'else\n'\
163 | f' echo "{target_fpath} does not exist, copying dataset"\n'\
164 | f' cp {source_file} {target_fpath}\n'\
165 | f'fi'
166 |
167 | return cmd
168 |
--------------------------------------------------------------------------------
/robomimic/scripts/split_train_val.py:
--------------------------------------------------------------------------------
1 | """
2 | Script for splitting a dataset hdf5 file into training and validation trajectories.
3 |
4 | Args:
5 | dataset (str): path to hdf5 dataset
6 |
7 | filter_key (str): if provided, split the subset of trajectories
8 | in the file that correspond to this filter key into a training
9 | and validation set of trajectories, instead of splitting the
10 | full set of trajectories
11 |
12 | ratio (float): validation ratio, in (0, 1). Defaults to 0.1, which is 10%.
13 |
14 | Example usage:
15 | python split_train_val.py --dataset /path/to/demo.hdf5 --ratio 0.1
16 | """
17 |
18 | import argparse
19 | import h5py
20 | import numpy as np
21 |
22 | from robomimic.utils.file_utils import create_hdf5_filter_key
23 |
24 |
25 | def split_train_val_from_hdf5(hdf5_path, val_ratio=0.1, filter_key=None):
26 | """
27 | Splits data into training set and validation set from HDF5 file.
28 |
29 | Args:
30 | hdf5_path (str): path to the hdf5 file
31 | to load the transitions from
32 |
33 | val_ratio (float): ratio of validation demonstrations to all demonstrations
34 |
35 | filter_key (str): if provided, split the subset of demonstration keys stored
36 | under mask/@filter_key instead of the full set of demonstrations
37 | """
38 |
39 | # retrieve number of demos
40 | f = h5py.File(hdf5_path, "r")
41 | if filter_key is not None:
42 | print("using filter key: {}".format(filter_key))
43 | demos = sorted([elem.decode("utf-8") for elem in np.array(f["mask/{}".format(filter_key)])])
44 | else:
45 | demos = sorted(list(f["data"].keys()))
46 | num_demos = len(demos)
47 | f.close()
48 |
49 | # get random split
50 | num_demos = len(demos)
51 | num_val = int(val_ratio * num_demos)
52 | mask = np.zeros(num_demos)
53 | mask[:num_val] = 1.
54 | np.random.shuffle(mask)
55 | mask = mask.astype(int)
56 | train_inds = (1 - mask).nonzero()[0]
57 | valid_inds = mask.nonzero()[0]
58 | train_keys = [demos[i] for i in train_inds]
59 | valid_keys = [demos[i] for i in valid_inds]
60 | print("{} validation demonstrations out of {} total demonstrations.".format(num_val, num_demos))
61 |
62 | # pass mask to generate split
63 | name_1 = "train"
64 | name_2 = "valid"
65 | if filter_key is not None:
66 | name_1 = "{}_{}".format(filter_key, name_1)
67 | name_2 = "{}_{}".format(filter_key, name_2)
68 |
69 | train_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=train_keys, key_name=name_1)
70 | valid_lengths = create_hdf5_filter_key(hdf5_path=hdf5_path, demo_keys=valid_keys, key_name=name_2)
71 |
72 | print("Total number of train samples: {}".format(np.sum(train_lengths)))
73 | print("Average number of train samples {}".format(np.mean(train_lengths)))
74 |
75 | print("Total number of valid samples: {}".format(np.sum(valid_lengths)))
76 | print("Average number of valid samples {}".format(np.mean(valid_lengths)))
77 |
78 |
79 | if __name__ == "__main__":
80 | parser = argparse.ArgumentParser()
81 | parser.add_argument(
82 | "--dataset",
83 | type=str,
84 | help="path to hdf5 dataset",
85 | )
86 | parser.add_argument(
87 | "--filter_key",
88 | type=str,
89 | default=None,
90 | help="if provided, split the subset of trajectories in the file that correspond to\
91 | this filter key into a training and validation set of trajectories, instead of\
92 | splitting the full set of trajectories",
93 | )
94 | parser.add_argument(
95 | "--ratio",
96 | type=float,
97 | default=0.1,
98 | help="validation ratio, in (0, 1)"
99 | )
100 | args = parser.parse_args()
101 |
102 | # seed to make sure results are consistent
103 | np.random.seed(0)
104 |
105 | split_train_val_from_hdf5(args.dataset, val_ratio=args.ratio, filter_key=args.filter_key)
--------------------------------------------------------------------------------
/robomimic/scripts/vis/vis_preintv.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 |
4 | from robomimic.scripts.vis.vis_utils import get_argparser, playback_dataset
5 | from robomimic.scripts.vis.image_utils import apply_filter
6 |
7 | import matplotlib
8 | matplotlib.use('Agg')
9 | import matplotlib.pyplot as plt
10 |
11 | def get_intv_and_preintv_inds(ep_info):
12 | if args.model == 'Q':
13 | vals = ep_info['q_vals']
14 | elif args.model == 'V':
15 | vals = ep_info['v_vals']
16 | else:
17 | raise ValueError
18 |
19 | ac_mods = ep_info["action_modes"]
20 | intv_inds = np.reshape(np.argwhere(ac_mods == 1), -1)
21 |
22 | preintv_inds = []
23 | intv_start_inds = [i for i in intv_inds if i > 0 and ac_mods[-1] != 1]
24 | for i_start in intv_start_inds:
25 | for j in range(i_start-1, 0, -1):
26 | if j in intv_inds or vals[j] > args.th:
27 | break
28 |
29 | preintv_inds.append(j)
30 |
31 | return intv_inds, preintv_inds
32 |
33 |
34 | def plot_helper(ep_num, ep_info):
35 | fig, ax1 = plt.subplots()
36 |
37 | if args.model == 'Q':
38 | y_vals = ep_info['q_vals']
39 | y_label = 'Q'
40 | elif args.model == 'V':
41 | y_vals = ep_info['v_vals']
42 | y_label = 'V'
43 | else:
44 | raise ValueError
45 |
46 | color = 'tab:blue'
47 | ax1.set_xlabel('Timestep')
48 |
49 | ax1.set_ylabel(y_label)
50 | ax1.plot(y_vals, color = color)
51 | ax1.tick_params(axis ='y')
52 |
53 | ax1.axhline(y = 0.0, color = 'black')
54 |
55 | ax1.set_ylim(-1.2, 0.2)
56 |
57 | intv_inds, preintv_inds = get_intv_and_preintv_inds(ep_info)
58 | for i in intv_inds:
59 | ax1.axvline(x=i, color='green', linewidth=5, alpha=0.10)
60 |
61 | for i in preintv_inds:
62 | ax1.axvline(x=i, color='red', linewidth=5, alpha=0.10)
63 |
64 | plt.savefig(os.path.join(
65 | args.vis_path,
66 | 'plot_{}.png'.format(ep_num)
67 | ))
68 | plt.close()
69 |
70 |
71 | def video_helper(ep_num, ep_info):
72 | intv_inds, preintv_inds = get_intv_and_preintv_inds(ep_info)
73 |
74 | if len(intv_inds) == 0:
75 | return []
76 |
77 | video_frames = ep_info['video_frames']
78 | for (i, img) in video_frames:
79 | if i in intv_inds:
80 | img[::] = apply_filter(img, color=(0, 255, 0))
81 |
82 | if i in preintv_inds:
83 | img[::] = apply_filter(img, color=(255, 0, 0))
84 |
85 | return video_frames
86 |
87 |
88 | if __name__ == "__main__":
89 | parser = get_argparser()
90 |
91 | parser.add_argument(
92 | "--th",
93 | type=float,
94 | default=-0.35,
95 | help="threshold for pre-intervention",
96 | )
97 |
98 | parser.add_argument(
99 | "--model",
100 | type=str,
101 | default='Q',
102 | choices=['Q', 'V'],
103 | help="Model to use for determining pre-intv",
104 | )
105 |
106 | args = parser.parse_args()
107 | playback_dataset(args, plot_helper=plot_helper, video_helper=video_helper)
108 |
--------------------------------------------------------------------------------
/robomimic/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__init__.py
--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/dataset.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/env_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/env_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/file_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/file_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/log_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/log_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/loss_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/loss_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/macros.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/macros.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/obs_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/obs_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/python_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/python_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/tensor_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/tensor_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/torch_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/torch_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/train_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/train_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/utils/__pycache__/vis_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UT-Austin-RPL/sirius/2af35b48bec553afeabf5a1d5c3c2796a597e6c2/robomimic/utils/__pycache__/vis_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/robomimic/utils/log_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | This file contains utility classes and functions for logging to stdout, stderr,
3 | and to tensorboard.
4 | """
5 | import os
6 | import sys
7 | import numpy as np
8 | from datetime import datetime
9 | from contextlib import contextmanager
10 | from tqdm import tqdm
11 | import time
12 |
13 |
14 | class PrintLogger(object):
15 | """
16 | This class redirects print statements to both console and a file.
17 | """
18 | def __init__(self, log_file):
19 | self.terminal = sys.stdout
20 | print('STDOUT will be forked to %s' % log_file)
21 | self.log_file = open(log_file, "a")
22 |
23 | def write(self, message):
24 | self.terminal.write(message)
25 | self.log_file.write(message)
26 | self.log_file.flush()
27 |
28 | def flush(self):
29 | # this flush method is needed for python 3 compatibility.
30 | # this handles the flush command by doing nothing.
31 | # you might want to specify some extra behavior here.
32 | pass
33 |
34 |
35 | class DataLogger(object):
36 | """
37 | Logging class to log metrics to tensorboard and/or retrieve running statistics about logged data.
38 | """
39 | def __init__(self, log_dir, config, log_tb=True, log_wandb=False):
40 | """
41 | Args:
42 | log_dir (str): base path to store logs
43 | log_tb (bool): whether to use tensorboard logging
44 | """
45 | self._tb_logger = None
46 | self._wandb_logger = None
47 | self._data = dict() # store all the scalar data logged so far
48 |
49 | if log_tb:
50 | from tensorboardX import SummaryWriter
51 | self._tb_logger = SummaryWriter(os.path.join(log_dir, 'tb'))
52 |
53 | if log_wandb:
54 | import wandb
55 |
56 | num_attempts = 10
57 | for attempt in range(num_attempts):
58 | try:
59 | # set up wandb
60 | self._wandb_logger = wandb
61 | self._wandb_logger.init(
62 | entity="sirius",
63 | project=config['tags']['wandb_proj_name'],
64 | name=config.experiment.name,
65 | dir=log_dir,
66 | mode=("offline" if attempt == num_attempts - 1 else "online"),
67 | )
68 |
69 | # set up tags for identifying experiment
70 | tags = config['tags']
71 | wandb_config = {k: v for (k, v) in tags.items() if k not in ['hp_keys', 'hp_values']}
72 | for (k, v) in zip(tags['hp_keys'], tags['hp_values']):
73 | wandb_config[k] = v
74 | self._wandb_logger.config.update(wandb_config)
75 |
76 | break
77 | except:
78 | print("wandb initialization, attempt #{}".format(attempt + 1))
79 | self._wandb_logger = None
80 | time.sleep(30)
81 |
82 | def record(self, k, v, epoch, data_type='scalar', log_stats=False):
83 | """
84 | Record data with logger.
85 |
86 | Args:
87 | k (str): key string
88 | v (float or image): value to store
89 | epoch: current epoch number
90 | data_type (str): the type of data. either 'scalar' or 'image'
91 | log_stats (bool): whether to store the mean/max/min/std for all data logged so far with key k
92 | """
93 |
94 | assert data_type in ['scalar', 'image']
95 |
96 | if data_type == 'scalar':
97 | # maybe update internal cache if logging stats for this key
98 | if log_stats or k in self._data: # any key that we're logging or previously logged
99 | if k not in self._data:
100 | self._data[k] = []
101 | self._data[k].append(v)
102 |
103 | # maybe log to tensorboard
104 | if self._tb_logger is not None:
105 | if data_type == 'scalar':
106 | self._tb_logger.add_scalar(k, v, epoch)
107 | if log_stats:
108 | stats = self.get_stats(k)
109 | for (stat_k, stat_v) in stats.items():
110 | stat_k_name = '{}-{}'.format(k, stat_k)
111 | self._tb_logger.add_scalar(stat_k_name, stat_v, epoch)
112 | elif data_type == 'image':
113 | self._tb_logger.add_images(k, img_tensor=v, global_step=epoch, dataformats="NHWC")
114 |
115 | if self._wandb_logger is not None:
116 | if data_type == 'scalar':
117 | self._wandb_logger.log({k: v}, step=epoch)
118 | if log_stats:
119 | stats = self.get_stats(k)
120 | for (stat_k, stat_v) in stats.items():
121 | self._wandb_logger.log({stat_k: stat_v}, step=epoch)
122 | elif data_type == 'image':
123 | pass # Not Implemented
124 |
125 | def get_stats(self, k):
126 | """
127 | Computes running statistics for a particular key.
128 |
129 | Args:
130 | k (str): key string
131 | Returns:
132 | stats (dict): dictionary of statistics
133 | """
134 | stats = dict()
135 | stats['mean'] = np.mean(self._data[k])
136 | stats['std'] = np.std(self._data[k])
137 | stats['min'] = np.min(self._data[k])
138 | stats['max'] = np.max(self._data[k])
139 | return stats
140 |
141 | def close(self):
142 | """
143 | Run before terminating to make sure all logs are flushed
144 | """
145 | if self._tb_logger is not None:
146 | self._tb_logger.close()
147 |
148 | if self._wandb_logger is not None:
149 | self._wandb_logger.finish()
150 |
151 |
152 | class custom_tqdm(tqdm):
153 | """
154 | Small extension to tqdm to make a few changes from default behavior.
155 | By default tqdm writes to stderr. Instead, we change it to write
156 | to stdout.
157 | """
158 | def __init__(self, *args, **kwargs):
159 | assert "file" not in kwargs
160 | super(custom_tqdm, self).__init__(*args, file=sys.stdout, **kwargs)
161 |
162 |
163 | @contextmanager
164 | def silence_stdout():
165 | """
166 | This contextmanager will redirect stdout so that nothing is printed
167 | to the terminal. Taken from the link below:
168 |
169 | https://stackoverflow.com/questions/6735917/redirecting-stdout-to-nothing-in-python
170 | """
171 | old_target = sys.stdout
172 | try:
173 | with open(os.devnull, "w") as new_target:
174 | sys.stdout = new_target
175 | yield new_target
176 | finally:
177 | sys.stdout = old_target
178 |
--------------------------------------------------------------------------------
/robomimic/utils/macros.py:
--------------------------------------------------------------------------------
1 | """
2 | Set of global variables shared across robomimic
3 | """
4 | # Sets debugging mode. Should be set at top-level script so that internal
5 | # debugging functionalities are made active
6 | DEBUG = False
7 |
--------------------------------------------------------------------------------
/robomimic/utils/python_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Set of general purpose utility functions for easier interfacing with Python API
3 | """
4 | import inspect
5 | from copy import deepcopy
6 | import robomimic.utils.macros as Macros
7 |
8 |
9 | def get_class_init_kwargs(cls):
10 | """
11 | Helper function to return a list of all valid keyword arguments (excluding "self") for the given @cls class.
12 |
13 | Args:
14 | cls (object): Class from which to grab __init__ kwargs
15 |
16 | Returns:
17 | list: All keyword arguments (excluding "self") specified by @cls __init__ constructor method
18 | """
19 | return list(inspect.signature(cls.__init__).parameters.keys())[1:]
20 |
21 |
22 | def extract_subset_dict(dic, keys, copy=False):
23 | """
24 | Helper function to extract a subset of dictionary key-values from a current dictionary. Optionally (deep)copies
25 | the values extracted from the original @dic if @copy is True.
26 |
27 | Args:
28 | dic (dict): Dictionary containing multiple key-values
29 | keys (Iterable): Specific keys to extract from @dic. If the key doesn't exist in @dic, then the key is skipped
30 | copy (bool): If True, will deepcopy all values corresponding to the specified @keys
31 |
32 | Returns:
33 | dict: Extracted subset dictionary containing only the specified @keys and their corresponding values
34 | """
35 | subset = {k: dic[k] for k in keys if k in dic}
36 | return deepcopy(subset) if copy else subset
37 |
38 |
39 | def extract_class_init_kwargs_from_dict(cls, dic, copy=False, verbose=False):
40 | """
41 | Helper function to return a dictionary of key-values that specifically correspond to @cls class's __init__
42 | constructor method, from @dic which may or may not contain additional, irrelevant kwargs.
43 |
44 | Note that @dic may possibly be missing certain kwargs as specified by cls.__init__. No error will be raised.
45 |
46 | Args:
47 | cls (object): Class from which to grab __init__ kwargs that will be be used as filtering keys for @dic
48 | dic (dict): Dictionary containing multiple key-values
49 | copy (bool): If True, will deepcopy all values corresponding to the specified @keys
50 | verbose (bool): If True (or if macro DEBUG is True), then will print out mismatched keys
51 |
52 | Returns:
53 | dict: Extracted subset dictionary possibly containing only the specified keys from cls.__init__ and their
54 | corresponding values
55 | """
56 | # extract only relevant kwargs for this specific backbone
57 | cls_keys = get_class_init_kwargs(cls)
58 | subdic = extract_subset_dict(
59 | dic=dic,
60 | keys=cls_keys,
61 | copy=copy,
62 | )
63 |
64 | # Run sanity check if verbose or debugging
65 | if verbose or Macros.DEBUG:
66 | keys_not_in_cls = [k for k in dic if k not in cls_keys]
67 | keys_not_in_dic = [k for k in cls_keys if k not in list(dic.keys())]
68 | if len(keys_not_in_cls) > 0:
69 | print(f"Warning: For class {cls.__name__}, got unknown keys: {keys_not_in_cls} ")
70 | if len(keys_not_in_dic) > 0:
71 | print(f"Warning: For class {cls.__name__}, got missing keys: {keys_not_in_dic} ")
72 |
73 | return subdic
--------------------------------------------------------------------------------
/robomimic/utils/torch_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | This file contains some PyTorch utilities.
3 | """
4 | import numpy as np
5 | import torch
6 | import torch.optim as optim
7 |
8 |
9 | def soft_update(source, target, tau):
10 | """
11 | Soft update from the parameters of a @source torch module to a @target torch module
12 | with strength @tau. The update follows target = target * (1 - tau) + source * tau.
13 |
14 | Args:
15 | source (torch.nn.Module): source network to push target network parameters towards
16 | target (torch.nn.Module): target network to update
17 | """
18 | for target_param, param in zip(target.parameters(), source.parameters()):
19 | target_param.copy_(
20 | target_param * (1.0 - tau) + param * tau
21 | )
22 |
23 |
24 | def hard_update(source, target):
25 | """
26 | Hard update @target parameters to match @source.
27 |
28 | Args:
29 | source (torch.nn.Module): source network to provide parameters
30 | target (torch.nn.Module): target network to update parameters for
31 | """
32 | for target_param, param in zip(target.parameters(), source.parameters()):
33 | target_param.copy_(param)
34 |
35 |
36 | def get_torch_device(try_to_use_cuda):
37 | """
38 | Return torch device. If using cuda (GPU), will also set cudnn.benchmark to True
39 | to optimize CNNs.
40 |
41 | Args:
42 | try_to_use_cuda (bool): if True and cuda is available, will use GPU
43 |
44 | Returns:
45 | device (torch.Device): device to use for models
46 | """
47 | if try_to_use_cuda and torch.cuda.is_available():
48 | torch.backends.cudnn.benchmark = True
49 | device = torch.device("cuda:0")
50 | else:
51 | device = torch.device("cpu")
52 | return device
53 |
54 |
55 | def reparameterize(mu, logvar):
56 | """
57 | Reparameterize for the backpropagation of z instead of q.
58 | This makes it so that we can backpropagate through the sampling of z from
59 | our encoder when feeding the sampled variable to the decoder.
60 |
61 | (See "The reparameterization trick" section of https://arxiv.org/abs/1312.6114)
62 |
63 | Args:
64 | mu (torch.Tensor): batch of means from the encoder distribution
65 | logvar (torch.Tensor): batch of log variances from the encoder distribution
66 |
67 | Returns:
68 | z (torch.Tensor): batch of sampled latents from the encoder distribution that
69 | support backpropagation
70 | """
71 | # logvar = \log(\sigma^2) = 2 * \log(\sigma)
72 | # \sigma = \exp(0.5 * logvar)
73 |
74 | # clamped for numerical stability
75 | logstd = (0.5 * logvar).clamp(-4, 15)
76 | std = torch.exp(logstd)
77 |
78 | # Sample \epsilon from normal distribution
79 | # use std to create a new tensor, so we don't have to care
80 | # about running on GPU or not
81 | eps = std.new(std.size()).normal_()
82 |
83 | # Then multiply with the standard deviation and add the mean
84 | z = eps.mul(std).add_(mu)
85 |
86 | return z
87 |
88 |
89 | def optimizer_from_optim_params(net_optim_params, net):
90 | """
91 | Helper function to return a torch Optimizer from the optim_params
92 | section of the config for a particular network.
93 |
94 | Args:
95 | optim_params (Config): optim_params part of algo_config corresponding
96 | to @net. This determines the optimizer that is created.
97 |
98 | net (torch.nn.Module): module whose parameters this optimizer will be
99 | responsible
100 |
101 | Returns:
102 | optimizer (torch.optim.Optimizer): optimizer
103 | """
104 | return optim.Adam(
105 | params=net.parameters(),
106 | lr=net_optim_params["learning_rate"]["initial"],
107 | weight_decay=net_optim_params["regularization"]["L2"],
108 | )
109 |
110 |
111 | def lr_scheduler_from_optim_params(net_optim_params, net, optimizer):
112 | """
113 | Helper function to return a LRScheduler from the optim_params
114 | section of the config for a particular network. Returns None
115 | if a scheduler is not needed.
116 |
117 | Args:
118 | optim_params (Config): optim_params part of algo_config corresponding
119 | to @net. This determines whether a learning rate scheduler is created.
120 |
121 | net (torch.nn.Module): module whose parameters this optimizer will be
122 | responsible
123 |
124 | optimizer (torch.optim.Optimizer): optimizer for this net
125 |
126 | Returns:
127 | lr_scheduler (torch.optim.lr_scheduler or None): learning rate scheduler
128 | """
129 | lr_scheduler = None
130 | if len(net_optim_params["learning_rate"]["epoch_schedule"]) > 0:
131 | # decay LR according to the epoch schedule
132 | lr_scheduler = optim.lr_scheduler.MultiStepLR(
133 | optimizer=optimizer,
134 | milestones=net_optim_params["learning_rate"]["epoch_schedule"],
135 | gamma=net_optim_params["learning_rate"]["decay_factor"],
136 | )
137 | return lr_scheduler
138 |
139 |
140 | def backprop_for_loss(net, optim, loss, max_grad_norm=None, retain_graph=False, dont_step=False):
141 | """
142 | Backpropagate loss and update parameters for network with
143 | name @name.
144 |
145 | Args:
146 | net (torch.nn.Module): network to update
147 |
148 | optim (torch.optim.Optimizer): optimizer to use
149 |
150 | loss (torch.Tensor): loss to use for backpropagation
151 |
152 | max_grad_norm (float): if provided, used to clip gradients
153 |
154 | retain_graph (bool): if True, graph is not freed after backward call
155 |
156 | Returns:
157 | grad_norms (float): average gradient norms from backpropagation
158 | """
159 |
160 | # backprop
161 | optim.zero_grad()
162 | loss.backward(retain_graph=retain_graph)
163 |
164 | # gradient clipping
165 | if max_grad_norm is not None:
166 | torch.nn.utils.clip_grad_norm_(net.parameters(), max_grad_norm)
167 |
168 | # compute grad norms
169 | grad_norms = 0.
170 | for p in net.parameters():
171 | # only clip gradients for parameters for which requires_grad is True
172 | if p.grad is not None:
173 | grad_norms += p.grad.data.norm(2).pow(2).item()
174 |
175 | if not dont_step:
176 | # step
177 | optim.step()
178 |
179 | return grad_norms
180 |
181 |
182 | class dummy_context_mgr():
183 | """
184 | A dummy context manager - useful for having conditional scopes (such
185 | as @maybe_no_grad). Nothing happens in this scope.
186 | """
187 | def __enter__(self):
188 | return None
189 | def __exit__(self, exc_type, exc_value, traceback):
190 | return False
191 |
192 |
193 | def maybe_no_grad(no_grad):
194 | """
195 | Args:
196 | no_grad (bool): if True, the returned context will be torch.no_grad(), otherwise
197 | it will be a dummy context
198 | """
199 | return torch.no_grad() if no_grad else dummy_context_mgr()
200 |
--------------------------------------------------------------------------------
/robomimic/utils/vis_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | This file contains utility functions for visualizing image observations in the training pipeline.
3 | These functions can be a useful debugging tool.
4 | """
5 | import numpy as np
6 |
7 | import robomimic.utils.tensor_utils as TensorUtils
8 | import robomimic.utils.obs_utils as ObsUtils
9 |
10 | from PIL import Image, ImageFont, ImageDraw
11 |
12 |
13 | def image_tensor_to_numpy(image):
14 | """
15 | Converts processed image tensors to numpy so that they can be saved to disk or video.
16 | A useful utility function for visualizing images in the middle of training.
17 |
18 | Args:
19 | image (torch.Tensor): images of shape [..., C, H, W]
20 |
21 | Returns:
22 | image (np.array): converted images of shape [..., H, W, C] and type uint8
23 | """
24 | return TensorUtils.to_numpy(
25 | ObsUtils.unprocess_image(image)
26 | ).astype(np.uint8)
27 |
28 |
29 | def image_to_disk(image, fname):
30 | """
31 | Writes an image to disk.
32 |
33 | Args:
34 | image (np.array): image of shape [H, W, 3]
35 | fname (str): path to save image to
36 | """
37 | image = Image.fromarray(image)
38 | image.save(fname)
39 |
40 |
41 | def image_tensor_to_disk(image, fname):
42 | """
43 | Writes an image tensor to disk. Any leading batch dimensions are indexed out
44 | with the first element.
45 |
46 | Args:
47 | image (torch.Tensor): image of shape [..., C, H, W]. All leading dimensions
48 | will be indexed out with the first element
49 | fname (str): path to save image to
50 | """
51 | # index out all leading dimensions before [C, H, W]
52 | num_leading_dims = len(image.shape[:-3])
53 | for _ in range(num_leading_dims):
54 | image = image[0]
55 | image = image_tensor_to_numpy(image)
56 | image_to_disk(image, fname)
57 |
58 | def write_text_on_image(image_arr, text, def_color=None, font=30, pos=(15, 15)):
59 | img = Image.fromarray(image_arr)
60 | image_edit = ImageDraw.Draw(img)
61 | font = ImageFont.truetype("FreeMono.ttf", font)
62 | color = (0, 255, 0) if def_color is None else def_color
63 | image_edit.text(pos, text, color, font=font)
64 | return np.array(img)
65 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | # read the contents of your README file
4 | from os import path
5 | this_directory = path.abspath(path.dirname(__file__))
6 | with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f:
7 | lines = f.readlines()
8 |
9 | # remove images from README
10 | lines = [x for x in lines if (('.png' not in x) and ('.gif' not in x))]
11 | long_description = ''.join(lines)
12 |
13 | setup(
14 | name="robomimic",
15 | packages=[
16 | package for package in find_packages() if package.startswith("robomimic")
17 | ],
18 | install_requires=[
19 | "numpy>=1.13.3",
20 | "h5py",
21 | "psutil",
22 | "tqdm",
23 | "termcolor",
24 | "tensorboard",
25 | "tensorboardX",
26 | "imageio",
27 | "imageio-ffmpeg",
28 | "egl_probe>=1.0.1",
29 | "torch",
30 | "torchvision",
31 | ],
32 | eager_resources=['*'],
33 | include_package_data=True,
34 | python_requires='>=3',
35 | description="robomimic: A Modular Framework for Robot Learning from Demonstration",
36 | author="Ajay Mandlekar, Danfei Xu, Josiah Wong, Soroush Nasiriany, Chen Wang",
37 | url="https://github.com/ARISE-Initiative/robomimic",
38 | author_email="amandlek@cs.stanford.edu",
39 | version="0.2.0",
40 | long_description=long_description,
41 | long_description_content_type='text/markdown'
42 | )
43 |
--------------------------------------------------------------------------------