├── LICENSE ├── README.md ├── algo ├── __init__.py ├── call_algo.py ├── call_tune_algo.py ├── finetune │ └── sac_tune.py ├── offline_offline │ ├── bosa.py │ ├── dara.py │ ├── igdf.py │ ├── iql.py │ └── td3_bc.py ├── offline_online │ ├── bc_par.py │ ├── bc_sac.py │ ├── bc_vgdf.py │ ├── cql_sac.py │ ├── h2o.py │ ├── mcq_sac.py │ └── rlpd.py ├── online_offline │ ├── h2o.py │ ├── par_bc.py │ ├── sac_bc.py │ ├── sac_cql.py │ └── sac_mcq.py ├── online_online │ ├── darc.py │ ├── par.py │ ├── sac.py │ ├── sac_iw.py │ └── vgdf.py └── utils.py ├── config ├── adroit │ ├── bc_par │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── bc_sac │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── bc_vgdf │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── bosa │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── cql_sac │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── dara │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── darc │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── h2o │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── igdf │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── iql │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── mcq_sac │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── par │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── par_bc │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── rlpd │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── sac │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── sac_bc │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── sac_cql │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── sac_iw │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── sac_mcq │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ ├── td3_bc │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml │ └── vgdf │ │ ├── door.yaml │ │ ├── hammer.yaml │ │ ├── pen.yaml │ │ └── relocate.yaml ├── antmaze │ ├── bc_par │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── bc_sac │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── bc_vgdf │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── bosa │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── cql_sac │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── dara │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── darc │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── h2o │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── igdf │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── iql │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── mcq_sac │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── par │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── par_bc │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── rlpd │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── sac │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── sac_bc │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── sac_cql │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── sac_iw │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── sac_mcq │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ ├── td3_bc │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml │ └── vgdf │ │ ├── antmaze-large.yaml │ │ ├── antmaze-medium.yaml │ │ └── antmaze-small.yaml └── mujoco │ ├── bc_par │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── bc_sac │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── bc_vgdf │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── bosa │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── cql_sac │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── dara │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── darc │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── h2o │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── igdf │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── iql │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── mcq_sac │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── par │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── par_bc │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── rlpd │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── sac │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── sac_bc │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── sac_cql │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── sac_iw │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── sac_mcq │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ ├── td3_bc │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml │ └── vgdf │ ├── ant.yaml │ ├── halfcheetah.yaml │ ├── hopper.yaml │ └── walker2d.yaml ├── dataset ├── adroit │ └── README.md ├── antmaze │ └── README.md ├── call_dataset.py └── mujoco │ └── README.md ├── envs ├── __init__.py ├── adroit │ ├── __init__.py │ ├── assets │ │ ├── adroit.xml │ │ ├── adroit_broken_joint_easy.xml │ │ ├── adroit_broken_joint_hard.xml │ │ ├── adroit_broken_joint_medium.xml │ │ ├── adroit_shrink_finger_easy.xml │ │ ├── adroit_shrink_finger_hard.xml │ │ ├── adroit_shrink_finger_medium.xml │ │ ├── assets.xml │ │ ├── door.xml │ │ ├── door_broken_joint_easy.xml │ │ ├── door_broken_joint_hard.xml │ │ ├── door_broken_joint_medium.xml │ │ ├── door_shrink_finger_easy.xml │ │ ├── door_shrink_finger_hard.xml │ │ ├── door_shrink_finger_medium.xml │ │ ├── hammer.xml │ │ ├── hammer_broken_joint_easy.xml │ │ ├── hammer_broken_joint_hard.xml │ │ ├── hammer_broken_joint_medium.xml │ │ ├── hammer_shrink_finger_easy.xml │ │ ├── hammer_shrink_finger_hard.xml │ │ ├── hammer_shrink_finger_medium.xml │ │ ├── pen.xml │ │ ├── pen_broken_joint_easy.xml │ │ ├── pen_broken_joint_hard.xml │ │ ├── pen_broken_joint_medium.xml │ │ ├── pen_shrink_finger_easy.xml │ │ ├── pen_shrink_finger_hard.xml │ │ ├── pen_shrink_finger_medium.xml │ │ ├── relocate.xml │ │ ├── relocate_broken_joint_easy.xml │ │ ├── relocate_broken_joint_hard.xml │ │ ├── relocate_broken_joint_medium.xml │ │ ├── relocate_shrink_finger_easy.xml │ │ ├── relocate_shrink_finger_hard.xml │ │ └── relocate_shrink_finger_medium.xml │ ├── call_adroit_env.py │ ├── dependencies │ │ └── Adroit │ │ │ └── resources │ │ │ ├── meshes │ │ │ ├── F1.stl │ │ │ ├── F2.stl │ │ │ ├── F3.stl │ │ │ ├── TH1_z.stl │ │ │ ├── TH2_z.stl │ │ │ ├── TH3_z.stl │ │ │ ├── forearm_simple.stl │ │ │ ├── knuckle.stl │ │ │ ├── lfmetacarpal.stl │ │ │ ├── palm.stl │ │ │ └── wrist.stl │ │ │ └── textures │ │ │ ├── darkwood.png │ │ │ ├── dice.png │ │ │ ├── foil.png │ │ │ ├── marble.png │ │ │ ├── silverRaw.png │ │ │ ├── skin.png │ │ │ ├── square.png │ │ │ ├── wood.png │ │ │ └── woodb.png │ ├── door.py │ ├── hammer.py │ ├── mujoco_env.py │ ├── pen.py │ ├── quatmath.py │ ├── relocate.py │ └── utils │ │ └── quatmath.py ├── antmaze │ ├── __init__.py │ ├── ant.py │ ├── assets │ │ ├── ant.xml │ │ └── point.xml │ ├── call_antmaze_env.py │ ├── common.py │ ├── goal_reaching_env.py │ ├── maze_env.py │ ├── mujoco_goal_env.py │ └── wrappers.py ├── infos.py └── mujoco │ ├── __init__.py │ ├── assets │ ├── ant.xml │ ├── ant_friction_0.1.xml │ ├── ant_friction_0.5.xml │ ├── ant_friction_2.0.xml │ ├── ant_friction_5.0.xml │ ├── ant_gravity_0.1.xml │ ├── ant_gravity_0.5.xml │ ├── ant_gravity_2.0.xml │ ├── ant_gravity_5.0.xml │ ├── ant_kinematic_anklejnt_easy.xml │ ├── ant_kinematic_anklejnt_hard.xml │ ├── ant_kinematic_anklejnt_medium.xml │ ├── ant_kinematic_hipjnt_easy.xml │ ├── ant_kinematic_hipjnt_hard.xml │ ├── ant_kinematic_hipjnt_medium.xml │ ├── ant_morph_alllegs_easy.xml │ ├── ant_morph_alllegs_hard.xml │ ├── ant_morph_alllegs_medium.xml │ ├── ant_morph_halflegs_easy.xml │ ├── ant_morph_halflegs_hard.xml │ ├── ant_morph_halflegs_medium.xml │ ├── half_cheetah.xml │ ├── halfcheetah_friction_0.1.xml │ ├── halfcheetah_friction_0.5.xml │ ├── halfcheetah_friction_2.0.xml │ ├── halfcheetah_friction_5.0.xml │ ├── halfcheetah_gravity_0.1.xml │ ├── halfcheetah_gravity_0.5.xml │ ├── halfcheetah_gravity_2.0.xml │ ├── halfcheetah_gravity_5.0.xml │ ├── halfcheetah_kinematic_footjnt_easy.xml │ ├── halfcheetah_kinematic_footjnt_hard.xml │ ├── halfcheetah_kinematic_footjnt_medium.xml │ ├── halfcheetah_kinematic_thighjnt_easy.xml │ ├── halfcheetah_kinematic_thighjnt_hard.xml │ ├── halfcheetah_kinematic_thighjnt_medium.xml │ ├── halfcheetah_morph_thigh_easy.xml │ ├── halfcheetah_morph_thigh_hard.xml │ ├── halfcheetah_morph_thigh_medium.xml │ ├── halfcheetah_morph_torso_easy.xml │ ├── halfcheetah_morph_torso_hard.xml │ ├── halfcheetah_morph_torso_medium.xml │ ├── hopper.xml │ ├── hopper_friction_0.1.xml │ ├── hopper_friction_0.5.xml │ ├── hopper_friction_2.0.xml │ ├── hopper_friction_5.0.xml │ ├── hopper_gravity_0.1.xml │ ├── hopper_gravity_0.5.xml │ ├── hopper_gravity_2.0.xml │ ├── hopper_gravity_5.0.xml │ ├── hopper_kinematic_footjnt_easy.xml │ ├── hopper_kinematic_footjnt_hard.xml │ ├── hopper_kinematic_footjnt_medium.xml │ ├── hopper_kinematic_legjnt_easy.xml │ ├── hopper_kinematic_legjnt_hard.xml │ ├── hopper_kinematic_legjnt_medium.xml │ ├── hopper_morph_foot_easy.xml │ ├── hopper_morph_foot_hard.xml │ ├── hopper_morph_foot_medium.xml │ ├── hopper_morph_torso_easy.xml │ ├── hopper_morph_torso_hard.xml │ ├── hopper_morph_torso_medium.xml │ ├── walker2d.xml │ ├── walker2d_friction_0.1.xml │ ├── walker2d_friction_0.5.xml │ ├── walker2d_friction_2.0.xml │ ├── walker2d_friction_5.0.xml │ ├── walker2d_gravity_0.1.xml │ ├── walker2d_gravity_0.5.xml │ ├── walker2d_gravity_2.0.xml │ ├── walker2d_gravity_5.0.xml │ ├── walker2d_kinematic_footjnt_easy.xml │ ├── walker2d_kinematic_footjnt_hard.xml │ ├── walker2d_kinematic_footjnt_medium.xml │ ├── walker2d_kinematic_thighjnt_easy.xml │ ├── walker2d_kinematic_thighjnt_hard.xml │ ├── walker2d_kinematic_thighjnt_medium.xml │ ├── walker2d_morph_leg_easy.xml │ ├── walker2d_morph_leg_hard.xml │ ├── walker2d_morph_leg_medium.xml │ ├── walker2d_morph_torso_easy.xml │ ├── walker2d_morph_torso_hard.xml │ └── walker2d_morph_torso_medium.xml │ └── call_mujoco_env.py ├── imgs └── ODRLbenchmark.png ├── requirement.txt ├── run.sh ├── train.py └── train_tune.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 OffDynamicsRL 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /algo/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | sys.path.append(str(Path(__file__).parent.absolute())) -------------------------------------------------------------------------------- /algo/call_tune_algo.py: -------------------------------------------------------------------------------- 1 | # import all algorithms this benchmark implement 2 | 3 | def call_tune_algo(algo_name, config, mode, device): 4 | if mode == 0: 5 | algo_name = algo_name.lower() 6 | assert algo_name == 'sac' 7 | # online online setting, we support SAC 8 | from finetune.sac_tune import SAC 9 | 10 | algo_to_call = { 11 | 'sac': SAC, 12 | } 13 | 14 | algo = algo_to_call[algo_name] 15 | policy = algo(config, device) 16 | else: 17 | raise NotImplementedError 18 | 19 | return policy -------------------------------------------------------------------------------- /config/adroit/bc_par/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 32 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/bc_par/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 32 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/bc_par/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 32 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/bc_par/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 32 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/bc_sac/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/adroit/bc_sac/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/adroit/bc_sac/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/adroit/bc_sac/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/adroit/bc_vgdf/door.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | bc_coeff: 5.0 53 | use_q_decay: True -------------------------------------------------------------------------------- /config/adroit/bc_vgdf/hammer.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | bc_coeff: 5.0 53 | use_q_decay: True -------------------------------------------------------------------------------- /config/adroit/bc_vgdf/pen.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | bc_coeff: 5.0 53 | use_q_decay: True -------------------------------------------------------------------------------- /config/adroit/bc_vgdf/relocate.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | bc_coeff: 5.0 53 | use_q_decay: True -------------------------------------------------------------------------------- /config/adroit/bosa/door.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | noise_clip: 0.5 15 | 16 | eval_episode: 10 17 | eval_freq: 10000 18 | start_steps: 5000 19 | max_step: 500000 20 | 21 | device: cuda 22 | 23 | save_freq: 5000 24 | 25 | vae_policy_lr: 0.001 26 | vae_policy_hidden_dim: 750 27 | vae_policy_beta: 0.5 28 | vae_dyna_lr: 0.001 29 | vae_dyna_ensemble: 5 30 | vae_dyna_hidden_dim: 750 31 | vae_dyna_beta: 0.5 32 | vae_iteration: 100000 33 | 34 | lamda_policy: 0.1 35 | lamda_dyna: 0.1 36 | epsilon_policy_exp: 0.01 37 | epsilon_dyna_exp: 0.01 38 | conservation_coef: 0.1 39 | num_samples: 1 40 | -------------------------------------------------------------------------------- /config/adroit/bosa/hammer.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | noise_clip: 0.5 15 | 16 | eval_episode: 10 17 | eval_freq: 10000 18 | start_steps: 5000 19 | max_step: 500000 20 | 21 | device: cuda 22 | 23 | save_freq: 5000 24 | 25 | vae_policy_lr: 0.001 26 | vae_policy_hidden_dim: 750 27 | vae_policy_beta: 0.5 28 | vae_dyna_lr: 0.001 29 | vae_dyna_ensemble: 5 30 | vae_dyna_hidden_dim: 750 31 | vae_dyna_beta: 0.5 32 | vae_iteration: 100000 33 | 34 | lamda_policy: 0.1 35 | lamda_dyna: 0.1 36 | epsilon_policy_exp: 0.01 37 | epsilon_dyna_exp: 0.01 38 | conservation_coef: 0.1 39 | num_samples: 1 40 | -------------------------------------------------------------------------------- /config/adroit/bosa/pen.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | noise_clip: 0.5 15 | 16 | eval_episode: 10 17 | eval_freq: 10000 18 | start_steps: 5000 19 | max_step: 500000 20 | 21 | device: cuda 22 | 23 | save_freq: 5000 24 | 25 | vae_policy_lr: 0.001 26 | vae_policy_hidden_dim: 750 27 | vae_policy_beta: 0.5 28 | vae_dyna_lr: 0.001 29 | vae_dyna_ensemble: 5 30 | vae_dyna_hidden_dim: 750 31 | vae_dyna_beta: 0.5 32 | vae_iteration: 100000 33 | 34 | lamda_policy: 0.1 35 | lamda_dyna: 0.1 36 | epsilon_policy_exp: 0.01 37 | epsilon_dyna_exp: 0.01 38 | conservation_coef: 0.1 39 | num_samples: 1 40 | -------------------------------------------------------------------------------- /config/adroit/bosa/relocate.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | noise_clip: 0.5 15 | 16 | eval_episode: 10 17 | eval_freq: 10000 18 | start_steps: 5000 19 | max_step: 500000 20 | 21 | device: cuda 22 | 23 | save_freq: 5000 24 | 25 | vae_policy_lr: 0.001 26 | vae_policy_hidden_dim: 750 27 | vae_policy_beta: 0.5 28 | vae_dyna_lr: 0.001 29 | vae_dyna_ensemble: 5 30 | vae_dyna_hidden_dim: 750 31 | vae_dyna_beta: 0.5 32 | vae_iteration: 100000 33 | 34 | lamda_policy: 0.1 35 | lamda_dyna: 0.1 36 | epsilon_policy_exp: 0.01 37 | epsilon_dyna_exp: 0.01 38 | conservation_coef: 0.1 39 | num_samples: 1 40 | -------------------------------------------------------------------------------- /config/adroit/cql_sac/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/adroit/cql_sac/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/adroit/cql_sac/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/adroit/cql_sac/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/adroit/dara/door.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | eta: 0.1 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 500000 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | lam: 0.7 31 | temp: 3.0 32 | eta: 0.1 -------------------------------------------------------------------------------- /config/adroit/dara/hammer.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | eta: 0.1 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 500000 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | lam: 0.7 31 | temp: 3.0 32 | eta: 0.1 -------------------------------------------------------------------------------- /config/adroit/dara/pen.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | eta: 0.1 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 500000 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | lam: 0.7 31 | temp: 3.0 32 | eta: 0.1 -------------------------------------------------------------------------------- /config/adroit/dara/relocate.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | eta: 0.1 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 500000 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | lam: 0.7 31 | temp: 3.0 32 | eta: 0.1 -------------------------------------------------------------------------------- /config/adroit/darc/door.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | penalty_coefficient: 1.0 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 1000000 25 | tar_env_interact_freq: 10 26 | 27 | device: cuda 28 | 29 | save_freq: 5000 30 | -------------------------------------------------------------------------------- /config/adroit/darc/hammer.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | penalty_coefficient: 1.0 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 1000000 25 | tar_env_interact_freq: 10 26 | 27 | device: cuda 28 | 29 | save_freq: 5000 30 | -------------------------------------------------------------------------------- /config/adroit/darc/pen.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | penalty_coefficient: 1.0 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 1000000 25 | tar_env_interact_freq: 10 26 | 27 | device: cuda 28 | 29 | save_freq: 5000 30 | -------------------------------------------------------------------------------- /config/adroit/darc/relocate.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | penalty_coefficient: 1.0 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 1000000 25 | tar_env_interact_freq: 10 26 | 27 | device: cuda 28 | 29 | save_freq: 5000 30 | -------------------------------------------------------------------------------- /config/adroit/h2o/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | gaussian_noise_std: 1.0 27 | 28 | device: cuda 29 | 30 | cql_max_target_backup: False 31 | backup_entropy: False 32 | cql_n_actions: 10 33 | cql_importance_sample: True 34 | cql_temp: 1.0 35 | cql_clip_diff_min: -1000000 36 | cql_clip_diff_max: 1000000 37 | cql_lagrange: False 38 | cql_alpha: 10.0 39 | cql_target_action_gap: 1.0 40 | n_state: 10 41 | 42 | 43 | save_freq: 5000 44 | -------------------------------------------------------------------------------- /config/adroit/h2o/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | gaussian_noise_std: 1.0 27 | 28 | device: cuda 29 | 30 | cql_max_target_backup: False 31 | backup_entropy: False 32 | cql_n_actions: 10 33 | cql_importance_sample: True 34 | cql_temp: 1.0 35 | cql_clip_diff_min: -1000000 36 | cql_clip_diff_max: 1000000 37 | cql_lagrange: False 38 | cql_alpha: 10.0 39 | cql_target_action_gap: 1.0 40 | n_state: 10 41 | 42 | 43 | save_freq: 5000 44 | -------------------------------------------------------------------------------- /config/adroit/h2o/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | gaussian_noise_std: 1.0 27 | 28 | device: cuda 29 | 30 | cql_max_target_backup: False 31 | backup_entropy: False 32 | cql_n_actions: 10 33 | cql_importance_sample: True 34 | cql_temp: 1.0 35 | cql_clip_diff_min: -1000000 36 | cql_clip_diff_max: 1000000 37 | cql_lagrange: False 38 | cql_alpha: 10.0 39 | cql_target_action_gap: 1.0 40 | n_state: 10 41 | 42 | 43 | save_freq: 5000 44 | -------------------------------------------------------------------------------- /config/adroit/h2o/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | gaussian_noise_std: 1.0 27 | 28 | device: cuda 29 | 30 | cql_max_target_backup: False 31 | backup_entropy: False 32 | cql_n_actions: 10 33 | cql_importance_sample: True 34 | cql_temp: 1.0 35 | cql_clip_diff_min: -1000000 36 | cql_clip_diff_max: 1000000 37 | cql_lagrange: False 38 | cql_alpha: 10.0 39 | cql_target_action_gap: 1.0 40 | n_state: 10 41 | 42 | 43 | save_freq: 5000 44 | -------------------------------------------------------------------------------- /config/adroit/igdf/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | info_update_step: 7000 32 | repr_dim: 64 33 | ensemble_size: 1 34 | repr_norm: False 35 | repr_norm_temp: False 36 | ortho_init: False 37 | output_gain: None 38 | importance_weight: 1.0 39 | xi: 0.75 40 | 41 | save_freq: 5000 42 | -------------------------------------------------------------------------------- /config/adroit/igdf/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | info_update_step: 7000 32 | repr_dim: 64 33 | ensemble_size: 1 34 | repr_norm: False 35 | repr_norm_temp: False 36 | ortho_init: False 37 | output_gain: None 38 | importance_weight: 1.0 39 | xi: 0.75 40 | 41 | save_freq: 5000 42 | -------------------------------------------------------------------------------- /config/adroit/igdf/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | info_update_step: 7000 32 | repr_dim: 64 33 | ensemble_size: 1 34 | repr_norm: False 35 | repr_norm_temp: False 36 | ortho_init: False 37 | output_gain: None 38 | importance_weight: 1.0 39 | xi: 0.75 40 | 41 | save_freq: 5000 42 | -------------------------------------------------------------------------------- /config/adroit/igdf/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | info_update_step: 7000 32 | repr_dim: 64 33 | ensemble_size: 1 34 | repr_norm: False 35 | repr_norm_temp: False 36 | ortho_init: False 37 | output_gain: None 38 | importance_weight: 1.0 39 | xi: 0.75 40 | 41 | save_freq: 5000 42 | -------------------------------------------------------------------------------- /config/adroit/iql/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 0.5 30 | 31 | save_freq: 5000 32 | -------------------------------------------------------------------------------- /config/adroit/iql/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 0.5 30 | 31 | save_freq: 5000 32 | -------------------------------------------------------------------------------- /config/adroit/iql/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 0.5 30 | 31 | save_freq: 5000 32 | -------------------------------------------------------------------------------- /config/adroit/iql/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 0.5 30 | 31 | save_freq: 5000 32 | -------------------------------------------------------------------------------- /config/adroit/mcq_sac/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/mcq_sac/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/mcq_sac/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/mcq_sac/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/par/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/par/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/par/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/par/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/par_bc/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 -------------------------------------------------------------------------------- /config/adroit/par_bc/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 -------------------------------------------------------------------------------- /config/adroit/par_bc/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 -------------------------------------------------------------------------------- /config/adroit/par_bc/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 -------------------------------------------------------------------------------- /config/adroit/rlpd/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | n_layers: 3 31 | num_q: 10 32 | num_backup_q: 2 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/rlpd/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | n_layers: 3 31 | num_q: 10 32 | num_backup_q: 2 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/rlpd/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | n_layers: 3 31 | num_q: 10 32 | num_backup_q: 2 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/rlpd/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | n_layers: 3 31 | num_q: 10 32 | num_backup_q: 2 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/adroit/sac/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/adroit/sac/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/adroit/sac/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/adroit/sac/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/adroit/sac_bc/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/adroit/sac_bc/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/adroit/sac_bc/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/adroit/sac_bc/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/adroit/sac_cql/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/adroit/sac_cql/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/adroit/sac_cql/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/adroit/sac_cql/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/adroit/sac_iw/door.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/adroit/sac_iw/hammer.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/adroit/sac_iw/pen.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/adroit/sac_iw/relocate.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/adroit/sac_mcq/door.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 -------------------------------------------------------------------------------- /config/adroit/sac_mcq/hammer.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 -------------------------------------------------------------------------------- /config/adroit/sac_mcq/pen.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 -------------------------------------------------------------------------------- /config/adroit/sac_mcq/relocate.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 -------------------------------------------------------------------------------- /config/adroit/td3_bc/door.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | 15 | eval_episode: 10 16 | eval_freq: 10000 17 | start_steps: 5000 18 | max_step: 500000 19 | 20 | device: cuda 21 | 22 | weight: 2.5 23 | 24 | save_freq: 5000 25 | -------------------------------------------------------------------------------- /config/adroit/td3_bc/hammer.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | 15 | eval_episode: 10 16 | eval_freq: 10000 17 | start_steps: 5000 18 | max_step: 500000 19 | 20 | device: cuda 21 | 22 | weight: 2.5 23 | 24 | save_freq: 5000 25 | -------------------------------------------------------------------------------- /config/adroit/td3_bc/pen.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | 15 | eval_episode: 10 16 | eval_freq: 10000 17 | start_steps: 5000 18 | max_step: 500000 19 | 20 | device: cuda 21 | 22 | weight: 2.5 23 | 24 | save_freq: 5000 25 | -------------------------------------------------------------------------------- /config/adroit/td3_bc/relocate.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | 15 | eval_episode: 10 16 | eval_freq: 10000 17 | start_steps: 5000 18 | max_step: 500000 19 | 20 | device: cuda 21 | 22 | weight: 2.5 23 | 24 | save_freq: 5000 25 | -------------------------------------------------------------------------------- /config/adroit/vgdf/door.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | -------------------------------------------------------------------------------- /config/adroit/vgdf/hammer.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | -------------------------------------------------------------------------------- /config/adroit/vgdf/pen.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | -------------------------------------------------------------------------------- /config/adroit/vgdf/relocate.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | -------------------------------------------------------------------------------- /config/antmaze/bc_par/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 32 | entropy_backup: False -------------------------------------------------------------------------------- /config/antmaze/bc_par/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 32 | entropy_backup: False -------------------------------------------------------------------------------- /config/antmaze/bc_par/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 32 | entropy_backup: False -------------------------------------------------------------------------------- /config/antmaze/bc_sac/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/antmaze/bc_sac/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/antmaze/bc_sac/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/antmaze/bc_vgdf/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | bc_coeff: 5.0 53 | use_q_decay: True -------------------------------------------------------------------------------- /config/antmaze/bc_vgdf/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | bc_coeff: 5.0 53 | use_q_decay: True -------------------------------------------------------------------------------- /config/antmaze/bc_vgdf/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | bc_coeff: 5.0 53 | use_q_decay: True -------------------------------------------------------------------------------- /config/antmaze/bosa/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | noise_clip: 0.5 15 | 16 | eval_episode: 10 17 | eval_freq: 10000 18 | start_steps: 5000 19 | max_step: 500000 20 | 21 | device: cuda 22 | 23 | save_freq: 5000 24 | 25 | vae_policy_lr: 0.001 26 | vae_policy_hidden_dim: 750 27 | vae_policy_beta: 0.5 28 | vae_dyna_lr: 0.001 29 | vae_dyna_ensemble: 5 30 | vae_dyna_hidden_dim: 750 31 | vae_dyna_beta: 0.5 32 | vae_iteration: 100000 33 | 34 | lamda_policy: 0.1 35 | lamda_dyna: 0.1 36 | epsilon_policy_exp: 0.01 37 | epsilon_dyna_exp: 0.01 38 | conservation_coef: 0.1 39 | num_samples: 1 40 | -------------------------------------------------------------------------------- /config/antmaze/bosa/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | noise_clip: 0.5 15 | 16 | eval_episode: 10 17 | eval_freq: 10000 18 | start_steps: 5000 19 | max_step: 500000 20 | 21 | device: cuda 22 | 23 | save_freq: 5000 24 | 25 | vae_policy_lr: 0.001 26 | vae_policy_hidden_dim: 750 27 | vae_policy_beta: 0.5 28 | vae_dyna_lr: 0.001 29 | vae_dyna_ensemble: 5 30 | vae_dyna_hidden_dim: 750 31 | vae_dyna_beta: 0.5 32 | vae_iteration: 100000 33 | 34 | lamda_policy: 0.1 35 | lamda_dyna: 0.1 36 | epsilon_policy_exp: 0.01 37 | epsilon_dyna_exp: 0.01 38 | conservation_coef: 0.1 39 | num_samples: 1 40 | -------------------------------------------------------------------------------- /config/antmaze/bosa/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | noise_clip: 0.5 15 | 16 | eval_episode: 10 17 | eval_freq: 10000 18 | start_steps: 5000 19 | max_step: 500000 20 | 21 | device: cuda 22 | 23 | save_freq: 5000 24 | 25 | vae_policy_lr: 0.001 26 | vae_policy_hidden_dim: 750 27 | vae_policy_beta: 0.5 28 | vae_dyna_lr: 0.001 29 | vae_dyna_ensemble: 5 30 | vae_dyna_hidden_dim: 750 31 | vae_dyna_beta: 0.5 32 | vae_iteration: 100000 33 | 34 | lamda_policy: 0.1 35 | lamda_dyna: 0.1 36 | epsilon_policy_exp: 0.01 37 | epsilon_dyna_exp: 0.01 38 | conservation_coef: 0.1 39 | num_samples: 1 40 | -------------------------------------------------------------------------------- /config/antmaze/cql_sac/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: False 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: True 36 | cql_alpha: 5.0 37 | cql_target_action_gap: 0.8 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/antmaze/cql_sac/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: False 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: True 36 | cql_alpha: 5.0 37 | cql_target_action_gap: 0.8 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/antmaze/cql_sac/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: False 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: True 36 | cql_alpha: 5.0 37 | cql_target_action_gap: 0.8 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/antmaze/dara/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | eta: 0.1 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 500000 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | lam: 0.7 31 | temp: 3.0 32 | eta: 0.1 -------------------------------------------------------------------------------- /config/antmaze/dara/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | eta: 0.1 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 500000 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | lam: 0.7 31 | temp: 3.0 32 | eta: 0.1 -------------------------------------------------------------------------------- /config/antmaze/dara/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | eta: 0.1 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 500000 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | lam: 0.7 31 | temp: 3.0 32 | eta: 0.1 -------------------------------------------------------------------------------- /config/antmaze/darc/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | penalty_coefficient: 1.0 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 1000000 25 | tar_env_interact_freq: 10 26 | 27 | device: cuda 28 | 29 | save_freq: 5000 30 | -------------------------------------------------------------------------------- /config/antmaze/darc/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | penalty_coefficient: 1.0 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 1000000 25 | tar_env_interact_freq: 10 26 | 27 | device: cuda 28 | 29 | save_freq: 5000 30 | -------------------------------------------------------------------------------- /config/antmaze/darc/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | penalty_coefficient: 1.0 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 1000000 25 | tar_env_interact_freq: 10 26 | 27 | device: cuda 28 | 29 | save_freq: 5000 30 | -------------------------------------------------------------------------------- /config/antmaze/h2o/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | gaussian_noise_std: 1.0 27 | 28 | device: cuda 29 | 30 | cql_max_target_backup: False 31 | backup_entropy: False 32 | cql_n_actions: 10 33 | cql_importance_sample: True 34 | cql_temp: 1.0 35 | cql_clip_diff_min: -1000000 36 | cql_clip_diff_max: 1000000 37 | cql_lagrange: False 38 | cql_alpha: 10.0 39 | cql_target_action_gap: 1.0 40 | n_state: 10 41 | 42 | 43 | save_freq: 5000 44 | -------------------------------------------------------------------------------- /config/antmaze/h2o/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | gaussian_noise_std: 1.0 27 | 28 | device: cuda 29 | 30 | cql_max_target_backup: False 31 | backup_entropy: False 32 | cql_n_actions: 10 33 | cql_importance_sample: True 34 | cql_temp: 1.0 35 | cql_clip_diff_min: -1000000 36 | cql_clip_diff_max: 1000000 37 | cql_lagrange: False 38 | cql_alpha: 10.0 39 | cql_target_action_gap: 1.0 40 | n_state: 10 41 | 42 | 43 | save_freq: 5000 44 | -------------------------------------------------------------------------------- /config/antmaze/h2o/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | gaussian_noise_std: 1.0 27 | 28 | device: cuda 29 | 30 | cql_max_target_backup: False 31 | backup_entropy: False 32 | cql_n_actions: 10 33 | cql_importance_sample: True 34 | cql_temp: 1.0 35 | cql_clip_diff_min: -1000000 36 | cql_clip_diff_max: 1000000 37 | cql_lagrange: False 38 | cql_alpha: 10.0 39 | cql_target_action_gap: 1.0 40 | n_state: 10 41 | 42 | 43 | save_freq: 5000 44 | -------------------------------------------------------------------------------- /config/antmaze/igdf/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | info_update_step: 7000 32 | repr_dim: 64 33 | ensemble_size: 1 34 | repr_norm: False 35 | repr_norm_temp: False 36 | ortho_init: False 37 | output_gain: None 38 | importance_weight: 1.0 39 | xi: 0.75 40 | 41 | save_freq: 5000 42 | -------------------------------------------------------------------------------- /config/antmaze/igdf/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | info_update_step: 7000 32 | repr_dim: 64 33 | ensemble_size: 1 34 | repr_norm: False 35 | repr_norm_temp: False 36 | ortho_init: False 37 | output_gain: None 38 | importance_weight: 1.0 39 | xi: 0.75 40 | 41 | save_freq: 5000 42 | -------------------------------------------------------------------------------- /config/antmaze/igdf/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | info_update_step: 7000 32 | repr_dim: 64 33 | ensemble_size: 1 34 | repr_norm: False 35 | repr_norm_temp: False 36 | ortho_init: False 37 | output_gain: None 38 | importance_weight: 1.0 39 | xi: 0.75 40 | 41 | save_freq: 5000 42 | -------------------------------------------------------------------------------- /config/antmaze/iql/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.9 29 | temp: 10.0 30 | 31 | save_freq: 5000 32 | -------------------------------------------------------------------------------- /config/antmaze/iql/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.9 29 | temp: 10.0 30 | 31 | save_freq: 5000 32 | -------------------------------------------------------------------------------- /config/antmaze/iql/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.9 29 | temp: 10.0 30 | 31 | save_freq: 5000 32 | -------------------------------------------------------------------------------- /config/antmaze/mcq_sac/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | entropy_backup: False -------------------------------------------------------------------------------- /config/antmaze/mcq_sac/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | entropy_backup: False -------------------------------------------------------------------------------- /config/antmaze/mcq_sac/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | entropy_backup: False -------------------------------------------------------------------------------- /config/antmaze/par/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | entropy_backup: True -------------------------------------------------------------------------------- /config/antmaze/par/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | entropy_backup: True -------------------------------------------------------------------------------- /config/antmaze/par/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | entropy_backup: True -------------------------------------------------------------------------------- /config/antmaze/par_bc/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 -------------------------------------------------------------------------------- /config/antmaze/par_bc/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 -------------------------------------------------------------------------------- /config/antmaze/par_bc/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 -------------------------------------------------------------------------------- /config/antmaze/rlpd/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | n_layers: 3 31 | num_q: 10 32 | num_backup_q: 1 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/antmaze/rlpd/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | n_layers: 3 31 | num_q: 10 32 | num_backup_q: 1 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/antmaze/rlpd/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | n_layers: 3 31 | num_q: 10 32 | num_backup_q: 1 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/antmaze/sac/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/antmaze/sac/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/antmaze/sac/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/antmaze/sac_bc/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/antmaze/sac_bc/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/antmaze/sac_bc/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/antmaze/sac_cql/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/antmaze/sac_cql/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/antmaze/sac_cql/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/antmaze/sac_iw/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/antmaze/sac_iw/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/antmaze/sac_iw/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/antmaze/sac_mcq/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/antmaze/sac_mcq/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/antmaze/sac_mcq/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | 33 | entropy_backup: False -------------------------------------------------------------------------------- /config/antmaze/td3_bc/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | 15 | eval_episode: 10 16 | eval_freq: 10000 17 | start_steps: 5000 18 | max_step: 500000 19 | 20 | device: cuda 21 | 22 | weight: 2.5 23 | 24 | save_freq: 5000 25 | -------------------------------------------------------------------------------- /config/antmaze/td3_bc/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | 15 | eval_episode: 10 16 | eval_freq: 10000 17 | start_steps: 5000 18 | max_step: 500000 19 | 20 | device: cuda 21 | 22 | weight: 2.5 23 | 24 | save_freq: 5000 25 | -------------------------------------------------------------------------------- /config/antmaze/td3_bc/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | 15 | eval_episode: 10 16 | eval_freq: 10000 17 | start_steps: 5000 18 | max_step: 500000 19 | 20 | device: cuda 21 | 22 | weight: 2.5 23 | 24 | save_freq: 5000 25 | -------------------------------------------------------------------------------- /config/antmaze/vgdf/antmaze-large.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | -------------------------------------------------------------------------------- /config/antmaze/vgdf/antmaze-medium.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | -------------------------------------------------------------------------------- /config/antmaze/vgdf/antmaze-small.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | -------------------------------------------------------------------------------- /config/mujoco/bc_par/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 32 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/bc_par/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 32 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/bc_par/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 32 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/bc_par/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 32 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/bc_sac/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/mujoco/bc_sac/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/mujoco/bc_sac/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/mujoco/bc_sac/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/mujoco/bc_vgdf/ant.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | bc_coeff: 5.0 53 | use_q_decay: True -------------------------------------------------------------------------------- /config/mujoco/bc_vgdf/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | bc_coeff: 5.0 53 | use_q_decay: True -------------------------------------------------------------------------------- /config/mujoco/bc_vgdf/hopper.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | bc_coeff: 5.0 53 | use_q_decay: True -------------------------------------------------------------------------------- /config/mujoco/bc_vgdf/walker2d.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | bc_coeff: 5.0 53 | use_q_decay: True -------------------------------------------------------------------------------- /config/mujoco/bosa/ant.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | noise_clip: 0.5 15 | 16 | eval_episode: 10 17 | eval_freq: 10000 18 | start_steps: 5000 19 | max_step: 500000 20 | 21 | device: cuda 22 | 23 | save_freq: 5000 24 | 25 | vae_policy_lr: 0.001 26 | vae_policy_hidden_dim: 750 27 | vae_policy_beta: 0.5 28 | vae_dyna_lr: 0.001 29 | vae_dyna_ensemble: 5 30 | vae_dyna_hidden_dim: 750 31 | vae_dyna_beta: 0.5 32 | vae_iteration: 100000 33 | 34 | lamda_policy: 0.1 35 | lamda_dyna: 0.1 36 | epsilon_policy_exp: 0.01 37 | epsilon_dyna_exp: 0.01 38 | conservation_coef: 0.1 39 | num_samples: 1 40 | -------------------------------------------------------------------------------- /config/mujoco/bosa/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | noise_clip: 0.5 15 | 16 | eval_episode: 10 17 | eval_freq: 10000 18 | start_steps: 5000 19 | max_step: 500000 20 | 21 | device: cuda 22 | 23 | save_freq: 5000 24 | 25 | vae_policy_lr: 0.001 26 | vae_policy_hidden_dim: 750 27 | vae_policy_beta: 0.5 28 | vae_dyna_lr: 0.001 29 | vae_dyna_ensemble: 5 30 | vae_dyna_hidden_dim: 750 31 | vae_dyna_beta: 0.5 32 | vae_iteration: 100000 33 | 34 | lamda_policy: 0.1 35 | lamda_dyna: 0.1 36 | epsilon_policy_exp: 0.01 37 | epsilon_dyna_exp: 0.01 38 | conservation_coef: 0.1 39 | num_samples: 1 40 | -------------------------------------------------------------------------------- /config/mujoco/bosa/hopper.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | noise_clip: 0.5 15 | 16 | eval_episode: 10 17 | eval_freq: 10000 18 | start_steps: 5000 19 | max_step: 500000 20 | 21 | device: cuda 22 | 23 | save_freq: 5000 24 | 25 | vae_policy_lr: 0.001 26 | vae_policy_hidden_dim: 750 27 | vae_policy_beta: 0.5 28 | vae_dyna_lr: 0.001 29 | vae_dyna_ensemble: 5 30 | vae_dyna_hidden_dim: 750 31 | vae_dyna_beta: 0.5 32 | vae_iteration: 100000 33 | 34 | lamda_policy: 0.1 35 | lamda_dyna: 0.1 36 | epsilon_policy_exp: 0.01 37 | epsilon_dyna_exp: 0.01 38 | conservation_coef: 0.1 39 | num_samples: 1 40 | -------------------------------------------------------------------------------- /config/mujoco/bosa/walker2d.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | noise_clip: 0.5 15 | 16 | eval_episode: 10 17 | eval_freq: 10000 18 | start_steps: 5000 19 | max_step: 500000 20 | 21 | device: cuda 22 | 23 | save_freq: 5000 24 | 25 | vae_policy_lr: 0.001 26 | vae_policy_hidden_dim: 750 27 | vae_policy_beta: 0.5 28 | vae_dyna_lr: 0.001 29 | vae_dyna_ensemble: 5 30 | vae_dyna_hidden_dim: 750 31 | vae_dyna_beta: 0.5 32 | vae_iteration: 100000 33 | 34 | lamda_policy: 0.1 35 | lamda_dyna: 0.1 36 | epsilon_policy_exp: 0.01 37 | epsilon_dyna_exp: 0.01 38 | conservation_coef: 0.1 39 | num_samples: 1 40 | -------------------------------------------------------------------------------- /config/mujoco/cql_sac/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/mujoco/cql_sac/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/mujoco/cql_sac/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/mujoco/cql_sac/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/mujoco/dara/ant.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | eta: 0.1 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 500000 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | lam: 0.7 31 | temp: 3.0 32 | eta: 0.1 -------------------------------------------------------------------------------- /config/mujoco/dara/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | eta: 0.1 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 500000 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | lam: 0.7 31 | temp: 3.0 32 | eta: 0.1 -------------------------------------------------------------------------------- /config/mujoco/dara/hopper.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | eta: 0.1 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 500000 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | lam: 0.7 31 | temp: 3.0 32 | eta: 0.1 -------------------------------------------------------------------------------- /config/mujoco/dara/walker2d.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | eta: 0.1 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 500000 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | lam: 0.7 31 | temp: 3.0 32 | eta: 0.1 -------------------------------------------------------------------------------- /config/mujoco/darc/ant.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | penalty_coefficient: 1.0 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 1000000 25 | tar_env_interact_freq: 10 26 | 27 | device: cuda 28 | 29 | save_freq: 5000 30 | -------------------------------------------------------------------------------- /config/mujoco/darc/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | penalty_coefficient: 1.0 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 1000000 25 | tar_env_interact_freq: 10 26 | 27 | device: cuda 28 | 29 | save_freq: 5000 30 | -------------------------------------------------------------------------------- /config/mujoco/darc/hopper.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | penalty_coefficient: 1.0 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 1000000 25 | tar_env_interact_freq: 10 26 | 27 | device: cuda 28 | 29 | save_freq: 5000 30 | -------------------------------------------------------------------------------- /config/mujoco/darc/walker2d.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | penalty_coefficient: 1.0 14 | 15 | temperature_opt: False 16 | 17 | tau: 0.005 18 | update_interval: 2 19 | expl_noise: 0.2 20 | 21 | eval_episode: 10 22 | eval_freq: 10000 23 | start_steps: 5000 24 | max_step: 1000000 25 | tar_env_interact_freq: 10 26 | 27 | device: cuda 28 | 29 | save_freq: 5000 30 | -------------------------------------------------------------------------------- /config/mujoco/h2o/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | gaussian_noise_std: 1.0 27 | 28 | device: cuda 29 | 30 | cql_max_target_backup: False 31 | backup_entropy: True 32 | cql_n_actions: 10 33 | cql_importance_sample: True 34 | cql_temp: 1.0 35 | cql_clip_diff_min: -1000000 36 | cql_clip_diff_max: 1000000 37 | cql_lagrange: False 38 | cql_alpha: 10.0 39 | cql_target_action_gap: 1.0 40 | n_state: 10 41 | 42 | 43 | save_freq: 5000 44 | -------------------------------------------------------------------------------- /config/mujoco/h2o/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | gaussian_noise_std: 1.0 27 | 28 | device: cuda 29 | 30 | cql_max_target_backup: False 31 | backup_entropy: True 32 | cql_n_actions: 10 33 | cql_importance_sample: True 34 | cql_temp: 1.0 35 | cql_clip_diff_min: -1000000 36 | cql_clip_diff_max: 1000000 37 | cql_lagrange: False 38 | cql_alpha: 10.0 39 | cql_target_action_gap: 1.0 40 | n_state: 10 41 | 42 | 43 | save_freq: 5000 44 | -------------------------------------------------------------------------------- /config/mujoco/h2o/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | gaussian_noise_std: 1.0 27 | 28 | device: cuda 29 | 30 | cql_max_target_backup: False 31 | backup_entropy: True 32 | cql_n_actions: 10 33 | cql_importance_sample: True 34 | cql_temp: 1.0 35 | cql_clip_diff_min: -1000000 36 | cql_clip_diff_max: 1000000 37 | cql_lagrange: False 38 | cql_alpha: 10.0 39 | cql_target_action_gap: 1.0 40 | n_state: 10 41 | 42 | 43 | save_freq: 5000 44 | -------------------------------------------------------------------------------- /config/mujoco/h2o/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | gaussian_noise_std: 1.0 27 | 28 | device: cuda 29 | 30 | cql_max_target_backup: False 31 | backup_entropy: True 32 | cql_n_actions: 10 33 | cql_importance_sample: True 34 | cql_temp: 1.0 35 | cql_clip_diff_min: -1000000 36 | cql_clip_diff_max: 1000000 37 | cql_lagrange: False 38 | cql_alpha: 10.0 39 | cql_target_action_gap: 1.0 40 | n_state: 10 41 | 42 | 43 | save_freq: 5000 44 | -------------------------------------------------------------------------------- /config/mujoco/igdf/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | info_update_step: 7000 32 | repr_dim: 64 33 | ensemble_size: 1 34 | repr_norm: False 35 | repr_norm_temp: False 36 | ortho_init: False 37 | output_gain: None 38 | importance_weight: 1.0 39 | xi: 0.75 40 | 41 | save_freq: 5000 42 | -------------------------------------------------------------------------------- /config/mujoco/igdf/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | info_update_step: 7000 32 | repr_dim: 64 33 | ensemble_size: 1 34 | repr_norm: False 35 | repr_norm_temp: False 36 | ortho_init: False 37 | output_gain: None 38 | importance_weight: 1.0 39 | xi: 0.75 40 | 41 | save_freq: 5000 42 | -------------------------------------------------------------------------------- /config/mujoco/igdf/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | info_update_step: 7000 32 | repr_dim: 64 33 | ensemble_size: 1 34 | repr_norm: False 35 | repr_norm_temp: False 36 | ortho_init: False 37 | output_gain: None 38 | importance_weight: 1.0 39 | xi: 0.75 40 | 41 | save_freq: 5000 42 | -------------------------------------------------------------------------------- /config/mujoco/igdf/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | info_update_step: 7000 32 | repr_dim: 64 33 | ensemble_size: 1 34 | repr_norm: False 35 | repr_norm_temp: False 36 | ortho_init: False 37 | output_gain: None 38 | importance_weight: 1.0 39 | xi: 0.75 40 | 41 | save_freq: 5000 42 | -------------------------------------------------------------------------------- /config/mujoco/iql/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | save_freq: 5000 32 | -------------------------------------------------------------------------------- /config/mujoco/iql/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | save_freq: 5000 32 | -------------------------------------------------------------------------------- /config/mujoco/iql/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | save_freq: 5000 32 | -------------------------------------------------------------------------------- /config/mujoco/iql/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | lam: 0.7 29 | temp: 3.0 30 | 31 | save_freq: 5000 32 | -------------------------------------------------------------------------------- /config/mujoco/mcq_sac/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | 33 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/mcq_sac/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | 33 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/mcq_sac/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | 33 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/mcq_sac/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 32 | 33 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/par/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/par/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/par/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/par/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/par_bc/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 -------------------------------------------------------------------------------- /config/mujoco/par_bc/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 -------------------------------------------------------------------------------- /config/mujoco/par_bc/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 -------------------------------------------------------------------------------- /config/mujoco/par_bc/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | beta: 0.1 31 | weight: 5.0 -------------------------------------------------------------------------------- /config/mujoco/rlpd/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | n_layers: 3 31 | num_q: 10 32 | num_backup_q: 2 33 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/rlpd/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | n_layers: 3 31 | num_q: 10 32 | num_backup_q: 2 33 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/rlpd/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | n_layers: 3 31 | num_q: 10 32 | num_backup_q: 2 33 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/rlpd/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | 30 | n_layers: 3 31 | num_q: 10 32 | num_backup_q: 2 33 | entropy_backup: True -------------------------------------------------------------------------------- /config/mujoco/sac/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/mujoco/sac/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/mujoco/sac/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/mujoco/sac/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/mujoco/sac_bc/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/mujoco/sac_bc/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/mujoco/sac_bc/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/mujoco/sac_bc/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | weight: 2.5 29 | 30 | save_freq: 5000 31 | -------------------------------------------------------------------------------- /config/mujoco/sac_cql/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/mujoco/sac_cql/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/mujoco/sac_cql/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/mujoco/sac_cql/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | cql_max_target_backup: False 29 | backup_entropy: True 30 | cql_n_actions: 10 31 | cql_importance_sample: True 32 | cql_temp: 1.0 33 | cql_clip_diff_min: -1000000 34 | cql_clip_diff_max: 1000000 35 | cql_lagrange: False 36 | cql_alpha: 10.0 37 | cql_target_action_gap: 0.0 38 | 39 | 40 | save_freq: 5000 41 | -------------------------------------------------------------------------------- /config/mujoco/sac_iw/ant.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/mujoco/sac_iw/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/mujoco/sac_iw/hopper.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/mujoco/sac_iw/walker2d.yaml: -------------------------------------------------------------------------------- 1 | alpha: 0.2 2 | batch_size: 128 3 | actor_lr: 0.0003 4 | critic_lr: 0.0003 5 | gamma: 0.99 6 | 7 | state_dim: 17 8 | action_dim: 3 9 | hidden_sizes: 256 10 | max_action: 1 11 | 12 | gaussian_noise_std: 1.0 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 1000000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | -------------------------------------------------------------------------------- /config/mujoco/sac_mcq/ant.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 -------------------------------------------------------------------------------- /config/mujoco/sac_mcq/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 -------------------------------------------------------------------------------- /config/mujoco/sac_mcq/hopper.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 -------------------------------------------------------------------------------- /config/mujoco/sac_mcq/walker2d.yaml: -------------------------------------------------------------------------------- 1 | ac_gradient_clip: 100 2 | alpha: 0.2 3 | batch_size: 128 4 | actor_lr: 0.0003 5 | critic_lr: 0.0003 6 | gamma: 0.99 7 | max_epochs_since_update_decay_interval: 150000.0 8 | 9 | state_dim: 17 10 | action_dim: 3 11 | hidden_sizes: 256 12 | max_action: 1 13 | 14 | temperature_opt: False 15 | 16 | tau: 0.005 17 | update_interval: 2 18 | expl_noise: 0.2 19 | 20 | eval_episode: 10 21 | eval_freq: 10000 22 | start_steps: 5000 23 | max_step: 500000 24 | tar_env_interact_freq: 10 25 | 26 | device: cuda 27 | 28 | save_freq: 5000 29 | lam: 0.8 30 | num_sample_action: 10 31 | vae_features: 750 -------------------------------------------------------------------------------- /config/mujoco/td3_bc/ant.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | 15 | eval_episode: 10 16 | eval_freq: 10000 17 | start_steps: 5000 18 | max_step: 500000 19 | 20 | device: cuda 21 | 22 | weight: 2.5 23 | 24 | save_freq: 5000 25 | -------------------------------------------------------------------------------- /config/mujoco/td3_bc/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | 15 | eval_episode: 10 16 | eval_freq: 10000 17 | start_steps: 5000 18 | max_step: 500000 19 | 20 | device: cuda 21 | 22 | weight: 2.5 23 | 24 | save_freq: 5000 25 | -------------------------------------------------------------------------------- /config/mujoco/td3_bc/hopper.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | 15 | eval_episode: 10 16 | eval_freq: 10000 17 | start_steps: 5000 18 | max_step: 500000 19 | 20 | device: cuda 21 | 22 | weight: 2.5 23 | 24 | save_freq: 5000 25 | -------------------------------------------------------------------------------- /config/mujoco/td3_bc/walker2d.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 128 2 | actor_lr: 0.0003 3 | critic_lr: 0.0003 4 | gamma: 0.99 5 | 6 | state_dim: 17 7 | action_dim: 3 8 | hidden_sizes: 256 9 | max_action: 1 10 | 11 | tau: 0.005 12 | update_interval: 2 13 | expl_noise: 0.2 14 | 15 | eval_episode: 10 16 | eval_freq: 10000 17 | start_steps: 5000 18 | max_step: 500000 19 | 20 | device: cuda 21 | 22 | weight: 2.5 23 | 24 | save_freq: 5000 25 | -------------------------------------------------------------------------------- /config/mujoco/vgdf/ant.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | -------------------------------------------------------------------------------- /config/mujoco/vgdf/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | -------------------------------------------------------------------------------- /config/mujoco/vgdf/hopper.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | -------------------------------------------------------------------------------- /config/mujoco/vgdf/walker2d.yaml: -------------------------------------------------------------------------------- 1 | model_config: 2 | dynamics_elite_size: 5 3 | dynamics_ensemble_size: 7 4 | dynamics_hidden_size: 200 5 | policy_hiddens: 6 | - 256 7 | - 256 8 | policy_initializer: xavier uniform 9 | policy_log_std_max: 2.0 10 | policy_log_std_min: -10.0 11 | policy_nonlinear: ReLU 12 | value_hiddens: 13 | - 256 14 | - 256 15 | value_initializer: xavier uniform 16 | value_nonlinear: ReLU 17 | 18 | 19 | state_dim: 27 20 | action_dim: 8 21 | ac_gradient_clip: 100 22 | alpha: 0.2 23 | batch_size: 128 24 | lr: 0.0003 25 | gamma: 0.99 26 | max_epochs_since_update_decay_interval: 150000.0 27 | 28 | optimistic: True 29 | 30 | tau: 0.005 31 | training_delay: 2 32 | 33 | src_buffer_size: 1000000 34 | tar_buffer_size: 1000000 35 | 36 | dynamics_batch_size: 256 37 | dynamics_holdout_ratio: 0.1 38 | dynamics_max_epochs_since_update: 5 39 | dynamics_train_freq: 250 40 | 41 | eval_episode: 5 42 | eval_freq: 10000 43 | max_step: 1000000 44 | tar_env_interact_freq: 10 45 | 46 | device: cuda 47 | 48 | save_freq: 50000 49 | 50 | likelihood_gate_threshold: 0.75 51 | start_gate_src_sample: 100000.0 52 | -------------------------------------------------------------------------------- /dataset/adroit/README.md: -------------------------------------------------------------------------------- 1 | Adroit domain datasets should be placed here. -------------------------------------------------------------------------------- /dataset/antmaze/README.md: -------------------------------------------------------------------------------- 1 | AntMaze domain datasets should be placed here. -------------------------------------------------------------------------------- /dataset/mujoco/README.md: -------------------------------------------------------------------------------- 1 | Locomotion domain datasets should be placed here. -------------------------------------------------------------------------------- /envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/__init__.py -------------------------------------------------------------------------------- /envs/adroit/call_adroit_env.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import gym 3 | 4 | 5 | def call_adroit_env(env_config: Dict) -> gym.Env: 6 | env_name = env_config['env_name'].lower() # eg. "pen_shrink_finger" 7 | shift_level = env_config['shift_level'] # level(easy/medium/hard) 8 | 9 | if '_' in env_name: 10 | env_name = env_name.replace('_', '-') 11 | # decide which task it is, support the following tasks 12 | # pen/hammer/relocate/door - shrink_finger 13 | # - broken_joint 14 | assert any([env_name.startswith(f'{e}') for e in ['pen', 'hammer', 'relocate', 'door']]) 15 | assert any([env_name.endswith(f'{e}') for e in ['shrink-finger', 'broken-joint']]) 16 | 17 | env_name = env_name + '-' + str(shift_level) + '-v0' 18 | 19 | return gym.make(env_name) -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/meshes/F1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/F1.stl -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/meshes/F2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/F2.stl -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/meshes/F3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/F3.stl -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/meshes/TH1_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/TH1_z.stl -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/meshes/TH2_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/TH2_z.stl -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/meshes/TH3_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/TH3_z.stl -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/meshes/forearm_simple.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/forearm_simple.stl -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/meshes/knuckle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/knuckle.stl -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/meshes/lfmetacarpal.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/lfmetacarpal.stl -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/meshes/palm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/palm.stl -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/meshes/wrist.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/wrist.stl -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/textures/darkwood.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/darkwood.png -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/textures/dice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/dice.png -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/textures/foil.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/foil.png -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/textures/marble.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/marble.png -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/textures/silverRaw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/silverRaw.png -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/textures/skin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/skin.png -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/textures/square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/square.png -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/textures/wood.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/wood.png -------------------------------------------------------------------------------- /envs/adroit/dependencies/Adroit/resources/textures/woodb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/woodb.png -------------------------------------------------------------------------------- /envs/antmaze/assets/point.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 31 | -------------------------------------------------------------------------------- /envs/antmaze/call_antmaze_env.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import gym 3 | import d4rl 4 | 5 | 6 | def call_antmaze_env(env_config: Dict) -> gym.Env: 7 | env_name = env_config['env_name'].lower() # eg. "antmaze_small_lshape" 8 | if '_' in env_name: 9 | env_name = env_name.replace('_', '-') 10 | # decide which task it is, support the following tasks 11 | # antmaze - small - empty 12 | # - lshape 13 | # - centerblock 14 | # - brokenjoint 15 | # - reversel 16 | # - reverseu 17 | # - zshape 18 | # - medium - 1/2/3/4/5/6 19 | # - large - 1/2/3/4/5/6 20 | assert env_name.startswith('antmaze') 21 | assert any([size in env_name for size in ['small', 'medium', 'large']]) 22 | 23 | shift_level = env_config['shift_level'] 24 | 25 | if shift_level is None: 26 | if 'small' in env_name: 27 | return gym.make('antmaze-umaze-v0') 28 | elif 'medium' in env_name: 29 | return gym.make('antmaze-medium-0-v0') 30 | else: 31 | return gym.make('antmaze-large-0-v0') 32 | else: 33 | if 'small' in env_name: 34 | assert any([size in shift_level for size in ['empty', 'lshape', 'centerblock', 'reversel', 'reverseu', 'zshape']]) 35 | env_name += '-' + str(shift_level) + '-v0' 36 | elif 'medium' in env_name: 37 | assert any([size in shift_level for size in ['0','1', '2', '3', '4', '5', '6']]) 38 | env_name += '-' + str(shift_level) + '-v0' 39 | else: 40 | assert any([size in shift_level for size in ['1', '2', '3', '4', '5', '6']]) 41 | env_name += '-' + str(shift_level) + '-v0' 42 | return gym.make(env_name) -------------------------------------------------------------------------------- /envs/antmaze/common.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def run_policy_on_env(policy_fn, env, truncate_episode_at=None, 4 | first_obs=None): 5 | if first_obs is None: 6 | obs = env.reset() 7 | else: 8 | obs = first_obs 9 | 10 | trajectory = [] 11 | step_num = 0 12 | while True: 13 | act = policy_fn(obs) 14 | next_obs, rew, done, _ = env.step(act) 15 | trajectory.append((obs, act, rew, done)) 16 | obs = next_obs 17 | step_num += 1 18 | if (done or 19 | (truncate_episode_at is not None and step_num >= truncate_episode_at)): 20 | break 21 | return trajectory 22 | -------------------------------------------------------------------------------- /envs/antmaze/goal_reaching_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def disk_goal_sampler(np_random, goal_region_radius=10.): 5 | th = 2 * np.pi * np_random.uniform() 6 | radius = goal_region_radius * np_random.uniform() 7 | return radius * np.array([np.cos(th), np.sin(th)]) 8 | 9 | def constant_goal_sampler(np_random, location=10.0 * np.ones([2])): 10 | return location 11 | 12 | class GoalReachingEnv(object): 13 | """General goal-reaching environment.""" 14 | BASE_ENV = None # Must be specified by child class. 15 | 16 | def __init__(self, goal_sampler, eval=False, reward_type='dense'): 17 | self._goal_sampler = goal_sampler 18 | self._goal = np.ones([2]) 19 | self.target_goal = self._goal 20 | 21 | # This flag is used to make sure that when using this environment 22 | # for evaluation, that is no goals are appended to the state 23 | self.eval = eval 24 | 25 | # This is the reward type fed as input to the goal confitioned policy 26 | self.reward_type = reward_type 27 | 28 | def _get_obs(self): 29 | base_obs = self.BASE_ENV._get_obs(self) 30 | goal_direction = self._goal - self.get_xy() 31 | if not self.eval: 32 | obs = np.concatenate([base_obs, goal_direction]) 33 | return obs 34 | else: 35 | return base_obs 36 | 37 | def step(self, a): 38 | self.BASE_ENV.step(self, a) 39 | if self.reward_type == 'dense': 40 | reward = -np.linalg.norm(self.target_goal - self.get_xy()) 41 | elif self.reward_type == 'sparse': 42 | reward = 1.0 if np.linalg.norm(self.get_xy() - self.target_goal) <= 0.5 else 0.0 43 | 44 | done = False 45 | # Terminate episode when we reach a goal 46 | if self.eval and np.linalg.norm(self.get_xy() - self.target_goal) <= 0.5: 47 | done = True 48 | 49 | obs = self._get_obs() 50 | return obs, reward, done, {} 51 | 52 | def reset_model(self): 53 | if self.target_goal is not None or self.eval: 54 | self._goal = self.target_goal 55 | else: 56 | self._goal = self._goal_sampler(self.np_random) 57 | 58 | return self.BASE_ENV.reset_model(self) -------------------------------------------------------------------------------- /envs/mujoco/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/mujoco/__init__.py -------------------------------------------------------------------------------- /envs/mujoco/assets/hopper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 49 | -------------------------------------------------------------------------------- /envs/mujoco/assets/hopper_friction_0.1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 49 | -------------------------------------------------------------------------------- /envs/mujoco/assets/hopper_friction_0.5.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 49 | -------------------------------------------------------------------------------- /envs/mujoco/assets/hopper_friction_2.0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 49 | -------------------------------------------------------------------------------- /envs/mujoco/assets/hopper_friction_5.0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 49 | -------------------------------------------------------------------------------- /envs/mujoco/assets/hopper_morph_foot_easy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 49 | -------------------------------------------------------------------------------- /imgs/ODRLbenchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/imgs/ODRLbenchmark.png -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | box2d-py==2.3.8 2 | Cython==0.29.23 3 | -e git+https://github.com/Farama-Foundation/D4RL.git@4235ef21ac5ba35285ecfce133d9eff62f3490e5#egg=D4RL 4 | debugpy==1.6.3 5 | dm-control==1.0.8 6 | dm-env==1.5 7 | dm-tree==0.1.7 8 | glfw==2.5.4 9 | gym==0.18.3 10 | -e git+https://github.com/MattChanTK/gym-maze.git@83176811b49b5538a6213520612f44fb1bc49114#egg=gym_maze 11 | imageio==2.9.0 12 | matplotlib==3.6.2 13 | moviepy==1.0.3 14 | mujoco-py==2.1.2.14 15 | numpy==1.23.5 16 | omegaconf==2.1.2 17 | opencv-python==4.5.1.48 18 | pandas==1.4.4 19 | pybullet==3.2.5 20 | pycparser==2.21 21 | pygame==2.1.2 22 | pyglet==1.5.15 23 | PyOpenGL==3.1.6 24 | requests==2.28.1 25 | -e git+https://github.com/TianhongDai/reinforcement-learning-algorithms.git@88ff69c082cba9305275a0082794d4b467ac823f#egg=rl_utils 26 | scipy==1.4.1 27 | seaborn==0.12.2 28 | tensorboard==2.7.0 29 | tensorboardX==2.5.1 30 | torch==1.11.0+cu113 31 | torchaudio==0.11.0+cu113 32 | torchvision==0.12.0+cu113 33 | tqdm==4.64.0 34 | urllib3==1.26.12 35 | virtualenv==20.16.3 36 | wandb==0.13.2 -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # we give some examples on running the experiments 4 | 5 | for ((i=1;i<6;i+=1)) 6 | do 7 | # online-online example: DARC 8 | CUDA_VISIBLE_DEVICES=0 python train.py --policy DARC --env halfcheetah-kinematic-footjnt --shift_level easy --seed $i --dir logs & 9 | CUDA_VISIBLE_DEVICES=0 python train.py --policy DARC --env hopper-gravity --shift_level 0.5 --seed $i --dir logs & 10 | 11 | ## offline-online example: BC_SAC 12 | CUDA_VISIBLE_DEVICES=1 python train.py --policy BC_SAC --env halfcheetah-friction --shift_level 0.1 --seed $i --mode 1 --srctype medium --dir logs & 13 | CUDA_VISIBLE_DEVICES=1 python train.py --policy BC_SAC --env pen-broken-joint --shift_level easy --seed $i --mode 1 --srctype expert --dir logs & 14 | 15 | # online-offline example 16 | CUDA_VISIBLE_DEVICES=2 python train.py --policy SAC_MCQ --env walker2d-gravity --shift_level 2.0 --seed $i --mode 2 --tartype medium --dir logs & 17 | CUDA_VISIBLE_DEVICES=2 python train.py --policy SAC_MCQ --env ant-morph-alllegs --shift_level medium --seed $i --mode 2 --tartype medium --dir logs & 18 | 19 | # offline-offline example 20 | CUDA_VISIBLE_DEVICES=3 python train.py --policy BOSA --env ant-friction --shift_level 0.5 --seed $i --mode 3 --srctype medium --tartype medium --dir logs & 21 | CUDA_VISIBLE_DEVICES=3 python train.py --policy BOSA --env hopper-morph-torso --shift_level medium --seed $i --mode 3 --srctype medium --tartype medium --dir logs & 22 | done 23 | --------------------------------------------------------------------------------