├── LICENSE
├── README.md
├── algo
├── __init__.py
├── call_algo.py
├── call_tune_algo.py
├── finetune
│ └── sac_tune.py
├── offline_offline
│ ├── bosa.py
│ ├── dara.py
│ ├── igdf.py
│ ├── iql.py
│ └── td3_bc.py
├── offline_online
│ ├── bc_par.py
│ ├── bc_sac.py
│ ├── bc_vgdf.py
│ ├── cql_sac.py
│ ├── h2o.py
│ ├── mcq_sac.py
│ └── rlpd.py
├── online_offline
│ ├── h2o.py
│ ├── par_bc.py
│ ├── sac_bc.py
│ ├── sac_cql.py
│ └── sac_mcq.py
├── online_online
│ ├── darc.py
│ ├── par.py
│ ├── sac.py
│ ├── sac_iw.py
│ └── vgdf.py
└── utils.py
├── config
├── adroit
│ ├── bc_par
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── bc_sac
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── bc_vgdf
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── bosa
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── cql_sac
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── dara
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── darc
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── h2o
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── igdf
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── iql
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── mcq_sac
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── par
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── par_bc
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── rlpd
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── sac
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── sac_bc
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── sac_cql
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── sac_iw
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── sac_mcq
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ ├── td3_bc
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
│ └── vgdf
│ │ ├── door.yaml
│ │ ├── hammer.yaml
│ │ ├── pen.yaml
│ │ └── relocate.yaml
├── antmaze
│ ├── bc_par
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── bc_sac
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── bc_vgdf
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── bosa
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── cql_sac
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── dara
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── darc
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── h2o
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── igdf
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── iql
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── mcq_sac
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── par
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── par_bc
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── rlpd
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── sac
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── sac_bc
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── sac_cql
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── sac_iw
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── sac_mcq
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ ├── td3_bc
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
│ └── vgdf
│ │ ├── antmaze-large.yaml
│ │ ├── antmaze-medium.yaml
│ │ └── antmaze-small.yaml
└── mujoco
│ ├── bc_par
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── bc_sac
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── bc_vgdf
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── bosa
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── cql_sac
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── dara
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── darc
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── h2o
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── igdf
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── iql
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── mcq_sac
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── par
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── par_bc
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── rlpd
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── sac
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── sac_bc
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── sac_cql
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── sac_iw
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── sac_mcq
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ ├── td3_bc
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
│ └── vgdf
│ ├── ant.yaml
│ ├── halfcheetah.yaml
│ ├── hopper.yaml
│ └── walker2d.yaml
├── dataset
├── adroit
│ └── README.md
├── antmaze
│ └── README.md
├── call_dataset.py
└── mujoco
│ └── README.md
├── envs
├── __init__.py
├── adroit
│ ├── __init__.py
│ ├── assets
│ │ ├── adroit.xml
│ │ ├── adroit_broken_joint_easy.xml
│ │ ├── adroit_broken_joint_hard.xml
│ │ ├── adroit_broken_joint_medium.xml
│ │ ├── adroit_shrink_finger_easy.xml
│ │ ├── adroit_shrink_finger_hard.xml
│ │ ├── adroit_shrink_finger_medium.xml
│ │ ├── assets.xml
│ │ ├── door.xml
│ │ ├── door_broken_joint_easy.xml
│ │ ├── door_broken_joint_hard.xml
│ │ ├── door_broken_joint_medium.xml
│ │ ├── door_shrink_finger_easy.xml
│ │ ├── door_shrink_finger_hard.xml
│ │ ├── door_shrink_finger_medium.xml
│ │ ├── hammer.xml
│ │ ├── hammer_broken_joint_easy.xml
│ │ ├── hammer_broken_joint_hard.xml
│ │ ├── hammer_broken_joint_medium.xml
│ │ ├── hammer_shrink_finger_easy.xml
│ │ ├── hammer_shrink_finger_hard.xml
│ │ ├── hammer_shrink_finger_medium.xml
│ │ ├── pen.xml
│ │ ├── pen_broken_joint_easy.xml
│ │ ├── pen_broken_joint_hard.xml
│ │ ├── pen_broken_joint_medium.xml
│ │ ├── pen_shrink_finger_easy.xml
│ │ ├── pen_shrink_finger_hard.xml
│ │ ├── pen_shrink_finger_medium.xml
│ │ ├── relocate.xml
│ │ ├── relocate_broken_joint_easy.xml
│ │ ├── relocate_broken_joint_hard.xml
│ │ ├── relocate_broken_joint_medium.xml
│ │ ├── relocate_shrink_finger_easy.xml
│ │ ├── relocate_shrink_finger_hard.xml
│ │ └── relocate_shrink_finger_medium.xml
│ ├── call_adroit_env.py
│ ├── dependencies
│ │ └── Adroit
│ │ │ └── resources
│ │ │ ├── meshes
│ │ │ ├── F1.stl
│ │ │ ├── F2.stl
│ │ │ ├── F3.stl
│ │ │ ├── TH1_z.stl
│ │ │ ├── TH2_z.stl
│ │ │ ├── TH3_z.stl
│ │ │ ├── forearm_simple.stl
│ │ │ ├── knuckle.stl
│ │ │ ├── lfmetacarpal.stl
│ │ │ ├── palm.stl
│ │ │ └── wrist.stl
│ │ │ └── textures
│ │ │ ├── darkwood.png
│ │ │ ├── dice.png
│ │ │ ├── foil.png
│ │ │ ├── marble.png
│ │ │ ├── silverRaw.png
│ │ │ ├── skin.png
│ │ │ ├── square.png
│ │ │ ├── wood.png
│ │ │ └── woodb.png
│ ├── door.py
│ ├── hammer.py
│ ├── mujoco_env.py
│ ├── pen.py
│ ├── quatmath.py
│ ├── relocate.py
│ └── utils
│ │ └── quatmath.py
├── antmaze
│ ├── __init__.py
│ ├── ant.py
│ ├── assets
│ │ ├── ant.xml
│ │ └── point.xml
│ ├── call_antmaze_env.py
│ ├── common.py
│ ├── goal_reaching_env.py
│ ├── maze_env.py
│ ├── mujoco_goal_env.py
│ └── wrappers.py
├── infos.py
└── mujoco
│ ├── __init__.py
│ ├── assets
│ ├── ant.xml
│ ├── ant_friction_0.1.xml
│ ├── ant_friction_0.5.xml
│ ├── ant_friction_2.0.xml
│ ├── ant_friction_5.0.xml
│ ├── ant_gravity_0.1.xml
│ ├── ant_gravity_0.5.xml
│ ├── ant_gravity_2.0.xml
│ ├── ant_gravity_5.0.xml
│ ├── ant_kinematic_anklejnt_easy.xml
│ ├── ant_kinematic_anklejnt_hard.xml
│ ├── ant_kinematic_anklejnt_medium.xml
│ ├── ant_kinematic_hipjnt_easy.xml
│ ├── ant_kinematic_hipjnt_hard.xml
│ ├── ant_kinematic_hipjnt_medium.xml
│ ├── ant_morph_alllegs_easy.xml
│ ├── ant_morph_alllegs_hard.xml
│ ├── ant_morph_alllegs_medium.xml
│ ├── ant_morph_halflegs_easy.xml
│ ├── ant_morph_halflegs_hard.xml
│ ├── ant_morph_halflegs_medium.xml
│ ├── half_cheetah.xml
│ ├── halfcheetah_friction_0.1.xml
│ ├── halfcheetah_friction_0.5.xml
│ ├── halfcheetah_friction_2.0.xml
│ ├── halfcheetah_friction_5.0.xml
│ ├── halfcheetah_gravity_0.1.xml
│ ├── halfcheetah_gravity_0.5.xml
│ ├── halfcheetah_gravity_2.0.xml
│ ├── halfcheetah_gravity_5.0.xml
│ ├── halfcheetah_kinematic_footjnt_easy.xml
│ ├── halfcheetah_kinematic_footjnt_hard.xml
│ ├── halfcheetah_kinematic_footjnt_medium.xml
│ ├── halfcheetah_kinematic_thighjnt_easy.xml
│ ├── halfcheetah_kinematic_thighjnt_hard.xml
│ ├── halfcheetah_kinematic_thighjnt_medium.xml
│ ├── halfcheetah_morph_thigh_easy.xml
│ ├── halfcheetah_morph_thigh_hard.xml
│ ├── halfcheetah_morph_thigh_medium.xml
│ ├── halfcheetah_morph_torso_easy.xml
│ ├── halfcheetah_morph_torso_hard.xml
│ ├── halfcheetah_morph_torso_medium.xml
│ ├── hopper.xml
│ ├── hopper_friction_0.1.xml
│ ├── hopper_friction_0.5.xml
│ ├── hopper_friction_2.0.xml
│ ├── hopper_friction_5.0.xml
│ ├── hopper_gravity_0.1.xml
│ ├── hopper_gravity_0.5.xml
│ ├── hopper_gravity_2.0.xml
│ ├── hopper_gravity_5.0.xml
│ ├── hopper_kinematic_footjnt_easy.xml
│ ├── hopper_kinematic_footjnt_hard.xml
│ ├── hopper_kinematic_footjnt_medium.xml
│ ├── hopper_kinematic_legjnt_easy.xml
│ ├── hopper_kinematic_legjnt_hard.xml
│ ├── hopper_kinematic_legjnt_medium.xml
│ ├── hopper_morph_foot_easy.xml
│ ├── hopper_morph_foot_hard.xml
│ ├── hopper_morph_foot_medium.xml
│ ├── hopper_morph_torso_easy.xml
│ ├── hopper_morph_torso_hard.xml
│ ├── hopper_morph_torso_medium.xml
│ ├── walker2d.xml
│ ├── walker2d_friction_0.1.xml
│ ├── walker2d_friction_0.5.xml
│ ├── walker2d_friction_2.0.xml
│ ├── walker2d_friction_5.0.xml
│ ├── walker2d_gravity_0.1.xml
│ ├── walker2d_gravity_0.5.xml
│ ├── walker2d_gravity_2.0.xml
│ ├── walker2d_gravity_5.0.xml
│ ├── walker2d_kinematic_footjnt_easy.xml
│ ├── walker2d_kinematic_footjnt_hard.xml
│ ├── walker2d_kinematic_footjnt_medium.xml
│ ├── walker2d_kinematic_thighjnt_easy.xml
│ ├── walker2d_kinematic_thighjnt_hard.xml
│ ├── walker2d_kinematic_thighjnt_medium.xml
│ ├── walker2d_morph_leg_easy.xml
│ ├── walker2d_morph_leg_hard.xml
│ ├── walker2d_morph_leg_medium.xml
│ ├── walker2d_morph_torso_easy.xml
│ ├── walker2d_morph_torso_hard.xml
│ └── walker2d_morph_torso_medium.xml
│ └── call_mujoco_env.py
├── imgs
└── ODRLbenchmark.png
├── requirement.txt
├── run.sh
├── train.py
└── train_tune.py
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 OffDynamicsRL
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/algo/__init__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import sys
3 | sys.path.append(str(Path(__file__).parent.absolute()))
--------------------------------------------------------------------------------
/algo/call_tune_algo.py:
--------------------------------------------------------------------------------
1 | # import all algorithms this benchmark implement
2 |
3 | def call_tune_algo(algo_name, config, mode, device):
4 | if mode == 0:
5 | algo_name = algo_name.lower()
6 | assert algo_name == 'sac'
7 | # online online setting, we support SAC
8 | from finetune.sac_tune import SAC
9 |
10 | algo_to_call = {
11 | 'sac': SAC,
12 | }
13 |
14 | algo = algo_to_call[algo_name]
15 | policy = algo(config, device)
16 | else:
17 | raise NotImplementedError
18 |
19 | return policy
--------------------------------------------------------------------------------
/config/adroit/bc_par/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
32 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/bc_par/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
32 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/bc_par/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
32 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/bc_par/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
32 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/bc_sac/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/adroit/bc_sac/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/adroit/bc_sac/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/adroit/bc_sac/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/adroit/bc_vgdf/door.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 | bc_coeff: 5.0
53 | use_q_decay: True
--------------------------------------------------------------------------------
/config/adroit/bc_vgdf/hammer.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 | bc_coeff: 5.0
53 | use_q_decay: True
--------------------------------------------------------------------------------
/config/adroit/bc_vgdf/pen.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 | bc_coeff: 5.0
53 | use_q_decay: True
--------------------------------------------------------------------------------
/config/adroit/bc_vgdf/relocate.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 | bc_coeff: 5.0
53 | use_q_decay: True
--------------------------------------------------------------------------------
/config/adroit/bosa/door.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 | noise_clip: 0.5
15 |
16 | eval_episode: 10
17 | eval_freq: 10000
18 | start_steps: 5000
19 | max_step: 500000
20 |
21 | device: cuda
22 |
23 | save_freq: 5000
24 |
25 | vae_policy_lr: 0.001
26 | vae_policy_hidden_dim: 750
27 | vae_policy_beta: 0.5
28 | vae_dyna_lr: 0.001
29 | vae_dyna_ensemble: 5
30 | vae_dyna_hidden_dim: 750
31 | vae_dyna_beta: 0.5
32 | vae_iteration: 100000
33 |
34 | lamda_policy: 0.1
35 | lamda_dyna: 0.1
36 | epsilon_policy_exp: 0.01
37 | epsilon_dyna_exp: 0.01
38 | conservation_coef: 0.1
39 | num_samples: 1
40 |
--------------------------------------------------------------------------------
/config/adroit/bosa/hammer.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 | noise_clip: 0.5
15 |
16 | eval_episode: 10
17 | eval_freq: 10000
18 | start_steps: 5000
19 | max_step: 500000
20 |
21 | device: cuda
22 |
23 | save_freq: 5000
24 |
25 | vae_policy_lr: 0.001
26 | vae_policy_hidden_dim: 750
27 | vae_policy_beta: 0.5
28 | vae_dyna_lr: 0.001
29 | vae_dyna_ensemble: 5
30 | vae_dyna_hidden_dim: 750
31 | vae_dyna_beta: 0.5
32 | vae_iteration: 100000
33 |
34 | lamda_policy: 0.1
35 | lamda_dyna: 0.1
36 | epsilon_policy_exp: 0.01
37 | epsilon_dyna_exp: 0.01
38 | conservation_coef: 0.1
39 | num_samples: 1
40 |
--------------------------------------------------------------------------------
/config/adroit/bosa/pen.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 | noise_clip: 0.5
15 |
16 | eval_episode: 10
17 | eval_freq: 10000
18 | start_steps: 5000
19 | max_step: 500000
20 |
21 | device: cuda
22 |
23 | save_freq: 5000
24 |
25 | vae_policy_lr: 0.001
26 | vae_policy_hidden_dim: 750
27 | vae_policy_beta: 0.5
28 | vae_dyna_lr: 0.001
29 | vae_dyna_ensemble: 5
30 | vae_dyna_hidden_dim: 750
31 | vae_dyna_beta: 0.5
32 | vae_iteration: 100000
33 |
34 | lamda_policy: 0.1
35 | lamda_dyna: 0.1
36 | epsilon_policy_exp: 0.01
37 | epsilon_dyna_exp: 0.01
38 | conservation_coef: 0.1
39 | num_samples: 1
40 |
--------------------------------------------------------------------------------
/config/adroit/bosa/relocate.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 | noise_clip: 0.5
15 |
16 | eval_episode: 10
17 | eval_freq: 10000
18 | start_steps: 5000
19 | max_step: 500000
20 |
21 | device: cuda
22 |
23 | save_freq: 5000
24 |
25 | vae_policy_lr: 0.001
26 | vae_policy_hidden_dim: 750
27 | vae_policy_beta: 0.5
28 | vae_dyna_lr: 0.001
29 | vae_dyna_ensemble: 5
30 | vae_dyna_hidden_dim: 750
31 | vae_dyna_beta: 0.5
32 | vae_iteration: 100000
33 |
34 | lamda_policy: 0.1
35 | lamda_dyna: 0.1
36 | epsilon_policy_exp: 0.01
37 | epsilon_dyna_exp: 0.01
38 | conservation_coef: 0.1
39 | num_samples: 1
40 |
--------------------------------------------------------------------------------
/config/adroit/cql_sac/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/adroit/cql_sac/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/adroit/cql_sac/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/adroit/cql_sac/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/adroit/dara/door.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | eta: 0.1
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 500000
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | lam: 0.7
31 | temp: 3.0
32 | eta: 0.1
--------------------------------------------------------------------------------
/config/adroit/dara/hammer.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | eta: 0.1
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 500000
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | lam: 0.7
31 | temp: 3.0
32 | eta: 0.1
--------------------------------------------------------------------------------
/config/adroit/dara/pen.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | eta: 0.1
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 500000
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | lam: 0.7
31 | temp: 3.0
32 | eta: 0.1
--------------------------------------------------------------------------------
/config/adroit/dara/relocate.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | eta: 0.1
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 500000
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | lam: 0.7
31 | temp: 3.0
32 | eta: 0.1
--------------------------------------------------------------------------------
/config/adroit/darc/door.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | penalty_coefficient: 1.0
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 1000000
25 | tar_env_interact_freq: 10
26 |
27 | device: cuda
28 |
29 | save_freq: 5000
30 |
--------------------------------------------------------------------------------
/config/adroit/darc/hammer.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | penalty_coefficient: 1.0
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 1000000
25 | tar_env_interact_freq: 10
26 |
27 | device: cuda
28 |
29 | save_freq: 5000
30 |
--------------------------------------------------------------------------------
/config/adroit/darc/pen.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | penalty_coefficient: 1.0
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 1000000
25 | tar_env_interact_freq: 10
26 |
27 | device: cuda
28 |
29 | save_freq: 5000
30 |
--------------------------------------------------------------------------------
/config/adroit/darc/relocate.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | penalty_coefficient: 1.0
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 1000000
25 | tar_env_interact_freq: 10
26 |
27 | device: cuda
28 |
29 | save_freq: 5000
30 |
--------------------------------------------------------------------------------
/config/adroit/h2o/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | gaussian_noise_std: 1.0
27 |
28 | device: cuda
29 |
30 | cql_max_target_backup: False
31 | backup_entropy: False
32 | cql_n_actions: 10
33 | cql_importance_sample: True
34 | cql_temp: 1.0
35 | cql_clip_diff_min: -1000000
36 | cql_clip_diff_max: 1000000
37 | cql_lagrange: False
38 | cql_alpha: 10.0
39 | cql_target_action_gap: 1.0
40 | n_state: 10
41 |
42 |
43 | save_freq: 5000
44 |
--------------------------------------------------------------------------------
/config/adroit/h2o/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | gaussian_noise_std: 1.0
27 |
28 | device: cuda
29 |
30 | cql_max_target_backup: False
31 | backup_entropy: False
32 | cql_n_actions: 10
33 | cql_importance_sample: True
34 | cql_temp: 1.0
35 | cql_clip_diff_min: -1000000
36 | cql_clip_diff_max: 1000000
37 | cql_lagrange: False
38 | cql_alpha: 10.0
39 | cql_target_action_gap: 1.0
40 | n_state: 10
41 |
42 |
43 | save_freq: 5000
44 |
--------------------------------------------------------------------------------
/config/adroit/h2o/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | gaussian_noise_std: 1.0
27 |
28 | device: cuda
29 |
30 | cql_max_target_backup: False
31 | backup_entropy: False
32 | cql_n_actions: 10
33 | cql_importance_sample: True
34 | cql_temp: 1.0
35 | cql_clip_diff_min: -1000000
36 | cql_clip_diff_max: 1000000
37 | cql_lagrange: False
38 | cql_alpha: 10.0
39 | cql_target_action_gap: 1.0
40 | n_state: 10
41 |
42 |
43 | save_freq: 5000
44 |
--------------------------------------------------------------------------------
/config/adroit/h2o/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | gaussian_noise_std: 1.0
27 |
28 | device: cuda
29 |
30 | cql_max_target_backup: False
31 | backup_entropy: False
32 | cql_n_actions: 10
33 | cql_importance_sample: True
34 | cql_temp: 1.0
35 | cql_clip_diff_min: -1000000
36 | cql_clip_diff_max: 1000000
37 | cql_lagrange: False
38 | cql_alpha: 10.0
39 | cql_target_action_gap: 1.0
40 | n_state: 10
41 |
42 |
43 | save_freq: 5000
44 |
--------------------------------------------------------------------------------
/config/adroit/igdf/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | info_update_step: 7000
32 | repr_dim: 64
33 | ensemble_size: 1
34 | repr_norm: False
35 | repr_norm_temp: False
36 | ortho_init: False
37 | output_gain: None
38 | importance_weight: 1.0
39 | xi: 0.75
40 |
41 | save_freq: 5000
42 |
--------------------------------------------------------------------------------
/config/adroit/igdf/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | info_update_step: 7000
32 | repr_dim: 64
33 | ensemble_size: 1
34 | repr_norm: False
35 | repr_norm_temp: False
36 | ortho_init: False
37 | output_gain: None
38 | importance_weight: 1.0
39 | xi: 0.75
40 |
41 | save_freq: 5000
42 |
--------------------------------------------------------------------------------
/config/adroit/igdf/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | info_update_step: 7000
32 | repr_dim: 64
33 | ensemble_size: 1
34 | repr_norm: False
35 | repr_norm_temp: False
36 | ortho_init: False
37 | output_gain: None
38 | importance_weight: 1.0
39 | xi: 0.75
40 |
41 | save_freq: 5000
42 |
--------------------------------------------------------------------------------
/config/adroit/igdf/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | info_update_step: 7000
32 | repr_dim: 64
33 | ensemble_size: 1
34 | repr_norm: False
35 | repr_norm_temp: False
36 | ortho_init: False
37 | output_gain: None
38 | importance_weight: 1.0
39 | xi: 0.75
40 |
41 | save_freq: 5000
42 |
--------------------------------------------------------------------------------
/config/adroit/iql/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 0.5
30 |
31 | save_freq: 5000
32 |
--------------------------------------------------------------------------------
/config/adroit/iql/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 0.5
30 |
31 | save_freq: 5000
32 |
--------------------------------------------------------------------------------
/config/adroit/iql/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 0.5
30 |
31 | save_freq: 5000
32 |
--------------------------------------------------------------------------------
/config/adroit/iql/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 0.5
30 |
31 | save_freq: 5000
32 |
--------------------------------------------------------------------------------
/config/adroit/mcq_sac/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 |
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/mcq_sac/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 |
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/mcq_sac/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 |
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/mcq_sac/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 |
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/par/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/par/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/par/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/par/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/par_bc/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
--------------------------------------------------------------------------------
/config/adroit/par_bc/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
--------------------------------------------------------------------------------
/config/adroit/par_bc/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
--------------------------------------------------------------------------------
/config/adroit/par_bc/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
--------------------------------------------------------------------------------
/config/adroit/rlpd/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | n_layers: 3
31 | num_q: 10
32 | num_backup_q: 2
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/rlpd/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | n_layers: 3
31 | num_q: 10
32 | num_backup_q: 2
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/rlpd/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | n_layers: 3
31 | num_q: 10
32 | num_backup_q: 2
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/rlpd/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | n_layers: 3
31 | num_q: 10
32 | num_backup_q: 2
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/adroit/sac/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/adroit/sac/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/adroit/sac/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/adroit/sac/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/adroit/sac_bc/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/adroit/sac_bc/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/adroit/sac_bc/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/adroit/sac_bc/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/adroit/sac_cql/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/adroit/sac_cql/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/adroit/sac_cql/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/adroit/sac_cql/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/adroit/sac_iw/door.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/adroit/sac_iw/hammer.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/adroit/sac_iw/pen.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/adroit/sac_iw/relocate.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/adroit/sac_mcq/door.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
--------------------------------------------------------------------------------
/config/adroit/sac_mcq/hammer.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
--------------------------------------------------------------------------------
/config/adroit/sac_mcq/pen.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
--------------------------------------------------------------------------------
/config/adroit/sac_mcq/relocate.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
--------------------------------------------------------------------------------
/config/adroit/td3_bc/door.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 |
15 | eval_episode: 10
16 | eval_freq: 10000
17 | start_steps: 5000
18 | max_step: 500000
19 |
20 | device: cuda
21 |
22 | weight: 2.5
23 |
24 | save_freq: 5000
25 |
--------------------------------------------------------------------------------
/config/adroit/td3_bc/hammer.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 |
15 | eval_episode: 10
16 | eval_freq: 10000
17 | start_steps: 5000
18 | max_step: 500000
19 |
20 | device: cuda
21 |
22 | weight: 2.5
23 |
24 | save_freq: 5000
25 |
--------------------------------------------------------------------------------
/config/adroit/td3_bc/pen.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 |
15 | eval_episode: 10
16 | eval_freq: 10000
17 | start_steps: 5000
18 | max_step: 500000
19 |
20 | device: cuda
21 |
22 | weight: 2.5
23 |
24 | save_freq: 5000
25 |
--------------------------------------------------------------------------------
/config/adroit/td3_bc/relocate.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 |
15 | eval_episode: 10
16 | eval_freq: 10000
17 | start_steps: 5000
18 | max_step: 500000
19 |
20 | device: cuda
21 |
22 | weight: 2.5
23 |
24 | save_freq: 5000
25 |
--------------------------------------------------------------------------------
/config/adroit/vgdf/door.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 |
--------------------------------------------------------------------------------
/config/adroit/vgdf/hammer.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 |
--------------------------------------------------------------------------------
/config/adroit/vgdf/pen.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 |
--------------------------------------------------------------------------------
/config/adroit/vgdf/relocate.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 |
--------------------------------------------------------------------------------
/config/antmaze/bc_par/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
32 | entropy_backup: False
--------------------------------------------------------------------------------
/config/antmaze/bc_par/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
32 | entropy_backup: False
--------------------------------------------------------------------------------
/config/antmaze/bc_par/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
32 | entropy_backup: False
--------------------------------------------------------------------------------
/config/antmaze/bc_sac/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/antmaze/bc_sac/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/antmaze/bc_sac/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/antmaze/bc_vgdf/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 | bc_coeff: 5.0
53 | use_q_decay: True
--------------------------------------------------------------------------------
/config/antmaze/bc_vgdf/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 | bc_coeff: 5.0
53 | use_q_decay: True
--------------------------------------------------------------------------------
/config/antmaze/bc_vgdf/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 | bc_coeff: 5.0
53 | use_q_decay: True
--------------------------------------------------------------------------------
/config/antmaze/bosa/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 | noise_clip: 0.5
15 |
16 | eval_episode: 10
17 | eval_freq: 10000
18 | start_steps: 5000
19 | max_step: 500000
20 |
21 | device: cuda
22 |
23 | save_freq: 5000
24 |
25 | vae_policy_lr: 0.001
26 | vae_policy_hidden_dim: 750
27 | vae_policy_beta: 0.5
28 | vae_dyna_lr: 0.001
29 | vae_dyna_ensemble: 5
30 | vae_dyna_hidden_dim: 750
31 | vae_dyna_beta: 0.5
32 | vae_iteration: 100000
33 |
34 | lamda_policy: 0.1
35 | lamda_dyna: 0.1
36 | epsilon_policy_exp: 0.01
37 | epsilon_dyna_exp: 0.01
38 | conservation_coef: 0.1
39 | num_samples: 1
40 |
--------------------------------------------------------------------------------
/config/antmaze/bosa/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 | noise_clip: 0.5
15 |
16 | eval_episode: 10
17 | eval_freq: 10000
18 | start_steps: 5000
19 | max_step: 500000
20 |
21 | device: cuda
22 |
23 | save_freq: 5000
24 |
25 | vae_policy_lr: 0.001
26 | vae_policy_hidden_dim: 750
27 | vae_policy_beta: 0.5
28 | vae_dyna_lr: 0.001
29 | vae_dyna_ensemble: 5
30 | vae_dyna_hidden_dim: 750
31 | vae_dyna_beta: 0.5
32 | vae_iteration: 100000
33 |
34 | lamda_policy: 0.1
35 | lamda_dyna: 0.1
36 | epsilon_policy_exp: 0.01
37 | epsilon_dyna_exp: 0.01
38 | conservation_coef: 0.1
39 | num_samples: 1
40 |
--------------------------------------------------------------------------------
/config/antmaze/bosa/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 | noise_clip: 0.5
15 |
16 | eval_episode: 10
17 | eval_freq: 10000
18 | start_steps: 5000
19 | max_step: 500000
20 |
21 | device: cuda
22 |
23 | save_freq: 5000
24 |
25 | vae_policy_lr: 0.001
26 | vae_policy_hidden_dim: 750
27 | vae_policy_beta: 0.5
28 | vae_dyna_lr: 0.001
29 | vae_dyna_ensemble: 5
30 | vae_dyna_hidden_dim: 750
31 | vae_dyna_beta: 0.5
32 | vae_iteration: 100000
33 |
34 | lamda_policy: 0.1
35 | lamda_dyna: 0.1
36 | epsilon_policy_exp: 0.01
37 | epsilon_dyna_exp: 0.01
38 | conservation_coef: 0.1
39 | num_samples: 1
40 |
--------------------------------------------------------------------------------
/config/antmaze/cql_sac/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: False
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: True
36 | cql_alpha: 5.0
37 | cql_target_action_gap: 0.8
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/antmaze/cql_sac/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: False
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: True
36 | cql_alpha: 5.0
37 | cql_target_action_gap: 0.8
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/antmaze/cql_sac/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: False
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: True
36 | cql_alpha: 5.0
37 | cql_target_action_gap: 0.8
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/antmaze/dara/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | eta: 0.1
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 500000
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | lam: 0.7
31 | temp: 3.0
32 | eta: 0.1
--------------------------------------------------------------------------------
/config/antmaze/dara/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | eta: 0.1
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 500000
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | lam: 0.7
31 | temp: 3.0
32 | eta: 0.1
--------------------------------------------------------------------------------
/config/antmaze/dara/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | eta: 0.1
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 500000
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | lam: 0.7
31 | temp: 3.0
32 | eta: 0.1
--------------------------------------------------------------------------------
/config/antmaze/darc/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | penalty_coefficient: 1.0
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 1000000
25 | tar_env_interact_freq: 10
26 |
27 | device: cuda
28 |
29 | save_freq: 5000
30 |
--------------------------------------------------------------------------------
/config/antmaze/darc/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | penalty_coefficient: 1.0
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 1000000
25 | tar_env_interact_freq: 10
26 |
27 | device: cuda
28 |
29 | save_freq: 5000
30 |
--------------------------------------------------------------------------------
/config/antmaze/darc/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | penalty_coefficient: 1.0
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 1000000
25 | tar_env_interact_freq: 10
26 |
27 | device: cuda
28 |
29 | save_freq: 5000
30 |
--------------------------------------------------------------------------------
/config/antmaze/h2o/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | gaussian_noise_std: 1.0
27 |
28 | device: cuda
29 |
30 | cql_max_target_backup: False
31 | backup_entropy: False
32 | cql_n_actions: 10
33 | cql_importance_sample: True
34 | cql_temp: 1.0
35 | cql_clip_diff_min: -1000000
36 | cql_clip_diff_max: 1000000
37 | cql_lagrange: False
38 | cql_alpha: 10.0
39 | cql_target_action_gap: 1.0
40 | n_state: 10
41 |
42 |
43 | save_freq: 5000
44 |
--------------------------------------------------------------------------------
/config/antmaze/h2o/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | gaussian_noise_std: 1.0
27 |
28 | device: cuda
29 |
30 | cql_max_target_backup: False
31 | backup_entropy: False
32 | cql_n_actions: 10
33 | cql_importance_sample: True
34 | cql_temp: 1.0
35 | cql_clip_diff_min: -1000000
36 | cql_clip_diff_max: 1000000
37 | cql_lagrange: False
38 | cql_alpha: 10.0
39 | cql_target_action_gap: 1.0
40 | n_state: 10
41 |
42 |
43 | save_freq: 5000
44 |
--------------------------------------------------------------------------------
/config/antmaze/h2o/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | gaussian_noise_std: 1.0
27 |
28 | device: cuda
29 |
30 | cql_max_target_backup: False
31 | backup_entropy: False
32 | cql_n_actions: 10
33 | cql_importance_sample: True
34 | cql_temp: 1.0
35 | cql_clip_diff_min: -1000000
36 | cql_clip_diff_max: 1000000
37 | cql_lagrange: False
38 | cql_alpha: 10.0
39 | cql_target_action_gap: 1.0
40 | n_state: 10
41 |
42 |
43 | save_freq: 5000
44 |
--------------------------------------------------------------------------------
/config/antmaze/igdf/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | info_update_step: 7000
32 | repr_dim: 64
33 | ensemble_size: 1
34 | repr_norm: False
35 | repr_norm_temp: False
36 | ortho_init: False
37 | output_gain: None
38 | importance_weight: 1.0
39 | xi: 0.75
40 |
41 | save_freq: 5000
42 |
--------------------------------------------------------------------------------
/config/antmaze/igdf/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | info_update_step: 7000
32 | repr_dim: 64
33 | ensemble_size: 1
34 | repr_norm: False
35 | repr_norm_temp: False
36 | ortho_init: False
37 | output_gain: None
38 | importance_weight: 1.0
39 | xi: 0.75
40 |
41 | save_freq: 5000
42 |
--------------------------------------------------------------------------------
/config/antmaze/igdf/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | info_update_step: 7000
32 | repr_dim: 64
33 | ensemble_size: 1
34 | repr_norm: False
35 | repr_norm_temp: False
36 | ortho_init: False
37 | output_gain: None
38 | importance_weight: 1.0
39 | xi: 0.75
40 |
41 | save_freq: 5000
42 |
--------------------------------------------------------------------------------
/config/antmaze/iql/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.9
29 | temp: 10.0
30 |
31 | save_freq: 5000
32 |
--------------------------------------------------------------------------------
/config/antmaze/iql/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.9
29 | temp: 10.0
30 |
31 | save_freq: 5000
32 |
--------------------------------------------------------------------------------
/config/antmaze/iql/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.9
29 | temp: 10.0
30 |
31 | save_freq: 5000
32 |
--------------------------------------------------------------------------------
/config/antmaze/mcq_sac/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 | entropy_backup: False
--------------------------------------------------------------------------------
/config/antmaze/mcq_sac/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 | entropy_backup: False
--------------------------------------------------------------------------------
/config/antmaze/mcq_sac/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 | entropy_backup: False
--------------------------------------------------------------------------------
/config/antmaze/par/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | entropy_backup: True
--------------------------------------------------------------------------------
/config/antmaze/par/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | entropy_backup: True
--------------------------------------------------------------------------------
/config/antmaze/par/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | entropy_backup: True
--------------------------------------------------------------------------------
/config/antmaze/par_bc/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
--------------------------------------------------------------------------------
/config/antmaze/par_bc/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
--------------------------------------------------------------------------------
/config/antmaze/par_bc/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
--------------------------------------------------------------------------------
/config/antmaze/rlpd/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | n_layers: 3
31 | num_q: 10
32 | num_backup_q: 1
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/antmaze/rlpd/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | n_layers: 3
31 | num_q: 10
32 | num_backup_q: 1
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/antmaze/rlpd/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | n_layers: 3
31 | num_q: 10
32 | num_backup_q: 1
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/antmaze/sac/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/antmaze/sac/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/antmaze/sac/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/antmaze/sac_bc/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/antmaze/sac_bc/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/antmaze/sac_bc/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/antmaze/sac_cql/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/antmaze/sac_cql/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/antmaze/sac_cql/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/antmaze/sac_iw/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/antmaze/sac_iw/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/antmaze/sac_iw/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/antmaze/sac_mcq/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 |
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/antmaze/sac_mcq/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 |
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/antmaze/sac_mcq/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 |
33 | entropy_backup: False
--------------------------------------------------------------------------------
/config/antmaze/td3_bc/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 |
15 | eval_episode: 10
16 | eval_freq: 10000
17 | start_steps: 5000
18 | max_step: 500000
19 |
20 | device: cuda
21 |
22 | weight: 2.5
23 |
24 | save_freq: 5000
25 |
--------------------------------------------------------------------------------
/config/antmaze/td3_bc/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 |
15 | eval_episode: 10
16 | eval_freq: 10000
17 | start_steps: 5000
18 | max_step: 500000
19 |
20 | device: cuda
21 |
22 | weight: 2.5
23 |
24 | save_freq: 5000
25 |
--------------------------------------------------------------------------------
/config/antmaze/td3_bc/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 |
15 | eval_episode: 10
16 | eval_freq: 10000
17 | start_steps: 5000
18 | max_step: 500000
19 |
20 | device: cuda
21 |
22 | weight: 2.5
23 |
24 | save_freq: 5000
25 |
--------------------------------------------------------------------------------
/config/antmaze/vgdf/antmaze-large.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 |
--------------------------------------------------------------------------------
/config/antmaze/vgdf/antmaze-medium.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 |
--------------------------------------------------------------------------------
/config/antmaze/vgdf/antmaze-small.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 |
--------------------------------------------------------------------------------
/config/mujoco/bc_par/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
32 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/bc_par/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
32 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/bc_par/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
32 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/bc_par/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
32 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/bc_sac/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/mujoco/bc_sac/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/mujoco/bc_sac/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/mujoco/bc_sac/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/mujoco/bc_vgdf/ant.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 | bc_coeff: 5.0
53 | use_q_decay: True
--------------------------------------------------------------------------------
/config/mujoco/bc_vgdf/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 | bc_coeff: 5.0
53 | use_q_decay: True
--------------------------------------------------------------------------------
/config/mujoco/bc_vgdf/hopper.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 | bc_coeff: 5.0
53 | use_q_decay: True
--------------------------------------------------------------------------------
/config/mujoco/bc_vgdf/walker2d.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 | bc_coeff: 5.0
53 | use_q_decay: True
--------------------------------------------------------------------------------
/config/mujoco/bosa/ant.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 | noise_clip: 0.5
15 |
16 | eval_episode: 10
17 | eval_freq: 10000
18 | start_steps: 5000
19 | max_step: 500000
20 |
21 | device: cuda
22 |
23 | save_freq: 5000
24 |
25 | vae_policy_lr: 0.001
26 | vae_policy_hidden_dim: 750
27 | vae_policy_beta: 0.5
28 | vae_dyna_lr: 0.001
29 | vae_dyna_ensemble: 5
30 | vae_dyna_hidden_dim: 750
31 | vae_dyna_beta: 0.5
32 | vae_iteration: 100000
33 |
34 | lamda_policy: 0.1
35 | lamda_dyna: 0.1
36 | epsilon_policy_exp: 0.01
37 | epsilon_dyna_exp: 0.01
38 | conservation_coef: 0.1
39 | num_samples: 1
40 |
--------------------------------------------------------------------------------
/config/mujoco/bosa/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 | noise_clip: 0.5
15 |
16 | eval_episode: 10
17 | eval_freq: 10000
18 | start_steps: 5000
19 | max_step: 500000
20 |
21 | device: cuda
22 |
23 | save_freq: 5000
24 |
25 | vae_policy_lr: 0.001
26 | vae_policy_hidden_dim: 750
27 | vae_policy_beta: 0.5
28 | vae_dyna_lr: 0.001
29 | vae_dyna_ensemble: 5
30 | vae_dyna_hidden_dim: 750
31 | vae_dyna_beta: 0.5
32 | vae_iteration: 100000
33 |
34 | lamda_policy: 0.1
35 | lamda_dyna: 0.1
36 | epsilon_policy_exp: 0.01
37 | epsilon_dyna_exp: 0.01
38 | conservation_coef: 0.1
39 | num_samples: 1
40 |
--------------------------------------------------------------------------------
/config/mujoco/bosa/hopper.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 | noise_clip: 0.5
15 |
16 | eval_episode: 10
17 | eval_freq: 10000
18 | start_steps: 5000
19 | max_step: 500000
20 |
21 | device: cuda
22 |
23 | save_freq: 5000
24 |
25 | vae_policy_lr: 0.001
26 | vae_policy_hidden_dim: 750
27 | vae_policy_beta: 0.5
28 | vae_dyna_lr: 0.001
29 | vae_dyna_ensemble: 5
30 | vae_dyna_hidden_dim: 750
31 | vae_dyna_beta: 0.5
32 | vae_iteration: 100000
33 |
34 | lamda_policy: 0.1
35 | lamda_dyna: 0.1
36 | epsilon_policy_exp: 0.01
37 | epsilon_dyna_exp: 0.01
38 | conservation_coef: 0.1
39 | num_samples: 1
40 |
--------------------------------------------------------------------------------
/config/mujoco/bosa/walker2d.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 | noise_clip: 0.5
15 |
16 | eval_episode: 10
17 | eval_freq: 10000
18 | start_steps: 5000
19 | max_step: 500000
20 |
21 | device: cuda
22 |
23 | save_freq: 5000
24 |
25 | vae_policy_lr: 0.001
26 | vae_policy_hidden_dim: 750
27 | vae_policy_beta: 0.5
28 | vae_dyna_lr: 0.001
29 | vae_dyna_ensemble: 5
30 | vae_dyna_hidden_dim: 750
31 | vae_dyna_beta: 0.5
32 | vae_iteration: 100000
33 |
34 | lamda_policy: 0.1
35 | lamda_dyna: 0.1
36 | epsilon_policy_exp: 0.01
37 | epsilon_dyna_exp: 0.01
38 | conservation_coef: 0.1
39 | num_samples: 1
40 |
--------------------------------------------------------------------------------
/config/mujoco/cql_sac/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/mujoco/cql_sac/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/mujoco/cql_sac/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/mujoco/cql_sac/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/mujoco/dara/ant.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | eta: 0.1
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 500000
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | lam: 0.7
31 | temp: 3.0
32 | eta: 0.1
--------------------------------------------------------------------------------
/config/mujoco/dara/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | eta: 0.1
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 500000
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | lam: 0.7
31 | temp: 3.0
32 | eta: 0.1
--------------------------------------------------------------------------------
/config/mujoco/dara/hopper.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | eta: 0.1
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 500000
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | lam: 0.7
31 | temp: 3.0
32 | eta: 0.1
--------------------------------------------------------------------------------
/config/mujoco/dara/walker2d.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | eta: 0.1
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 500000
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | lam: 0.7
31 | temp: 3.0
32 | eta: 0.1
--------------------------------------------------------------------------------
/config/mujoco/darc/ant.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | penalty_coefficient: 1.0
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 1000000
25 | tar_env_interact_freq: 10
26 |
27 | device: cuda
28 |
29 | save_freq: 5000
30 |
--------------------------------------------------------------------------------
/config/mujoco/darc/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | penalty_coefficient: 1.0
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 1000000
25 | tar_env_interact_freq: 10
26 |
27 | device: cuda
28 |
29 | save_freq: 5000
30 |
--------------------------------------------------------------------------------
/config/mujoco/darc/hopper.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | penalty_coefficient: 1.0
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 1000000
25 | tar_env_interact_freq: 10
26 |
27 | device: cuda
28 |
29 | save_freq: 5000
30 |
--------------------------------------------------------------------------------
/config/mujoco/darc/walker2d.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 | penalty_coefficient: 1.0
14 |
15 | temperature_opt: False
16 |
17 | tau: 0.005
18 | update_interval: 2
19 | expl_noise: 0.2
20 |
21 | eval_episode: 10
22 | eval_freq: 10000
23 | start_steps: 5000
24 | max_step: 1000000
25 | tar_env_interact_freq: 10
26 |
27 | device: cuda
28 |
29 | save_freq: 5000
30 |
--------------------------------------------------------------------------------
/config/mujoco/h2o/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | gaussian_noise_std: 1.0
27 |
28 | device: cuda
29 |
30 | cql_max_target_backup: False
31 | backup_entropy: True
32 | cql_n_actions: 10
33 | cql_importance_sample: True
34 | cql_temp: 1.0
35 | cql_clip_diff_min: -1000000
36 | cql_clip_diff_max: 1000000
37 | cql_lagrange: False
38 | cql_alpha: 10.0
39 | cql_target_action_gap: 1.0
40 | n_state: 10
41 |
42 |
43 | save_freq: 5000
44 |
--------------------------------------------------------------------------------
/config/mujoco/h2o/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | gaussian_noise_std: 1.0
27 |
28 | device: cuda
29 |
30 | cql_max_target_backup: False
31 | backup_entropy: True
32 | cql_n_actions: 10
33 | cql_importance_sample: True
34 | cql_temp: 1.0
35 | cql_clip_diff_min: -1000000
36 | cql_clip_diff_max: 1000000
37 | cql_lagrange: False
38 | cql_alpha: 10.0
39 | cql_target_action_gap: 1.0
40 | n_state: 10
41 |
42 |
43 | save_freq: 5000
44 |
--------------------------------------------------------------------------------
/config/mujoco/h2o/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | gaussian_noise_std: 1.0
27 |
28 | device: cuda
29 |
30 | cql_max_target_backup: False
31 | backup_entropy: True
32 | cql_n_actions: 10
33 | cql_importance_sample: True
34 | cql_temp: 1.0
35 | cql_clip_diff_min: -1000000
36 | cql_clip_diff_max: 1000000
37 | cql_lagrange: False
38 | cql_alpha: 10.0
39 | cql_target_action_gap: 1.0
40 | n_state: 10
41 |
42 |
43 | save_freq: 5000
44 |
--------------------------------------------------------------------------------
/config/mujoco/h2o/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | gaussian_noise_std: 1.0
27 |
28 | device: cuda
29 |
30 | cql_max_target_backup: False
31 | backup_entropy: True
32 | cql_n_actions: 10
33 | cql_importance_sample: True
34 | cql_temp: 1.0
35 | cql_clip_diff_min: -1000000
36 | cql_clip_diff_max: 1000000
37 | cql_lagrange: False
38 | cql_alpha: 10.0
39 | cql_target_action_gap: 1.0
40 | n_state: 10
41 |
42 |
43 | save_freq: 5000
44 |
--------------------------------------------------------------------------------
/config/mujoco/igdf/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | info_update_step: 7000
32 | repr_dim: 64
33 | ensemble_size: 1
34 | repr_norm: False
35 | repr_norm_temp: False
36 | ortho_init: False
37 | output_gain: None
38 | importance_weight: 1.0
39 | xi: 0.75
40 |
41 | save_freq: 5000
42 |
--------------------------------------------------------------------------------
/config/mujoco/igdf/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | info_update_step: 7000
32 | repr_dim: 64
33 | ensemble_size: 1
34 | repr_norm: False
35 | repr_norm_temp: False
36 | ortho_init: False
37 | output_gain: None
38 | importance_weight: 1.0
39 | xi: 0.75
40 |
41 | save_freq: 5000
42 |
--------------------------------------------------------------------------------
/config/mujoco/igdf/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | info_update_step: 7000
32 | repr_dim: 64
33 | ensemble_size: 1
34 | repr_norm: False
35 | repr_norm_temp: False
36 | ortho_init: False
37 | output_gain: None
38 | importance_weight: 1.0
39 | xi: 0.75
40 |
41 | save_freq: 5000
42 |
--------------------------------------------------------------------------------
/config/mujoco/igdf/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | info_update_step: 7000
32 | repr_dim: 64
33 | ensemble_size: 1
34 | repr_norm: False
35 | repr_norm_temp: False
36 | ortho_init: False
37 | output_gain: None
38 | importance_weight: 1.0
39 | xi: 0.75
40 |
41 | save_freq: 5000
42 |
--------------------------------------------------------------------------------
/config/mujoco/iql/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | save_freq: 5000
32 |
--------------------------------------------------------------------------------
/config/mujoco/iql/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | save_freq: 5000
32 |
--------------------------------------------------------------------------------
/config/mujoco/iql/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | save_freq: 5000
32 |
--------------------------------------------------------------------------------
/config/mujoco/iql/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | lam: 0.7
29 | temp: 3.0
30 |
31 | save_freq: 5000
32 |
--------------------------------------------------------------------------------
/config/mujoco/mcq_sac/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 |
33 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/mcq_sac/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 |
33 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/mcq_sac/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 |
33 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/mcq_sac/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
32 |
33 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/par/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/par/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/par/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/par/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/par_bc/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
--------------------------------------------------------------------------------
/config/mujoco/par_bc/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
--------------------------------------------------------------------------------
/config/mujoco/par_bc/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
--------------------------------------------------------------------------------
/config/mujoco/par_bc/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | beta: 0.1
31 | weight: 5.0
--------------------------------------------------------------------------------
/config/mujoco/rlpd/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | n_layers: 3
31 | num_q: 10
32 | num_backup_q: 2
33 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/rlpd/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | n_layers: 3
31 | num_q: 10
32 | num_backup_q: 2
33 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/rlpd/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | n_layers: 3
31 | num_q: 10
32 | num_backup_q: 2
33 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/rlpd/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
30 | n_layers: 3
31 | num_q: 10
32 | num_backup_q: 2
33 | entropy_backup: True
--------------------------------------------------------------------------------
/config/mujoco/sac/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/mujoco/sac/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/mujoco/sac/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/mujoco/sac/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/mujoco/sac_bc/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/mujoco/sac_bc/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/mujoco/sac_bc/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/mujoco/sac_bc/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | weight: 2.5
29 |
30 | save_freq: 5000
31 |
--------------------------------------------------------------------------------
/config/mujoco/sac_cql/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/mujoco/sac_cql/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/mujoco/sac_cql/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/mujoco/sac_cql/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | cql_max_target_backup: False
29 | backup_entropy: True
30 | cql_n_actions: 10
31 | cql_importance_sample: True
32 | cql_temp: 1.0
33 | cql_clip_diff_min: -1000000
34 | cql_clip_diff_max: 1000000
35 | cql_lagrange: False
36 | cql_alpha: 10.0
37 | cql_target_action_gap: 0.0
38 |
39 |
40 | save_freq: 5000
41 |
--------------------------------------------------------------------------------
/config/mujoco/sac_iw/ant.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/mujoco/sac_iw/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/mujoco/sac_iw/hopper.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/mujoco/sac_iw/walker2d.yaml:
--------------------------------------------------------------------------------
1 | alpha: 0.2
2 | batch_size: 128
3 | actor_lr: 0.0003
4 | critic_lr: 0.0003
5 | gamma: 0.99
6 |
7 | state_dim: 17
8 | action_dim: 3
9 | hidden_sizes: 256
10 | max_action: 1
11 |
12 | gaussian_noise_std: 1.0
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 1000000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 |
--------------------------------------------------------------------------------
/config/mujoco/sac_mcq/ant.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
--------------------------------------------------------------------------------
/config/mujoco/sac_mcq/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
--------------------------------------------------------------------------------
/config/mujoco/sac_mcq/hopper.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
--------------------------------------------------------------------------------
/config/mujoco/sac_mcq/walker2d.yaml:
--------------------------------------------------------------------------------
1 | ac_gradient_clip: 100
2 | alpha: 0.2
3 | batch_size: 128
4 | actor_lr: 0.0003
5 | critic_lr: 0.0003
6 | gamma: 0.99
7 | max_epochs_since_update_decay_interval: 150000.0
8 |
9 | state_dim: 17
10 | action_dim: 3
11 | hidden_sizes: 256
12 | max_action: 1
13 |
14 | temperature_opt: False
15 |
16 | tau: 0.005
17 | update_interval: 2
18 | expl_noise: 0.2
19 |
20 | eval_episode: 10
21 | eval_freq: 10000
22 | start_steps: 5000
23 | max_step: 500000
24 | tar_env_interact_freq: 10
25 |
26 | device: cuda
27 |
28 | save_freq: 5000
29 | lam: 0.8
30 | num_sample_action: 10
31 | vae_features: 750
--------------------------------------------------------------------------------
/config/mujoco/td3_bc/ant.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 |
15 | eval_episode: 10
16 | eval_freq: 10000
17 | start_steps: 5000
18 | max_step: 500000
19 |
20 | device: cuda
21 |
22 | weight: 2.5
23 |
24 | save_freq: 5000
25 |
--------------------------------------------------------------------------------
/config/mujoco/td3_bc/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 |
15 | eval_episode: 10
16 | eval_freq: 10000
17 | start_steps: 5000
18 | max_step: 500000
19 |
20 | device: cuda
21 |
22 | weight: 2.5
23 |
24 | save_freq: 5000
25 |
--------------------------------------------------------------------------------
/config/mujoco/td3_bc/hopper.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 |
15 | eval_episode: 10
16 | eval_freq: 10000
17 | start_steps: 5000
18 | max_step: 500000
19 |
20 | device: cuda
21 |
22 | weight: 2.5
23 |
24 | save_freq: 5000
25 |
--------------------------------------------------------------------------------
/config/mujoco/td3_bc/walker2d.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 128
2 | actor_lr: 0.0003
3 | critic_lr: 0.0003
4 | gamma: 0.99
5 |
6 | state_dim: 17
7 | action_dim: 3
8 | hidden_sizes: 256
9 | max_action: 1
10 |
11 | tau: 0.005
12 | update_interval: 2
13 | expl_noise: 0.2
14 |
15 | eval_episode: 10
16 | eval_freq: 10000
17 | start_steps: 5000
18 | max_step: 500000
19 |
20 | device: cuda
21 |
22 | weight: 2.5
23 |
24 | save_freq: 5000
25 |
--------------------------------------------------------------------------------
/config/mujoco/vgdf/ant.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 |
--------------------------------------------------------------------------------
/config/mujoco/vgdf/halfcheetah.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 |
--------------------------------------------------------------------------------
/config/mujoco/vgdf/hopper.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 |
--------------------------------------------------------------------------------
/config/mujoco/vgdf/walker2d.yaml:
--------------------------------------------------------------------------------
1 | model_config:
2 | dynamics_elite_size: 5
3 | dynamics_ensemble_size: 7
4 | dynamics_hidden_size: 200
5 | policy_hiddens:
6 | - 256
7 | - 256
8 | policy_initializer: xavier uniform
9 | policy_log_std_max: 2.0
10 | policy_log_std_min: -10.0
11 | policy_nonlinear: ReLU
12 | value_hiddens:
13 | - 256
14 | - 256
15 | value_initializer: xavier uniform
16 | value_nonlinear: ReLU
17 |
18 |
19 | state_dim: 27
20 | action_dim: 8
21 | ac_gradient_clip: 100
22 | alpha: 0.2
23 | batch_size: 128
24 | lr: 0.0003
25 | gamma: 0.99
26 | max_epochs_since_update_decay_interval: 150000.0
27 |
28 | optimistic: True
29 |
30 | tau: 0.005
31 | training_delay: 2
32 |
33 | src_buffer_size: 1000000
34 | tar_buffer_size: 1000000
35 |
36 | dynamics_batch_size: 256
37 | dynamics_holdout_ratio: 0.1
38 | dynamics_max_epochs_since_update: 5
39 | dynamics_train_freq: 250
40 |
41 | eval_episode: 5
42 | eval_freq: 10000
43 | max_step: 1000000
44 | tar_env_interact_freq: 10
45 |
46 | device: cuda
47 |
48 | save_freq: 50000
49 |
50 | likelihood_gate_threshold: 0.75
51 | start_gate_src_sample: 100000.0
52 |
--------------------------------------------------------------------------------
/dataset/adroit/README.md:
--------------------------------------------------------------------------------
1 | Adroit domain datasets should be placed here.
--------------------------------------------------------------------------------
/dataset/antmaze/README.md:
--------------------------------------------------------------------------------
1 | AntMaze domain datasets should be placed here.
--------------------------------------------------------------------------------
/dataset/mujoco/README.md:
--------------------------------------------------------------------------------
1 | Locomotion domain datasets should be placed here.
--------------------------------------------------------------------------------
/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/__init__.py
--------------------------------------------------------------------------------
/envs/adroit/call_adroit_env.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 | import gym
3 |
4 |
5 | def call_adroit_env(env_config: Dict) -> gym.Env:
6 | env_name = env_config['env_name'].lower() # eg. "pen_shrink_finger"
7 | shift_level = env_config['shift_level'] # level(easy/medium/hard)
8 |
9 | if '_' in env_name:
10 | env_name = env_name.replace('_', '-')
11 | # decide which task it is, support the following tasks
12 | # pen/hammer/relocate/door - shrink_finger
13 | # - broken_joint
14 | assert any([env_name.startswith(f'{e}') for e in ['pen', 'hammer', 'relocate', 'door']])
15 | assert any([env_name.endswith(f'{e}') for e in ['shrink-finger', 'broken-joint']])
16 |
17 | env_name = env_name + '-' + str(shift_level) + '-v0'
18 |
19 | return gym.make(env_name)
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/meshes/F1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/F1.stl
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/meshes/F2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/F2.stl
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/meshes/F3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/F3.stl
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/meshes/TH1_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/TH1_z.stl
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/meshes/TH2_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/TH2_z.stl
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/meshes/TH3_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/TH3_z.stl
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/meshes/forearm_simple.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/forearm_simple.stl
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/meshes/knuckle.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/knuckle.stl
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/meshes/lfmetacarpal.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/lfmetacarpal.stl
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/meshes/palm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/palm.stl
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/meshes/wrist.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/meshes/wrist.stl
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/textures/darkwood.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/darkwood.png
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/textures/dice.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/dice.png
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/textures/foil.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/foil.png
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/textures/marble.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/marble.png
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/textures/silverRaw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/silverRaw.png
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/textures/skin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/skin.png
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/textures/square.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/square.png
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/textures/wood.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/wood.png
--------------------------------------------------------------------------------
/envs/adroit/dependencies/Adroit/resources/textures/woodb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/adroit/dependencies/Adroit/resources/textures/woodb.png
--------------------------------------------------------------------------------
/envs/antmaze/assets/point.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/envs/antmaze/call_antmaze_env.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 | import gym
3 | import d4rl
4 |
5 |
6 | def call_antmaze_env(env_config: Dict) -> gym.Env:
7 | env_name = env_config['env_name'].lower() # eg. "antmaze_small_lshape"
8 | if '_' in env_name:
9 | env_name = env_name.replace('_', '-')
10 | # decide which task it is, support the following tasks
11 | # antmaze - small - empty
12 | # - lshape
13 | # - centerblock
14 | # - brokenjoint
15 | # - reversel
16 | # - reverseu
17 | # - zshape
18 | # - medium - 1/2/3/4/5/6
19 | # - large - 1/2/3/4/5/6
20 | assert env_name.startswith('antmaze')
21 | assert any([size in env_name for size in ['small', 'medium', 'large']])
22 |
23 | shift_level = env_config['shift_level']
24 |
25 | if shift_level is None:
26 | if 'small' in env_name:
27 | return gym.make('antmaze-umaze-v0')
28 | elif 'medium' in env_name:
29 | return gym.make('antmaze-medium-0-v0')
30 | else:
31 | return gym.make('antmaze-large-0-v0')
32 | else:
33 | if 'small' in env_name:
34 | assert any([size in shift_level for size in ['empty', 'lshape', 'centerblock', 'reversel', 'reverseu', 'zshape']])
35 | env_name += '-' + str(shift_level) + '-v0'
36 | elif 'medium' in env_name:
37 | assert any([size in shift_level for size in ['0','1', '2', '3', '4', '5', '6']])
38 | env_name += '-' + str(shift_level) + '-v0'
39 | else:
40 | assert any([size in shift_level for size in ['1', '2', '3', '4', '5', '6']])
41 | env_name += '-' + str(shift_level) + '-v0'
42 | return gym.make(env_name)
--------------------------------------------------------------------------------
/envs/antmaze/common.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | def run_policy_on_env(policy_fn, env, truncate_episode_at=None,
4 | first_obs=None):
5 | if first_obs is None:
6 | obs = env.reset()
7 | else:
8 | obs = first_obs
9 |
10 | trajectory = []
11 | step_num = 0
12 | while True:
13 | act = policy_fn(obs)
14 | next_obs, rew, done, _ = env.step(act)
15 | trajectory.append((obs, act, rew, done))
16 | obs = next_obs
17 | step_num += 1
18 | if (done or
19 | (truncate_episode_at is not None and step_num >= truncate_episode_at)):
20 | break
21 | return trajectory
22 |
--------------------------------------------------------------------------------
/envs/antmaze/goal_reaching_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def disk_goal_sampler(np_random, goal_region_radius=10.):
5 | th = 2 * np.pi * np_random.uniform()
6 | radius = goal_region_radius * np_random.uniform()
7 | return radius * np.array([np.cos(th), np.sin(th)])
8 |
9 | def constant_goal_sampler(np_random, location=10.0 * np.ones([2])):
10 | return location
11 |
12 | class GoalReachingEnv(object):
13 | """General goal-reaching environment."""
14 | BASE_ENV = None # Must be specified by child class.
15 |
16 | def __init__(self, goal_sampler, eval=False, reward_type='dense'):
17 | self._goal_sampler = goal_sampler
18 | self._goal = np.ones([2])
19 | self.target_goal = self._goal
20 |
21 | # This flag is used to make sure that when using this environment
22 | # for evaluation, that is no goals are appended to the state
23 | self.eval = eval
24 |
25 | # This is the reward type fed as input to the goal confitioned policy
26 | self.reward_type = reward_type
27 |
28 | def _get_obs(self):
29 | base_obs = self.BASE_ENV._get_obs(self)
30 | goal_direction = self._goal - self.get_xy()
31 | if not self.eval:
32 | obs = np.concatenate([base_obs, goal_direction])
33 | return obs
34 | else:
35 | return base_obs
36 |
37 | def step(self, a):
38 | self.BASE_ENV.step(self, a)
39 | if self.reward_type == 'dense':
40 | reward = -np.linalg.norm(self.target_goal - self.get_xy())
41 | elif self.reward_type == 'sparse':
42 | reward = 1.0 if np.linalg.norm(self.get_xy() - self.target_goal) <= 0.5 else 0.0
43 |
44 | done = False
45 | # Terminate episode when we reach a goal
46 | if self.eval and np.linalg.norm(self.get_xy() - self.target_goal) <= 0.5:
47 | done = True
48 |
49 | obs = self._get_obs()
50 | return obs, reward, done, {}
51 |
52 | def reset_model(self):
53 | if self.target_goal is not None or self.eval:
54 | self._goal = self.target_goal
55 | else:
56 | self._goal = self._goal_sampler(self.np_random)
57 |
58 | return self.BASE_ENV.reset_model(self)
--------------------------------------------------------------------------------
/envs/mujoco/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/envs/mujoco/__init__.py
--------------------------------------------------------------------------------
/envs/mujoco/assets/hopper.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/envs/mujoco/assets/hopper_friction_0.1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/envs/mujoco/assets/hopper_friction_0.5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/envs/mujoco/assets/hopper_friction_2.0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/envs/mujoco/assets/hopper_friction_5.0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/envs/mujoco/assets/hopper_morph_foot_easy.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/imgs/ODRLbenchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OffDynamicsRL/off-dynamics-rl/e0189bd517585fb850caad0972d15b982fa8104e/imgs/ODRLbenchmark.png
--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | box2d-py==2.3.8
2 | Cython==0.29.23
3 | -e git+https://github.com/Farama-Foundation/D4RL.git@4235ef21ac5ba35285ecfce133d9eff62f3490e5#egg=D4RL
4 | debugpy==1.6.3
5 | dm-control==1.0.8
6 | dm-env==1.5
7 | dm-tree==0.1.7
8 | glfw==2.5.4
9 | gym==0.18.3
10 | -e git+https://github.com/MattChanTK/gym-maze.git@83176811b49b5538a6213520612f44fb1bc49114#egg=gym_maze
11 | imageio==2.9.0
12 | matplotlib==3.6.2
13 | moviepy==1.0.3
14 | mujoco-py==2.1.2.14
15 | numpy==1.23.5
16 | omegaconf==2.1.2
17 | opencv-python==4.5.1.48
18 | pandas==1.4.4
19 | pybullet==3.2.5
20 | pycparser==2.21
21 | pygame==2.1.2
22 | pyglet==1.5.15
23 | PyOpenGL==3.1.6
24 | requests==2.28.1
25 | -e git+https://github.com/TianhongDai/reinforcement-learning-algorithms.git@88ff69c082cba9305275a0082794d4b467ac823f#egg=rl_utils
26 | scipy==1.4.1
27 | seaborn==0.12.2
28 | tensorboard==2.7.0
29 | tensorboardX==2.5.1
30 | torch==1.11.0+cu113
31 | torchaudio==0.11.0+cu113
32 | torchvision==0.12.0+cu113
33 | tqdm==4.64.0
34 | urllib3==1.26.12
35 | virtualenv==20.16.3
36 | wandb==0.13.2
--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # we give some examples on running the experiments
4 |
5 | for ((i=1;i<6;i+=1))
6 | do
7 | # online-online example: DARC
8 | CUDA_VISIBLE_DEVICES=0 python train.py --policy DARC --env halfcheetah-kinematic-footjnt --shift_level easy --seed $i --dir logs &
9 | CUDA_VISIBLE_DEVICES=0 python train.py --policy DARC --env hopper-gravity --shift_level 0.5 --seed $i --dir logs &
10 |
11 | ## offline-online example: BC_SAC
12 | CUDA_VISIBLE_DEVICES=1 python train.py --policy BC_SAC --env halfcheetah-friction --shift_level 0.1 --seed $i --mode 1 --srctype medium --dir logs &
13 | CUDA_VISIBLE_DEVICES=1 python train.py --policy BC_SAC --env pen-broken-joint --shift_level easy --seed $i --mode 1 --srctype expert --dir logs &
14 |
15 | # online-offline example
16 | CUDA_VISIBLE_DEVICES=2 python train.py --policy SAC_MCQ --env walker2d-gravity --shift_level 2.0 --seed $i --mode 2 --tartype medium --dir logs &
17 | CUDA_VISIBLE_DEVICES=2 python train.py --policy SAC_MCQ --env ant-morph-alllegs --shift_level medium --seed $i --mode 2 --tartype medium --dir logs &
18 |
19 | # offline-offline example
20 | CUDA_VISIBLE_DEVICES=3 python train.py --policy BOSA --env ant-friction --shift_level 0.5 --seed $i --mode 3 --srctype medium --tartype medium --dir logs &
21 | CUDA_VISIBLE_DEVICES=3 python train.py --policy BOSA --env hopper-morph-torso --shift_level medium --seed $i --mode 3 --srctype medium --tartype medium --dir logs &
22 | done
23 |
--------------------------------------------------------------------------------