├── algorithms └── __init__.py ├── results └── bin │ ├── offline_scores.pickle │ └── finetune_scores.pickle ├── .pre-commit-config.yaml ├── pyproject.toml ├── configs ├── offline │ ├── bc │ │ ├── door │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── pen │ │ │ ├── cloned_v1.yaml │ │ │ ├── expert_v1.yaml │ │ │ └── human_v1.yaml │ │ ├── antmaze │ │ │ ├── umaze_v2.yaml │ │ │ ├── large_play_v2.yaml │ │ │ ├── medium_play_v2.yaml │ │ │ ├── large_diverse_v2.yaml │ │ │ ├── umaze_diverse_v2.yaml │ │ │ └── medium_diverse_v2.yaml │ │ ├── hammer │ │ │ ├── cloned_v1.yaml │ │ │ ├── expert_v1.yaml │ │ │ └── human_v1.yaml │ │ ├── hopper │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ ├── maze2d │ │ │ ├── large_v1.yaml │ │ │ ├── medium_v1.yaml │ │ │ ├── umaze_v1.yaml │ │ │ ├── large_dense_v1.yaml │ │ │ ├── umaze_dense_v1.yaml │ │ │ └── medium_dense_v1.yaml │ │ ├── relocate │ │ │ ├── cloned_v1.yaml │ │ │ ├── expert_v1.yaml │ │ │ └── human_v1.yaml │ │ ├── walker2d │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ └── halfcheetah │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ ├── bc_10 │ │ ├── pen │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── door │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── hammer │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── maze2d │ │ │ ├── large_v1.yaml │ │ │ ├── umaze_v1.yaml │ │ │ ├── medium_v1.yaml │ │ │ ├── large_dense_v1.yaml │ │ │ ├── umaze_dense_v1.yaml │ │ │ └── medium_dense_v1.yaml │ │ ├── antmaze │ │ │ ├── umaze_v2.yaml │ │ │ ├── large_play_v2.yaml │ │ │ ├── medium_play_v2.yaml │ │ │ ├── large_diverse_v2.yaml │ │ │ ├── medium_diverse_v2.yaml │ │ │ └── umaze_diverse_v2.yaml │ │ ├── hopper │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ ├── relocate │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── walker2d │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ └── halfcheetah │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ ├── awac │ │ ├── door │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── pen │ │ │ ├── cloned_v1.yaml │ │ │ ├── expert_v1.yaml │ │ │ └── human_v1.yaml │ │ ├── hammer │ │ │ ├── cloned_v1.yaml │ │ │ ├── expert_v1.yaml │ │ │ └── human_v1.yaml │ │ ├── maze2d │ │ │ ├── large_v1.yaml │ │ │ ├── medium_v1.yaml │ │ │ ├── umaze_v1.yaml │ │ │ ├── large_dense_v1.yaml │ │ │ ├── umaze_dense_v1.yaml │ │ │ └── medium_dense_v1.yaml │ │ ├── hopper │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ ├── relocate │ │ │ ├── cloned_v1.yaml │ │ │ ├── expert_v1.yaml │ │ │ └── human_v1.yaml │ │ ├── walker2d │ │ │ ├── random_v2.yaml │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_replay_v2.yaml │ │ │ └── medium_expert_v2.yaml │ │ ├── halfcheetah │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ └── antmaze │ │ │ ├── umaze_v2.yaml │ │ │ ├── large_play_v2.yaml │ │ │ ├── medium_play_v2.yaml │ │ │ ├── large_diverse_v2.yaml │ │ │ ├── medium_diverse_v2.yaml │ │ │ └── umaze_diverse_v2.yaml │ ├── td3_bc │ │ ├── pen │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── antmaze │ │ │ ├── umaze_v2.yaml │ │ │ ├── large_play_v2.yaml │ │ │ ├── medium_play_v2.yaml │ │ │ ├── large_diverse_v2.yaml │ │ │ ├── medium_diverse_v2.yaml │ │ │ └── umaze_diverse_v2.yaml │ │ ├── door │ │ │ ├── cloned_v1.yaml │ │ │ ├── expert_v1.yaml │ │ │ └── human_v1.yaml │ │ ├── hopper │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ ├── maze2d │ │ │ ├── large_v1.yaml │ │ │ ├── medium_v1.yaml │ │ │ ├── umaze_v1.yaml │ │ │ ├── large_dense_v1.yaml │ │ │ ├── umaze_dense_v1.yaml │ │ │ └── medium_dense_v1.yaml │ │ ├── hammer │ │ │ ├── cloned_v1.yaml │ │ │ ├── expert_v1.yaml │ │ │ └── human_v1.yaml │ │ ├── walker2d │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ ├── relocate │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ └── halfcheetah │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ ├── iql │ │ ├── door │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── pen │ │ │ ├── cloned_v1.yaml │ │ │ ├── expert_v1.yaml │ │ │ └── human_v1.yaml │ │ ├── hammer │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── hopper │ │ │ ├── expert_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_replay_v2.yaml │ │ │ └── medium_expert_v2.yaml │ │ ├── antmaze │ │ │ ├── umaze_v2.yaml │ │ │ ├── large_play_v2.yaml │ │ │ ├── medium_play_v2.yaml │ │ │ ├── large_diverse_v2.yaml │ │ │ ├── umaze_diverse_v2.yaml │ │ │ └── medium_diverse_v2.yaml │ │ ├── maze2d │ │ │ ├── large_v1.yaml │ │ │ ├── umaze_v1.yaml │ │ │ ├── medium_v1.yaml │ │ │ ├── large_dense_v1.yaml │ │ │ ├── medium_dense_v1.yaml │ │ │ └── umaze_dense_v1.yaml │ │ ├── relocate │ │ │ ├── cloned_v1.yaml │ │ │ ├── expert_v1.yaml │ │ │ └── human_v1.yaml │ │ ├── walker2d │ │ │ ├── expert_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ └── halfcheetah │ │ │ ├── expert_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ ├── sac_n │ │ ├── pen │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── door │ │ │ ├── cloned_v1.yaml │ │ │ ├── expert_v1.yaml │ │ │ └── human_v1.yaml │ │ ├── antmaze │ │ │ ├── umaze_v2.yaml │ │ │ ├── large_play_v2.yaml │ │ │ ├── medium_play_v2.yaml │ │ │ ├── large_diverse_v2.yaml │ │ │ ├── umaze_diverse_v2.yaml │ │ │ └── medium_diverse_v2.yaml │ │ ├── hammer │ │ │ ├── cloned_v1.yaml │ │ │ ├── expert_v1.yaml │ │ │ └── human_v1.yaml │ │ ├── maze2d │ │ │ ├── large_v1.yaml │ │ │ ├── medium_v1.yaml │ │ │ └── umaze_v1.yaml │ │ ├── relocate │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── walker2d │ │ │ ├── medium_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ ├── halfcheetah │ │ │ ├── medium_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ └── hopper │ │ │ ├── medium_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ ├── edac │ │ ├── door │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── pen │ │ │ ├── cloned_v1.yaml │ │ │ ├── expert_v1.yaml │ │ │ └── human_v1.yaml │ │ ├── antmaze │ │ │ ├── umaze_v2.yaml │ │ │ ├── large_play_v2.yaml │ │ │ ├── medium_play_v2.yaml │ │ │ ├── large_diverse_v2.yaml │ │ │ ├── umaze_diverse_v2.yaml │ │ │ └── medium_diverse_v2.yaml │ │ ├── hammer │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── hopper │ │ │ ├── medium_v2.yaml │ │ │ ├── medium_replay_v2.yaml │ │ │ └── medium_expert_v2.yaml │ │ ├── maze2d │ │ │ ├── large_v1.yaml │ │ │ ├── medium_v1.yaml │ │ │ └── umaze_v1.yaml │ │ ├── relocate │ │ │ ├── human_v1.yaml │ │ │ ├── cloned_v1.yaml │ │ │ └── expert_v1.yaml │ │ ├── walker2d │ │ │ ├── medium_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ └── halfcheetah │ │ │ ├── medium_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ ├── lb-sac │ │ ├── hopper │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ ├── walker2d │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ ├── medium_expert_v2.yaml │ │ │ └── medium_replay_v2.yaml │ │ └── halfcheetah │ │ │ ├── expert_v2.yaml │ │ │ ├── medium_v2.yaml │ │ │ ├── random_v2.yaml │ │ │ ├── full_replay_v2.yaml │ │ │ └── medium_expert_v2.yaml │ └── rebrac │ │ ├── door │ │ ├── human_v1.yaml │ │ ├── cloned_v1.yaml │ │ └── expert_v1.yaml │ │ ├── pen │ │ ├── human_v1.yaml │ │ ├── cloned_v1.yaml │ │ └── expert_v1.yaml │ │ ├── hammer │ │ ├── cloned_v1.yaml │ │ ├── human_v1.yaml │ │ └── expert_v1.yaml │ │ ├── hopper │ │ ├── expert_v2.yaml │ │ ├── medium_v2.yaml │ │ ├── random_v2.yaml │ │ └── full_replay_v2.yaml │ │ ├── maze2d │ │ ├── large_v1.yaml │ │ ├── medium_v1.yaml │ │ └── umaze_v1.yaml │ │ ├── relocate │ │ ├── human_v1.yaml │ │ ├── cloned_v1.yaml │ │ └── expert_v1.yaml │ │ ├── walker2d │ │ ├── expert_v2.yaml │ │ ├── medium_v2.yaml │ │ └── random_v2.yaml │ │ ├── antmaze │ │ └── umaze_v2.yaml │ │ └── halfcheetah │ │ ├── expert_v2.yaml │ │ └── random_v2.yaml └── finetune │ ├── awac │ ├── door │ │ └── cloned_v1.yaml │ ├── pen │ │ └── cloned_v1.yaml │ ├── hammer │ │ └── cloned_v1.yaml │ ├── antmaze │ │ ├── umaze_v2.yaml │ │ ├── large_play_v2.yaml │ │ ├── medium_play_v2.yaml │ │ ├── large_diverse_v2.yaml │ │ ├── umaze_diverse_v2.yaml │ │ └── medium_diverse_v2.yaml │ └── relocate │ │ └── cloned_v1.yaml │ └── iql │ ├── antmaze │ ├── umaze_v2.yaml │ ├── large_play_v2.yaml │ ├── medium_play_v2.yaml │ ├── large_diverse_v2.yaml │ ├── medium_diverse_v2.yaml │ └── umaze_diverse_v2.yaml │ ├── pen │ └── cloned_v1.yaml │ ├── door │ └── cloned_v1.yaml │ ├── hammer │ └── cloned_v1.yaml │ └── relocate │ └── cloned_v1.yaml ├── .github ├── CODEOWNERS └── workflows │ └── codestyle.yml └── requirements ├── requirements.txt └── requirements_dev.txt /algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /results/bin/offline_scores.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tinkoff-ai/CORL/HEAD/results/bin/offline_scores.pickle -------------------------------------------------------------------------------- /results/bin/finetune_scores.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tinkoff-ai/CORL/HEAD/results/bin/finetune_scores.pickle -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/charliermarsh/ruff-pre-commit 3 | rev: 'v0.0.278' 4 | hooks: 5 | - id: ruff 6 | args: [--fix] -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | select = ["E", "F", "I001", "RUF100"] 3 | ignore = ["E402"] 4 | line-length = 89 5 | target-version = "py39" 6 | 7 | [tool.ruff.isort] 8 | combine-as-imports = true 9 | lines-after-imports = 1 10 | order-by-type = false -------------------------------------------------------------------------------- /configs/offline/bc/door/human_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: door-human-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-door-human-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/pen/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: pen-cloned-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-pen-cloned-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/pen/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: pen-expert-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-pen-expert-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/pen/human_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: pen-human-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-pen-human-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/door/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: door-cloned-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-door-cloned-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/door/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: door-expert-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-door-expert-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/pen/human_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: pen-human-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-pen-human-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/antmaze/umaze_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: antmaze-umaze-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-antmaze-umaze-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/hammer/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: hammer-cloned-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-hammer-cloned-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/hammer/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: hammer-expert-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-hammer-expert-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/hammer/human_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: hammer-human-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-hammer-human-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/hopper/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: hopper-expert-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-hopper-expert-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/hopper/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: hopper-medium-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-hopper-medium-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/hopper/random_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: hopper-random-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-hopper-random-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/maze2d/large_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: maze2d-large-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-maze2d-large-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/maze2d/medium_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: maze2d-medium-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-maze2d-medium-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/maze2d/umaze_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: maze2d-umaze-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-maze2d-umaze-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/door/human_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: door-human-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-door-human-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/pen/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: pen-cloned-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-pen-cloned-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/pen/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: pen-expert-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-pen-expert-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/relocate/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: relocate-cloned-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-relocate-cloned-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/relocate/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: relocate-expert-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-relocate-expert-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/relocate/human_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: relocate-human-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-relocate-human-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/walker2d/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: walker2d-expert-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-walker2d-expert-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/walker2d/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: walker2d-medium-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-walker2d-medium-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/walker2d/random_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: walker2d-random-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-walker2d-random-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/door/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: door-cloned-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-door-cloned-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/door/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: door-expert-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-door-expert-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/hammer/human_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: hammer-human-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-hammer-human-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/maze2d/large_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: maze2d-large-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-maze2d-large-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/maze2d/umaze_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: maze2d-umaze-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-maze2d-umaze-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/antmaze/umaze_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: antmaze-umaze-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-antmaze-umaze-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/hammer/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: hammer-cloned-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-hammer-cloned-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/hammer/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: hammer-expert-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-hammer-expert-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/hopper/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: hopper-expert-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-hopper-expert-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/hopper/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: hopper-medium-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-hopper-medium-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/hopper/random_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: hopper-random-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-hopper-medium-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/maze2d/medium_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: maze2d-medium-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-maze2d-medium-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/relocate/human_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: relocate-human-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-relocate-human-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/antmaze/large_play_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: antmaze-large-play-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-antmaze-large-play-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/halfcheetah/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: halfcheetah-expert-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-halfcheetah-expert-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/halfcheetah/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: halfcheetah-medium-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-halfcheetah-medium-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/halfcheetah/random_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: halfcheetah-random-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-halfcheetah-random-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/hopper/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: hopper-full-replay-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-hopper-full-replay-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/maze2d/large_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: maze2d-large-dense-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-maze2d-large-dense-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/maze2d/umaze_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: maze2d-umaze-dense-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-maze2d-umaze-dense-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/relocate/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: relocate-cloned-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-relocate-cloned-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/relocate/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: relocate-expert-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-relocate-expert-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/walker2d/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: walker2d-expert-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-walker2d-expert-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/walker2d/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: walker2d-medium-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-walker2d-medium-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/walker2d/random_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: walker2d-random-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-walker2d-random-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/antmaze/medium_play_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: antmaze-medium-play-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-antmaze-medium-play-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/hopper/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: hopper-medium-expert-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-hopper-medium-expert-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/hopper/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: hopper-medium-replay-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-hopper-medium-replay-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/maze2d/medium_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: maze2d-medium-dense-v1 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-maze2d-medium-dense-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/walker2d/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: walker2d-full-replay-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-walker2d-full-replay-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/antmaze/large_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: antmaze-large-diverse-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-antmaze-large-diverse-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/antmaze/umaze_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: antmaze-umaze-diverse-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-antmaze-umaze-diverse-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/antmaze/large_play_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: antmaze-large-play-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-antmaze-large-play-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/halfcheetah/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: halfcheetah-expert-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-halfcheetah-expert-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/halfcheetah/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: halfcheetah-medium-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-halfcheetah-medium-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/halfcheetah/random_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: halfcheetah-random-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-halfcheetah-random-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/hopper/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: hopper-full-replay-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-hopper-full-replay-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/maze2d/large_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: maze2d-large-dense-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-maze2d-large-dense-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/maze2d/umaze_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: maze2d-umaze-dense-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-maze2d-umaze-dense-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/antmaze/medium_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: antmaze-medium-diverse-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-antmaze-medium-diverse-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/halfcheetah/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: halfcheetah-full-replay-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-halfcheetah-full-replay-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/walker2d/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: walker2d-medium-expert-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-walker2d-medium-expert-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/walker2d/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: walker2d-medium-replay-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-walker2d-medium-replay-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/antmaze/medium_play_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: antmaze-medium-play-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-antmaze-medium-play-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/hopper/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: hopper-medium-expert-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-hopper-medium-expert-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/hopper/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: hopper-medium-replay-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-hopper-medium-replay-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/maze2d/medium_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: maze2d-medium-dense-v1 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-maze2d-medium-dense-v1-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/walker2d/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: walker2d-full-replay-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-walker2d-full-replay-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/halfcheetah/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: halfcheetah-medium-expert-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-halfcheetah-medium-expert-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc/halfcheetah/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 0.99 6 | env: halfcheetah-medium-replay-v2 7 | eval_freq: 5000 8 | frac: 1.0 9 | group: bc-halfcheetah-medium-replay-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/antmaze/large_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: antmaze-large-diverse-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-antmaze-large-diverse-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/antmaze/medium_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: antmaze-medium-diverse-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-antmaze-medium-diverse-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/antmaze/umaze_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: antmaze-umaze-diverse-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-antmaze-umaze-diverse-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 100 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/walker2d/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: walker2d-medium-expert-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-walker2d-medium-expert-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/walker2d/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: walker2d-medium-replay-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-walker2d-medium-replay-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/halfcheetah/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: halfcheetah-full-replay-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-halfcheetah-full-replay-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/halfcheetah/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: halfcheetah-medium-expert-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-halfcheetah-medium-expert-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/bc_10/halfcheetah/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 256 2 | buffer_size: 10000000 3 | checkpoints_path: null 4 | device: cuda 5 | discount: 1.0 6 | env: halfcheetah-medium-replay-v2 7 | eval_freq: 5000 8 | frac: 0.1 9 | group: bc-10-halfcheetah-medium-replay-v2-multiseed-v0 10 | load_model: '' 11 | max_timesteps: 1000000 12 | max_traj_len: 1000 13 | n_episodes: 10 14 | name: BC-10 15 | normalize: true 16 | project: CORL 17 | seed: 0 18 | -------------------------------------------------------------------------------- /configs/offline/awac/door/human_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: door-human-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-door-human-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/pen/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: pen-cloned-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-pen-cloned-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/pen/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: pen-expert-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-pen-expert-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/pen/human_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: pen-human-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-pen-human-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/door/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: door-cloned-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-door-cloned-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/door/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: door-expert-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-door-expert-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/hammer/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: hammer-cloned-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-hammer-cloned-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/hammer/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: hammer-expert-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-hammer-expert-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/hammer/human_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: hammer-human-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-hammer-human-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/maze2d/large_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: maze2d-large-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-maze2d-large-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/maze2d/medium_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: maze2d-medium-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-maze2d-medium-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/maze2d/umaze_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: maze2d-umaze-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-maze2d-umaze-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/hopper/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: hopper-expert-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-hopper-expert-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/hopper/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: hopper-medium-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-hopper-medium-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/hopper/random_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: hopper-random-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-hopper-random-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/relocate/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: relocate-cloned-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-relocate-cloned-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/relocate/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: relocate-expert-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-relocate-expert-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/relocate/human_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: relocate-human-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-relocate-human-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/walker2d/random_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: walker2d-random-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-walker2d-random-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 -------------------------------------------------------------------------------- /configs/offline/awac/walker2d/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: walker2d-expert-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-walker2d-expert-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/walker2d/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: walker2d-medium-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-walker2d-medium-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/maze2d/large_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: maze2d-large-dense-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-maze2d-large-dense-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/maze2d/umaze_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: maze2d-umaze-dense-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-maze2d-umaze-dense-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/halfcheetah/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: halfcheetah-expert-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-halfcheetah-expert-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/halfcheetah/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: halfcheetah-medium-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-halfcheetah-medium-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/halfcheetah/random_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: halfcheetah-random-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-halfcheetah-random-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/hopper/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: hopper-full-replay-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-hopper-full-replay-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/maze2d/medium_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: maze2d-medium-dense-v1 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-maze2d-medium-dense-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/hopper/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: hopper-medium-expert-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-hopper-medium-expert-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/hopper/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: hopper-medium-replay-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-hopper-medium-replay-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/walker2d/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: walker2d-full-replay-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-walker2d-full-replay-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/walker2d/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: walker2d-medium-replay-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-walker2d-medium-replay-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 -------------------------------------------------------------------------------- /configs/offline/awac/antmaze/umaze_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: antmaze-umaze-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-antmaze-umaze-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | normalize_reward: true 15 | num_train_ops: 1000000 16 | project: CORL 17 | seed: 42 18 | tau: 0.005 19 | test_seed: 69 20 | -------------------------------------------------------------------------------- /configs/offline/awac/walker2d/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: walker2d-medium-expert-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-walker2d-medium-expert-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Lines starting with '#' are comments. 2 | # Each line is a file pattern followed by one or more owners. 3 | 4 | # These owners will be the default owners for everything in the repo. 5 | * @Howuhh @Scitator @vkurenkov 6 | 7 | # Order is important. The last matching pattern has the most precedence. 8 | # So if a pull request only touches javascript files, only these owners 9 | # will be requested to review. 10 | 11 | 12 | # You can also use email addresses if you prefer. 13 | -------------------------------------------------------------------------------- /configs/offline/awac/halfcheetah/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: halfcheetah-full-replay-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-halfcheetah-full-replay-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/halfcheetah/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: halfcheetah-medium-expert-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-halfcheetah-medium-expert-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/halfcheetah/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.3333 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: true 6 | device: cuda 7 | env_name: halfcheetah-medium-replay-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-halfcheetah-medium-replay-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | num_train_ops: 1000000 15 | project: CORL 16 | seed: 42 17 | tau: 0.005 18 | test_seed: 69 19 | -------------------------------------------------------------------------------- /configs/offline/awac/antmaze/large_play_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: antmaze-large-play-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-antmaze-large-play-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | normalize_reward: true 15 | num_train_ops: 1000000 16 | project: CORL 17 | seed: 42 18 | tau: 0.005 19 | test_seed: 69 20 | -------------------------------------------------------------------------------- /configs/offline/awac/antmaze/medium_play_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: antmaze-medium-play-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-antmaze-medium-play-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | normalize_reward: true 15 | num_train_ops: 1000000 16 | project: CORL 17 | seed: 42 18 | tau: 0.005 19 | test_seed: 69 20 | -------------------------------------------------------------------------------- /configs/finetune/awac/door/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: door-cloned-v1 8 | eval_frequency: 5000 9 | gamma: 0.99 10 | group: awac-door-cloned-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | normalize_reward: false 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | project: CORL 18 | seed: 42 19 | tau: 0.005 20 | test_seed: 69 -------------------------------------------------------------------------------- /configs/finetune/awac/pen/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: pen-cloned-v1 8 | eval_frequency: 5000 9 | gamma: 0.99 10 | group: awac-pen-cloned-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | normalize_reward: false 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | project: CORL 18 | seed: 42 19 | tau: 0.005 20 | test_seed: 69 -------------------------------------------------------------------------------- /configs/offline/awac/antmaze/large_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: antmaze-large-diverse-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-antmaze-large-diverse-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | normalize_reward: true 15 | num_train_ops: 1000000 16 | project: CORL 17 | seed: 42 18 | tau: 0.005 19 | test_seed: 69 20 | -------------------------------------------------------------------------------- /configs/offline/awac/antmaze/medium_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: antmaze-medium-diverse-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-antmaze-medium-diverse-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | normalize_reward: true 15 | num_train_ops: 1000000 16 | project: CORL 17 | seed: 42 18 | tau: 0.005 19 | test_seed: 69 20 | -------------------------------------------------------------------------------- /configs/offline/awac/antmaze/umaze_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: antmaze-umaze-diverse-v2 8 | eval_frequency: 1000 9 | gamma: 0.99 10 | group: awac-antmaze-umaze-diverse-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | normalize_reward: true 15 | num_train_ops: 1000000 16 | project: CORL 17 | seed: 42 18 | tau: 0.005 19 | test_seed: 69 20 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/pen/human_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: pen-human-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-adroit-pen-human-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/finetune/awac/hammer/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: hammer-cloned-v1 8 | eval_frequency: 5000 9 | gamma: 0.99 10 | group: awac-hammer-cloned-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | normalize_reward: false 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | project: CORL 18 | seed: 42 19 | tau: 0.005 20 | test_seed: 69 -------------------------------------------------------------------------------- /configs/offline/iql/door/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | device: cuda 7 | discount: 0.99 8 | env: door-human-v1 9 | eval_freq: 5000 10 | group: iql-adroit-door-human-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.8 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/pen/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | device: cuda 7 | discount: 0.99 8 | env: pen-cloned-v1 9 | eval_freq: 5000 10 | group: iql-adroit-pen-cloned-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.8 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/pen/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | device: cuda 7 | discount: 0.99 8 | env: pen-expert-v1 9 | eval_freq: 5000 10 | group: iql-adroit-pen-expert-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.8 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/pen/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | device: cuda 7 | discount: 0.99 8 | env: pen-human-v1 9 | eval_freq: 5000 10 | group: iql-adroit-pen-human-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.8 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/antmaze/umaze_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: antmaze-umaze-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-antmaze-umaze-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 100 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: true 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/door/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: door-cloned-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-adroit-door-cloned-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/door/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: door-expert-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-adroit-door-expert-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/door/human_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: door-human-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-adroit-door-human-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/hopper/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: hopper-expert-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-hopper-expert-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/hopper/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: hopper-medium-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-hopper-medium-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/hopper/random_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: hopper-random-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-hopper-random-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/maze2d/large_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: maze2d-large-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-maze2d-large-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 100 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/maze2d/medium_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: maze2d-medium-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-maze2d-medium-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 100 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/maze2d/umaze_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: maze2d-umaze-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-maze2d-umaze-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 100 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/pen/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: pen-cloned-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-adroit-pen-cloned-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/pen/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: pen-expert-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-adroit-pen-expert-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/finetune/awac/antmaze/umaze_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: antmaze-umaze-v2 8 | eval_frequency: 50000 9 | gamma: 0.99 10 | group: awac-antmaze-umaze-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | normalize_reward: true 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | project: CORL 18 | seed: 42 19 | tau: 0.005 20 | test_seed: 69 -------------------------------------------------------------------------------- /configs/offline/iql/door/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | device: cuda 7 | discount: 0.99 8 | env: door-cloned-v1 9 | eval_freq: 5000 10 | group: iql-adroit-door-cloned-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.8 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/door/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | device: cuda 7 | discount: 0.99 8 | env: door-expert-v1 9 | eval_freq: 5000 10 | group: iql-adroit-door-expert-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.8 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/hammer/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | device: cuda 7 | discount: 0.99 8 | env: hammer-human-v1 9 | eval_freq: 5000 10 | group: iql-adroit-hammer-human-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.8 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/hopper/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: hopper-expert-v2 9 | eval_freq: 5000 10 | group: iql-hopper-expert-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/hopper/random_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: hopper-random-v2 9 | eval_freq: 5000 10 | group: iql-hopper-random-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/hammer/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: hammer-cloned-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-adroit-hammer-cloned-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/hammer/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: hammer-expert-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-adroit-hammer-expert-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/hammer/human_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: hammer-human-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-adroit-hammer-human-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/walker2d/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: walker2d-expert-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-walker2d-expert-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/walker2d/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: walker2d-medium-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-walker2d-medium-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/walker2d/random_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: walker2d-random-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-walker2d-random-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/finetune/awac/relocate/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: relocate-cloned-v1 8 | eval_frequency: 5000 9 | gamma: 0.99 10 | group: awac-relocate-cloned-v1-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 10 14 | normalize_reward: false 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | project: CORL 18 | seed: 42 19 | tau: 0.005 20 | test_seed: 69 -------------------------------------------------------------------------------- /configs/offline/iql/antmaze/umaze_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 10.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: antmaze-umaze-v2 9 | eval_freq: 5000 10 | group: iql-antmaze-umaze-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.9 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 100 16 | name: IQL 17 | normalize: true 18 | normalize_reward: true 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/hammer/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | device: cuda 7 | discount: 0.99 8 | env: hammer-cloned-v1 9 | eval_freq: 5000 10 | group: iql-adroit-hammer-cloned-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.8 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/hammer/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | device: cuda 7 | discount: 0.99 8 | env: hammer-expert-v1 9 | eval_freq: 5000 10 | group: iql-adroit-hammer-expert-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.8 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/hopper/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: hopper-medium-v2 9 | eval_freq: 5000 10 | group: iql-hopper-medium-v2-multiseed-v0 11 | iql_deterministic: true 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: true 19 | project: CORL 20 | qf_lr: 3e-4 21 | seed: 0 22 | tau: 0.001 23 | 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/offline/iql/maze2d/large_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: maze2d-large-v1 9 | eval_freq: 5000 10 | group: iql-maze2d-large-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 100 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | project: CORL 20 | qf_lr: 3e-4 21 | seed: 0 22 | tau: 0.005 23 | 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/offline/iql/maze2d/umaze_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: maze2d-umaze-v1 9 | eval_freq: 5000 10 | group: iql-maze2d-umaze-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 100 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | project: CORL 20 | qf_lr: 3e-4 21 | seed: 0 22 | tau: 0.005 23 | 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/relocate/human_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: relocate-human-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-adroit-relocate-human-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/iql/maze2d/medium_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: maze2d-medium-v1 9 | eval_freq: 5000 10 | group: iql-maze2d-medium-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 100 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | project: CORL 20 | qf_lr: 3e-4 21 | seed: 0 22 | tau: 0.005 23 | 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/offline/iql/relocate/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | device: cuda 7 | discount: 0.99 8 | env: relocate-cloned-v1 9 | eval_freq: 5000 10 | group: iql-adroit-relocate-cloned-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.8 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/relocate/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | device: cuda 7 | discount: 0.99 8 | env: relocate-expert-v1 9 | eval_freq: 5000 10 | group: iql-adroit-relocate-expert-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.8 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/relocate/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | device: cuda 7 | discount: 0.99 8 | env: relocate-human-v1 9 | eval_freq: 5000 10 | group: iql-adroit-relocate-human-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.8 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/walker2d/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: walker2d-expert-v2 9 | eval_freq: 5000 10 | group: iql-walker2d-expert-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/walker2d/random_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: walker2d-random-v2 9 | eval_freq: 5000 10 | group: iql-walker2d-random-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/antmaze/large_play_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: antmaze-large-play-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-antmaze-large-play-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 100 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: true 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/halfcheetah/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: halfcheetah-expert-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-halfcheetah-expert-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/halfcheetah/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: halfcheetah-medium-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-halfcheetah-medium-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/halfcheetah/random_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: halfcheetah-random-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-halfcheetah-random-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/hopper/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: hopper-full-replay-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-hopper-full-replay-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/maze2d/large_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: maze2d-large-dense-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-maze2d-large-dense-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 100 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/maze2d/umaze_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: maze2d-umaze-dense-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-maze2d-umaze-dense-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 100 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/relocate/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: relocate-cloned-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-adroit-relocate-cloned-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/relocate/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: relocate-expert-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-adroit-relocate-expert-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | # Main dependencies 2 | git+https://github.com/tinkoff-ai/d4rl@master#egg=d4rl 3 | tqdm==4.64.0 4 | wandb==0.12.21 5 | mujoco-py==2.1.2.14 6 | numpy==1.23.1 7 | gym[mujoco_py,classic_control]==0.23.0 8 | --extra-index-url https://download.pytorch.org/whl/cu113 9 | torch==1.11.0+cu113 10 | pyrallis==0.3.1 11 | --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html 12 | jax==0.4.1 13 | jaxlib[cuda11_cudnn82]==0.4.1 14 | flax==0.6.1 15 | optax==0.1.3 16 | distrax==0.1.2 17 | chex==0.1.5 18 | -------------------------------------------------------------------------------- /configs/finetune/awac/antmaze/large_play_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: antmaze-large-play-v2 8 | eval_frequency: 50000 9 | gamma: 0.99 10 | group: awac-antmaze-large-play-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | normalize_reward: true 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | project: CORL 18 | seed: 42 19 | tau: 0.005 20 | test_seed: 69 -------------------------------------------------------------------------------- /configs/finetune/awac/antmaze/medium_play_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: antmaze-medium-play-v2 8 | eval_frequency: 50000 9 | gamma: 0.99 10 | group: awac-antmaze-medium-play-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | normalize_reward: true 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | project: CORL 18 | seed: 42 19 | tau: 0.005 20 | test_seed: 69 -------------------------------------------------------------------------------- /configs/offline/iql/halfcheetah/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: halfcheetah-expert-v2 9 | eval_freq: 5000 10 | group: iql-halfcheetah-expert-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/halfcheetah/random_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: halfcheetah-random-v2 9 | eval_freq: 5000 10 | group: iql-halfcheetah-random-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/hopper/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: hopper-full-replay-v2 9 | eval_freq: 5000 10 | group: iql-hopper-full-replay-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/walker2d/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: walker2d-medium-v2 9 | eval_freq: 5000 10 | group: iql-walker2d-medium-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | project: CORL 20 | qf_lr: 3e-4 21 | seed: 0 22 | tau: 0.005 23 | 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/antmaze/medium_play_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: antmaze-medium-play-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-antmaze-medium-play-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 100 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: true 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/hopper/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: hopper-medium-expert-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-hopper-medium-expert-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/hopper/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: hopper-medium-replay-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-hopper-medium-replay-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/maze2d/medium_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: maze2d-medium-dense-v1 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-maze2d-medium-dense-v1-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 100 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/walker2d/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: walker2d-full-replay-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-walker2d-full-replay-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/finetune/awac/antmaze/large_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: antmaze-large-diverse-v2 8 | eval_frequency: 50000 9 | gamma: 0.99 10 | group: awac-antmaze-large-diverse-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | normalize_reward: true 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | project: CORL 18 | seed: 42 19 | tau: 0.005 20 | test_seed: 69 -------------------------------------------------------------------------------- /configs/finetune/awac/antmaze/umaze_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: antmaze-umaze-diverse-v2 8 | eval_frequency: 50000 9 | gamma: 0.99 10 | group: awac-antmaze-umaze-diverse-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | normalize_reward: true 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | project: CORL 18 | seed: 42 19 | tau: 0.005 20 | test_seed: 69 -------------------------------------------------------------------------------- /configs/offline/iql/antmaze/large_play_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 10.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: antmaze-large-play-v2 9 | eval_freq: 5000 10 | group: iql-antmaze-large-play-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.9 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 100 16 | name: IQL 17 | normalize: true 18 | normalize_reward: true 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/antmaze/medium_play_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 10.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: antmaze-medium-play-v2 9 | eval_freq: 5000 10 | group: iql-antmaze-medium-play-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.9 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 100 16 | name: IQL 17 | normalize: true 18 | normalize_reward: true 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/maze2d/large_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: maze2d-large-dense-v1 9 | eval_freq: 5000 10 | group: iql-maze2d-large-dense-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 100 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/maze2d/medium_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: maze2d-medium-dense-v1 9 | eval_freq: 5000 10 | group: iql-maze2d-medium-dense-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 100 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/maze2d/umaze_dense_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: maze2d-umaze-dense-v1 9 | eval_freq: 5000 10 | group: iql-maze2d-umaze-dense-v1-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 100 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/antmaze/large_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: antmaze-large-diverse-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-antmaze-large-diverse-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 100 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: true 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/antmaze/medium_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: antmaze-medium-diverse-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-antmaze-medium-diverse-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 100 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: true 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/antmaze/umaze_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: antmaze-umaze-diverse-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-antmaze-umaze-diverse-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 100 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: true 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/walker2d/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: walker2d-medium-expert-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-walker2d-medium-expert-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/walker2d/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: walker2d-medium-replay-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-walker2d-medium-replay-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/finetune/awac/antmaze/medium_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | awac_lambda: 0.1 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | deterministic_torch: false 6 | device: cuda 7 | env_name: antmaze-medium-diverse-v2 8 | eval_frequency: 50000 9 | gamma: 0.99 10 | group: awac-antmaze-medium-diverse-v2-multiseed-v0 11 | hidden_dim: 256 12 | learning_rate: 0.0003 13 | n_test_episodes: 100 14 | normalize_reward: true 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | project: CORL 18 | seed: 42 19 | tau: 0.005 20 | test_seed: 69 -------------------------------------------------------------------------------- /configs/offline/iql/antmaze/large_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 10.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: antmaze-large-diverse-v2 9 | eval_freq: 5000 10 | group: iql-antmaze-large-diverse-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.9 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 100 16 | name: IQL 17 | normalize: true 18 | normalize_reward: true 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/antmaze/umaze_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 10.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: antmaze-umaze-diverse-v2 9 | eval_freq: 5000 10 | group: iql-antmaze-umaze-diverse-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.9 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 100 16 | name: IQL 17 | normalize: true 18 | normalize_reward: true 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/halfcheetah/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: halfcheetah-medium-v2 9 | eval_freq: 5000 10 | group: iql-halfcheetah-medium-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | project: CORL 20 | qf_lr: 3e-4 21 | seed: 0 22 | tau: 0.005 23 | 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/offline/iql/hopper/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: hopper-medium-replay-v2 9 | eval_freq: 5000 10 | group: iql-hopper-medium-replay-v2-multiseed-v0 11 | iql_deterministic: true 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: true 19 | project: CORL 20 | qf_lr: 3e-4 21 | seed: 0 22 | tau: 0.001 23 | 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/offline/iql/walker2d/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: walker2d-full-replay-v2 9 | eval_freq: 5000 10 | group: iql-walker2d-full-replay-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/halfcheetah/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: halfcheetah-full-replay-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-halfcheetah-full-replay-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/iql/antmaze/medium_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 10.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: antmaze-medium-diverse-v2 9 | eval_freq: 5000 10 | group: iql-antmaze-medium-diverse-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.9 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 100 16 | name: IQL 17 | normalize: true 18 | normalize_reward: true 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/halfcheetah/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: halfcheetah-full-replay-v2 9 | eval_freq: 5000 10 | group: iql-halfcheetah-full-replay-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | qf_lr: 3e-4 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | vf_lr: 3e-4 24 | -------------------------------------------------------------------------------- /configs/offline/iql/hopper/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 6.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: hopper-medium-expert-v2 9 | eval_freq: 5000 10 | group: iql-hopper-medium-expert-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.5 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | project: CORL 20 | qf_lr: 3e-4 21 | seed: 0 22 | tau: 0.005 23 | 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/halfcheetah/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: halfcheetah-medium-expert-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-halfcheetah-medium-expert-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/offline/td3_bc/halfcheetah/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | alpha: 2.5 2 | batch_size: 256 3 | buffer_size: 10000000 4 | checkpoints_path: null 5 | device: cuda 6 | discount: 0.99 7 | env: halfcheetah-medium-replay-v2 8 | eval_freq: 5000 9 | expl_noise: 0.1 10 | group: td3-bc-halfcheetah-medium-replay-v2-multiseed-v0 11 | load_model: '' 12 | max_timesteps: 1000000 13 | n_episodes: 10 14 | name: TD3-BC 15 | noise_clip: 0.5 16 | normalize: true 17 | normalize_reward: false 18 | policy_freq: 2 19 | policy_noise: 0.2 20 | project: CORL 21 | seed: 0 22 | tau: 0.005 23 | -------------------------------------------------------------------------------- /configs/finetune/iql/antmaze/umaze_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 10.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: antmaze-umaze-v2 9 | eval_freq: 50000 10 | group: IQL-D4RL 11 | iql_deterministic: false 12 | iql_tau: 0.9 13 | load_model: '' 14 | offline_iterations: 1000000 15 | online_iterations: 1000000 16 | n_episodes: 100 17 | name: IQL_antmaze-umaze-v2 18 | normalize: true 19 | normalize_reward: true 20 | qf_lr: 3e-4 21 | project: CORL 22 | seed: 0 23 | tau: 0.005 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/offline/iql/walker2d/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: walker2d-medium-expert-v2 9 | eval_freq: 5000 10 | group: iql-walker2d-medium-expert-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | project: CORL 20 | qf_lr: 3e-4 21 | seed: 0 22 | tau: 0.005 23 | 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/offline/iql/walker2d/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: walker2d-medium-replay-v2 9 | eval_freq: 5000 10 | group: iql-walker2d-medium-replay-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | project: CORL 20 | qf_lr: 3e-4 21 | seed: 0 22 | tau: 0.005 23 | 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/finetune/iql/pen/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | checkpoints_path: null 7 | device: cuda 8 | discount: 0.99 9 | env: pen-cloned-v1 10 | eval_freq: 5000 11 | group: IQL-D4RL 12 | iql_deterministic: false 13 | iql_tau: 0.8 14 | load_model: '' 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | n_episodes: 10 18 | name: IQL_pen-cloned-v1 19 | normalize: true 20 | normalize_reward: false 21 | qf_lr: 3e-4 22 | project: CORL 23 | seed: 0 24 | tau: 0.005 25 | vf_lr: 3e-4 26 | -------------------------------------------------------------------------------- /configs/offline/iql/halfcheetah/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: halfcheetah-medium-expert-v2 9 | eval_freq: 5000 10 | group: iql-halfcheetah-medium-expert-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | project: CORL 20 | qf_lr: 3e-4 21 | seed: 0 22 | tau: 0.005 23 | 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/offline/iql/halfcheetah/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 3.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: halfcheetah-medium-replay-v2 9 | eval_freq: 5000 10 | group: iql-halfcheetah-medium-replay-v2-multiseed-v0 11 | iql_deterministic: false 12 | iql_tau: 0.7 13 | load_model: '' 14 | max_timesteps: 1000000 15 | n_episodes: 10 16 | name: IQL 17 | normalize: true 18 | normalize_reward: false 19 | project: CORL 20 | qf_lr: 3e-4 21 | seed: 0 22 | tau: 0.005 23 | 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/finetune/iql/antmaze/large_play_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 10.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: antmaze-large-play-v2 9 | eval_freq: 50000 10 | group: IQL-D4RL 11 | iql_deterministic: false 12 | iql_tau: 0.9 13 | load_model: '' 14 | offline_iterations: 1000000 15 | online_iterations: 1000000 16 | n_episodes: 100 17 | name: IQL_antmaze-large-play-v2 18 | normalize: true 19 | normalize_reward: true 20 | qf_lr: 3e-4 21 | project: CORL 22 | seed: 0 23 | tau: 0.005 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/finetune/iql/antmaze/medium_play_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 10.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: antmaze-medium-play-v2 9 | eval_freq: 50000 10 | group: IQL-D4RL 11 | iql_deterministic: false 12 | iql_tau: 0.9 13 | load_model: '' 14 | offline_iterations: 1000000 15 | online_iterations: 1000000 16 | n_episodes: 100 17 | name: IQL_antmaze-medium-play-v2 18 | normalize: true 19 | normalize_reward: true 20 | qf_lr: 3e-4 21 | project: CORL 22 | seed: 0 23 | tau: 0.005 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/finetune/iql/door/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | checkpoints_path: null 7 | device: cuda 8 | discount: 0.99 9 | env: door-cloned-v1 10 | eval_freq: 5000 11 | group: IQL-D4RL 12 | iql_deterministic: false 13 | iql_tau: 0.8 14 | load_model: '' 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | n_episodes: 10 18 | name: IQL_door-cloned-v1 19 | normalize: true 20 | normalize_reward: false 21 | qf_lr: 3e-4 22 | project: CORL 23 | seed: 0 24 | tau: 0.005 25 | vf_lr: 3e-4 26 | -------------------------------------------------------------------------------- /configs/finetune/iql/hammer/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | checkpoints_path: null 7 | device: cuda 8 | discount: 0.99 9 | env: hammer-cloned-v1 10 | eval_freq: 5000 11 | group: IQL-D4RL 12 | iql_deterministic: false 13 | iql_tau: 0.8 14 | load_model: '' 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | n_episodes: 10 18 | name: IQL_hammer-cloned-v1 19 | normalize: true 20 | normalize_reward: false 21 | qf_lr: 3e-4 22 | project: CORL 23 | seed: 0 24 | tau: 0.005 25 | vf_lr: 3e-4 26 | -------------------------------------------------------------------------------- /requirements/requirements_dev.txt: -------------------------------------------------------------------------------- 1 | # Main dependencies 2 | git+https://github.com/tinkoff-ai/d4rl@master#egg=d4rl 3 | tqdm==4.64.0 4 | wandb==0.12.21 5 | mujoco-py==2.1.2.14 6 | numpy==1.23.1 7 | gym[mujoco_py,classic_control]==0.23.0 8 | --extra-index-url https://download.pytorch.org/whl/cu113 9 | torch==1.11.0+cu113 10 | pyrallis==0.3.1 11 | pre-commit==3.3.3 12 | ruff==0.0.278 13 | --find-links https://storage.googleapis.com/jax-releases/jax_cuda_releases.html 14 | jax==0.4.1 15 | jaxlib[cuda11_cudnn82]==0.4.1 16 | flax==0.6.1 17 | optax==0.1.3 18 | distrax==0.1.2 19 | chex==0.1.5 20 | -------------------------------------------------------------------------------- /configs/finetune/iql/antmaze/large_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 10.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: antmaze-large-diverse-v2 9 | eval_freq: 50000 10 | group: IQL-D4RL 11 | iql_deterministic: false 12 | iql_tau: 0.9 13 | load_model: '' 14 | offline_iterations: 1000000 15 | online_iterations: 1000000 16 | n_episodes: 100 17 | name: IQL_antmaze-large-diverse-v2 18 | normalize: true 19 | normalize_reward: true 20 | qf_lr: 3e-4 21 | project: CORL 22 | seed: 0 23 | tau: 0.005 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/finetune/iql/antmaze/medium_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 10.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: antmaze-medium-diverse-v2 9 | eval_freq: 50000 10 | group: IQL-D4RL 11 | iql_deterministic: false 12 | iql_tau: 0.9 13 | load_model: '' 14 | offline_iterations: 1000000 15 | online_iterations: 1000000 16 | n_episodes: 100 17 | name: IQL_antmaze-medium-diverse-v2 18 | normalize: true 19 | normalize_reward: true 20 | qf_lr: 3e-4 21 | project: CORL 22 | seed: 0 23 | tau: 0.005 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/finetune/iql/antmaze/umaze_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | batch_size: 256 3 | beta: 10.0 4 | buffer_size: 10000000 5 | checkpoints_path: null 6 | device: cuda 7 | discount: 0.99 8 | env: antmaze-umaze-diverse-v2 9 | eval_freq: 50000 10 | group: IQL-D4RL 11 | iql_deterministic: false 12 | iql_tau: 0.9 13 | load_model: '' 14 | offline_iterations: 1000000 15 | online_iterations: 1000000 16 | n_episodes: 100 17 | name: IQL_antmaze-umaze-diverse-v2 18 | normalize: true 19 | normalize_reward: true 20 | qf_lr: 3e-4 21 | project: CORL 22 | seed: 0 23 | tau: 0.005 24 | vf_lr: 3e-4 25 | -------------------------------------------------------------------------------- /configs/finetune/iql/relocate/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_lr: 3e-4 2 | actor_dropout: 0.1 3 | batch_size: 256 4 | beta: 3.0 5 | buffer_size: 10000000 6 | checkpoints_path: null 7 | device: cuda 8 | discount: 0.99 9 | env: relocate-cloned-v1 10 | eval_freq: 5000 11 | group: IQL-D4RL 12 | iql_deterministic: false 13 | iql_tau: 0.8 14 | load_model: '' 15 | offline_iterations: 1000000 16 | online_iterations: 1000000 17 | n_episodes: 10 18 | name: IQL_relocate-cloned-v1 19 | normalize: true 20 | normalize_reward: false 21 | qf_lr: 3e-4 22 | project: CORL 23 | seed: 0 24 | tau: 0.005 25 | vf_lr: 3e-4 26 | -------------------------------------------------------------------------------- /configs/offline/sac_n/pen/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "pen-human-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-pen-human-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 100 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/door/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "door-cloned-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-door-cloned-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 100 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/door/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "door-expert-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-door-expert-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 100 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/door/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "door-human-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-door-human-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 100 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/pen/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "pen-cloned-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-pen-cloned-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 100 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/pen/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "pen-expert-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-pen-expert-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 100 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/antmaze/umaze_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "antmaze-umaze-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-antmaze-umaze-v2-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 25 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/hammer/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "hammer-cloned-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-hammer-cloned-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 100 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/hammer/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "hammer-expert-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-hammer-expert-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 100 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/hammer/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "hammer-human-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-hammer-human-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 100 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/maze2d/large_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 4000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "maze2d-large-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-maze2d-large-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 25 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/maze2d/medium_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "maze2d-medium-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-maze2d-medium-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 25 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/maze2d/umaze_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "maze2d-umaze-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-maze2d-umaze-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 25 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/door/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "door-human-v1" 10 | eta: 200.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-door-human-v1-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 50 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/pen/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "pen-cloned-v1" 10 | eta: 10.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-pen-cloned-v1-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 20 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/pen/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "pen-expert-v1" 10 | eta: 10.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-pen-expert-v1-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 20 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/pen/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "pen-human-v1" 10 | eta: 1000.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-pen-human-v1-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 20 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/relocate/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "relocate-human-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-relocate-human-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 100 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/walker2d/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "walker2d-medium-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-walker2d-medium-v2-multiseed-v2" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 20 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/antmaze/umaze_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "antmaze-umaze-v2" 10 | eta: 1.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-antmaze-umaze-v2-multiseed-v0" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/door/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "door-cloned-v1" 10 | eta: 200.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-door-cloned-v1-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 50 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/door/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "door-expert-v1" 10 | eta: 200.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-door-expert-v1-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 50 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/hammer/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "hammer-human-v1" 10 | eta: 200.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-hammer-human-v1-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 50 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/hopper/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "hopper-medium-v2" 10 | eta: 1.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-hopper-medium-v2-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 50 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/maze2d/large_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 4000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "maze2d-large-v1" 10 | eta: 0.1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-maze2d-large-v1-multiseed-v1" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/maze2d/medium_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "maze2d-medium-v1" 10 | eta: 0.1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-maze2d-medium-v1-multiseed-v1" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/maze2d/umaze_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "maze2d-umaze-v1" 10 | eta: 0.1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-maze2d-umaze-v1-multiseed-v1" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/relocate/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "relocate-cloned-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-relocate-cloned-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 100 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/relocate/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "relocate-expert-v1" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-relocate-expert-v1-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 100 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/hammer/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "hammer-cloned-v1" 10 | eta: 200.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-hammer-cloned-v1-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 50 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/hammer/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "hammer-expert-v1" 10 | eta: 200.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-hammer-expert-v1-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 50 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/relocate/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "relocate-human-v1" 10 | eta: 200.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-relocate-human-v1-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 50 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/antmaze/large_play_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "antmaze-large-play-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-antmaze-large-play-v2-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 25 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/halfcheetah/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "halfcheetah-medium-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-halfcheetah-medium-v2-multiseed-v2" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 10 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/relocate/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "relocate-cloned-v1" 10 | eta: 200.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-relocate-cloned-v1-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 50 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/relocate/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "relocate-expert-v1" 10 | eta: 200.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-relocate-expert-v1-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 50 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/walker2d/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "walker2d-medium-v2" 10 | eta: 1.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-walker2d-medium-v2-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/antmaze/medium_play_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "antmaze-medium-play-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-antmaze-medium-play-v2-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 25 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/hopper/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "hopper-medium-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-hopper-medium-v2-multiseed-v2" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 500 # damn boi he thicc 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/antmaze/large_play_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "antmaze-large-play-v2" 10 | eta: 1.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-antmaze-large-play-v2-multiseed-v0" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/halfcheetah/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "halfcheetah-medium-v2" 10 | eta: 1.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-halfcheetah-medium-v2-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/antmaze/large_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "antmaze-large-diverse-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-antmaze-large-diverse-v2-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 25 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/antmaze/umaze_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "antmaze-umaze-diverse-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-antmaze-umaze-diverse-v2-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 25 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/hopper/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "hopper-medium-expert-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-hopper-medium-expert-v2-multiseed-v2" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 200 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/hopper/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "hopper-medium-replay-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-hopper-medium-replay-v2-multiseed-v2" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 200 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/antmaze/medium_play_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "antmaze-medium-play-v2" 10 | eta: 1.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-antmaze-medium-play-v2-multiseed-v0" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/antmaze/medium_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "antmaze-medium-diverse-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-antmaze-medium-diverse-v2-multiseed-v0" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 25 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/walker2d/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "walker2d-medium-expert-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-walker2d-medium-expert-v2-multiseed-v2" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 20 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/walker2d/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "walker2d-medium-replay-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-walker2d-medium-replay-v2-multiseed-v2" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 20 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/antmaze/large_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "antmaze-large-diverse-v2" 10 | eta: 1.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-antmaze-large-diverse-v2-multiseed-v0" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/antmaze/umaze_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "antmaze-umaze-diverse-v2" 10 | eta: 1.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-antmaze-umaze-diverse-v2-multiseed-v0" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/hopper/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "hopper-medium-replay-v2" 10 | eta: 1.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-hopper-medium-replay-v2-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 50 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/lb-sac/hopper/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "hopper-expert-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-hopper-expert-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 50 22 | num_epochs: 450 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/hopper/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: true 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "hopper-medium-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-hopper-medium-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 25 22 | num_epochs: 200 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/hopper/random_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "hopper-random-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-hopper-random-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 25 22 | num_epochs: 250 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/sac_n/halfcheetah/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "halfcheetah-medium-expert-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-halfcheetah-medium-expert-v2-multiseed-v2" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 10 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/sac_n/halfcheetah/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "halfcheetah-medium-replay-v2" 10 | eval_episodes: 10 11 | eval_every: 5 12 | eval_seed: 42 13 | gamma: 0.99 14 | group: "sac-n-halfcheetah-medium-replay-v2-multiseed-v2" 15 | hidden_dim: 256 16 | log_every: 100 17 | max_action: 1.0 18 | name: "SAC-N" 19 | normalize_reward: false 20 | num_critics: 10 21 | num_epochs: 3000 22 | num_updates_on_epoch: 1000 23 | project: "CORL" 24 | tau: 0.005 25 | train_seed: 10 -------------------------------------------------------------------------------- /.github/workflows/codestyle.yml: -------------------------------------------------------------------------------- 1 | name: codestyle check 2 | on: 3 | push: 4 | branches: 5 | - main 6 | pull_request: 7 | branches: 8 | - main 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3 15 | - name: Set up Python 3.9 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: "3.9" 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install -r requirements/requirements_dev.txt 23 | - name: check codestyle 24 | run: | 25 | ruff --config pyproject.toml --diff . -------------------------------------------------------------------------------- /configs/offline/edac/antmaze/medium_diverse_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "antmaze-medium-diverse-v2" 10 | eta: 1.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-antmaze-medium-diverse-v2-multiseed-v0" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/hopper/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "hopper-medium-expert-v2" 10 | eta: 1.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-hopper-medium-expert-v2-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 50 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/edac/walker2d/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "walker2d-medium-expert-v2" 10 | eta: 5.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-walker2d-medium-expert-v2-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/walker2d/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "walker2d-medium-replay-v2" 10 | eta: 1.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-walker2d-medium-replay-v2-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/lb-sac/walker2d/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: true 11 | env_name: "walker2d-expert-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-walker2d-expert-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 30 22 | num_epochs: 350 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/walker2d/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "walker2d-medium-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-walker2d-medium-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 10 22 | num_epochs: 150 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/walker2d/random_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "walker2d-random-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-walker2d-random-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 15 22 | num_epochs: 150 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/rebrac/door/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.1 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.1 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: door-human-v1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-door-human-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/pen/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.1 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.5 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: pen-human-v1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-pen-human-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/door/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.01 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.1 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: door-cloned-v1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-door-cloned-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/door/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.05 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.01 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: door-expert-v1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-door-expert-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/pen/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.05 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.5 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: pen-cloned-v1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-pen-cloned-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/pen/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.01 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.01 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: pen-expert-v1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-pen-expert-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/edac/halfcheetah/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "halfcheetah-medium-expert-v2" 10 | eta: 5.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-halfcheetah-medium-expert-v2-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/edac/halfcheetah/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0003 2 | alpha_learning_rate: 0.0003 3 | batch_size: 256 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_learning_rate: 0.0003 7 | deterministic_torch: false 8 | device: cuda 9 | env_name: "halfcheetah-medium-replay-v2" 10 | eta: 1.0 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: "edac-halfcheetah-medium-replay-v2-multiseed-v2" 16 | hidden_dim: 256 17 | log_every: 100 18 | max_action: 1.0 19 | name: "EDAC" 20 | normalize_reward: false 21 | num_critics: 10 22 | num_epochs: 3000 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 -------------------------------------------------------------------------------- /configs/offline/lb-sac/halfcheetah/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "halfcheetah-expert-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-halfcheetah-expert-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 6 22 | num_epochs: 350 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/halfcheetah/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "halfcheetah-medium-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-halfcheetah-medium-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 4 22 | num_epochs: 300 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/halfcheetah/random_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "halfcheetah-random-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-halfcheetah-random-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 2 22 | num_epochs: 300 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/hopper/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "hopper-full-replay-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-hopper-full-replay-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 25 22 | num_epochs: 150 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/rebrac/hammer/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.1 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.5 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: hammer-cloned-v1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-hammer-cloned-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/hammer/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.01 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.5 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: hammer-human-v1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-hammer-human-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/hopper/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.1 2 | actor_learning_rate: 0.001 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 1024 6 | critic_bc_coef: 0.1 7 | critic_learning_rate: 0.001 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: hopper-expert-v2 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-hopper-expert-v2 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/hopper/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.01 2 | actor_learning_rate: 0.001 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 1024 6 | critic_bc_coef: 0.01 7 | critic_learning_rate: 0.001 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: hopper-medium-v2 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-hopper-medium-v2 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/hopper/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "hopper-medium-expert-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-hopper-medium-expert-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 40 22 | num_epochs: 150 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/hopper/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "hopper-medium-replay-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-hopper-medium-replay-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 20 22 | num_epochs: 150 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/walker2d/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "walker2d-full-replay-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-walker2d-full-replay-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 4 22 | num_epochs: 350 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/rebrac/hammer/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.01 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.01 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: hammer-expert-v1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-hammer-expert-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/hopper/random_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.001 2 | actor_learning_rate: 0.001 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 1024 6 | critic_bc_coef: 0.01 7 | critic_learning_rate: 0.001 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: hopper-random-v2 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-hopper-random-v2 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/maze2d/large_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.003 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.001 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: maze2d-large-v1 11 | eval_episodes: 100 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-maze2d-large-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/maze2d/medium_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.003 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.001 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: maze2d-medium-v1 11 | eval_episodes: 100 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-maze2d-medium-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/maze2d/umaze_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.003 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.001 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: maze2d-umaze-v1 11 | eval_episodes: 100 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-maze2d-umaze-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/relocate/human_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.1 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.01 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: relocate-human-v1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-relocate-human-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/walker2d/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.01 2 | actor_learning_rate: 0.001 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 1024 6 | critic_bc_coef: 0.5 7 | critic_learning_rate: 0.001 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: walker2d-expert-v2 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-walker2d-expert-v2 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/walker2d/medium_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.05 2 | actor_learning_rate: 0.001 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 1024 6 | critic_bc_coef: 0.1 7 | critic_learning_rate: 0.001 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: walker2d-medium-v2 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-walker2d-medium-v2 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/walker2d/random_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.01 2 | actor_learning_rate: 0.001 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 1024 6 | critic_bc_coef: 0.0 7 | critic_learning_rate: 0.001 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: walker2d-random-v2 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-walker2d-random-v2 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/walker2d/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "walker2d-medium-expert-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-walker2d-medium-expert-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 10 22 | num_epochs: 300 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/walker2d/medium_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "walker2d-medium-replay-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-walker2d-medium-replay-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 4 22 | num_epochs: 350 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/rebrac/antmaze/umaze_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.003 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.002 7 | critic_learning_rate: 0.00005 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: antmaze-umaze-v2 11 | eval_episodes: 100 12 | eval_every: 50 13 | eval_seed: 42 14 | gamma: 0.999 15 | group: rebrac-antmaze-umaze-v2 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: true 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/relocate/cloned_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.1 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.01 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: relocate-cloned-v1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-relocate-cloned-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/relocate/expert_v1.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.05 2 | actor_learning_rate: 0.0003 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 256 6 | critic_bc_coef: 0.01 7 | critic_learning_rate: 0.0003 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: relocate-expert-v1 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-relocate-expert-v1 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/halfcheetah/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 1000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "halfcheetah-full-replay-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-halfcheetah-full-replay-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 4 22 | num_epochs: 300 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | -------------------------------------------------------------------------------- /configs/offline/rebrac/halfcheetah/expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.01 2 | actor_learning_rate: 0.001 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 1024 6 | critic_bc_coef: 0.01 7 | critic_learning_rate: 0.001 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: halfcheetah-expert-v2 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-halfcheetah-expert-v2 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/halfcheetah/random_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.001 2 | actor_learning_rate: 0.001 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 1024 6 | critic_bc_coef: 0.1 7 | critic_learning_rate: 0.001 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: halfcheetah-random-v2 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-halfcheetah-random-v2 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/rebrac/hopper/full_replay_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_bc_coef: 0.01 2 | actor_learning_rate: 0.001 3 | actor_ln: false 4 | actor_n_hiddens: 3 5 | batch_size: 1024 6 | critic_bc_coef: 0.01 7 | critic_learning_rate: 0.001 8 | critic_ln: true 9 | critic_n_hiddens: 3 10 | dataset_name: hopper-full-replay-v2 11 | eval_episodes: 10 12 | eval_every: 5 13 | eval_seed: 42 14 | gamma: 0.99 15 | group: rebrac-hopper-full-replay-v2 16 | hidden_dim: 256 17 | name: rebrac 18 | noise_clip: 0.5 19 | normalize_q: true 20 | normalize_reward: false 21 | normalize_states: false 22 | num_epochs: 1000 23 | num_updates_on_epoch: 1000 24 | policy_freq: 2 25 | policy_noise: 0.2 26 | project: ReBRAC 27 | tau: 0.005 28 | train_seed: 0 29 | -------------------------------------------------------------------------------- /configs/offline/lb-sac/halfcheetah/medium_expert_v2.yaml: -------------------------------------------------------------------------------- 1 | actor_learning_rate: 0.0018 2 | alpha_learning_rate: 0.0018 3 | batch_size: 10000 4 | buffer_size: 2000000 5 | checkpoints_path: null 6 | critic_layernorm: false 7 | critic_learning_rate: 0.0018 8 | deterministic_torch: false 9 | device: "сuda" 10 | edac_init: false 11 | env_name: "halfcheetah-medium-expert-v2" 12 | eval_episodes: 10 13 | eval_every: 5 14 | eval_seed: 42 15 | gamma: 0.99 16 | group: "lb-sac-halfcheetah-medium-expert-v2-multiseed-v0" 17 | hidden_dim: 256 18 | log_every: 100 19 | max_action: 1.0 20 | name: "LB-SAC" 21 | num_critics: 8 22 | num_epochs: 300 23 | num_updates_on_epoch: 1000 24 | project: "CORL" 25 | tau: 0.005 26 | train_seed: 10 27 | --------------------------------------------------------------------------------