├── .dockerignore
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.yml
│ ├── documentation.yml
│ ├── feature_request.yml
│ └── question.yml
├── PULL_REQUEST_TEMPLATE.md
└── workflows
│ ├── ci.yml
│ └── trained_agents.yml
├── .gitignore
├── .gitmodules
├── .readthedocs.yml
├── CHANGELOG.md
├── LICENSE
├── Makefile
├── README.md
├── benchmark.md
├── docker
└── Dockerfile
├── docs
├── Makefile
├── README.md
├── _static
│ ├── css
│ │ └── baselines_theme.css
│ └── img
│ │ ├── colab-badge.svg
│ │ └── colab.svg
├── conda_env.yml
├── conf.py
├── guide
│ ├── config.rst
│ ├── custom_env.rst
│ ├── enjoy.rst
│ ├── install.rst
│ ├── integrations.rst
│ ├── plot.rst
│ ├── quickstart.rst
│ ├── sbx.rst
│ ├── train.rst
│ └── tuning.rst
├── index.rst
├── make.bat
├── misc
│ └── changelog.rst
├── modules
│ ├── callbacks.rst
│ ├── exp_manager.rst
│ ├── utils.rst
│ └── wrappers.rst
└── spelling_wordlist.txt
├── enjoy.py
├── hyperparams
├── a2c.yml
├── ars.yml
├── crossq.yml
├── ddpg.yml
├── dqn.yml
├── her.yml
├── ppo.yml
├── ppo_lstm.yml
├── python
│ └── ppo_config_example.py
├── qrdqn.yml
├── sac.yml
├── td3.yml
├── tqc.yml
└── trpo.yml
├── images
└── car.jpg
├── logs
└── benchmark
│ ├── a2c-Acrobot-v1
│ └── 0.monitor.csv
│ ├── a2c-Ant-v3
│ └── 0.monitor.csv
│ ├── a2c-AntBulletEnv-v0
│ └── 0.monitor.csv
│ ├── a2c-AsteroidsNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── a2c-BeamRiderNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── a2c-BipedalWalker-v3
│ └── 0.monitor.csv
│ ├── a2c-BipedalWalkerHardcore-v3
│ └── 0.monitor.csv
│ ├── a2c-BreakoutNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── a2c-CartPole-v1
│ └── 0.monitor.csv
│ ├── a2c-EnduroNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── a2c-HalfCheetah-v3
│ └── 0.monitor.csv
│ ├── a2c-HalfCheetahBulletEnv-v0
│ └── 0.monitor.csv
│ ├── a2c-Hopper-v3
│ └── 0.monitor.csv
│ ├── a2c-HopperBulletEnv-v0
│ └── 0.monitor.csv
│ ├── a2c-Humanoid-v3
│ └── 0.monitor.csv
│ ├── a2c-LunarLander-v2
│ └── 0.monitor.csv
│ ├── a2c-LunarLanderContinuous-v2
│ └── 0.monitor.csv
│ ├── a2c-MountainCar-v0
│ └── 0.monitor.csv
│ ├── a2c-MountainCarContinuous-v0
│ └── 0.monitor.csv
│ ├── a2c-MsPacmanNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── a2c-Pendulum-v1
│ └── 0.monitor.csv
│ ├── a2c-PongNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── a2c-QbertNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── a2c-ReacherBulletEnv-v0
│ └── 0.monitor.csv
│ ├── a2c-RoadRunnerNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── a2c-SeaquestNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── a2c-SpaceInvadersNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── a2c-Swimmer-v3
│ └── 0.monitor.csv
│ ├── a2c-Walker2DBulletEnv-v0
│ └── 0.monitor.csv
│ ├── a2c-Walker2d-v3
│ └── 0.monitor.csv
│ ├── ars-Acrobot-v1
│ └── 0.monitor.csv
│ ├── ars-Ant-v3
│ └── 0.monitor.csv
│ ├── ars-CartPole-v1
│ └── 0.monitor.csv
│ ├── ars-HalfCheetah-v3
│ └── 0.monitor.csv
│ ├── ars-Hopper-v3
│ └── 0.monitor.csv
│ ├── ars-LunarLanderContinuous-v2
│ └── 0.monitor.csv
│ ├── ars-MountainCar-v0
│ └── 0.monitor.csv
│ ├── ars-MountainCarContinuous-v0
│ └── 0.monitor.csv
│ ├── ars-Pendulum-v1
│ └── 0.monitor.csv
│ ├── ars-Swimmer-v3
│ └── 0.monitor.csv
│ ├── ars-Walker2d-v3
│ └── 0.monitor.csv
│ ├── benchmark.md
│ ├── ddpg-AntBulletEnv-v0
│ └── 0.monitor.csv
│ ├── ddpg-BipedalWalker-v3
│ └── 0.monitor.csv
│ ├── ddpg-HalfCheetahBulletEnv-v0
│ └── 0.monitor.csv
│ ├── ddpg-HopperBulletEnv-v0
│ └── 0.monitor.csv
│ ├── ddpg-LunarLanderContinuous-v2
│ └── 0.monitor.csv
│ ├── ddpg-MountainCarContinuous-v0
│ └── 0.monitor.csv
│ ├── ddpg-Pendulum-v1
│ └── 0.monitor.csv
│ ├── ddpg-ReacherBulletEnv-v0
│ └── 0.monitor.csv
│ ├── ddpg-Walker2DBulletEnv-v0
│ └── 0.monitor.csv
│ ├── dqn-Acrobot-v1
│ └── 0.monitor.csv
│ ├── dqn-AsteroidsNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── dqn-BeamRiderNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── dqn-BreakoutNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── dqn-CartPole-v1
│ └── 0.monitor.csv
│ ├── dqn-EnduroNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── dqn-LunarLander-v2
│ └── 0.monitor.csv
│ ├── dqn-MountainCar-v0
│ └── 0.monitor.csv
│ ├── dqn-MsPacmanNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── dqn-PongNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── dqn-QbertNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── dqn-RoadRunnerNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── dqn-SeaquestNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── dqn-SpaceInvadersNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── her-FetchPickAndPlace-v1
│ └── 0.monitor.csv
│ ├── her-FetchPush-v1
│ └── 0.monitor.csv
│ ├── her-FetchReach-v1
│ └── 0.monitor.csv
│ ├── her-FetchSlide-v1
│ └── 0.monitor.csv
│ ├── her-parking-v0
│ └── 0.monitor.csv
│ ├── ppo-Acrobot-v1
│ └── 0.monitor.csv
│ ├── ppo-Ant-v3
│ └── 0.monitor.csv
│ ├── ppo-AntBulletEnv-v0
│ └── 0.monitor.csv
│ ├── ppo-AsteroidsNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── ppo-BeamRiderNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── ppo-BipedalWalker-v3
│ └── 0.monitor.csv
│ ├── ppo-BipedalWalkerHardcore-v3
│ └── 0.monitor.csv
│ ├── ppo-BreakoutNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── ppo-CarRacing-v0
│ └── 0.monitor.csv
│ ├── ppo-CartPole-v1
│ └── 0.monitor.csv
│ ├── ppo-EnduroNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── ppo-HalfCheetah-v3
│ └── 0.monitor.csv
│ ├── ppo-HalfCheetahBulletEnv-v0
│ └── 0.monitor.csv
│ ├── ppo-Hopper-v3
│ └── 0.monitor.csv
│ ├── ppo-HopperBulletEnv-v0
│ └── 0.monitor.csv
│ ├── ppo-LunarLander-v2
│ └── 0.monitor.csv
│ ├── ppo-LunarLanderContinuous-v2
│ └── 0.monitor.csv
│ ├── ppo-MiniGrid-DoorKey-5x5-v0
│ └── 0.monitor.csv
│ ├── ppo-MiniGrid-Empty-Random-5x5-v0
│ └── 0.monitor.csv
│ ├── ppo-MiniGrid-FourRooms-v0
│ └── 0.monitor.csv
│ ├── ppo-MiniGrid-GoToDoor-5x5-v0
│ └── 0.monitor.csv
│ ├── ppo-MiniGrid-KeyCorridorS3R1-v0
│ └── 0.monitor.csv
│ ├── ppo-MiniGrid-LockedRoom-v0
│ └── 0.monitor.csv
│ ├── ppo-MiniGrid-MultiRoom-N4-S5-v0
│ └── 0.monitor.csv
│ ├── ppo-MiniGrid-ObstructedMaze-2Dlh-v0
│ └── 0.monitor.csv
│ ├── ppo-MiniGrid-PutNear-6x6-N2-v0
│ └── 0.monitor.csv
│ ├── ppo-MiniGrid-RedBlueDoors-6x6-v0
│ └── 0.monitor.csv
│ ├── ppo-MiniGrid-Unlock-v0
│ └── 0.monitor.csv
│ ├── ppo-MountainCar-v0
│ └── 0.monitor.csv
│ ├── ppo-MountainCarContinuous-v0
│ └── 0.monitor.csv
│ ├── ppo-MsPacmanNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── ppo-Pendulum-v1
│ └── 0.monitor.csv
│ ├── ppo-PongNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── ppo-QbertNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── ppo-ReacherBulletEnv-v0
│ └── 0.monitor.csv
│ ├── ppo-RoadRunnerNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── ppo-SeaquestNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── ppo-SpaceInvadersNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── ppo-Swimmer-v3
│ └── 0.monitor.csv
│ ├── ppo-Walker2DBulletEnv-v0
│ └── 0.monitor.csv
│ ├── ppo-Walker2d-v3
│ └── 0.monitor.csv
│ ├── ppo_lstm-CarRacing-v0
│ └── 0.monitor.csv
│ ├── ppo_lstm-CartPoleNoVel-v1
│ └── 0.monitor.csv
│ ├── ppo_lstm-MountainCarContinuousNoVel-v0
│ └── 0.monitor.csv
│ ├── ppo_lstm-PendulumNoVel-v1
│ └── 0.monitor.csv
│ ├── qrdqn-Acrobot-v1
│ └── 0.monitor.csv
│ ├── qrdqn-AsteroidsNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── qrdqn-BeamRiderNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── qrdqn-BreakoutNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── qrdqn-CartPole-v1
│ └── 0.monitor.csv
│ ├── qrdqn-EnduroNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── qrdqn-LunarLander-v2
│ └── 0.monitor.csv
│ ├── qrdqn-MountainCar-v0
│ └── 0.monitor.csv
│ ├── qrdqn-MsPacmanNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── qrdqn-PongNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── qrdqn-QbertNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── qrdqn-RoadRunnerNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── qrdqn-SeaquestNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── qrdqn-SpaceInvadersNoFrameskip-v4
│ └── 0.monitor.csv
│ ├── sac-Ant-v3
│ └── 0.monitor.csv
│ ├── sac-AntBulletEnv-v0
│ └── 0.monitor.csv
│ ├── sac-BipedalWalker-v3
│ └── 0.monitor.csv
│ ├── sac-BipedalWalkerHardcore-v3
│ └── 0.monitor.csv
│ ├── sac-HalfCheetah-v3
│ └── 0.monitor.csv
│ ├── sac-HalfCheetahBulletEnv-v0
│ └── 0.monitor.csv
│ ├── sac-Hopper-v3
│ └── 0.monitor.csv
│ ├── sac-HopperBulletEnv-v0
│ └── 0.monitor.csv
│ ├── sac-Humanoid-v3
│ └── 0.monitor.csv
│ ├── sac-LunarLanderContinuous-v2
│ └── 0.monitor.csv
│ ├── sac-MountainCarContinuous-v0
│ └── 0.monitor.csv
│ ├── sac-Pendulum-v1
│ └── 0.monitor.csv
│ ├── sac-ReacherBulletEnv-v0
│ └── 0.monitor.csv
│ ├── sac-Swimmer-v3
│ └── 0.monitor.csv
│ ├── sac-Walker2DBulletEnv-v0
│ └── 0.monitor.csv
│ ├── sac-Walker2d-v3
│ └── 0.monitor.csv
│ ├── td3-Ant-v3
│ └── 0.monitor.csv
│ ├── td3-AntBulletEnv-v0
│ └── 0.monitor.csv
│ ├── td3-BipedalWalker-v3
│ └── 0.monitor.csv
│ ├── td3-BipedalWalkerHardcore-v3
│ └── 0.monitor.csv
│ ├── td3-HalfCheetah-v3
│ └── 0.monitor.csv
│ ├── td3-HalfCheetahBulletEnv-v0
│ └── 0.monitor.csv
│ ├── td3-Hopper-v3
│ └── 0.monitor.csv
│ ├── td3-HopperBulletEnv-v0
│ └── 0.monitor.csv
│ ├── td3-Humanoid-v3
│ └── 0.monitor.csv
│ ├── td3-LunarLanderContinuous-v2
│ └── 0.monitor.csv
│ ├── td3-MountainCarContinuous-v0
│ └── 0.monitor.csv
│ ├── td3-Pendulum-v1
│ └── 0.monitor.csv
│ ├── td3-ReacherBulletEnv-v0
│ └── 0.monitor.csv
│ ├── td3-Swimmer-v3
│ └── 0.monitor.csv
│ ├── td3-Walker2DBulletEnv-v0
│ └── 0.monitor.csv
│ ├── td3-Walker2d-v3
│ └── 0.monitor.csv
│ ├── tqc-Ant-v3
│ └── 0.monitor.csv
│ ├── tqc-AntBulletEnv-v0
│ └── 0.monitor.csv
│ ├── tqc-BipedalWalker-v3
│ └── 0.monitor.csv
│ ├── tqc-BipedalWalkerHardcore-v3
│ └── 0.monitor.csv
│ ├── tqc-FetchPickAndPlace-v1
│ └── 0.monitor.csv
│ ├── tqc-FetchPush-v1
│ └── 0.monitor.csv
│ ├── tqc-FetchReach-v1
│ └── 0.monitor.csv
│ ├── tqc-FetchSlide-v1
│ └── 0.monitor.csv
│ ├── tqc-HalfCheetah-v3
│ └── 0.monitor.csv
│ ├── tqc-HalfCheetahBulletEnv-v0
│ └── 0.monitor.csv
│ ├── tqc-Hopper-v3
│ └── 0.monitor.csv
│ ├── tqc-HopperBulletEnv-v0
│ └── 0.monitor.csv
│ ├── tqc-Humanoid-v3
│ └── 0.monitor.csv
│ ├── tqc-LunarLanderContinuous-v2
│ └── 0.monitor.csv
│ ├── tqc-MountainCarContinuous-v0
│ └── 0.monitor.csv
│ ├── tqc-PandaPickAndPlace-v1
│ └── 0.monitor.csv
│ ├── tqc-PandaPush-v1
│ └── 0.monitor.csv
│ ├── tqc-PandaReach-v1
│ └── 0.monitor.csv
│ ├── tqc-PandaSlide-v1
│ └── 0.monitor.csv
│ ├── tqc-PandaStack-v1
│ └── 0.monitor.csv
│ ├── tqc-Pendulum-v1
│ └── 0.monitor.csv
│ ├── tqc-ReacherBulletEnv-v0
│ └── 0.monitor.csv
│ ├── tqc-Swimmer-v3
│ └── 0.monitor.csv
│ ├── tqc-Walker2DBulletEnv-v0
│ └── 0.monitor.csv
│ ├── tqc-Walker2d-v3
│ └── 0.monitor.csv
│ ├── tqc-parking-v0
│ └── 0.monitor.csv
│ ├── trpo-Acrobot-v1
│ └── 0.monitor.csv
│ ├── trpo-Ant-v3
│ └── 0.monitor.csv
│ ├── trpo-AntBulletEnv-v0
│ └── 0.monitor.csv
│ ├── trpo-BipedalWalker-v3
│ └── 0.monitor.csv
│ ├── trpo-CartPole-v1
│ └── 0.monitor.csv
│ ├── trpo-HalfCheetah-v3
│ └── 0.monitor.csv
│ ├── trpo-HalfCheetahBulletEnv-v0
│ └── 0.monitor.csv
│ ├── trpo-Hopper-v3
│ └── 0.monitor.csv
│ ├── trpo-HopperBulletEnv-v0
│ └── 0.monitor.csv
│ ├── trpo-LunarLander-v2
│ └── 0.monitor.csv
│ ├── trpo-LunarLanderContinuous-v2
│ └── 0.monitor.csv
│ ├── trpo-MountainCar-v0
│ └── 0.monitor.csv
│ ├── trpo-MountainCarContinuous-v0
│ └── 0.monitor.csv
│ ├── trpo-Pendulum-v1
│ └── 0.monitor.csv
│ ├── trpo-ReacherBulletEnv-v0
│ └── 0.monitor.csv
│ ├── trpo-Swimmer-v3
│ └── 0.monitor.csv
│ ├── trpo-Walker2DBulletEnv-v0
│ └── 0.monitor.csv
│ └── trpo-Walker2d-v3
│ └── 0.monitor.csv
├── pyproject.toml
├── requirements.txt
├── rl_zoo3
├── __init__.py
├── benchmark.py
├── callbacks.py
├── cli.py
├── enjoy.py
├── exp_manager.py
├── gym_patches.py
├── hyperparams_opt.py
├── import_envs.py
├── load_from_hub.py
├── plots
│ ├── __init__.py
│ ├── all_plots.py
│ ├── plot_from_file.py
│ ├── plot_train.py
│ └── score_normalization.py
├── push_to_hub.py
├── py.typed
├── record_training.py
├── record_video.py
├── train.py
├── utils.py
├── version.txt
└── wrappers.py
├── scripts
├── __init__.py
├── all_plots.py
├── build_docker.sh
├── create_cluster_jobs.py
├── create_mujoco_jobs.py
├── migrate_to_hub.py
├── parse_study.py
├── plot_from_file.py
├── plot_train.py
├── run_docker_cpu.sh
├── run_docker_gpu.sh
├── run_jobs.py
└── run_tests.sh
├── setup.py
├── tests
├── dummy_env
│ ├── setup.py
│ └── test_env
│ │ ├── __init__.py
│ │ ├── config.py
│ │ └── test_env.py
├── test_callbacks.py
├── test_enjoy.py
├── test_hyperparams_opt.py
├── test_train.py
└── test_wrappers.py
└── train.py
/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | logs
3 | .pytest_cache/
4 | .coverage
5 | .coverage.*
6 | .idea/
7 | cluster_sbatch.sh
8 | cluster_sbatch_mpi.sh
9 | cluster_torchy.sh
10 | logs/
11 | .pytype/
12 | htmlcov/
13 | git_rewrite_commit_history.sh
14 | .vscode/
15 | # ignore for docker builds
16 | rl-trained-agents/
17 | .git/
18 | .mypy_cache/
19 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
1 | name: "\U0001F41B Bug Report"
2 | description: If you encounter an unexpected behavior, software crash, or other bug.
3 | title: "[Bug]: bug title"
4 | labels: ["bug"]
5 | body:
6 | - type: markdown
7 | attributes:
8 | value: |
9 | **Important Note: We do not do technical support, nor consulting** and don't answer personal questions per email.
10 | Please post your question on the [RL Discord](https://discord.com/invite/xhfNqQv), [Reddit](https://www.reddit.com/r/reinforcementlearning/) or [Stack Overflow](https://stackoverflow.com/) in that case.
11 | - type: textarea
12 | id: description
13 | attributes:
14 | label: 🐛 Bug
15 | description: A clear and concise description of what the bug is.
16 | validations:
17 | required: true
18 | - type: textarea
19 | id: reproduce
20 | attributes:
21 | label: To Reproduce
22 | description: |
23 | Steps to reproduce the behavior. Please try to provide a minimal example to reproduce the bug. Error messages and stack traces are also helpful.
24 | Please use the [markdown code blocks](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) for both code and stack traces.
25 | value: |
26 | ```shell
27 | python train.py --algo ...
28 | ```
29 |
30 | - type: textarea
31 | id: traceback
32 | attributes:
33 | label: Relevant log output / Error message
34 | description: Please copy and paste any relevant log output / error message. This will be automatically formatted into code, so no need for backticks.
35 | placeholder: "Traceback (most recent call last): File ..."
36 | render: shell
37 |
38 | - type: textarea
39 | id: system-info
40 | attributes:
41 | label: System Info
42 | description: |
43 | Describe the characteristic of your environment:
44 | * Describe how the library was installed (pip, docker, source, ...)
45 | * GPU models and configuration
46 | * Python version
47 | * PyTorch version
48 | * Gymnasium version
49 | * (if installed) OpenAI Gym version
50 | * Versions of any other relevant libraries
51 |
52 | You can use `sb3.get_system_info()` to print relevant packages info:
53 | ```sh
54 | python -c 'import stable_baselines3 as sb3; sb3.get_system_info()'
55 | ```
56 | - type: checkboxes
57 | id: terms
58 | attributes:
59 | label: Checklist
60 | options:
61 | - label: I have checked that there is no similar [issue](https://github.com/DLR-RM/rl-baselines3-zoo/issues) in the repo
62 | required: true
63 | - label: I have read the [SB3 documentation](https://stable-baselines3.readthedocs.io/en/master/)
64 | required: true
65 | - label: I have read the [RL Zoo documentation](https://rl-baselines3-zoo.readthedocs.io)
66 | required: true
67 | - label: I have provided a [minimal and working](https://github.com/DLR-RM/stable-baselines3/issues/982#issuecomment-1197044014) example to reproduce the bug
68 | required: true
69 | - label: I've used the [markdown code blocks](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) for both code and stack traces.
70 | required: true
71 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation.yml:
--------------------------------------------------------------------------------
1 | name: "\U0001F4DA Documentation"
2 | description: If you want to improve the documentation by reporting errors, inconsistencies, or missing information.
3 | labels: ["documentation"]
4 | body:
5 | - type: markdown
6 | attributes:
7 | value: |
8 | **Important Note: We do not do technical support, nor consulting** and don't answer personal questions per email.
9 | Please post your question on the [RL Discord](https://discord.com/invite/xhfNqQv), [Reddit](https://www.reddit.com/r/reinforcementlearning/) or [Stack Overflow](https://stackoverflow.com/) in that case.
10 | - type: textarea
11 | id: description
12 | attributes:
13 | label: 📚 Documentation
14 | description: A clear and concise description of what should be improved in the documentation.
15 | validations:
16 | required: true
17 | - type: checkboxes
18 | id: terms
19 | attributes:
20 | label: Checklist
21 | options:
22 | - label: I have checked that there is no similar [issue](https://github.com/DLR-RM/rl-baselines3-zoo/issues) in the repo
23 | required: true
24 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
1 | name: "\U0001F680 Feature Request"
2 | description: How to create an issue for requesting a feature
3 | title: "[Feature Request] request title"
4 | labels: ["enhancement"]
5 | body:
6 | - type: markdown
7 | attributes:
8 | value: |
9 | **Important Note: We do not do technical support, nor consulting** and don't answer personal questions per email.
10 | Please post your question on the [RL Discord](https://discord.com/invite/xhfNqQv), [Reddit](https://www.reddit.com/r/reinforcementlearning/) or [Stack Overflow](https://stackoverflow.com/) in that case.
11 | - type: textarea
12 | id: description
13 | attributes:
14 | label: 🚀 Feature
15 | description: A clear and concise description of the feature proposal.
16 | validations:
17 | required: true
18 | - type: textarea
19 | id: motivation
20 | attributes:
21 | label: Motivation
22 | description: Please outline the motivation for the proposal. Is your feature request related to a problem? e.g.,"I'm always frustrated when [...]". If this is related to another GitHub issue, please link here too.
23 | - type: textarea
24 | id: pitch
25 | attributes:
26 | label: Pitch
27 | description: A clear and concise description of what you want to happen.
28 | - type: textarea
29 | id: alternatives
30 | attributes:
31 | label: Alternatives
32 | description: A clear and concise description of any alternative solutions or features you've considered, if any.
33 | - type: textarea
34 | id: additional-context
35 | attributes:
36 | label: Additional context
37 | description: Add any other context or screenshots about the feature request here.
38 | - type: checkboxes
39 | id: terms
40 | attributes:
41 | label: Checklist
42 | options:
43 | - label: I have checked that there is no similar [issue](https://github.com/DLR-RM/rl-baselines3-zoo/issues) in the repo
44 | required: true
45 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.yml:
--------------------------------------------------------------------------------
1 | name: "❓ Question"
2 | description: If you have a general question about RL Baselines3 Zoo
3 | title: "[Question] question title"
4 | labels: ["question"]
5 | body:
6 | - type: markdown
7 | attributes:
8 | value: |
9 | **Important Note: We do not do technical support, nor consulting** and don't answer personal questions per email.
10 | Please post your question on the [RL Discord](https://discord.com/invite/xhfNqQv), [Reddit](https://www.reddit.com/r/reinforcementlearning/) or [Stack Overflow](https://stackoverflow.com/) in that case.
11 | - type: textarea
12 | id: question
13 | attributes:
14 | label: ❓ Question
15 | description: |
16 | Your question. This can be e.g. questions regarding confusing or unclear behaviour of functions or a question if X can be done using stable-baselines3. Make sure to check out the documentation first.
17 | **Important Note: If your question is anything like "Why is my code generating this error?", you must [submit a bug report](https://github.com/DLR-RM/rl-baselines3-zoo/issues/new?assignees=&labels=bug&projects=&template=bug_report.yml&title=%5BBug%5D%3A+bug+title) instead.**
18 | validations:
19 | required: true
20 | - type: checkboxes
21 | id: terms
22 | attributes:
23 | label: Checklist
24 | options:
25 | - label: I have checked that there is no similar [issue](https://github.com/DLR-RM/rl-baselines3-zoo/issues) in the repo
26 | required: true
27 | - label: I have read the [SB3 documentation](https://stable-baselines3.readthedocs.io/en/master/)
28 | required: true
29 | - label: I have read the [RL Zoo documentation](https://rl-baselines3-zoo.readthedocs.io)
30 | required: true
31 | - label: If code there is, it is [minimal and working](https://github.com/DLR-RM/stable-baselines3/issues/982#issuecomment-1197044014)
32 | required: true
33 | - label: If code there is, it is formatted using the [markdown code blocks](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) for both code and stack traces.
34 | required: true
35 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## Description
4 |
5 |
6 | ## Motivation and Context
7 |
8 |
9 |
10 | - [ ] I have raised an issue to propose this change ([required](https://github.com/DLR-RM/stable-baselines3/blob/master/CONTRIBUTING.md) for new features and bug fixes)
11 |
12 | ## Types of changes
13 |
14 | - [ ] Bug fix (non-breaking change which fixes an issue)
15 | - [ ] New feature (non-breaking change which adds functionality)
16 | - [ ] Breaking change (fix or feature that would cause existing functionality to change)
17 | - [ ] Documentation (update in the documentation)
18 |
19 | ## Checklist:
20 |
21 |
22 | - [ ] I've read the [CONTRIBUTION](https://github.com/DLR-RM/stable-baselines3/blob/master/CONTRIBUTING.md) guide (**required**)
23 | - [ ] I have updated the changelog accordingly (**required**).
24 | - [ ] My change requires a change to the documentation.
25 | - [ ] I have updated the tests accordingly (*required for a bug fix or a new feature*).
26 | - [ ] I have updated the documentation accordingly.
27 | - [ ] I have reformatted the code using `make format` (**required**)
28 | - [ ] I have checked the codestyle using `make check-codestyle` and `make lint` (**required**)
29 | - [ ] I have ensured `make pytest` and `make type` both pass. (**required**)
30 |
31 |
32 | Note: we are using a maximum length of 127 characters per line
33 |
34 |
35 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: CI
5 |
6 | on:
7 | push:
8 | branches: [master]
9 | pull_request:
10 | branches: [master]
11 |
12 | jobs:
13 | build:
14 | env:
15 | TERM: xterm-256color
16 | FORCE_COLOR: 1
17 | HF_TOKEN: ${{ secrets.HF_TOKEN }}
18 | # Skip CI if [ci skip] in the commit message
19 | if: "! contains(toJSON(github.event.commits.*.message), '[ci skip]')"
20 | runs-on: ubuntu-latest
21 | strategy:
22 | matrix:
23 | python-version: ["3.9", "3.10", "3.11", "3.12"]
24 | include:
25 | # Default version
26 | - gymnasium-version: "1.0.0"
27 | # Add a new config to test gym<1.0
28 | - python-version: "3.10"
29 | gymnasium-version: "0.29.1"
30 | steps:
31 | - uses: actions/checkout@v3
32 | with:
33 | submodules: true
34 | - name: Set up Python ${{ matrix.python-version }}
35 | uses: actions/setup-python@v4
36 | with:
37 | python-version: ${{ matrix.python-version }}
38 | - name: Install dependencies
39 | run: |
40 | python -m pip install --upgrade pip
41 | # Use uv for faster downloads
42 | pip install uv
43 | # cpu version of pytorch
44 | # See https://github.com/astral-sh/uv/issues/1497
45 | uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu
46 | # Install full requirements (for additional envs and test tools)
47 | uv pip install --system -r requirements.txt
48 | # Use headless version
49 | uv pip install --system opencv-python-headless
50 | uv pip install --system -e .[plots,tests]
51 |
52 | - name: Install specific version of gym
53 | run: |
54 | uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
55 | uv pip install --system "numpy<2"
56 | # Only run for python 3.10, downgrade gym to 0.29.1
57 | if: matrix.gymnasium-version != '1.0.0'
58 |
59 | - name: Lint with ruff
60 | run: |
61 | make lint
62 | - name: Check codestyle
63 | run: |
64 | make check-codestyle
65 | - name: Build the doc
66 | run: |
67 | make doc
68 | - name: Type check
69 | run: |
70 | make type
71 | - name: Test with pytest
72 | run: |
73 | make pytest
74 |
--------------------------------------------------------------------------------
/.github/workflows/trained_agents.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run check on trained agents
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Check-Trained-Agents
5 |
6 | on:
7 | push:
8 | branches: [master]
9 | pull_request:
10 | branches: [master]
11 |
12 | jobs:
13 | build:
14 | env:
15 | TERM: xterm-256color
16 | FORCE_COLOR: 1
17 | HF_TOKEN: ${{ secrets.HF_TOKEN }}
18 | # Skip CI if [ci skip] in the commit message
19 | if: "! contains(toJSON(github.event.commits.*.message), '[ci skip]')"
20 | runs-on: ubuntu-latest
21 | strategy:
22 | matrix:
23 | python-version: ["3.9", "3.10", "3.11", "3.12"]
24 | include:
25 | # Default version
26 | - gymnasium-version: "1.0.0"
27 | # Add a new config to test gym<1.0
28 | - python-version: "3.10"
29 | gymnasium-version: "0.29.1"
30 | steps:
31 | - uses: actions/checkout@v3
32 | with:
33 | submodules: true
34 | - name: Set up Python ${{ matrix.python-version }}
35 | uses: actions/setup-python@v4
36 | with:
37 | python-version: ${{ matrix.python-version }}
38 | - name: Install dependencies
39 | run: |
40 | python -m pip install --upgrade pip
41 |
42 | # Use uv for faster downloads
43 | pip install uv
44 | # cpu version of pytorch
45 | # See https://github.com/astral-sh/uv/issues/1497
46 | uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu
47 | # Install full requirements (for additional envs and test tools)
48 | uv pip install --system -r requirements.txt
49 | # Use headless version
50 | uv pip install --system opencv-python-headless
51 | uv pip install --system -e .[plots,tests]
52 | # Downgrade numpy to run pybullet agents
53 | # See https://github.com/bulletphysics/bullet3/issues/4649
54 | uv pip install --system "numpy<2"
55 |
56 | - name: Install specific version of gym
57 | run: |
58 | uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
59 | uv pip install --system "numpy<2"
60 | # Only run for python 3.10, downgrade gym to 0.29.1
61 | if: matrix.gymnasium-version != '1.0.0'
62 |
63 | - name: Check trained agents
64 | run: |
65 | make check-trained-agents
66 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | logs
3 | .pytest_cache/
4 | .coverage
5 | .coverage.*
6 | .idea/
7 | cluster_sbatch.sh
8 | cluster_sbatch_mpi.sh
9 | cluster_torchy.sh
10 | logs/
11 | rl-trained_agents/
12 | .pytype/
13 | htmlcov/
14 | git_rewrite_commit_history.sh
15 | .vscode/
16 | wandb
17 | runs
18 | hub
19 | *.mp4
20 | *.json
21 | _build/
22 |
23 | tests/dummy_env/build/
24 |
25 |
26 | # Setuptools distribution and build folders.
27 | /dist/
28 | /build
29 | keys/
30 | *.egg-info
31 | .cache
32 | *.lprof
33 | *.prof
34 |
35 | # Environments
36 | .env
37 | .venv
38 | env/
39 | venv/
40 | ENV/
41 | env.bak/
42 | venv.bak/
43 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "rl-trained-agents"]
2 | path = rl-trained-agents
3 | url = https://github.com/DLR-RM/rl-trained-agents
4 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # Read the Docs configuration file
2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3 |
4 | # Required
5 | version: 2
6 |
7 | # Build documentation in the docs/ directory with Sphinx
8 | sphinx:
9 | configuration: docs/conf.py
10 |
11 | # Optionally build your docs in additional formats such as PDF and ePub
12 | formats: all
13 |
14 | # Set requirements using conda env
15 | conda:
16 | environment: docs/conda_env.yml
17 |
18 | build:
19 | os: ubuntu-24.04
20 | tools:
21 | python: "mambaforge-23.11"
22 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Antonin RAFFIN
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | LINT_PATHS = *.py tests/ scripts/ rl_zoo3/ hyperparams/python/*.py docs/conf.py
2 |
3 | # Run pytest and coverage report
4 | pytest:
5 | ./scripts/run_tests.sh
6 |
7 | # check all trained agents (slow)
8 | check-trained-agents:
9 | python -m pytest -v tests/test_enjoy.py -k trained_agent --color=yes
10 |
11 | mypy:
12 | mypy ${LINT_PATHS} --install-types --non-interactive
13 |
14 | type: mypy
15 |
16 | lint:
17 | # stop the build if there are Python syntax errors or undefined names
18 | # see https://www.flake8rules.com/
19 | ruff check ${LINT_PATHS} --select=E9,F63,F7,F82 --output-format=full
20 | # exit-zero treats all errors as warnings.
21 | ruff check ${LINT_PATHS} --exit-zero --output-format=concise
22 |
23 | format:
24 | # Sort imports
25 | ruff check --select I ${LINT_PATHS} --fix
26 | # Reformat using black
27 | black ${LINT_PATHS}
28 |
29 | check-codestyle:
30 | # Sort imports
31 | ruff check --select I ${LINT_PATHS}
32 | # Reformat using black
33 | black --check ${LINT_PATHS}
34 |
35 | commit-checks: format type lint
36 |
37 | doc:
38 | cd docs && make html
39 |
40 | spelling:
41 | cd docs && make spelling
42 |
43 | clean:
44 | cd docs && make clean
45 |
46 | docker: docker-cpu docker-gpu
47 |
48 | docker-cpu:
49 | ./scripts/build_docker.sh
50 |
51 | docker-gpu:
52 | USE_GPU=True ./scripts/build_docker.sh
53 |
54 | # PyPi package release
55 | release:
56 | # rm -r build/* dist/*
57 | python -m build -s
58 | python -m build -w
59 | twine upload dist/*
60 |
61 | # Test PyPi package release
62 | test-release:
63 | # rm -r build/* dist/*
64 | python -m build -s
65 | python -m build -w
66 | twine upload --repository-url https://test.pypi.org/legacy/ dist/*
67 |
68 | .PHONY: lint format check-codestyle commit-checks doc spelling docker type pytest
69 |
--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG PARENT_IMAGE=stablebaselines/stable-baselines3
2 | FROM $PARENT_IMAGE
3 |
4 | USER root
5 |
6 | RUN apt-get -y update \
7 | && apt-get -y install \
8 | ffmpeg \
9 | swig \
10 | # For building box2d
11 | build-essential \
12 | && apt-get clean \
13 | && rm -rf /var/lib/apt/lists/*
14 |
15 | USER $MAMBA_USER
16 |
17 | ENV CODE_DIR=/home/$MAMBA_USER/code
18 | COPY requirements.txt /tmp/
19 |
20 | # Copy setup file only to install dependencies
21 | COPY --chown=$MAMBA_USER:$MAMBA_USER ./setup.py ${CODE_DIR}/rl_zoo3/setup.py
22 | COPY --chown=$MAMBA_USER:$MAMBA_USER ./rl_zoo3/version.txt ${CODE_DIR}/rl_zoo3/rl_zoo3/version.txt
23 | COPY --chown=$MAMBA_USER:$MAMBA_USER ./hyperparams ${CODE_DIR}/rl_zoo3/hyperparams
24 | COPY --chown=$MAMBA_USER:$MAMBA_USER ./rl_zoo3/plots ${CODE_DIR}/rl_zoo3/rl_zoo3/plots
25 |
26 |
27 | RUN \
28 | eval "$(micromamba shell hook --shell bash)" && \
29 | micromamba activate && \
30 | cd ${CODE_DIR}/rl_zoo3 && \
31 | uv pip uninstall stable-baselines3 && \
32 | uv pip install --system -r /tmp/requirements.txt && \
33 | uv pip install --system -e ".[plots,tests]" && \
34 | uv cache clean
35 |
36 | CMD /bin/bash
37 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | # For debug: SPHINXOPTS = -nWT --keep-going -vvv
6 | SPHINXOPTS = -W # make warnings fatal (disabled because of gym in the wrappers)
7 | SPHINXBUILD = sphinx-build
8 | SPHINXPROJ = RLZoo
9 | SOURCEDIR = .
10 | BUILDDIR = _build
11 |
12 | # Put it first so that "make" without argument is like "make help".
13 | help:
14 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
15 |
16 | .PHONY: help Makefile
17 |
18 | # Catch-all target: route all unknown targets to Sphinx using the new
19 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
20 | %: Makefile
21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | ## RL Zoo3 Documentation
2 |
3 | This folder contains documentation for the RL Zoo.
4 |
5 |
6 | ### Build the Documentation
7 |
8 | #### Install Sphinx and Theme
9 | Execute this command in the project root:
10 | ```
11 | pip install stable_baselines3[docs]
12 | pip install -e .
13 | ```
14 |
15 | #### Building the Docs
16 |
17 | In the `docs/` folder:
18 | ```
19 | make html
20 | ```
21 |
22 | if you want to building each time a file is changed:
23 |
24 | ```
25 | sphinx-autobuild . _build/html
26 | ```
27 |
--------------------------------------------------------------------------------
/docs/_static/css/baselines_theme.css:
--------------------------------------------------------------------------------
1 | /* Main colors adapted from pytorch doc */
2 | :root{
3 | --main-bg-color: #343A40;
4 | --link-color: #FD7E14;
5 | }
6 |
7 | /* Header fonts y */
8 | h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption {
9 | font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;
10 | }
11 |
12 |
13 | /* Docs background */
14 | .wy-side-nav-search{
15 | background-color: var(--main-bg-color);
16 | }
17 |
18 | /* Mobile version */
19 | .wy-nav-top{
20 | background-color: var(--main-bg-color);
21 | }
22 |
23 | /* Change link colors (except for the menu) */
24 | a {
25 | color: var(--link-color);
26 | }
27 |
28 | a:hover {
29 | color: #4F778F;
30 | }
31 |
32 | .wy-menu a {
33 | color: #b3b3b3;
34 | }
35 |
36 | .wy-menu a:hover {
37 | color: #b3b3b3;
38 | }
39 |
40 | a.icon.icon-home {
41 | color: #b3b3b3;
42 | }
43 |
44 | .version{
45 | color: var(--link-color) !important;
46 | }
47 |
48 |
49 | /* Make code blocks have a background */
50 | .codeblock,pre.literal-block,.rst-content .literal-block,.rst-content pre.literal-block,div[class^='highlight'] {
51 | background: #f8f8f8;;
52 | }
53 |
54 | /* Change style of types in the docstrings .rst-content .field-list */
55 | .field-list .xref.py.docutils, .field-list code.docutils, .field-list .docutils.literal.notranslate
56 | {
57 | border: None;
58 | padding-left: 0;
59 | padding-right: 0;
60 | color: #404040;
61 | }
62 |
--------------------------------------------------------------------------------
/docs/_static/img/colab-badge.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/docs/_static/img/colab.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/docs/conda_env.yml:
--------------------------------------------------------------------------------
1 | name: root
2 | channels:
3 | - pytorch
4 | - conda-forge
5 | dependencies:
6 | - cpuonly=1.0=0
7 | - pip=24.2
8 | - python=3.11
9 | - pytorch=2.5.0=py3.11_cpu_0
10 | - pip:
11 | - gymnasium>=0.28.1,<0.30
12 | - cloudpickle
13 | - opencv-python-headless
14 | - pandas
15 | - numpy
16 | - matplotlib
17 | - sphinx>=5,<8
18 | - sphinx_rtd_theme>=1.3.0
19 | - sphinx_copybutton
20 | - sb3-contrib>=2.4.0a10,<3.0
21 | - optuna>=3.0,<5.0
22 | - huggingface_sb3>=3.0
23 | - rich
24 | - tqdm
25 | - pyyaml>=5.1
26 | - pytablewriter==1.2.0
27 |
--------------------------------------------------------------------------------
/docs/guide/config.rst:
--------------------------------------------------------------------------------
1 | .. _config:
2 |
3 | =============
4 | Configuration
5 | =============
6 |
7 | Hyperparameter yaml syntax
8 | --------------------------
9 |
10 | The syntax used in ``hyperparameters/algo_name.yml`` for setting
11 | hyperparameters (likewise the syntax to `overwrite
12 | hyperparameters `__
13 | on the cli) may be specialized if the argument is a function. See
14 | examples in the ``hyperparameters/`` directory. For example:
15 |
16 | - Specify a linear schedule for the learning rate:
17 |
18 | .. code:: yaml
19 |
20 | learning_rate: lin_0.012486195510232303
21 |
22 | Specify a different activation function for the network:
23 |
24 | .. code:: yaml
25 |
26 | policy_kwargs: "dict(activation_fn=nn.ReLU)"
27 |
28 | For a custom policy:
29 |
30 | .. code:: yaml
31 |
32 | policy: my_package.MyCustomPolicy # for instance stable_baselines3.ppo.MlpPolicy
33 |
34 | Env Normalization
35 | -----------------
36 |
37 | In the hyperparameter file, ``normalize: True`` means that the training
38 | environment will be wrapped in a
39 | `VecNormalize `__
40 | wrapper.
41 |
42 | `Normalization
43 | uses `__ the
44 | default parameters of ``VecNormalize``, with the exception of ``gamma``
45 | which is set to match that of the agent. This can be
46 | `overridden `__
47 | using the appropriate ``hyperparameters/algo_name.yml``, e.g.
48 |
49 | .. code:: yaml
50 |
51 | normalize: "{'norm_obs': True, 'norm_reward': False}"
52 |
53 | Env Wrappers
54 | ------------
55 |
56 | You can specify in the hyperparameter config one or more wrapper to use
57 | around the environment:
58 |
59 | for one wrapper:
60 |
61 | .. code:: yaml
62 |
63 | env_wrapper: gym_minigrid.wrappers.FlatObsWrapper
64 |
65 | for multiple, specify a list:
66 |
67 | .. code:: yaml
68 |
69 | env_wrapper:
70 | - rl_zoo3.wrappers.TruncatedOnSuccessWrapper:
71 | reward_offset: 1.0
72 | - sb3_contrib.common.wrappers.TimeFeatureWrapper
73 |
74 | Note that you can easily specify parameters too.
75 |
76 | By default, the environment is wrapped with a ``Monitor`` wrapper to
77 | record episode statistics. You can specify arguments to it using
78 | ``monitor_kwargs`` parameter to log additional data. That data *must* be
79 | present in the info dictionary at the last step of each episode.
80 |
81 | For instance, for recording success with goal envs
82 | (e.g. ``FetchReach-v1``):
83 |
84 | .. code:: yaml
85 |
86 | monitor_kwargs: dict(info_keywords=('is_success',))
87 |
88 | or recording final x position with ``Ant-v3``:
89 |
90 | .. code:: yaml
91 |
92 | monitor_kwargs: dict(info_keywords=('x_position',))
93 |
94 | Note: for known ``GoalEnv`` like ``FetchReach``,
95 | ``info_keywords=('is_success',)`` is actually the default.
96 |
97 | VecEnvWrapper
98 | -------------
99 |
100 | You can specify which ``VecEnvWrapper`` to use in the config, the same
101 | way as for env wrappers (see above), using the ``vec_env_wrapper`` key:
102 |
103 | For instance:
104 |
105 | .. code:: yaml
106 |
107 | vec_env_wrapper: stable_baselines3.common.vec_env.VecMonitor
108 |
109 | Note: ``VecNormalize`` is supported separately using ``normalize``
110 | keyword, and ``VecFrameStack`` has a dedicated keyword ``frame_stack``.
111 |
112 | Callbacks
113 | ---------
114 |
115 | Following the same syntax as env wrappers, you can also add custom
116 | callbacks to use during training.
117 |
118 | .. code:: yaml
119 |
120 | callback:
121 | - rl_zoo3.callbacks.ParallelTrainCallback:
122 | gradient_steps: 256
123 |
--------------------------------------------------------------------------------
/docs/guide/custom_env.rst:
--------------------------------------------------------------------------------
1 | .. _custom:
2 |
3 | ==================
4 | Custom Environment
5 | ==================
6 |
7 | The easiest way to add support for a custom environment is to edit
8 | ``rl_zoo3/import_envs.py`` and register your environment here. Then, you
9 | need to add a section for it in the hyperparameters file
10 | (``hyperparams/algo.yml`` or a custom yaml file that you can specify
11 | using ``--conf-file`` argument).
12 |
--------------------------------------------------------------------------------
/docs/guide/enjoy.rst:
--------------------------------------------------------------------------------
1 | .. _enjoy:
2 |
3 | =====================
4 | Enjoy a Trained Agent
5 | =====================
6 |
7 | .. note::
8 |
9 | To download the repo with the trained agents, you must use
10 | ``git clone --recursive https://github.com/DLR-RM/rl-baselines3-zoo``
11 | in order to clone the submodule too.
12 |
13 |
14 | Enjoy a trained agent
15 | ---------------------
16 |
17 | If the trained agent exists, then you can see it in action using:
18 |
19 | ::
20 |
21 | python enjoy.py --algo algo_name --env env_id
22 |
23 | For example, enjoy A2C on Breakout during 5000 timesteps:
24 |
25 | ::
26 |
27 | python enjoy.py --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000
28 |
29 | If you have trained an agent yourself, you need to do:
30 |
31 | ::
32 |
33 | # exp-id 0 corresponds to the last experiment, otherwise, you can specify another ID
34 | python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 0
35 |
36 | Load Checkpoints, Best Model
37 | -----------------------------
38 |
39 | To load the best model (when using evaluation environment):
40 |
41 | ::
42 |
43 | python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-best
44 |
45 | To load a checkpoint (here the checkpoint name is
46 | ``rl_model_10000_steps.zip``):
47 |
48 | ::
49 |
50 | python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-checkpoint 10000
51 |
52 | To load the latest checkpoint:
53 |
54 | ::
55 |
56 | python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-last-checkpoint
57 |
58 |
59 | Record a Video of a Trained Agent
60 | ---------------------------------
61 |
62 | Record 1000 steps with the latest saved model:
63 |
64 | ::
65 |
66 | python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000
67 |
68 | Use the best saved model instead:
69 |
70 | ::
71 |
72 | python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-best
73 |
74 | Record a video of a checkpoint saved during training (here the
75 | checkpoint name is ``rl_model_10000_steps.zip``):
76 |
77 | ::
78 |
79 | python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-checkpoint 10000
80 |
81 |
82 | Record a Video of a Training Experiment
83 | ---------------------------------------
84 |
85 | Apart from recording videos of specific saved models, it is also
86 | possible to record a video of a training experiment where checkpoints
87 | have been saved.
88 |
89 | Record 1000 steps for each checkpoint, latest and best saved models:
90 |
91 | ::
92 |
93 | python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic
94 |
95 | The previous command will create a ``mp4`` file. To convert this file to
96 | ``gif`` format as well:
97 |
98 | ::
99 |
100 | python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic --gif
101 |
--------------------------------------------------------------------------------
/docs/guide/install.rst:
--------------------------------------------------------------------------------
1 | .. _install:
2 |
3 | Installation
4 | ============
5 |
6 | Prerequisites
7 | -------------
8 |
9 | RL Zoo requires python 3.8+ and PyTorch >= 1.13
10 |
11 |
12 | Minimal Installation
13 | --------------------
14 |
15 | To install RL Zoo with pip, execute:
16 |
17 | .. code-block:: bash
18 |
19 | pip install rl_zoo3
20 |
21 | From source:
22 |
23 | .. code-block:: bash
24 |
25 | git clone https://github.com/DLR-RM/rl-baselines3-zoo
26 | cd rl-baselines3-zoo/
27 | pip install -e .
28 |
29 | .. note::
30 |
31 | You can do ``python -m rl_zoo3.train`` from any folder and you have access to ``rl_zoo3`` command line interface, for instance, ``rl_zoo3 train`` is equivalent to ``python train.py``
32 |
33 |
34 |
35 | Full installation
36 | -----------------
37 |
38 | With extra envs and test dependencies:
39 |
40 |
41 | .. note::
42 |
43 | If you want to use Atari games, you will need to do ``pip install "autorom[accept-rom-license]"``
44 | additionally to download the ROMs
45 |
46 |
47 | .. code-block:: bash
48 |
49 | apt-get install swig cmake ffmpeg
50 | pip install -r requirements.txt
51 | pip install -e .[plots,tests]
52 |
53 |
54 | Please see `Stable Baselines3 documentation `_ for alternatives to install stable baselines3.
55 |
56 |
57 | Docker Images
58 | -------------
59 |
60 | Build docker image (CPU):
61 |
62 | ::
63 |
64 | make docker-cpu
65 |
66 | GPU:
67 |
68 | ::
69 |
70 | USE_GPU=True make docker-gpu
71 |
72 | Pull built docker image (CPU):
73 |
74 | ::
75 |
76 | docker pull stablebaselines/rl-baselines3-zoo-cpu
77 |
78 | GPU image:
79 |
80 | ::
81 |
82 | docker pull stablebaselines/rl-baselines3-zoo
83 |
84 | Run script in the docker image:
85 |
86 | ::
87 |
88 | ./scripts/run_docker_cpu.sh python train.py --algo ppo --env CartPole-v1
89 |
--------------------------------------------------------------------------------
/docs/guide/integrations.rst:
--------------------------------------------------------------------------------
1 | .. _integrations:
2 |
3 | ============
4 | Integrations
5 | ============
6 |
7 | Huggingface Hub Integration
8 | ---------------------------
9 |
10 | List and videos of trained agents can be found on our Huggingface page: https://huggingface.co/sb3
11 |
12 |
13 | Upload model to hub (same syntax as for ``enjoy.py``):
14 |
15 | ::
16 |
17 | python -m rl_zoo3.push_to_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3 -m "Initial commit"
18 |
19 | you can choose custom ``repo-name`` (default: ``{algo}-{env_id}``) by
20 | passing a ``--repo-name`` argument.
21 |
22 | Download model from hub:
23 |
24 | ::
25 |
26 | python -m rl_zoo3.load_from_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3
27 |
28 |
29 | Experiment tracking
30 | -------------------
31 |
32 | We support tracking experiment data such as learning curves and
33 | hyperparameters via `Weights and Biases `__.
34 |
35 | The following command
36 |
37 | ::
38 |
39 | python train.py --algo ppo --env CartPole-v1 --track --wandb-project-name sb3
40 |
41 | yields a tracked experiment at this
42 | `URL `__.
43 |
44 | To add a tag to the run, (e.g. ``optimized``), use the argument
45 | ``--wandb-tags optimized``.
46 |
--------------------------------------------------------------------------------
/docs/guide/plot.rst:
--------------------------------------------------------------------------------
1 | .. _plot:
2 |
3 | ============
4 | Plot Scripts
5 | ============
6 |
7 |
8 | Plot scripts (to be documented, see "Results" sections in SB3
9 | documentation):
10 |
11 | - ``scripts/all_plots.py``/``scripts/plot_from_file.py`` for plotting evaluations
12 |
13 | - ``scripts/plot_train.py`` for plotting training reward/success
14 |
15 |
16 | Examples
17 | --------
18 |
19 | Plot training success (y-axis) w.r.t. timesteps (x-axis) with a moving
20 | window of 500 episodes for all the ``Fetch`` environment with ``HER``
21 | algorithm:
22 |
23 | ::
24 |
25 | python scripts/plot_train.py -a her -e Fetch -y success -f rl-trained-agents/ -w 500 -x steps
26 |
27 | Plot evaluation reward curve for TQC, SAC and TD3 on the HalfCheetah and
28 | Ant PyBullet environments:
29 |
30 | ::
31 |
32 | python3 scripts/all_plots.py -a sac td3 tqc --env HalfCheetahBullet AntBullet -f rl-trained-agents/
33 |
34 | Plot with the rliable library
35 | -----------------------------
36 |
37 | The RL zoo integrates some of
38 | `rliable `__ library features. You
39 | can find a visual explanation of the tools used by rliable in this `blog
40 | post `__.
41 |
42 | First, you need to install
43 | `rliable `__.
44 |
45 | Note: Python 3.7+ is required in that case.
46 |
47 | Then export your results to a file using the ``all_plots.py`` script
48 | (see above):
49 |
50 | ::
51 |
52 | python scripts/all_plots.py -a sac td3 tqc --env Half Ant -f logs/ -o logs/offpolicy
53 |
54 | You can now use the ``plot_from_file.py`` script with ``--rliable``,
55 | ``--versus`` and ``--iqm`` arguments:
56 |
57 | ::
58 |
59 | python scripts/plot_from_file.py -i logs/offpolicy.pkl --skip-timesteps --rliable --versus -l SAC TD3 TQC
60 |
61 | .. note::
62 |
63 | you may need to edit ``plot_from_file.py``, in particular the
64 | ``env_key_to_env_id`` dictionary and the
65 | ``scripts/score_normalization.py`` which stores min and max score for
66 | each environment.
67 |
68 |
69 | Remark: plotting with the ``--rliable`` option is usually slow as
70 | confidence interval need to be computed using bootstrap sampling.
71 |
--------------------------------------------------------------------------------
/docs/guide/quickstart.rst:
--------------------------------------------------------------------------------
1 | .. _quickstart:
2 |
3 | ===============
4 | Getting Started
5 | ===============
6 |
7 | .. note::
8 |
9 | You can try the following examples online using Google Colab |Colab|
10 | notebook: `RL Baselines zoo notebook`_
11 |
12 |
13 | .. _RL Baselines zoo notebook: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/rl-baselines-zoo.ipynb
14 | .. |Colab| image:: ../_static/img/colab.svg
15 |
16 |
17 | The hyperparameters for each environment are defined in
18 | ``hyperparameters/algo_name.yml``.
19 |
20 | If the environment exists in this file, then you can train an agent
21 | using:
22 |
23 | ::
24 |
25 | python -m rl_zoo3.train --algo algo_name --env env_id
26 |
27 | Or if you are in the RL Zoo3 folder:
28 |
29 | ::
30 |
31 | python train.py --algo algo_name --env env_id
32 |
33 | For example (with evaluation and checkpoints):
34 |
35 | ::
36 |
37 | python -m rl_zoo3.train --algo ppo --env CartPole-v1 --eval-freq 10000 --save-freq 50000
38 |
39 |
40 |
41 | If the trained agent exists, then you can see it in action using:
42 |
43 | ::
44 |
45 | python -m rl_zoo3.enjoy --algo algo_name --env env_id
46 |
47 | For example, enjoy A2C on Breakout during 5000 timesteps:
48 |
49 | ::
50 |
51 | python -m rl_zoo3.enjoy --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000
52 |
--------------------------------------------------------------------------------
/docs/guide/sbx.rst:
--------------------------------------------------------------------------------
1 | .. _sbx:
2 |
3 | ==========================
4 | Stable Baselines Jax (SBX)
5 | ==========================
6 |
7 | `Stable Baselines Jax (SBX) `_ is a proof of concept version of Stable-Baselines3 in Jax.
8 |
9 | It provides a minimal number of features compared to SB3 but can be much faster (up to 20x times!): https://twitter.com/araffin2/status/1590714558628253698
10 |
11 |
12 | It is also compatible with the RL Zoo.
13 | For that you will need to create two files.
14 |
15 | ``train_sbx.py``:
16 |
17 | .. code-block:: python
18 |
19 | import rl_zoo3
20 | import rl_zoo3.train
21 | from rl_zoo3.train import train
22 | from sbx import DQN, PPO, SAC, TQC, DroQ
23 |
24 |
25 | rl_zoo3.ALGOS["tqc"] = TQC
26 | rl_zoo3.ALGOS["droq"] = DroQ
27 | rl_zoo3.ALGOS["sac"] = SAC
28 | rl_zoo3.ALGOS["ppo"] = PPO
29 | rl_zoo3.ALGOS["dqn"] = DQN
30 | rl_zoo3.train.ALGOS = rl_zoo3.ALGOS
31 | rl_zoo3.exp_manager.ALGOS = rl_zoo3.ALGOS
32 |
33 | if __name__ == "__main__":
34 | train()
35 |
36 | Then you can call ``python train_sbx.py --algo sac --env Pendulum-v1`` and use the RL Zoo CLI.
37 |
38 |
39 | ``enjoy_sbx.py``:
40 |
41 | .. code-block:: python
42 |
43 | import rl_zoo3
44 | import rl_zoo3.enjoy
45 | from rl_zoo3.enjoy import enjoy
46 | from sbx import DQN, PPO, SAC, TQC, DroQ
47 |
48 |
49 | rl_zoo3.ALGOS["tqc"] = TQC
50 | rl_zoo3.ALGOS["droq"] = DroQ
51 | rl_zoo3.ALGOS["sac"] = SAC
52 | rl_zoo3.ALGOS["ppo"] = PPO
53 | rl_zoo3.ALGOS["dqn"] = DQN
54 | rl_zoo3.enjoy.ALGOS = rl_zoo3.ALGOS
55 | rl_zoo3.exp_manager.ALGOS = rl_zoo3.ALGOS
56 |
57 | if __name__ == "__main__":
58 | enjoy()
59 |
--------------------------------------------------------------------------------
/docs/guide/train.rst:
--------------------------------------------------------------------------------
1 | .. _train:
2 |
3 | ==============
4 | Train an Agent
5 | ==============
6 |
7 | Basic Usage
8 | -----------
9 |
10 | The hyperparameters for each environment are defined in
11 | ``hyperparameters/algo_name.yml``.
12 |
13 |
14 | .. note::
15 |
16 | Once RL Zoo3 is install, you can do ``python -m rl_zoo3.train`` from any folder, it is equivalent to ``python train.py``
17 |
18 |
19 | If the environment exists in this file, then you can train an agent using:
20 |
21 | ::
22 |
23 | python train.py --algo algo_name --env env_id
24 |
25 |
26 | .. note::
27 |
28 | You can use ``-P`` (``--progress``) option to display a progress bar.
29 |
30 |
31 | Custom Config File
32 | ------------------
33 |
34 | Using a custom config file when it is a yaml file with a which contains a ``env_id`` entry:
35 |
36 | ::
37 |
38 | python train.py --algo algo_name --env env_id --conf-file my_yaml.yml
39 |
40 |
41 | You can also use a python file that contains a dictionary called `hyperparams` with an entry for each ``env_id``.
42 | (see ``hyperparams/python/ppo_config_example.py`` for an example)
43 |
44 | ::
45 |
46 | # You can pass a path to a python file
47 | python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams/python/ppo_config_example.py
48 | # Or pass a path to a file from a module (for instance my_package.my_file)
49 | python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams.python.ppo_config_example
50 |
51 | The advantage of this approach is that you can specify arbitrary python dictionaries
52 | and ensure that all their dependencies are imported in the config file itself.
53 |
54 | Tensorboard, Checkpoints, Evaluation
55 | ------------------------------------
56 |
57 | For example (with tensorboard support):
58 |
59 | ::
60 |
61 | python train.py --algo ppo --env CartPole-v1 --tensorboard-log /tmp/stable-baselines/
62 |
63 |
64 | Evaluate the agent every 10000 steps using 10 episodes for evaluation (using only one evaluation env):
65 |
66 | ::
67 |
68 | python train.py --algo sac --env AntBulletEnv-v0 --eval-freq 10000 --eval-episodes 10 --n-eval-envs 1
69 |
70 |
71 | Save a checkpoint of the agent every 100000 steps:
72 |
73 | ::
74 |
75 | python train.py --algo td3 --env AntBulletEnv-v0 --save-freq 100000
76 |
77 | Resume Training
78 | ---------------
79 |
80 | Continue training (here, load pretrained agent for Breakout and continue training for 5000 steps):
81 |
82 | ::
83 |
84 | python train.py --algo a2c --env BreakoutNoFrameskip-v4 -i rl-trained-agents/a2c/BreakoutNoFrameskip-v4_1/BreakoutNoFrameskip-v4.zip -n 5000
85 |
86 | Save Replay Buffer
87 | ------------------
88 |
89 | When using off-policy algorithms, you can also **save the replay buffer** after training:
90 |
91 | ::
92 |
93 | python train.py --algo sac --env Pendulum-v1 --save-replay-buffer
94 |
95 | It will be automatically loaded if present when continuing training.
96 |
97 |
98 | Env keyword arguments
99 | ---------------------
100 |
101 | You can specify keyword arguments to pass to the env constructor in the
102 | command line, using ``--env-kwargs``:
103 |
104 | ::
105 |
106 | python enjoy.py --algo ppo --env MountainCar-v0 --env-kwargs goal_velocity:10
107 |
108 |
109 | Overwrite hyperparameters
110 | -------------------------
111 |
112 | You can easily overwrite hyperparameters in the command line, using
113 | ``--hyperparams``:
114 |
115 | ::
116 |
117 | python train.py --algo a2c --env MountainCarContinuous-v0 --hyperparams learning_rate:0.001 policy_kwargs:"dict(net_arch=[64, 64])"
118 |
119 | Note: if you want to pass a string, you need to escape it like that:
120 | ``my_string:"'value'"``
121 |
--------------------------------------------------------------------------------
/docs/guide/tuning.rst:
--------------------------------------------------------------------------------
1 | .. _tuning:
2 |
3 | =====================
4 | Hyperparameter Tuning
5 | =====================
6 |
7 | Automated hyperparameter optimization
8 | -------------------------------------
9 |
10 | Blog post: `Automatic Hyperparameter Tuning - A Visual Guide `_
11 |
12 | Video: https://www.youtube.com/watch?v=AidFTOdGNFQ
13 |
14 | We use `Optuna `__ for optimizing the
15 | hyperparameters. Not all hyperparameters are tuned, and tuning enforces
16 | certain default hyperparameter settings that may be different from the
17 | official defaults. See
18 | `rl_zoo3/hyperparams_opt.py `__
19 | for the current settings for each agent.
20 |
21 | Hyperparameters not specified in
22 | `rl_zoo3/hyperparams_opt.py `__
23 | are taken from the associated YAML file and fallback to the default
24 | values of SB3 if not present.
25 |
26 | Note: when using SuccessiveHalvingPruner (“halving”), you must specify
27 | ``--n-jobs > 1``
28 |
29 | Budget of 1000 trials with a maximum of 50000 steps:
30 |
31 | ::
32 |
33 | python train.py --algo ppo --env MountainCar-v0 -n 50000 -optimize --n-trials 1000 --n-jobs 2 \
34 | --sampler tpe --pruner median
35 |
36 | Distributed optimization using a shared database is also possible (see
37 | the corresponding `Optuna
38 | documentation `__):
39 |
40 | ::
41 |
42 | python train.py --algo ppo --env MountainCar-v0 -optimize --study-name test --storage logs/demo.log
43 |
44 |
45 |
46 | Visualize live using `optuna-dashboard `__
47 |
48 | .. code:: bash
49 |
50 | optuna-dashboard logs/demo.log
51 |
52 | Load hyperparameters from trial number 21 and train an agent with it:
53 |
54 | .. code:: bash
55 |
56 | python train.py --algo ppo --env MountainCar-v0 --study-name test --storage logs/demo.log --trial-id 21
57 |
58 |
59 | The default budget for hyperparameter tuning is 500 trials and there is
60 | one intermediate evaluation for pruning/early stopping per 100k time
61 | steps.
62 |
63 | Hyperparameters search space
64 | ----------------------------
65 |
66 | Note that the default hyperparameters used in the zoo when tuning are
67 | not always the same as the defaults provided in
68 | `stable-baselines3 `__.
69 | Consult the latest source code to be sure of these settings. For
70 | example:
71 |
72 | - PPO tuning assumes a network architecture with ``ortho_init = False``
73 | when tuning, though it is ``True`` by
74 | `default `__.
75 | You can change that by updating
76 | `rl_zoo3/hyperparams_opt.py `__.
77 |
78 | - Non-episodic rollout in TD3 and DDPG assumes
79 | ``gradient_steps = train_freq`` and so tunes only ``train_freq`` to
80 | reduce the search space.
81 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | RL Baselines3 Zoo Docs - A Training Framework for Stable Baselines3
2 | ===================================================================
3 |
4 | `RL Baselines3 Zoo `_ s a training framework for Reinforcement Learning (RL), using `Stable Baselines3 (SB3) `_,
5 | reliable implementations of reinforcement learning algorithms in PyTorch.
6 |
7 | Github repository: https://github.com/DLR-RM/rl-baselines3-zoo
8 |
9 | It provides scripts for training, evaluating agents, tuning hyperparameters, plotting results and recording videos.
10 |
11 | In addition, it includes a collection of tuned hyperparameters for common environments and RL algorithms, and agents trained with those settings.
12 |
13 | .. toctree::
14 | :maxdepth: 2
15 | :caption: User Guide
16 |
17 | guide/install
18 | guide/quickstart
19 | guide/train
20 | guide/plot
21 | guide/enjoy
22 | guide/custom_env
23 | guide/config
24 | guide/integrations
25 | guide/tuning
26 | guide/sbx
27 |
28 |
29 | .. toctree::
30 | :maxdepth: 1
31 | :caption: RL Zoo API
32 |
33 | modules/exp_manager
34 | modules/wrappers
35 | modules/callbacks
36 | modules/utils
37 |
38 | .. toctree::
39 | :maxdepth: 1
40 | :caption: Misc
41 |
42 | misc/changelog
43 |
44 |
45 | Citing RL Baselines3 Zoo
46 | ------------------------
47 | To cite this project in publications:
48 |
49 | .. code-block:: bibtex
50 |
51 | @misc{rl-zoo3,
52 | author = {Raffin, Antonin},
53 | title = {RL Baselines3 Zoo},
54 | year = {2020},
55 | publisher = {GitHub},
56 | journal = {GitHub repository},
57 | howpublished = {\url{https://github.com/DLR-RM/rl-baselines3-zoo}},
58 | }
59 |
60 | Contributing
61 | ------------
62 |
63 | To any interested in making the rl baselines better, there are still some improvements
64 | that need to be done.
65 | You can check issues in the `repo `_.
66 |
67 | If you want to contribute, please read `CONTRIBUTING.md `_ first.
68 |
69 | Indices and tables
70 | -------------------
71 |
72 | * :ref:`genindex`
73 | * :ref:`search`
74 | * :ref:`modindex`
75 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=StableBaselines
13 |
14 | if "%1" == "" goto help
15 |
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | echo.
19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | echo.installed, then set the SPHINXBUILD environment variable to point
21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | echo.may add the Sphinx directory to PATH.
23 | echo.
24 | echo.If you don't have Sphinx installed, grab it from
25 | echo.http://sphinx-doc.org/
26 | exit /b 1
27 | )
28 |
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 |
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 |
35 | :end
36 | popd
37 |
--------------------------------------------------------------------------------
/docs/misc/changelog.rst:
--------------------------------------------------------------------------------
1 | .. _changelog:
2 |
3 | Changelog
4 | ==========
5 |
6 |
7 | See https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/CHANGELOG.md
8 |
--------------------------------------------------------------------------------
/docs/modules/callbacks.rst:
--------------------------------------------------------------------------------
1 | .. _callbacks:
2 |
3 |
4 | Callbacks
5 | =========
6 |
7 | .. automodule:: rl_zoo3.callbacks
8 | :members:
9 |
--------------------------------------------------------------------------------
/docs/modules/exp_manager.rst:
--------------------------------------------------------------------------------
1 | .. _manager:
2 |
3 | .. automodule:: rl_zoo3.exp_manager
4 |
5 |
6 | Experiment Manager
7 | ==================
8 |
9 |
10 | Parameters
11 | ----------
12 |
13 | .. autoclass:: ExperimentManager
14 | :members:
15 | :inherited-members:
16 |
--------------------------------------------------------------------------------
/docs/modules/utils.rst:
--------------------------------------------------------------------------------
1 | .. _utils:
2 |
3 |
4 | Utils
5 | =====
6 |
7 | .. automodule:: rl_zoo3.utils
8 | :members:
9 |
--------------------------------------------------------------------------------
/docs/modules/wrappers.rst:
--------------------------------------------------------------------------------
1 | .. _wrappers:
2 |
3 |
4 | Wrappers
5 | ========
6 |
7 | .. automodule:: rl_zoo3.wrappers
8 | :members:
9 |
--------------------------------------------------------------------------------
/docs/spelling_wordlist.txt:
--------------------------------------------------------------------------------
1 | py
2 | env
3 | atari
4 | argparse
5 | Argparse
6 | TensorFlow
7 | feedforward
8 | envs
9 | VecEnv
10 | pretrain
11 | petrained
12 | tf
13 | th
14 | nn
15 | np
16 | str
17 | mujoco
18 | cpu
19 | ndarray
20 | ndarrays
21 | timestep
22 | timesteps
23 | stepsize
24 | dataset
25 | adam
26 | fn
27 | normalisation
28 | Kullback
29 | Leibler
30 | boolean
31 | deserialized
32 | pretrained
33 | minibatch
34 | subprocesses
35 | ArgumentParser
36 | Tensorflow
37 | Gaussian
38 | approximator
39 | minibatches
40 | hyperparameters
41 | hyperparameter
42 | vectorized
43 | rl
44 | colab
45 | dataloader
46 | npz
47 | datasets
48 | vf
49 | logits
50 | num
51 | Utils
52 | backpropagate
53 | prepend
54 | NaN
55 | preprocessing
56 | Cloudpickle
57 | async
58 | multiprocess
59 | tensorflow
60 | mlp
61 | cnn
62 | neglogp
63 | tanh
64 | coef
65 | repo
66 | Huber
67 | params
68 | ppo
69 | arxiv
70 | Arxiv
71 | func
72 | DQN
73 | Uhlenbeck
74 | Ornstein
75 | multithread
76 | cancelled
77 | Tensorboard
78 | parallelize
79 | customising
80 | serializable
81 | Multiprocessed
82 | cartpole
83 | toolset
84 | lstm
85 | rescale
86 | ffmpeg
87 | avconv
88 | unnormalized
89 | Github
90 | pre
91 | preprocess
92 | backend
93 | attr
94 | preprocess
95 | Antonin
96 | Raffin
97 | araffin
98 | Homebrew
99 | Numpy
100 | Theano
101 | rollout
102 | kfac
103 | Piecewise
104 | csv
105 | nvidia
106 | visdom
107 | tensorboard
108 | preprocessed
109 | namespace
110 | sklearn
111 | GoalEnv
112 | Torchy
113 | pytorch
114 | dicts
115 | optimizers
116 | Deprecations
117 | forkserver
118 | cuda
119 | Polyak
120 | gSDE
121 | rollouts
122 | Pyro
123 | softmax
124 | stdout
125 | Contrib
126 | Quantile
127 | Huggingface
128 | Jax
129 | Optuna
130 |
--------------------------------------------------------------------------------
/enjoy.py:
--------------------------------------------------------------------------------
1 | from rl_zoo3.enjoy import enjoy
2 |
3 | if __name__ == "__main__":
4 | enjoy()
5 |
--------------------------------------------------------------------------------
/hyperparams/a2c.yml:
--------------------------------------------------------------------------------
1 | atari:
2 | env_wrapper:
3 | - stable_baselines3.common.atari_wrappers.AtariWrapper
4 | # Equivalent to
5 | # vec_env_wrapper:
6 | # - stable_baselines3.common.vec_env.VecFrameStack:
7 | # n_stack: 4
8 | frame_stack: 4
9 | policy: 'CnnPolicy'
10 | n_envs: 16
11 | n_timesteps: !!float 1e7
12 | ent_coef: 0.01
13 | vf_coef: 0.25
14 | policy_kwargs: "dict(optimizer_class=RMSpropTFLike, optimizer_kwargs=dict(eps=1e-5))"
15 |
16 | CartPole-v1:
17 | n_envs: 8
18 | n_timesteps: !!float 5e5
19 | policy: 'MlpPolicy'
20 | ent_coef: 0.0
21 |
22 | LunarLander-v2:
23 | n_envs: 8
24 | n_timesteps: !!float 2e5
25 | policy: 'MlpPolicy'
26 | gamma: 0.995
27 | n_steps: 5
28 | learning_rate: lin_0.00083
29 | ent_coef: 0.00001
30 |
31 | MountainCar-v0:
32 | normalize: true
33 | n_envs: 16
34 | n_timesteps: !!float 1e6
35 | policy: 'MlpPolicy'
36 | ent_coef: .0
37 |
38 | Acrobot-v1:
39 | normalize: true
40 | n_envs: 16
41 | n_timesteps: !!float 5e5
42 | policy: 'MlpPolicy'
43 | ent_coef: .0
44 |
45 | # Tuned
46 | Pendulum-v1:
47 | normalize: True
48 | n_envs: 8
49 | n_timesteps: !!float 1e6
50 | policy: 'MlpPolicy'
51 | ent_coef: 0.0
52 | max_grad_norm: 0.5
53 | n_steps: 8
54 | gae_lambda: 0.9
55 | vf_coef: 0.4
56 | gamma: 0.9
57 | use_rms_prop: True
58 | normalize_advantage: False
59 | learning_rate: lin_7e-4
60 | use_sde: True
61 | policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"
62 |
63 | # Tuned
64 | LunarLanderContinuous-v3:
65 | normalize: true
66 | n_envs: 4
67 | n_timesteps: !!float 5e6
68 | policy: 'MlpPolicy'
69 | ent_coef: 0.0
70 | max_grad_norm: 0.5
71 | n_steps: 8
72 | gae_lambda: 0.9
73 | vf_coef: 0.4
74 | gamma: 0.99
75 | use_rms_prop: True
76 | normalize_advantage: False
77 | learning_rate: lin_7e-4
78 | use_sde: True
79 | policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"
80 |
81 | # Tuned
82 | MountainCarContinuous-v0:
83 | normalize: true
84 | n_envs: 4
85 | n_steps: 100
86 | n_timesteps: !!float 1e5
87 | policy: 'MlpPolicy'
88 | ent_coef: 0.0
89 | use_sde: True
90 | sde_sample_freq: 16
91 | policy_kwargs: "dict(log_std_init=0.0, ortho_init=False)"
92 |
93 | # Tuned
94 | BipedalWalker-v3:
95 | normalize: true
96 | n_envs: 16
97 | n_timesteps: !!float 5e6
98 | policy: 'MlpPolicy'
99 | ent_coef: 0.0
100 | max_grad_norm: 0.5
101 | n_steps: 8
102 | gae_lambda: 0.9
103 | vf_coef: 0.4
104 | gamma: 0.99
105 | use_rms_prop: True
106 | normalize_advantage: False
107 | learning_rate: lin_0.00096
108 | use_sde: True
109 | policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"
110 |
111 | # Tuned
112 | BipedalWalkerHardcore-v3:
113 | normalize: true
114 | n_envs: 32
115 | n_timesteps: !!float 20e7
116 | policy: 'MlpPolicy'
117 | ent_coef: 0.001
118 | max_grad_norm: 0.5
119 | n_steps: 8
120 | gae_lambda: 0.9
121 | vf_coef: 0.4
122 | gamma: 0.99
123 | use_rms_prop: True
124 | normalize_advantage: False
125 | learning_rate: lin_0.0008
126 | use_sde: True
127 | policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"
128 |
129 | # Tuned
130 | HalfCheetahBulletEnv-v0: &pybullet-defaults
131 | normalize: true
132 | n_envs: 4
133 | n_timesteps: !!float 2e6
134 | policy: 'MlpPolicy'
135 | ent_coef: 0.0
136 | max_grad_norm: 0.5
137 | n_steps: 8
138 | gae_lambda: 0.9
139 | vf_coef: 0.4
140 | gamma: 0.99
141 | use_rms_prop: True
142 | normalize_advantage: False
143 | # Both works
144 | learning_rate: lin_0.00096
145 | # learning_rate: !!float 3e-4
146 | use_sde: True
147 | policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"
148 |
149 | Walker2DBulletEnv-v0:
150 | <<: *pybullet-defaults
151 |
152 | # Tuned
153 | AntBulletEnv-v0:
154 | <<: *pybullet-defaults
155 |
156 | # Tuned
157 | HopperBulletEnv-v0:
158 | <<: *pybullet-defaults
159 |
160 | # Tuned but unstable
161 | # Not working without SDE?
162 | ReacherBulletEnv-v0:
163 | <<: *pybullet-defaults
164 | learning_rate: lin_0.0008
165 |
166 | # === Mujoco Envs ===
167 |
168 | HalfCheetah-v4: &mujoco-defaults
169 | normalize: true
170 | n_timesteps: !!float 1e6
171 | policy: 'MlpPolicy'
172 |
173 | Ant-v4:
174 | <<: *mujoco-defaults
175 |
176 | Hopper-v4:
177 | <<: *mujoco-defaults
178 |
179 | Walker2d-v4:
180 | <<: *mujoco-defaults
181 |
182 | Humanoid-v4:
183 | <<: *mujoco-defaults
184 | n_timesteps: !!float 2e6
185 |
186 | Swimmer-v4:
187 | <<: *mujoco-defaults
188 | gamma: 0.9999
189 |
--------------------------------------------------------------------------------
/hyperparams/crossq.yml:
--------------------------------------------------------------------------------
1 | MountainCarContinuous-v0:
2 | n_timesteps: !!float 50000
3 | policy: 'MlpPolicy'
4 | learning_rate: !!float 7e-4
5 | buffer_size: 50000
6 | train_freq: 32
7 | gradient_steps: 32
8 | gamma: 0.9999
9 | learning_starts: 100
10 | use_sde: True
11 | policy_delay: 2
12 | policy_kwargs: "dict(use_expln=True, log_std_init=-1, net_arch=[64, 64])"
13 |
14 | Pendulum-v1:
15 | n_timesteps: 20000
16 | policy: 'MlpPolicy'
17 | policy_delay: 2
18 | policy_kwargs: "dict(net_arch=[256, 256])"
19 |
20 |
21 | LunarLanderContinuous-v3:
22 | n_timesteps: !!float 2e5
23 | policy: 'MlpPolicy'
24 | buffer_size: 1000000
25 | learning_starts: 10000
26 |
27 |
28 | BipedalWalker-v3:
29 | n_timesteps: !!float 2e5
30 | policy: 'MlpPolicy'
31 | buffer_size: 300000
32 | gamma: 0.98
33 | learning_starts: 10000
34 | policy_kwargs: "dict(net_arch=dict(pi=[256, 256], qf=[1024, 1024]))"
35 |
36 | # === Mujoco Envs ===
37 |
38 | HalfCheetah-v4: &mujoco-defaults
39 | buffer_size: 1_000_000
40 | learning_rate: !!float 1e-3
41 | learning_starts: 5000
42 | n_timesteps: !!float 5e6
43 | policy: 'MlpPolicy'
44 | policy_delay: 3
45 | policy_kwargs: "dict(net_arch=dict(pi=[256, 256], qf=[2048, 2048]))"
46 |
47 | Ant-v4:
48 | <<: *mujoco-defaults
49 |
50 | Hopper-v4:
51 | <<: *mujoco-defaults
52 |
53 | Walker2d-v4:
54 | <<: *mujoco-defaults
55 |
56 | Humanoid-v4:
57 | <<: *mujoco-defaults
58 |
59 | HumanoidStandup-v4:
60 | <<: *mujoco-defaults
61 |
62 | Swimmer-v4:
63 | <<: *mujoco-defaults
64 | gamma: 0.999
65 |
66 | # Tuned for SAC, need to check with CrossQ
67 | HalfCheetahBulletEnv-v0: &pybullet-defaults
68 | n_timesteps: !!float 1e6
69 | policy: 'MlpPolicy'
70 | learning_rate: !!float 7.3e-4
71 | buffer_size: 300000
72 | batch_size: 256
73 | ent_coef: 'auto'
74 | gamma: 0.98
75 | train_freq: 8
76 | gradient_steps: 8
77 | learning_starts: 10000
78 | use_sde: True
79 | policy_kwargs: "dict(use_expln=True, log_std_init=-3)"
80 |
81 | # Tuned
82 | AntBulletEnv-v0:
83 | <<: *pybullet-defaults
84 |
85 | HopperBulletEnv-v0:
86 | <<: *pybullet-defaults
87 | learning_rate: lin_7.3e-4
88 |
89 | Walker2DBulletEnv-v0:
90 | <<: *pybullet-defaults
91 | learning_rate: lin_7.3e-4
92 |
--------------------------------------------------------------------------------
/hyperparams/ddpg.yml:
--------------------------------------------------------------------------------
1 | # Tuned
2 | MountainCarContinuous-v0:
3 | n_timesteps: 300000
4 | policy: 'MlpPolicy'
5 | noise_type: 'ornstein-uhlenbeck'
6 | noise_std: 0.5
7 | gradient_steps: 1
8 | train_freq: 1
9 | learning_rate: !!float 1e-3
10 | batch_size: 256
11 | policy_kwargs: "dict(net_arch=[400, 300])"
12 |
13 | Pendulum-v1:
14 | n_timesteps: 20000
15 | policy: 'MlpPolicy'
16 | gamma: 0.98
17 | buffer_size: 200000
18 | learning_starts: 10000
19 | noise_type: 'normal'
20 | noise_std: 0.1
21 | gradient_steps: 1
22 | train_freq: 1
23 | learning_rate: !!float 1e-3
24 | policy_kwargs: "dict(net_arch=[400, 300])"
25 |
26 | LunarLanderContinuous-v3:
27 | n_timesteps: !!float 3e5
28 | policy: 'MlpPolicy'
29 | gamma: 0.98
30 | buffer_size: 200000
31 | learning_starts: 10000
32 | noise_type: 'normal'
33 | noise_std: 0.1
34 | gradient_steps: 1
35 | train_freq: 1
36 | learning_rate: !!float 1e-3
37 | policy_kwargs: "dict(net_arch=[400, 300])"
38 |
39 | BipedalWalker-v3:
40 | n_timesteps: !!float 1e6
41 | policy: 'MlpPolicy'
42 | gamma: 0.98
43 | buffer_size: 200000
44 | learning_starts: 10000
45 | noise_type: 'normal'
46 | noise_std: 0.1
47 | gradient_steps: 1
48 | train_freq: 1
49 | learning_rate: !!float 1e-3
50 | policy_kwargs: "dict(net_arch=[400, 300])"
51 |
52 | # To be tuned
53 | BipedalWalkerHardcore-v3:
54 | n_timesteps: !!float 1e7
55 | policy: 'MlpPolicy'
56 | gamma: 0.99
57 | buffer_size: 1000000
58 | learning_starts: 10000
59 | noise_type: 'normal'
60 | noise_std: 0.1
61 | batch_size: 256
62 | train_freq: 1
63 | learning_rate: lin_7e-4
64 | policy_kwargs: "dict(net_arch=[400, 300])"
65 |
66 | # Tuned
67 | HalfCheetahBulletEnv-v0: &pybullet-defaults
68 | n_timesteps: !!float 1e6
69 | policy: 'MlpPolicy'
70 | gamma: 0.98
71 | buffer_size: 200000
72 | learning_starts: 10000
73 | noise_type: 'normal'
74 | noise_std: 0.1
75 | gradient_steps: 1
76 | train_freq: 1
77 | batch_size: 256
78 | learning_rate: !!float 7e-4
79 | policy_kwargs: "dict(net_arch=[400, 300])"
80 |
81 | # Tuned
82 | AntBulletEnv-v0:
83 | <<: *pybullet-defaults
84 |
85 | # Tuned
86 | HopperBulletEnv-v0:
87 | <<: *pybullet-defaults
88 |
89 | # Tuned
90 | Walker2DBulletEnv-v0:
91 | <<: *pybullet-defaults
92 |
93 | # TO BE tested
94 | HumanoidBulletEnv-v0:
95 | n_timesteps: !!float 2e6
96 | policy: 'MlpPolicy'
97 | gamma: 0.98
98 | buffer_size: 200000
99 | learning_starts: 10000
100 | noise_type: 'normal'
101 | noise_std: 0.1
102 | gradient_steps: -1
103 | train_freq: [1, "episode"]
104 | learning_rate: !!float 1e-3
105 | policy_kwargs: "dict(net_arch=[400, 300])"
106 |
107 | # Tuned
108 | ReacherBulletEnv-v0:
109 | <<: *pybullet-defaults
110 | n_timesteps: !!float 3e5
111 |
112 |
113 | # To be tuned
114 | InvertedDoublePendulumBulletEnv-v0:
115 | <<: *pybullet-defaults
116 | n_timesteps: !!float 1e6
117 |
118 | # To be tuned
119 | InvertedPendulumSwingupBulletEnv-v0:
120 | <<: *pybullet-defaults
121 | n_timesteps: !!float 3e5
122 |
123 | # === Mujoco Envs ===
124 | HalfCheetah-v4: &mujoco-defaults
125 | n_timesteps: !!float 1e6
126 | policy: 'MlpPolicy'
127 | learning_starts: 10000
128 | noise_type: 'normal'
129 | noise_std: 0.1
130 | train_freq: 1
131 | gradient_steps: 1
132 | learning_rate: !!float 1e-3
133 | batch_size: 256
134 | policy_kwargs: "dict(net_arch=[400, 300])"
135 |
136 | Ant-v4:
137 | <<: *mujoco-defaults
138 |
139 | Hopper-v4:
140 | <<: *mujoco-defaults
141 |
142 | Walker2d-v4:
143 | <<: *mujoco-defaults
144 |
145 | Humanoid-v4:
146 | <<: *mujoco-defaults
147 | n_timesteps: !!float 2e6
148 |
149 | Swimmer-v4:
150 | <<: *mujoco-defaults
151 | gamma: 0.9999
152 |
--------------------------------------------------------------------------------
/hyperparams/dqn.yml:
--------------------------------------------------------------------------------
1 | atari:
2 | env_wrapper:
3 | - stable_baselines3.common.atari_wrappers.AtariWrapper
4 | frame_stack: 4
5 | policy: 'CnnPolicy'
6 | n_timesteps: !!float 1e7
7 | buffer_size: 100000
8 | learning_rate: !!float 1e-4
9 | batch_size: 32
10 | learning_starts: 100000
11 | target_update_interval: 1000
12 | train_freq: 4
13 | gradient_steps: 1
14 | exploration_fraction: 0.1
15 | exploration_final_eps: 0.01
16 | # If True, you need to deactivate handle_timeout_termination
17 | # in the replay_buffer_kwargs
18 | optimize_memory_usage: False
19 |
20 | # Almost Tuned
21 | CartPole-v1:
22 | n_timesteps: !!float 5e4
23 | policy: 'MlpPolicy'
24 | learning_rate: !!float 2.3e-3
25 | batch_size: 64
26 | buffer_size: 100000
27 | learning_starts: 1000
28 | gamma: 0.99
29 | target_update_interval: 10
30 | train_freq: 256
31 | gradient_steps: 128
32 | exploration_fraction: 0.16
33 | exploration_final_eps: 0.04
34 | policy_kwargs: "dict(net_arch=[256, 256])"
35 |
36 | # Tuned
37 | MountainCar-v0:
38 | n_timesteps: !!float 1.2e5
39 | policy: 'MlpPolicy'
40 | learning_rate: !!float 4e-3
41 | batch_size: 128
42 | buffer_size: 10000
43 | learning_starts: 1000
44 | gamma: 0.98
45 | target_update_interval: 600
46 | train_freq: 16
47 | gradient_steps: 8
48 | exploration_fraction: 0.2
49 | exploration_final_eps: 0.07
50 | policy_kwargs: "dict(net_arch=[256, 256])"
51 |
52 | # Tuned
53 | LunarLander-v2:
54 | n_timesteps: !!float 1e5
55 | policy: 'MlpPolicy'
56 | learning_rate: !!float 6.3e-4
57 | batch_size: 128
58 | buffer_size: 50000
59 | learning_starts: 0
60 | gamma: 0.99
61 | target_update_interval: 250
62 | train_freq: 4
63 | gradient_steps: -1
64 | exploration_fraction: 0.12
65 | exploration_final_eps: 0.1
66 | policy_kwargs: "dict(net_arch=[256, 256])"
67 |
68 | # Tuned
69 | Acrobot-v1:
70 | n_timesteps: !!float 1e5
71 | policy: 'MlpPolicy'
72 | learning_rate: !!float 6.3e-4
73 | batch_size: 128
74 | buffer_size: 50000
75 | learning_starts: 0
76 | gamma: 0.99
77 | target_update_interval: 250
78 | train_freq: 4
79 | gradient_steps: -1
80 | exploration_fraction: 0.12
81 | exploration_final_eps: 0.1
82 | policy_kwargs: "dict(net_arch=[256, 256])"
83 |
--------------------------------------------------------------------------------
/hyperparams/her.yml:
--------------------------------------------------------------------------------
1 | ############################################################
2 | # NOTE: STARTING WITH SB3 >= 1.1.0, because HER is now HerReplayBuffer,
3 | # this file is no longer used.
4 | # It is only here as a reference.
5 | #############################################################
6 |
7 | parking-v0:
8 | n_timesteps: !!float 1e5
9 | policy: 'MlpPolicy'
10 | model_class: 'tqc'
11 | n_sampled_goal: 4
12 | goal_selection_strategy: 'future'
13 | buffer_size: 1000000
14 | batch_size: 1024
15 | gamma: 0.95
16 | learning_rate: !!float 1e-3
17 | tau: 0.05
18 | policy_kwargs: "dict(n_critics=2, net_arch=[512, 512, 512])"
19 | online_sampling: True
20 | max_episode_length: 100
21 | # normalize: True
22 |
23 | # Mujoco Robotic Env
24 |
25 | FetchPush-v1:
26 | env_wrapper:
27 | - sb3_contrib.common.wrappers.TimeFeatureWrapper
28 | n_timesteps: !!float 1e6
29 | policy: 'MlpPolicy'
30 | model_class: 'tqc'
31 | n_sampled_goal: 4
32 | goal_selection_strategy: 'future'
33 | buffer_size: 1000000
34 | batch_size: 2048
35 | gamma: 0.95
36 | learning_rate: !!float 1e-3
37 | tau: 0.05
38 | policy_kwargs: "dict(n_critics=2, net_arch=[512, 512, 512])"
39 | online_sampling: True
40 |
41 | FetchSlide-v1:
42 | env_wrapper:
43 | - sb3_contrib.common.wrappers.TimeFeatureWrapper
44 | n_timesteps: !!float 2.5e6
45 | policy: 'MlpPolicy'
46 | model_class: 'tqc'
47 | n_sampled_goal: 4
48 | goal_selection_strategy: 'future'
49 | buffer_size: 1000000
50 | batch_size: 2048
51 | gamma: 0.95
52 | learning_rate: !!float 1e-3
53 | tau: 0.05
54 | # ent_coef: 0.01
55 | policy_kwargs: "dict(n_critics=2, net_arch=[512, 512, 512])"
56 | online_sampling: True
57 |
58 |
59 | FetchPickAndPlace-v1:
60 | env_wrapper:
61 | - sb3_contrib.common.wrappers.TimeFeatureWrapper
62 | # - rl_zoo3.wrappers.TruncatedOnSuccessWrapper:
63 | # reward_offset: 0
64 | # n_successes: 4
65 | # - stable_baselines3.common.monitor.Monitor
66 | n_timesteps: !!float 1e6
67 | policy: 'MlpPolicy'
68 | model_class: 'tqc'
69 | n_sampled_goal: 4
70 | goal_selection_strategy: 'future'
71 | buffer_size: 1000000
72 | batch_size: 1024
73 | gamma: 0.95
74 | learning_rate: !!float 1e-3
75 | tau: 0.05
76 | policy_kwargs: "dict(n_critics=2, net_arch=[512, 512, 512])"
77 | online_sampling: True
78 |
79 | # SAC hyperparams
80 | FetchReach-v1:
81 | n_timesteps: !!float 20000
82 | policy: 'MlpPolicy'
83 | model_class: 'sac'
84 | n_sampled_goal: 4
85 | goal_selection_strategy: 'future'
86 | buffer_size: 1000000
87 | ent_coef: 'auto'
88 | batch_size: 256
89 | gamma: 0.95
90 | learning_rate: 0.001
91 | learning_starts: 1000
92 | online_sampling: True
93 | normalize: True
94 |
95 | # === Real Robot envs
96 | NeckGoalEnvRelativeSparse-v2:
97 | model_class: 'sac'
98 | # env_wrapper:
99 | # - rl_zoo3.wrappers.HistoryWrapper:
100 | # horizon: 2
101 | # - sb3_contrib.common.wrappers.TimeFeatureWrapper
102 | n_timesteps: !!float 1e6
103 | policy: 'MlpPolicy'
104 | learning_rate: !!float 7.3e-4
105 | buffer_size: 100000
106 | batch_size: 256
107 | ent_coef: 'auto'
108 | gamma: 0.99
109 | tau: 0.02
110 | train_freq: [1, "episode"]
111 | gradient_steps: -1
112 | # 10 episodes of warm-up
113 | learning_starts: 1500
114 | use_sde_at_warmup: True
115 | use_sde: True
116 | sde_sample_freq: 64
117 | policy_kwargs: "dict(log_std_init=-2, net_arch=[256, 256])"
118 | n_sampled_goal: 4
119 | goal_selection_strategy: 'future'
120 | online_sampling: False
121 |
122 | NeckGoalEnvRelativeDense-v2:
123 | model_class: 'sac'
124 | env_wrapper:
125 | - rl_zoo3.wrappers.HistoryWrapperObsDict:
126 | horizon: 2
127 | # - sb3_contrib.common.wrappers.TimeFeatureWrapper
128 | n_timesteps: !!float 1e6
129 | policy: 'MlpPolicy'
130 | learning_rate: !!float 7.3e-4
131 | buffer_size: 200000
132 | batch_size: 256
133 | ent_coef: 'auto'
134 | gamma: 0.99
135 | tau: 0.02
136 | train_freq: [1, "episode"]
137 | gradient_steps: -1
138 | # 10 episodes of warm-up
139 | learning_starts: 1500
140 | use_sde_at_warmup: True
141 | use_sde: True
142 | sde_sample_freq: 64
143 | policy_kwargs: "dict(log_std_init=-2, net_arch=[256, 256])"
144 | n_sampled_goal: 4
145 | goal_selection_strategy: 'future'
146 | online_sampling: False
147 |
--------------------------------------------------------------------------------
/hyperparams/python/ppo_config_example.py:
--------------------------------------------------------------------------------
1 | """This file just serves as an example on how to configure the zoo
2 | using python scripts instead of yaml files."""
3 |
4 | import torch
5 |
6 | hyperparams = {
7 | "MountainCarContinuous-v0": dict(
8 | env_wrapper=[{"gymnasium.wrappers.TimeLimit": {"max_episode_steps": 100}}],
9 | normalize=True,
10 | n_envs=1,
11 | n_timesteps=20000.0,
12 | policy="MlpPolicy",
13 | batch_size=8,
14 | n_steps=8,
15 | gamma=0.9999,
16 | learning_rate=7.77e-05,
17 | ent_coef=0.00429,
18 | clip_range=0.1,
19 | n_epochs=2,
20 | gae_lambda=0.9,
21 | max_grad_norm=5,
22 | vf_coef=0.19,
23 | use_sde=True,
24 | policy_kwargs=dict(
25 | log_std_init=-3.29,
26 | ortho_init=False,
27 | activation_fn=torch.nn.ReLU,
28 | ),
29 | )
30 | }
31 |
--------------------------------------------------------------------------------
/hyperparams/qrdqn.yml:
--------------------------------------------------------------------------------
1 | atari:
2 | env_wrapper:
3 | - stable_baselines3.common.atari_wrappers.AtariWrapper
4 | frame_stack: 4
5 | policy: 'CnnPolicy'
6 | n_timesteps: !!float 1e7
7 | learning_starts: 50000
8 | exploration_fraction: 0.025 # explore 250k steps = 10M * 0.025
9 | # If True, you need to deactivate handle_timeout_termination
10 | # in the replay_buffer_kwargs
11 | optimize_memory_usage: False
12 |
13 | # Tuned
14 | CartPole-v1:
15 | n_timesteps: !!float 5e4
16 | policy: 'MlpPolicy'
17 | learning_rate: !!float 2.3e-3
18 | batch_size: 64
19 | buffer_size: 100000
20 | learning_starts: 1000
21 | gamma: 0.99
22 | target_update_interval: 10
23 | train_freq: 256
24 | gradient_steps: 128
25 | exploration_fraction: 0.16
26 | exploration_final_eps: 0.04
27 | policy_kwargs: "dict(net_arch=[256, 256], n_quantiles=10)"
28 |
29 | # Tuned
30 | MountainCar-v0:
31 | n_timesteps: !!float 1.2e5
32 | policy: 'MlpPolicy'
33 | learning_rate: !!float 4e-3
34 | batch_size: 128
35 | buffer_size: 10000
36 | learning_starts: 1000
37 | gamma: 0.98
38 | target_update_interval: 600
39 | train_freq: 16
40 | gradient_steps: 8
41 | exploration_fraction: 0.2
42 | exploration_final_eps: 0.07
43 | policy_kwargs: "dict(net_arch=[256, 256], n_quantiles=25)"
44 |
45 | # Tuned
46 | LunarLander-v2:
47 | n_timesteps: !!float 1e5
48 | policy: 'MlpPolicy'
49 | learning_rate: lin_1.5e-3
50 | batch_size: 128
51 | buffer_size: 100000
52 | learning_starts: 10000
53 | gamma: 0.995
54 | target_update_interval: 1
55 | train_freq: 256
56 | gradient_steps: -1
57 | exploration_fraction: 0.24
58 | exploration_final_eps: 0.18
59 | policy_kwargs: "dict(net_arch=[256, 256], n_quantiles=170)"
60 |
61 | # Tuned
62 | Acrobot-v1:
63 | n_timesteps: !!float 1e5
64 | policy: 'MlpPolicy'
65 | learning_rate: !!float 6.3e-4
66 | batch_size: 128
67 | buffer_size: 50000
68 | learning_starts: 0
69 | gamma: 0.99
70 | target_update_interval: 250
71 | train_freq: 4
72 | gradient_steps: -1
73 | exploration_fraction: 0.12
74 | exploration_final_eps: 0.1
75 | policy_kwargs: "dict(net_arch=[256, 256], n_quantiles=25)"
76 |
--------------------------------------------------------------------------------
/hyperparams/td3.yml:
--------------------------------------------------------------------------------
1 | # Tuned
2 | MountainCarContinuous-v0:
3 | n_timesteps: 300000
4 | policy: 'MlpPolicy'
5 | noise_type: 'ornstein-uhlenbeck'
6 | noise_std: 0.5
7 | gradient_steps: 1
8 | train_freq: 1
9 | learning_rate: !!float 1e-3
10 | batch_size: 256
11 | policy_kwargs: "dict(net_arch=[400, 300])"
12 |
13 | Pendulum-v1:
14 | n_timesteps: 20000
15 | policy: 'MlpPolicy'
16 | gamma: 0.98
17 | buffer_size: 200000
18 | learning_starts: 10000
19 | noise_type: 'normal'
20 | noise_std: 0.1
21 | gradient_steps: 1
22 | train_freq: 1
23 | learning_rate: !!float 1e-3
24 | policy_kwargs: "dict(net_arch=[400, 300])"
25 |
26 | LunarLanderContinuous-v3:
27 | n_timesteps: !!float 3e5
28 | policy: 'MlpPolicy'
29 | gamma: 0.98
30 | buffer_size: 200000
31 | learning_starts: 10000
32 | noise_type: 'normal'
33 | noise_std: 0.1
34 | gradient_steps: 1
35 | train_freq: 1
36 | learning_rate: !!float 1e-3
37 | policy_kwargs: "dict(net_arch=[400, 300])"
38 |
39 | BipedalWalker-v3:
40 | n_timesteps: !!float 1e6
41 | policy: 'MlpPolicy'
42 | gamma: 0.98
43 | buffer_size: 200000
44 | learning_starts: 10000
45 | noise_type: 'normal'
46 | noise_std: 0.1
47 | gradient_steps: 1
48 | train_freq: 1
49 | learning_rate: !!float 1e-3
50 | policy_kwargs: "dict(net_arch=[400, 300])"
51 |
52 | # To be tuned
53 | BipedalWalkerHardcore-v3:
54 | n_timesteps: !!float 1e7
55 | policy: 'MlpPolicy'
56 | gamma: 0.99
57 | buffer_size: 1000000
58 | learning_starts: 10000
59 | noise_type: 'normal'
60 | noise_std: 0.1
61 | batch_size: 256
62 | train_freq: 1
63 | learning_rate: lin_7e-4
64 | policy_kwargs: "dict(net_arch=[400, 300])"
65 |
66 | # Tuned
67 | HalfCheetahBulletEnv-v0: &pybullet-defaults
68 | n_timesteps: !!float 1e6
69 | policy: 'MlpPolicy'
70 | gamma: 0.98
71 | buffer_size: 200000
72 | learning_starts: 10000
73 | noise_type: 'normal'
74 | noise_std: 0.1
75 | gradient_steps: 1
76 | train_freq: 1
77 | batch_size: 256
78 | learning_rate: !!float 7e-4
79 | policy_kwargs: "dict(net_arch=[400, 300])"
80 |
81 | AntBulletEnv-v0:
82 | <<: *pybullet-defaults
83 |
84 | HopperBulletEnv-v0:
85 | <<: *pybullet-defaults
86 |
87 | Walker2DBulletEnv-v0:
88 | <<: *pybullet-defaults
89 |
90 |
91 | # TO BE tested
92 | HumanoidBulletEnv-v0:
93 | n_timesteps: !!float 2e6
94 | policy: 'MlpPolicy'
95 | gamma: 0.98
96 | buffer_size: 200000
97 | learning_starts: 10000
98 | noise_type: 'normal'
99 | noise_std: 0.1
100 | train_freq: 1
101 | learning_rate: !!float 1e-3
102 | policy_kwargs: "dict(net_arch=[400, 300])"
103 |
104 | # Tuned
105 | ReacherBulletEnv-v0:
106 | <<: *pybullet-defaults
107 | n_timesteps: !!float 3e5
108 |
109 | # Tuned
110 | InvertedDoublePendulumBulletEnv-v0:
111 | <<: *pybullet-defaults
112 |
113 | # Tuned
114 | InvertedPendulumSwingupBulletEnv-v0:
115 | <<: *pybullet-defaults
116 | n_timesteps: !!float 3e5
117 |
118 |
119 | MinitaurBulletEnv-v0:
120 | n_timesteps: !!float 1e6
121 | policy: 'MlpPolicy'
122 | gamma: 0.99
123 | buffer_size: 1000000
124 | noise_type: 'normal'
125 | noise_std: 0.1
126 | learning_starts: 10000
127 | batch_size: 256
128 | learning_rate: !!float 1e-3
129 | train_freq: 1
130 | gradient_steps: 1
131 | policy_kwargs: "dict(net_arch=[400, 300])"
132 |
133 | # === Mujoco Envs ===
134 | HalfCheetah-v4: &mujoco-defaults
135 | n_timesteps: !!float 1e6
136 | policy: 'MlpPolicy'
137 | learning_starts: 10000
138 | noise_type: 'normal'
139 | noise_std: 0.1
140 | train_freq: 1
141 | gradient_steps: 1
142 | learning_rate: !!float 1e-3
143 | batch_size: 256
144 | policy_kwargs: "dict(net_arch=[400, 300])"
145 |
146 | Ant-v4:
147 | <<: *mujoco-defaults
148 |
149 | Hopper-v4:
150 | <<: *mujoco-defaults
151 |
152 | Walker2d-v4:
153 | <<: *mujoco-defaults
154 |
155 | Humanoid-v4:
156 | <<: *mujoco-defaults
157 | n_timesteps: !!float 2e6
158 |
159 | # Tuned
160 | Swimmer-v4:
161 | <<: *mujoco-defaults
162 | gamma: 0.9999
163 |
--------------------------------------------------------------------------------
/images/car.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DLR-RM/rl-baselines3-zoo/577616cb9f13341579953cb0f6111e007acc0a1d/images/car.jpg
--------------------------------------------------------------------------------
/logs/benchmark/a2c-BeamRiderNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614768288.4174142, "env_id": "BeamRiderNoFrameskip-v4"}
2 | r,l,t
3 | 5310.0,19931,11.331775
4 | 5278.0,18863,19.145522
5 | 4412.0,18499,26.784143
6 | 2160.0,10025,30.907018
7 | 1380.0,7261,33.878756
8 | 1848.0,9623,37.852282
9 | 6372.0,22457,47.104666
10 | 2160.0,10896,51.553509
11 | 5470.0,17871,58.882525
12 | 1380.0,7366,61.998566
13 | 1380.0,7597,65.10874
14 | 1380.0,7625,68.216608
15 | 1188.0,6035,70.682402
16 | 2496.0,12475,75.791831
17 | 3000.0,13264,81.213101
18 | 3840.0,14565,87.340508
19 | 1588.0,9303,91.146296
20 | 5024.0,18179,98.595311
21 | 4734.0,15926,105.128943
22 | 2496.0,11991,110.020293
23 | 3000.0,12733,115.234923
24 | 2160.0,10681,119.611882
25 | 1284.0,9009,123.288294
26 | 2160.0,10005,127.381307
27 | 1536.0,7641,130.503966
28 | 3964.0,16999,137.472344
29 | 1796.0,9815,141.513283
30 | 2776.0,12343,146.616668
31 | 1188.0,7561,149.761088
32 | 1952.0,9335,153.61017
33 | 4028.0,17717,160.872984
34 | 2160.0,11651,165.693949
35 | 1692.0,9569,169.702119
36 | 2160.0,10413,173.962517
37 | 3420.0,13499,179.485842
38 | 1484.0,7885,182.708011
39 | 3360.0,14125,188.490419
40 | 2056.0,11479,193.165845
41 | 3000.0,14509,199.107143
42 | 2832.0,11739,203.909528
43 | 4832.0,17885,211.235447
44 | 4576.0,18013,218.621224
45 | 4348.0,17821,226.003222
46 | 4384.0,15790,232.468659
47 | 2160.0,11752,237.280512
48 | 1640.0,8824,241.053055
49 | 3000.0,12559,246.359317
50 |
--------------------------------------------------------------------------------
/logs/benchmark/a2c-BipedalWalkerHardcore-v3/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615193378.914093, "env_id": "BipedalWalkerHardcore-v3"}
2 | r,l,t
3 | 267.45342,1121,3.428861
4 | 52.082271,706,4.11655
5 | 96.57381,881,4.968083
6 | -54.718438,2000,6.907327
7 | -40.642382,356,7.254022
8 | -74.03119,183,7.43183
9 | 92.052367,897,8.295187
10 | 131.774118,959,9.223105
11 | 42.129698,788,9.97768
12 | -13.796958,2000,11.907291
13 | 258.424171,1285,13.191344
14 | 270.596795,1127,14.412434
15 | 266.009622,1171,15.68599
16 | -37.48283,307,16.011555
17 | 5.6025,498,16.489811
18 | 71.054019,668,17.134797
19 | 226.553959,2000,19.053179
20 | -29.138281,320,19.361876
21 | 114.494889,2000,21.290722
22 | 14.069756,475,21.746166
23 | -33.844138,2000,23.699775
24 | -71.932055,140,23.835918
25 | 43.003696,663,24.471339
26 | 266.815498,1151,25.580017
27 | -50.527918,2000,27.503519
28 | -43.358147,305,27.799032
29 | 64.183689,2000,29.724874
30 | 47.123,664,30.363194
31 | 267.460295,1167,31.487743
32 | 211.121624,2000,33.415691
33 | -5.723699,2000,35.34649
34 | 45.352271,2000,37.356237
35 | 31.473474,2000,39.551819
36 | -22.38959,2000,41.54504
37 | 262.385989,1233,42.732931
38 | -56.927479,2000,44.666323
39 | -35.414777,311,44.971309
40 | 37.318868,2000,46.90783
41 | 216.945029,2000,48.835921
42 | 268.85666,1135,49.923489
43 | 258.979918,1245,51.121087
44 | -53.220669,280,51.395679
45 | -52.96222,2000,53.395831
46 | -30.573014,2000,55.571042
47 | 86.642791,823,56.416
48 | 262.444665,1200,57.577227
49 | 264.190806,1200,58.728613
50 | -16.522844,490,59.203664
51 | 29.094346,2000,61.154416
52 | -7.494107,484,61.623292
53 | 146.257517,1037,62.624309
54 | 87.786025,705,63.30837
55 | 244.124279,1425,64.674047
56 | -5.306314,392,65.052497
57 | 206.864062,2000,66.985329
58 | 68.390227,2000,68.913324
59 | -78.383474,141,69.049458
60 | 32.96207,601,69.629996
61 | 266.996667,1162,70.747888
62 | 146.95878,967,71.681916
63 | -20.563405,2000,73.619046
64 | -41.638012,2000,75.55533
65 | -35.748264,2000,77.584659
66 | 262.590096,1215,78.910465
67 | 266.482528,1177,80.171633
68 | 261.938973,1184,81.313759
69 | -17.894561,2000,83.252325
70 | 256.634081,1282,84.484862
71 | 264.271622,1192,85.628885
72 | -56.296586,2000,87.590536
73 | 79.150195,2000,89.522783
74 | 260.159147,1227,90.70997
75 | 267.130668,1162,91.83126
76 | 125.648535,2000,93.906298
77 | -70.775179,2000,96.076391
78 | 267.10438,1163,97.195431
79 | 266.611174,1175,98.323065
80 | -35.588589,2000,100.260114
81 | 166.139694,2000,102.199227
82 | 265.671085,1187,103.346298
83 | 78.281103,717,104.035931
84 | -30.56892,2000,105.959747
85 | 104.053373,2000,107.905378
86 | 110.668196,2000,109.837001
87 | 27.119705,706,110.521208
88 | 250.962943,1363,111.834788
89 | 259.842996,1249,113.04013
90 | 116.491483,2000,114.972856
91 | 110.491584,2000,116.926345
92 | 137.124841,2000,119.112893
93 | 44.207205,2000,121.151075
94 | 97.941463,1188,122.29184
95 | 13.754588,438,122.717643
96 | -35.686061,2000,124.655047
97 | 268.467145,1161,125.767297
98 | -85.521298,95,125.862401
99 | -15.321094,2000,127.812872
100 | 262.569942,1170,128.940263
101 | 37.488241,2000,130.87561
102 | -60.510632,2000,132.82389
103 | -67.947069,2000,135.042222
104 | 267.845441,1157,136.268686
105 | 265.24081,1146,137.37269
106 | 71.176076,2000,139.299905
107 | 19.659466,556,139.834481
108 | 157.192219,2000,141.771152
109 | -45.485992,297,142.060811
110 | 94.526833,953,142.978194
111 | 275.8062,1055,143.992682
112 | 147.267272,2000,145.937913
113 | -6.264808,2000,147.862182
114 | 46.27028,592,148.43445
115 | 260.913623,1234,149.621845
116 |
--------------------------------------------------------------------------------
/logs/benchmark/a2c-BreakoutNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614767776.374316, "env_id": "BreakoutNoFrameskip-v4"}
2 | r,l,t
3 | 375.0,7606,6.076401
4 | 86.0,5504,8.302102
5 | 421.0,9923,12.304659
6 | 270.0,7092,15.171841
7 | 416.0,10123,19.272449
8 | 375.0,7936,22.472544
9 | 66.0,4884,24.44387
10 | 155.0,3572,25.881367
11 | 353.0,8509,29.312325
12 | 206.0,5237,31.436018
13 | 152.0,6099,33.923915
14 | 261.0,4678,35.802962
15 | 420.0,10346,39.934088
16 | 414.0,9589,43.782337
17 | 76.0,4809,45.771475
18 | 369.0,7685,48.882821
19 | 378.0,9284,52.609996
20 | 422.0,9561,56.527488
21 | 360.0,9243,60.541219
22 | 285.0,7841,63.847421
23 | 388.0,9060,67.705209
24 | 424.0,11123,72.290951
25 | 398.0,11786,77.135814
26 | 381.0,8441,80.522488
27 | 306.0,6006,82.938207
28 | 97.0,5661,85.224353
29 | 217.0,7602,88.45842
30 | 424.0,10106,92.498425
31 | 408.0,9388,96.425963
32 | 388.0,9039,100.346374
33 | 365.0,7323,103.434267
34 | 254.0,4257,105.377463
35 | 377.0,6523,108.253703
36 | 172.0,4913,110.310678
37 | 54.0,4115,111.961039
38 | 427.0,9623,115.799037
39 | 290.0,8287,119.121087
40 | 321.0,6662,121.808789
41 | 341.0,8114,125.064535
42 | 299.0,7691,128.199606
43 | 101.0,5482,130.39826
44 | 25.0,3313,131.728176
45 | 149.0,7167,134.684085
46 | 396.0,6986,137.652418
47 | 20.0,3088,138.91907
48 | 367.0,6303,141.544503
49 | 60.0,5134,143.608451
50 | 272.0,7200,146.649598
51 | 241.0,8170,150.087444
52 | 323.0,7407,153.176795
53 | 60.0,5605,155.588469
54 | 357.0,7098,158.436126
55 | 382.0,11383,162.98275
56 | 346.0,7662,166.189121
57 | 433.0,11569,170.862319
58 | 322.0,8534,174.628042
59 | 364.0,9157,178.331138
60 | 393.0,11215,182.997787
61 | 33.0,3490,184.547825
62 | 184.0,6600,187.359139
63 | 116.0,6307,189.997411
64 | 389.0,7875,193.394374
65 | 162.0,6677,196.233643
66 | 227.0,5508,198.446902
67 | 349.0,8482,201.844003
68 | 232.0,5904,204.213293
69 | 322.0,7344,207.185952
70 | 98.0,5867,209.543781
71 | 409.0,7671,212.636367
72 | 256.0,5574,214.934022
73 | 78.0,3914,216.506063
74 | 232.0,5971,219.082668
75 | 277.0,7206,222.017627
76 | 345.0,8122,225.279926
77 | 258.0,8608,228.830755
78 | 134.0,6329,231.487061
79 | 224.0,5912,234.179806
80 | 377.0,7698,237.315182
81 | 412.0,9315,241.053151
82 | 304.0,6834,243.793879
83 | 363.0,9009,247.567084
84 | 360.0,10184,251.635881
85 |
--------------------------------------------------------------------------------
/logs/benchmark/a2c-EnduroNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614793054.584203, "env_id": "EnduroNoFrameskip-v4"}
2 | r,l,t
3 | 0.0,13312,8.52731
4 | 0.0,13312,14.248429
5 | 0.0,13312,19.970835
6 | 0.0,13312,25.683224
7 | 0.0,13312,31.395868
8 | 0.0,13312,37.1207
9 | 0.0,13312,42.844106
10 | 0.0,13312,48.554223
11 | 0.0,13312,54.268349
12 | 0.0,13312,59.978596
13 | 0.0,13312,65.714733
14 | 0.0,13312,71.429556
15 | 0.0,13312,77.140308
16 | 0.0,13312,82.848562
17 | 0.0,13312,88.577056
18 | 0.0,13312,94.29743
19 | 0.0,13312,100.011838
20 | 0.0,13312,105.724653
21 | 0.0,13312,111.4436
22 | 0.0,13312,117.16673
23 | 0.0,13312,122.898437
24 | 0.0,13312,128.624969
25 | 0.0,13312,134.347584
26 | 0.0,13312,140.069764
27 | 0.0,13312,145.793799
28 | 0.0,13312,151.52352
29 | 0.0,13312,157.250938
30 | 0.0,13312,162.968618
31 | 0.0,13312,168.684224
32 | 0.0,13312,174.38758
33 | 0.0,13312,180.092482
34 | 0.0,13312,185.795563
35 | 0.0,13312,191.487232
36 | 0.0,13312,197.166874
37 | 0.0,13312,202.842334
38 | 0.0,13312,208.507896
39 | 0.0,13312,214.19725
40 | 0.0,13312,219.878992
41 | 0.0,13312,225.556518
42 | 0.0,13312,231.221534
43 | 0.0,13312,236.901492
44 | 0.0,13312,242.571724
45 | 0.0,13312,248.255306
46 | 0.0,13312,253.931271
47 | 0.0,13312,259.607796
48 |
--------------------------------------------------------------------------------
/logs/benchmark/a2c-PongNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614937367.290064, "env_id": "PongNoFrameskip-v4"}
2 | r,l,t
3 | 17.0,9410,6.490385
4 | 20.0,6460,9.053506
5 | 21.0,6612,11.682447
6 | 16.0,10462,15.840014
7 | 16.0,9991,19.805953
8 | 18.0,9430,23.54848
9 | 14.0,12478,28.513371
10 | 21.0,8217,31.778995
11 | 13.0,10952,36.132016
12 | 15.0,9796,40.019487
13 | 20.0,8360,43.334407
14 | 18.0,8692,46.782233
15 | 19.0,8696,50.229097
16 | 15.0,9513,54.008494
17 | 18.0,8654,57.441068
18 | 18.0,9672,61.291382
19 | 16.0,9992,65.258582
20 | 18.0,8518,68.636641
21 | 21.0,7975,71.803567
22 | 14.0,11213,76.254159
23 | 13.0,11658,80.887247
24 | 19.0,8659,84.324623
25 | 19.0,8699,87.776711
26 | 17.0,8714,91.239745
27 | 14.0,10644,95.463618
28 | 20.0,6268,97.947606
29 | 16.0,8232,101.212081
30 | 17.0,8342,104.524828
31 | 13.0,10497,108.687028
32 | 21.0,7977,111.853881
33 | 9.0,16950,118.590817
34 | 19.0,8345,121.907908
35 | 17.0,8330,125.210845
36 | 19.0,8607,128.627715
37 | 21.0,7977,131.794741
38 | 16.0,10361,135.957308
39 | 19.0,7406,138.89799
40 | 14.0,9998,142.860846
41 | 19.0,8425,146.197888
42 | 9.0,13049,151.363693
43 | 18.0,7094,154.169493
44 | 13.0,10650,158.380911
45 | 18.0,9850,162.280141
46 | 20.0,8748,165.737029
47 | 11.0,14626,171.512476
48 | 20.0,7334,174.406643
49 | 19.0,7206,177.238544
50 | 17.0,7696,180.272341
51 | 20.0,8487,183.609726
52 | 18.0,9257,187.252612
53 | 20.0,6972,189.992589
54 | 20.0,6478,192.620501
55 | 19.0,7444,195.968758
56 | 19.0,9188,199.581835
57 | 15.0,10271,203.622099
58 | 19.0,8481,206.963809
59 | 18.0,9057,210.536548
60 | 21.0,7905,213.649117
61 | 20.0,7008,216.409976
62 | 6.0,14798,222.241111
63 | 21.0,7975,225.376031
64 | 15.0,10766,229.613505
65 | 20.0,6676,232.238959
66 | 17.0,8737,235.679444
67 | 21.0,7975,238.823486
68 |
--------------------------------------------------------------------------------
/logs/benchmark/a2c-SeaquestNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615281451.6329484, "env_id": "SeaquestNoFrameskip-v4"}
2 | r,l,t
3 | 1720.0,9082,7.009356
4 | 1720.0,9082,11.020819
5 | 1800.0,9081,15.104989
6 | 1720.0,9081,19.505034
7 | 1780.0,9082,23.829598
8 | 1680.0,9082,28.13943
9 | 1720.0,9081,32.502165
10 | 1720.0,9082,36.898275
11 | 1780.0,9082,41.240505
12 | 1720.0,8826,45.521239
13 | 1760.0,9082,49.81492
14 | 1780.0,9081,54.232201
15 | 1760.0,9082,58.573261
16 | 1760.0,9082,62.944622
17 | 1460.0,7674,66.655156
18 | 1740.0,9082,71.012287
19 | 1760.0,9082,75.39344
20 | 1600.0,8538,79.487796
21 | 1760.0,9082,83.874407
22 | 1760.0,9081,88.244735
23 | 1800.0,9081,92.608476
24 | 1620.0,8449,96.688586
25 | 1680.0,8985,100.902476
26 | 1760.0,9081,105.253726
27 | 1760.0,9082,109.753073
28 | 1660.0,9082,114.145087
29 | 1660.0,9082,118.488832
30 | 1760.0,9081,122.930631
31 | 1760.0,9082,127.255234
32 | 1760.0,9082,131.570187
33 | 1740.0,9082,135.983795
34 | 1540.0,8262,140.112782
35 | 1560.0,8458,144.235981
36 | 1740.0,9082,148.655545
37 | 1820.0,9082,153.08726
38 | 1780.0,9081,157.483775
39 | 1760.0,9082,161.925575
40 | 1800.0,9081,166.24076
41 | 1780.0,9082,170.670106
42 | 1780.0,9082,175.08615
43 | 1620.0,8890,179.489985
44 | 1760.0,9082,183.932905
45 | 1720.0,9082,188.377881
46 | 1740.0,9082,192.827981
47 | 1660.0,9082,197.229679
48 | 1720.0,9082,201.669629
49 | 1740.0,9082,206.061735
50 | 1760.0,9081,210.484065
51 | 1800.0,9082,214.923074
52 | 1820.0,9082,219.321101
53 | 1760.0,9082,223.750549
54 | 1680.0,8746,227.91953
55 | 1760.0,9081,232.40319
56 | 1760.0,9081,236.780692
57 | 1780.0,9082,241.235071
58 | 1560.0,8409,245.320963
59 | 1740.0,9081,249.75418
60 | 1780.0,9082,254.156047
61 | 1800.0,9082,258.546376
62 | 1820.0,9081,263.008498
63 | 1660.0,8570,267.151795
64 | 1780.0,9082,271.580838
65 | 1760.0,9082,275.890209
66 | 1760.0,9082,280.313025
67 | 1080.0,6389,283.3866
68 | 1740.0,9082,287.816725
69 | 1760.0,9082,292.247788
70 |
--------------------------------------------------------------------------------
/logs/benchmark/a2c-SpaceInvadersNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614768543.9226692, "env_id": "SpaceInvadersNoFrameskip-v4"}
2 | r,l,t
3 | 495.0,3553,4.361108
4 | 550.0,3429,5.705929
5 | 550.0,3287,7.173348
6 | 730.0,3349,8.490565
7 | 545.0,3337,9.800351
8 | 525.0,3355,11.111516
9 | 545.0,2675,12.16402
10 | 570.0,2978,13.33383
11 | 570.0,3676,14.939109
12 | 545.0,3137,16.191074
13 | 800.0,3321,17.490357
14 | 515.0,3126,18.717305
15 | 580.0,3390,20.041372
16 | 575.0,3362,21.372456
17 | 345.0,2841,22.496135
18 | 515.0,3585,23.907067
19 | 575.0,2803,25.019226
20 | 520.0,3360,26.338413
21 | 545.0,3477,27.708291
22 | 925.0,5786,29.996137
23 | 835.0,5663,32.236963
24 | 525.0,3168,33.48305
25 | 1450.0,7057,36.259171
26 | 600.0,3823,37.755884
27 | 520.0,3563,39.157691
28 | 575.0,3715,40.611313
29 | 570.0,3322,41.912127
30 | 550.0,3481,43.280879
31 | 570.0,3459,44.628411
32 | 515.0,3459,46.029524
33 | 575.0,3795,47.527183
34 | 525.0,3062,48.729837
35 | 545.0,3865,50.2381
36 | 745.0,2843,51.355055
37 | 575.0,3545,52.73239
38 | 600.0,3691,54.164446
39 | 485.0,4321,55.862061
40 | 1315.0,7361,58.748237
41 | 515.0,3484,60.111922
42 | 560.0,2739,61.186349
43 | 555.0,3289,62.582255
44 | 525.0,3659,64.016016
45 | 570.0,3440,65.357781
46 | 1155.0,6121,67.756124
47 | 545.0,3315,69.061701
48 | 570.0,3365,70.485821
49 | 570.0,3858,72.024043
50 | 540.0,2799,73.125923
51 | 775.0,4102,74.728046
52 | 1130.0,7383,77.617019
53 | 545.0,4152,79.233018
54 | 545.0,2633,80.268746
55 | 575.0,3038,81.466269
56 | 570.0,3209,82.723093
57 | 545.0,4051,84.292974
58 | 600.0,3437,85.638784
59 | 390.0,3047,87.014406
60 | 975.0,6881,89.72367
61 | 515.0,3819,91.222727
62 | 725.0,3221,92.494269
63 | 745.0,3249,93.762644
64 | 540.0,3393,95.268128
65 | 515.0,3435,96.621554
66 | 600.0,4159,98.248236
67 | 1115.0,7433,101.158546
68 | 370.0,2565,102.171789
69 | 570.0,2896,103.312471
70 | 540.0,3486,104.682461
71 | 525.0,3669,106.128078
72 | 1160.0,7370,109.019551
73 | 510.0,3912,110.559911
74 | 520.0,3469,111.917484
75 | 420.0,3453,113.272565
76 | 545.0,3116,114.497209
77 | 550.0,3450,115.850051
78 | 745.0,3147,117.086844
79 | 520.0,3287,118.369515
80 | 570.0,4413,120.086121
81 | 575.0,4237,121.742209
82 | 520.0,3777,123.223256
83 | 545.0,3377,124.550506
84 | 575.0,3467,125.921681
85 | 520.0,3360,127.257644
86 | 545.0,2761,128.345333
87 | 575.0,3862,129.863054
88 | 490.0,3505,131.242345
89 | 415.0,2915,132.388172
90 | 575.0,3159,133.637486
91 | 545.0,3223,134.904184
92 | 515.0,3273,136.193307
93 | 600.0,3703,137.632767
94 | 770.0,3621,139.062721
95 | 540.0,3294,140.35698
96 | 545.0,2845,141.470494
97 | 515.0,3457,142.824692
98 | 600.0,4255,144.484802
99 | 600.0,3891,145.999692
100 | 460.0,3043,147.201257
101 | 525.0,3496,148.572112
102 | 745.0,3746,150.075762
103 | 600.0,3229,151.412536
104 | 800.0,3625,152.829677
105 | 505.0,3329,154.141243
106 | 1495.0,6859,156.837327
107 | 395.0,3243,158.115792
108 | 750.0,3166,159.360218
109 | 570.0,3679,160.795473
110 | 1010.0,4911,162.731656
111 | 570.0,3457,164.089787
112 | 605.0,2957,165.261341
113 | 1050.0,4771,167.193539
114 | 575.0,3417,168.530889
115 | 775.0,3331,169.843351
116 | 545.0,3855,171.345333
117 | 550.0,3275,172.629834
118 | 500.0,3263,173.936338
119 | 750.0,4023,175.632566
120 | 520.0,3051,176.833315
121 | 740.0,3550,178.231961
122 | 525.0,3157,179.469186
123 | 980.0,5809,181.75283
124 | 545.0,3373,183.07182
125 | 680.0,3205,184.332636
126 | 600.0,3309,185.631534
127 | 490.0,3827,187.135568
128 | 550.0,3344,188.445078
129 | 725.0,4961,190.382123
130 | 950.0,3156,191.617991
131 | 570.0,3672,193.044935
132 | 520.0,3355,194.369309
133 | 540.0,3834,195.876062
134 | 545.0,3689,197.317266
135 | 545.0,2803,198.413652
136 | 515.0,3243,199.68673
137 | 575.0,3441,201.035745
138 | 555.0,3152,202.270614
139 | 715.0,3957,203.821104
140 | 520.0,3490,205.186715
141 | 550.0,3545,206.592009
142 | 580.0,3537,207.970124
143 | 1255.0,7445,210.877233
144 | 605.0,3449,212.216876
145 | 520.0,3051,213.408286
146 | 1625.0,9443,217.101115
147 | 570.0,3159,218.331664
148 | 605.0,3249,219.598299
149 | 545.0,3231,220.860407
150 | 575.0,2931,221.999535
151 | 510.0,2737,223.074782
152 | 600.0,3699,224.509123
153 | 575.0,4609,226.294702
154 | 600.0,4095,227.885526
155 | 600.0,3505,229.250235
156 | 730.0,3457,230.704194
157 | 745.0,3130,231.928799
158 | 575.0,3216,233.187673
159 | 600.0,3293,234.468616
160 | 545.0,3218,235.712941
161 | 500.0,3685,237.147581
162 | 545.0,3973,238.775266
163 | 600.0,3801,240.252894
164 | 550.0,3391,241.570879
165 |
--------------------------------------------------------------------------------
/logs/benchmark/dqn-AsteroidsNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615283610.9370732, "env_id": "AsteroidsNoFrameskip-v4"}
2 | r,l,t
3 | 630.0,2948,3.806405
4 | 880.0,4636,5.560121
5 | 1080.0,3886,7.029938
6 | 630.0,5936,9.272279
7 | 780.0,4538,10.979955
8 | 1080.0,5056,12.878166
9 | 730.0,2726,13.92655
10 | 530.0,1782,14.599309
11 | 830.0,3484,15.916041
12 | 780.0,4220,17.516311
13 | 1320.0,21476,25.599429
14 | 680.0,2820,26.653483
15 | 580.0,5602,28.763651
16 | 630.0,3204,29.963536
17 | 830.0,3230,31.171206
18 | 580.0,4414,32.818263
19 | 1350.0,13594,37.866124
20 | 530.0,2182,38.679443
21 | 980.0,6624,41.277205
22 | 380.0,2260,42.171792
23 | 530.0,2246,43.034077
24 | 680.0,2662,44.036025
25 | 830.0,3606,45.39094
26 | 980.0,3120,46.550376
27 | 730.0,3518,47.880018
28 | 380.0,2260,48.734687
29 | 660.0,3296,49.979011
30 | 830.0,4678,51.755557
31 | 580.0,2652,52.757058
32 | 630.0,2612,53.747996
33 | 380.0,1268,54.225423
34 | 780.0,3730,55.635921
35 | 1180.0,8152,58.709851
36 | 430.0,1970,59.45361
37 | 930.0,9838,63.172857
38 | 930.0,14486,68.64684
39 | 680.0,4178,70.234863
40 | 1180.0,22736,78.745257
41 | 630.0,1780,79.413591
42 | 730.0,3642,80.78565
43 | 380.0,1938,81.516054
44 | 230.0,1494,82.075747
45 | 930.0,4740,83.871758
46 | 530.0,4052,85.406782
47 | 930.0,2948,86.517116
48 | 930.0,3048,87.663416
49 | 1340.0,16218,93.72735
50 | 360.0,1496,94.288277
51 | 630.0,2744,95.325491
52 | 630.0,1780,95.993038
53 | 780.0,3612,97.35601
54 | 880.0,3692,98.752235
55 | 730.0,2452,99.674829
56 | 780.0,2676,100.681265
57 | 780.0,2324,101.55319
58 | 780.0,2678,102.566934
59 | 630.0,2612,103.553079
60 | 1080.0,4050,105.075956
61 | 1460.0,10328,108.963586
62 | 630.0,2438,109.88712
63 | 1180.0,16976,116.303472
64 | 1080.0,4614,118.073532
65 | 1080.0,4690,119.866369
66 | 780.0,3664,121.273588
67 | 830.0,4382,122.946313
68 | 830.0,4578,124.703727
69 | 730.0,2838,125.787912
70 | 430.0,2502,126.745888
71 | 610.0,3276,127.999948
72 | 780.0,2678,129.026407
73 | 730.0,4440,130.724924
74 | 1080.0,8960,134.155202
75 | 1080.0,5556,136.260477
76 | 630.0,3438,137.511145
77 | 1080.0,6520,139.76433
78 | 630.0,1780,140.379913
79 | 410.0,1252,140.832459
80 | 780.0,2678,141.805494
81 | 380.0,2260,142.623271
82 | 630.0,1780,143.278926
83 | 1030.0,4518,145.009336
84 | 980.0,4404,146.690564
85 | 630.0,1780,147.365815
86 | 830.0,4266,148.99672
87 | 430.0,3342,150.270021
88 | 680.0,3476,151.585912
89 | 1080.0,10406,155.475321
90 | 730.0,4966,157.342425
91 | 930.0,4308,158.954403
92 | 880.0,4486,160.639949
93 | 1400.0,11686,165.001423
94 | 630.0,2992,166.126216
95 | 580.0,2652,167.116391
96 | 980.0,3866,168.562401
97 | 930.0,5498,170.620965
98 | 630.0,1780,171.284593
99 | 980.0,5008,173.16315
100 | 830.0,3448,174.455275
101 | 830.0,4424,176.110061
102 | 1030.0,4918,177.942581
103 | 230.0,1494,178.497264
104 | 830.0,4946,180.345697
105 | 930.0,6104,182.637209
106 | 780.0,4566,184.351494
107 | 1080.0,7800,187.232992
108 | 1320.0,8124,190.256808
109 | 430.0,1760,190.907736
110 | 880.0,6200,193.223942
111 | 880.0,2678,194.225293
112 | 680.0,3120,195.386348
113 | 780.0,2676,196.381358
114 | 530.0,3896,197.835135
115 | 1080.0,5948,200.057237
116 | 680.0,3930,201.528874
117 | 1180.0,4240,203.113463
118 | 630.0,3372,204.369068
119 | 430.0,2502,205.304568
120 | 830.0,4266,206.894893
121 | 630.0,3306,208.131841
122 | 730.0,4428,209.786296
123 | 830.0,5548,211.859851
124 | 830.0,4014,213.35282
125 | 430.0,2502,214.288179
126 | 680.0,4510,215.979968
127 | 930.0,4202,217.558226
128 | 780.0,5600,219.657292
129 | 1410.0,6062,221.910441
130 | 780.0,4486,223.582816
131 | 430.0,2502,224.521364
132 | 330.0,2164,225.330174
133 | 380.0,2010,226.078029
134 | 530.0,2628,227.055925
135 | 1280.0,7440,229.784199
136 | 1030.0,3484,231.088481
137 |
--------------------------------------------------------------------------------
/logs/benchmark/dqn-BeamRiderNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614793581.2582045, "env_id": "BeamRiderNoFrameskip-v4"}
2 | r,l,t
3 | 6576.0,20983,10.683172
4 | 2720.0,13592,15.828768
5 | 7740.0,23813,24.863982
6 | 4320.0,18685,31.91907
7 | 2108.0,8766,35.233023
8 | 1380.0,7675,38.103883
9 | 5696.0,19085,45.328726
10 | 5768.0,17864,52.062259
11 | 4320.0,13583,57.184806
12 | 4606.0,18828,64.292317
13 | 6478.0,19078,71.502656
14 | 2160.0,10273,75.362682
15 | 4260.0,16581,81.626119
16 | 6742.0,22736,90.241258
17 | 1952.0,9649,93.879019
18 | 3480.0,16215,99.980142
19 | 2160.0,12859,104.814137
20 | 3360.0,15497,110.641296
21 | 5376.0,19580,118.032608
22 | 6440.0,20723,125.866222
23 | 4734.0,17357,132.415582
24 | 4350.0,17837,139.141831
25 | 1432.0,8217,142.219953
26 | 4804.0,17582,148.852138
27 | 3420.0,14283,154.229172
28 | 4448.0,16951,160.630883
29 | 7140.0,21519,168.752505
30 | 6810.0,21377,176.88184
31 | 5182.0,20176,184.515731
32 | 3780.0,15807,190.476166
33 | 4796.0,20191,198.123916
34 | 1380.0,7373,200.899911
35 | 6292.0,22057,209.207382
36 | 1744.0,9035,212.610699
37 | 2160.0,9577,216.214398
38 | 4680.0,16599,222.475892
39 | 4156.0,18829,229.597445
40 |
--------------------------------------------------------------------------------
/logs/benchmark/dqn-BreakoutNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614778785.8173542, "env_id": "BreakoutNoFrameskip-v4"}
2 | r,l,t
3 | 390.0,14936,8.298947
4 | 310.0,8119,11.281157
5 | 137.0,5823,13.430059
6 | 308.0,8376,16.514826
7 | 405.0,9718,20.091962
8 | 375.0,10544,23.962725
9 | 396.0,11052,28.012314
10 | 399.0,10183,31.745516
11 | 77.0,4572,33.434542
12 | 332.0,10465,37.285213
13 | 377.0,11866,41.624256
14 | 261.0,6623,44.058955
15 | 402.0,6847,46.571992
16 | 387.0,9037,49.892307
17 | 303.0,9094,53.333262
18 | 319.0,22476,61.599988
19 | 372.0,8259,64.668601
20 | 396.0,26450,74.652882
21 | 318.0,6843,77.411975
22 | 372.0,8167,80.415497
23 | 400.0,21998,88.477855
24 | 351.0,20941,96.400516
25 | 397.0,14151,101.604712
26 | 403.0,8723,104.806063
27 | 370.0,9713,108.366861
28 | 386.0,7965,111.298332
29 | 381.0,10702,115.23716
30 | 408.0,10803,119.190315
31 | 387.0,8583,122.349447
32 | 419.0,8411,125.439177
33 | 374.0,8879,128.696841
34 | 403.0,11577,133.006629
35 | 405.0,14521,138.325462
36 | 354.0,8603,141.489714
37 | 355.0,8642,144.662911
38 | 390.0,15271,150.377894
39 | 393.0,9215,153.764675
40 | 401.0,8586,157.064263
41 | 369.0,9011,160.368164
42 | 405.0,8090,163.333655
43 | 306.0,13302,168.210222
44 | 399.0,23832,177.095047
45 | 320.0,8164,180.124154
46 | 404.0,10433,184.107888
47 | 379.0,9634,187.624143
48 | 399.0,8575,190.755317
49 | 336.0,13848,195.785423
50 | 383.0,7562,198.536605
51 | 368.0,8137,201.509571
52 | 387.0,8579,204.624538
53 | 292.0,7483,207.352157
54 | 407.0,9572,210.8472
55 | 342.0,25215,220.13302
56 | 304.0,6047,222.341196
57 | 295.0,7243,224.988355
58 |
--------------------------------------------------------------------------------
/logs/benchmark/dqn-EnduroNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614937878.9643338, "env_id": "EnduroNoFrameskip-v4"}
2 | r,l,t
3 | 773.0,39936,18.466804
4 | 1076.0,53248,39.507428
5 | 1022.0,53248,60.545804
6 | 1049.0,53248,81.489567
7 | 792.0,39936,97.197674
8 | 1012.0,53248,118.132145
9 | 771.0,39936,133.855354
10 | 785.0,39936,149.558964
11 | 484.0,26624,160.020037
12 | 788.0,39936,175.723603
13 | 792.0,39936,191.440947
14 | 784.0,39936,207.150612
15 | 433.0,26624,217.608288
16 | 1072.0,53248,238.547646
17 |
--------------------------------------------------------------------------------
/logs/benchmark/dqn-MsPacmanNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1659728618.0716512, "env_id": "MsPacmanNoFrameskip-v4"}
2 | r,l,t
3 | 3240.0,4610,4.565343
4 | 2380.0,3650,6.179767
5 | 3240.0,4722,8.242586
6 | 2950.0,4218,9.901634
7 | 2440.0,4450,11.644655
8 | 1600.0,3226,12.901034
9 | 3140.0,3970,14.512564
10 | 2570.0,4658,16.381635
11 | 2720.0,4242,18.09169
12 | 2540.0,4274,19.819639
13 | 2450.0,4098,21.46781
14 | 2450.0,4330,23.219339
15 | 2980.0,4578,25.034832
16 | 2750.0,4714,26.924132
17 | 3050.0,4386,28.647907
18 | 2840.0,4490,30.418036
19 | 2300.0,4402,32.153116
20 | 2290.0,3586,33.554561
21 | 1750.0,3802,35.044998
22 | 2400.0,4074,36.637052
23 | 3240.0,4546,38.421748
24 | 1760.0,4346,40.121146
25 | 2650.0,4402,41.842615
26 | 2500.0,3530,43.217673
27 | 2450.0,4570,45.000941
28 | 3140.0,4618,46.799562
29 | 2880.0,3730,48.256968
30 | 2240.0,3842,49.760671
31 | 3050.0,5538,51.925182
32 | 2730.0,4466,53.678259
33 | 3630.0,4698,55.515136
34 | 3150.0,4578,57.304975
35 | 3290.0,4794,59.179325
36 | 2210.0,3146,60.410003
37 | 2850.0,4450,62.155477
38 | 2480.0,4154,63.773322
39 | 3240.0,4458,65.523309
40 | 2850.0,4450,67.262537
41 | 2640.0,4066,68.852192
42 | 2440.0,4226,70.500446
43 | 3240.0,4626,72.309346
44 | 2830.0,4498,74.066068
45 | 2040.0,3810,75.559353
46 | 2850.0,4938,77.493622
47 | 2180.0,3202,78.742667
48 | 2960.0,4474,80.49608
49 | 2640.0,4714,82.340948
50 | 2240.0,4426,84.070252
51 | 2020.0,4474,85.819671
52 | 2550.0,4234,87.476478
53 | 2300.0,3754,88.943698
54 | 2740.0,3970,90.497502
55 | 1760.0,3698,91.945864
56 | 3110.0,4266,93.617598
57 | 2060.0,3546,94.994061
58 | 2440.0,4258,96.665241
59 | 2850.0,4530,98.440153
60 | 2850.0,4450,100.183625
61 | 2430.0,5162,102.198829
62 | 3030.0,4178,103.835441
63 | 2580.0,3618,105.243359
64 | 2850.0,4450,106.991979
65 | 2720.0,4242,108.651297
66 | 3030.0,4562,110.433888
67 | 2450.0,4354,112.137468
68 | 2700.0,4162,113.759301
69 | 3640.0,4898,115.678251
70 | 2850.0,3986,117.23745
71 | 3030.0,3946,118.78445
72 | 2890.0,4994,120.743981
73 | 2580.0,4786,122.618189
74 | 2640.0,5162,124.632866
75 | 2290.0,4098,126.237313
76 | 2280.0,4842,128.13284
77 | 2640.0,4650,129.952747
78 | 2440.0,4258,131.618814
79 | 2430.0,4226,133.269789
80 | 2640.0,4642,135.086737
81 | 2230.0,3522,136.464286
82 | 2200.0,3146,137.690324
83 | 2530.0,4778,139.554016
84 | 2960.0,4066,141.136904
85 | 2850.0,4450,142.878844
86 | 2840.0,4386,144.598397
87 | 3240.0,4626,146.411167
88 | 2180.0,3202,147.667167
89 | 2440.0,4338,149.359808
90 | 2230.0,3330,150.663123
91 | 2450.0,4810,152.543828
92 | 2840.0,4218,154.185746
93 | 2280.0,4458,155.923587
94 | 3640.0,4418,157.645611
95 | 2350.0,6634,160.237373
96 | 2450.0,4450,161.976687
97 | 2050.0,4258,163.636643
98 | 2740.0,4322,165.323966
99 | 2570.0,4162,166.970684
100 | 2740.0,4378,168.688692
101 | 2850.0,4130,170.294853
102 | 2320.0,3474,171.649862
103 | 3040.0,4186,173.286216
104 | 2730.0,4274,174.960623
105 | 2850.0,4322,176.654881
106 | 3040.0,4394,178.378233
107 | 4230.0,4306,180.056643
108 | 1840.0,4114,181.660494
109 | 3240.0,4842,183.554174
110 | 2570.0,3658,184.979727
111 | 2220.0,4146,186.601411
112 | 1830.0,3378,187.914714
113 | 1670.0,3226,189.174523
114 | 2300.0,3850,190.687076
115 | 2710.0,3842,192.195436
116 | 2850.0,4450,193.952946
117 | 3080.0,5314,196.122141
118 | 2880.0,4922,198.095347
119 | 2240.0,4066,199.693413
120 | 2450.0,4578,201.546361
121 | 2440.0,4066,203.163933
122 | 5040.0,4266,204.833798
123 | 1830.0,4090,206.447688
124 | 2230.0,3650,207.992231
125 | 3240.0,5170,210.199356
126 | 4040.0,4970,212.319058
127 | 3230.0,5090,214.496098
128 | 2740.0,4338,216.337857
129 | 2740.0,4386,218.197287
130 | 3640.0,4378,220.113769
131 | 2270.0,3418,221.690941
132 | 3050.0,5098,223.746212
133 | 2450.0,3482,225.097706
134 | 3240.0,4578,226.889236
135 | 2450.0,3906,228.403416
136 | 3030.0,3970,230.008759
137 | 2310.0,3906,231.596919
138 | 2640.0,4850,233.561483
139 | 2730.0,4274,235.296346
140 | 2640.0,4290,237.03278
141 | 3040.0,4402,238.817874
142 | 2540.0,3778,240.350946
143 |
--------------------------------------------------------------------------------
/logs/benchmark/dqn-PongNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614779237.7516618, "env_id": "PongNoFrameskip-v4"}
2 | r,l,t
3 | 20.0,6948,5.213674
4 | 21.0,6683,7.638885
5 | 20.0,6939,10.154375
6 | 20.0,7144,12.737012
7 | 21.0,6533,15.094103
8 | 21.0,6527,17.458776
9 | 21.0,6692,19.88431
10 | 20.0,7397,22.561709
11 | 20.0,7379,25.2273
12 | 21.0,6755,27.670537
13 | 21.0,6535,30.035142
14 | 19.0,7189,32.634809
15 | 21.0,6533,34.991524
16 | 21.0,6527,37.351326
17 | 21.0,6527,39.702946
18 | 21.0,6687,42.109237
19 | 21.0,6533,44.463506
20 | 21.0,6620,46.849023
21 | 21.0,6533,49.212942
22 | 21.0,6995,51.728969
23 | 21.0,6860,54.200454
24 | 21.0,6533,56.553215
25 | 20.0,7258,59.159745
26 | 20.0,6752,61.596981
27 | 19.0,7025,64.160838
28 | 21.0,6687,66.576899
29 | 21.0,6927,69.062689
30 | 21.0,6692,71.471189
31 | 19.0,7170,74.047396
32 | 20.0,7392,76.718277
33 | 21.0,6687,79.130105
34 | 20.0,6853,81.594396
35 | 20.0,6992,84.121735
36 | 21.0,6533,86.475407
37 | 19.0,7365,89.140565
38 | 21.0,6527,91.490788
39 | 21.0,6692,93.91832
40 | 20.0,7269,96.531502
41 | 21.0,6527,98.888406
42 | 20.0,7273,101.498355
43 | 20.0,6681,103.895792
44 | 20.0,7136,106.459496
45 | 21.0,6620,108.843947
46 | 21.0,6533,111.20154
47 | 21.0,6692,113.604377
48 | 21.0,6692,116.014685
49 | 21.0,7000,118.542409
50 | 21.0,6687,120.945755
51 | 21.0,7258,123.563952
52 | 19.0,7286,126.18596
53 | 21.0,6763,128.66123
54 | 21.0,6690,131.142843
55 | 21.0,6687,133.553499
56 | 21.0,6687,135.956716
57 | 21.0,6671,138.370338
58 | 21.0,6527,140.729741
59 | 21.0,6527,143.078396
60 | 21.0,6527,145.443633
61 | 21.0,6683,147.850354
62 | 21.0,6692,150.267021
63 | 20.0,6922,152.779045
64 | 21.0,6527,155.125277
65 | 20.0,7446,157.806427
66 | 20.0,6737,160.240098
67 | 21.0,6533,162.594134
68 | 21.0,6906,165.074382
69 | 21.0,6533,167.429593
70 | 20.0,6745,169.864267
71 | 21.0,6999,172.378367
72 | 21.0,6508,174.721212
73 | 21.0,6915,177.212817
74 | 20.0,7224,179.820872
75 | 20.0,7279,182.435939
76 | 21.0,6692,184.85815
77 | 20.0,6942,187.353981
78 | 21.0,6527,189.70991
79 | 21.0,6533,192.060815
80 | 21.0,6527,194.421159
81 | 21.0,6755,196.848697
82 | 21.0,6595,199.219058
83 | 20.0,7042,201.748603
84 | 21.0,6692,204.171969
85 | 21.0,6527,206.523586
86 | 20.0,6853,208.986539
87 | 19.0,7552,211.70457
88 | 21.0,6692,214.113817
89 | 21.0,6533,216.462623
90 | 21.0,6533,218.815706
91 |
--------------------------------------------------------------------------------
/logs/benchmark/dqn-QbertNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614779461.8854597, "env_id": "QbertNoFrameskip-v4"}
2 | r,l,t
3 | 500.0,2147,3.450365
4 | 675.0,2280,4.281284
5 | 850.0,3327,5.504148
6 | 11350.0,6037,7.714523
7 | 3900.0,2954,8.786828
8 | 15300.0,6627,11.213221
9 | 4225.0,4297,12.786068
10 | 14600.0,6943,15.321182
11 | 425.0,1954,16.032893
12 | 775.0,2362,16.890738
13 | 14800.0,6422,19.253953
14 | 4250.0,3370,20.482237
15 | 11525.0,5167,22.36452
16 | 11425.0,4544,24.030699
17 | 11350.0,4444,25.65287
18 | 8575.0,5000,27.47626
19 | 14725.0,6593,29.891513
20 | 4100.0,2769,30.893262
21 | 14800.0,6422,33.239472
22 | 650.0,2632,34.196923
23 | 11700.0,7037,36.776742
24 | 11625.0,6226,39.061646
25 | 11525.0,5167,40.944191
26 | 3925.0,2408,41.811353
27 | 11675.0,7397,44.505452
28 | 11950.0,4852,46.267992
29 | 700.0,2432,47.143335
30 | 11625.0,6157,49.401633
31 | 11425.0,4604,51.083146
32 | 11425.0,4604,52.764815
33 | 875.0,2887,53.816217
34 | 14600.0,5348,55.76363
35 | 4000.0,2792,56.77684
36 | 14650.0,6588,59.178909
37 | 11425.0,4604,60.859552
38 | 14650.0,5609,63.0423
39 | 11625.0,6157,65.393578
40 | 725.0,2217,66.189359
41 | 14725.0,6612,68.602587
42 | 14800.0,6687,71.035443
43 | 7775.0,4892,72.811937
44 | 14625.0,5478,74.801383
45 | 14600.0,5088,76.658427
46 | 11100.0,4034,78.122569
47 | 50.0,1199,78.547241
48 | 15150.0,6012,80.753348
49 | 11575.0,6872,83.254765
50 | 4425.0,3952,84.685961
51 | 4250.0,2785,85.690472
52 | 7800.0,3592,86.996258
53 | 700.0,2424,87.872761
54 | 7875.0,5553,89.891262
55 | 650.0,2722,90.881686
56 | 475.0,1785,91.521566
57 | 14600.0,5678,93.58827
58 | 14650.0,5558,95.610656
59 | 11425.0,4604,97.288862
60 | 11425.0,4604,98.97228
61 | 650.0,2592,99.915128
62 | 11400.0,5527,101.934255
63 | 4925.0,4992,103.74385
64 | 14750.0,7337,106.41443
65 | 750.0,2639,107.370049
66 | 11875.0,5802,109.488307
67 | 14800.0,6422,112.072564
68 | 11950.0,5387,114.030024
69 | 15050.0,7730,116.853713
70 | 14600.0,5798,118.965109
71 | 875.0,3017,120.060684
72 | 11425.0,4604,121.752643
73 | 4325.0,3712,123.10552
74 | 4025.0,3307,124.304083
75 | 14600.0,6413,126.636068
76 | 14800.0,6157,128.87671
77 | 750.0,2373,129.735242
78 | 14650.0,6723,132.529886
79 | 11975.0,6208,134.864778
80 | 11425.0,4664,136.570588
81 | 775.0,2362,137.425285
82 | 4000.0,2717,138.410109
83 | 11750.0,6157,140.658257
84 | 11625.0,6157,143.046972
85 | 9225.0,3942,144.577907
86 | 14600.0,5403,146.544769
87 | 14875.0,6683,148.984606
88 | 15075.0,6292,151.476602
89 | 11250.0,3950,152.95514
90 | 14700.0,7993,155.873396
91 | 14725.0,6352,158.215972
92 | 14700.0,7312,160.962415
93 | 14700.0,6803,163.442979
94 | 3675.0,2443,164.332046
95 | 14600.0,6743,166.793701
96 | 3700.0,3134,167.926943
97 | 4150.0,4097,169.420132
98 | 11550.0,5930,171.578741
99 | 11425.0,4604,173.259727
100 | 8075.0,3280,174.520545
101 | 14800.0,6422,176.900329
102 | 8025.0,3705,178.240064
103 | 11575.0,7623,181.017266
104 | 750.0,2803,182.034169
105 | 11425.0,4604,183.710549
106 | 11625.0,6157,185.954851
107 | 14800.0,6432,188.295226
108 | 14600.0,6813,190.778485
109 | 14850.0,6704,193.219888
110 | 650.0,2722,194.206065
111 | 15100.0,6267,196.488646
112 | 15025.0,5802,198.600253
113 | 750.0,2803,199.618398
114 | 775.0,2397,200.487827
115 | 14600.0,5838,202.614424
116 | 11100.0,4049,204.087964
117 | 3625.0,2323,204.925828
118 | 14800.0,6432,207.265786
119 | 14725.0,6812,209.751004
120 | 14600.0,6068,212.2734
121 | 12250.0,7827,215.238562
122 | 14800.0,6422,217.575494
123 | 825.0,3072,218.685553
124 | 11525.0,5064,220.524356
125 | 14650.0,5578,222.557985
126 | 14975.0,7839,225.419132
127 |
--------------------------------------------------------------------------------
/logs/benchmark/dqn-RoadRunnerNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615283352.8105607, "env_id": "RoadRunnerNoFrameskip-v4"}
2 | r,l,t
3 | 45600.0,4331,4.578996
4 | 35200.0,4359,6.388526
5 | 49100.0,4577,8.294341
6 | 46300.0,4812,10.291539
7 | 30400.0,2996,11.530835
8 | 40600.0,4622,13.434747
9 | 37400.0,3984,15.079498
10 | 46600.0,4608,16.980999
11 | 41300.0,4311,18.765128
12 | 41500.0,4272,20.535463
13 | 38300.0,4694,22.473144
14 | 26400.0,4265,24.230634
15 | 28700.0,4342,26.017114
16 | 28800.0,4423,27.848736
17 | 41000.0,4308,29.627312
18 | 47100.0,4669,31.560422
19 | 37300.0,4405,33.359143
20 | 36700.0,4353,35.152945
21 | 34900.0,4492,37.008729
22 | 41400.0,4212,38.757955
23 | 32400.0,4408,40.58818
24 | 28500.0,4744,42.547467
25 | 35700.0,4446,44.38607
26 | 52500.0,4346,46.184409
27 | 33100.0,4171,47.908489
28 | 38000.0,4278,49.682371
29 | 44100.0,4193,51.412412
30 | 30800.0,4582,53.31371
31 | 43600.0,4589,55.214548
32 | 36600.0,4183,56.942748
33 | 44600.0,4574,58.847455
34 | 53600.0,4590,60.767837
35 | 44600.0,4267,62.550773
36 | 30900.0,4176,64.288576
37 | 35600.0,4288,66.056793
38 | 56000.0,4673,67.991279
39 | 49000.0,4516,69.869656
40 | 46800.0,4460,71.706389
41 | 36600.0,4630,73.619184
42 | 50000.0,4389,75.435945
43 | 42600.0,4499,77.273676
44 | 38500.0,4665,79.194321
45 | 34300.0,4593,81.097265
46 | 45300.0,4486,82.948642
47 | 50100.0,4341,84.745933
48 | 35700.0,4318,86.536117
49 | 39600.0,4860,88.540414
50 | 33300.0,4382,90.352031
51 | 43400.0,4286,92.123165
52 | 48500.0,4734,94.088329
53 | 22900.0,3440,95.512706
54 | 37700.0,4222,97.262531
55 | 39200.0,4558,99.151146
56 | 51700.0,4582,101.045657
57 | 30400.0,4121,102.746956
58 | 41500.0,4542,104.62781
59 | 36400.0,4351,106.431224
60 | 35100.0,4365,108.237686
61 | 42900.0,4423,110.065588
62 | 52600.0,4707,112.015754
63 | 52000.0,4405,113.839923
64 | 32100.0,4448,115.679077
65 | 34000.0,4537,117.548685
66 | 43800.0,4494,119.412248
67 | 39900.0,4682,121.348915
68 | 33300.0,4531,123.198588
69 | 35700.0,4454,125.039904
70 | 43600.0,4270,126.80856
71 | 37400.0,4224,128.562729
72 | 45000.0,4304,130.349417
73 | 48000.0,4372,132.164388
74 | 40500.0,4486,134.016303
75 | 46300.0,4519,135.889534
76 | 59500.0,4447,137.728427
77 | 46100.0,4420,139.553422
78 | 38500.0,4342,141.348007
79 | 40200.0,4308,143.126172
80 | 31600.0,4513,144.993962
81 | 40900.0,4414,146.819435
82 | 38300.0,4590,148.714608
83 | 38400.0,4326,150.503732
84 | 48000.0,4393,152.340948
85 | 46600.0,4398,154.171558
86 | 34900.0,4412,156.00668
87 | 52500.0,4694,157.958101
88 | 43100.0,4529,159.842753
89 | 39800.0,4345,161.651971
90 | 42600.0,4415,163.485021
91 | 47100.0,4774,165.471995
92 | 41500.0,4421,167.306136
93 | 41400.0,4230,169.038306
94 | 32700.0,4277,170.820947
95 | 50000.0,4406,172.647852
96 | 25600.0,4351,174.455871
97 | 37400.0,4655,176.389951
98 | 42100.0,4412,178.224521
99 | 44500.0,4539,180.112359
100 | 42700.0,4422,181.948988
101 | 37700.0,4614,183.867699
102 | 43900.0,4190,185.605365
103 | 37700.0,4233,187.360487
104 | 45000.0,3984,189.032385
105 | 47700.0,4382,190.859018
106 | 45100.0,4139,192.587108
107 | 41800.0,4318,194.378205
108 | 45000.0,4390,196.197077
109 | 46800.0,4318,197.987917
110 | 44100.0,4251,199.751307
111 | 40300.0,4446,201.604496
112 | 33900.0,4309,203.388354
113 | 49600.0,4560,205.283296
114 | 32000.0,4601,207.192033
115 | 31000.0,4477,209.046702
116 | 44300.0,4318,210.844528
117 | 39700.0,4454,212.696337
118 | 45700.0,4409,214.494696
119 | 26800.0,4208,216.242845
120 | 26600.0,4133,217.952397
121 | 43600.0,4781,219.946394
122 | 34900.0,4468,221.796999
123 | 47100.0,4464,223.652724
124 | 48100.0,4216,225.403106
125 | 31900.0,4136,227.11711
126 | 36100.0,4235,228.873448
127 | 30300.0,3572,230.364285
128 | 27000.0,4702,232.3186
129 | 50000.0,4601,234.225036
130 | 50100.0,4604,236.139172
131 | 33900.0,4382,237.965039
132 | 33600.0,4544,239.851667
133 | 40600.0,4619,241.768723
134 | 35000.0,4284,243.543154
135 | 41800.0,4520,245.416899
136 | 48600.0,4593,247.330962
137 | 33600.0,4376,249.147818
138 | 41000.0,4128,250.868949
139 | 45500.0,4526,252.75774
140 |
--------------------------------------------------------------------------------
/logs/benchmark/dqn-SeaquestNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615283104.8211045, "env_id": "SeaquestNoFrameskip-v4"}
2 | r,l,t
3 | 760.0,4674,4.580214
4 | 1080.0,6074,6.95292
5 | 1900.0,9529,10.720694
6 | 1840.0,9065,14.293803
7 | 1840.0,9082,17.876858
8 | 1840.0,9114,21.572851
9 | 1660.0,7806,24.662641
10 | 2320.0,8890,28.260154
11 | 2000.0,7674,31.37266
12 | 1320.0,6921,34.148088
13 | 2600.0,9818,38.122417
14 | 1940.0,9530,41.912591
15 | 2880.0,10146,45.971469
16 | 2600.0,9529,49.750961
17 | 1340.0,6297,52.203349
18 | 2020.0,8334,55.703941
19 | 2200.0,10841,60.11372
20 | 1060.0,5982,62.467764
21 | 1880.0,9321,66.280149
22 | 2500.0,9450,70.017334
23 | 2460.0,9274,73.811825
24 | 2660.0,9526,77.645413
25 | 1840.0,7705,80.782437
26 | 940.0,5402,82.964944
27 | 2080.0,10182,87.057083
28 | 1840.0,7298,90.006956
29 | 1600.0,7890,93.164201
30 | 1660.0,8666,96.604625
31 | 2960.0,10586,100.910327
32 | 1420.0,7834,103.996339
33 | 2380.0,9149,107.735687
34 | 1440.0,7401,110.651457
35 | 1860.0,9114,114.266929
36 | 2920.0,10269,118.386817
37 | 2000.0,9786,122.288618
38 | 1880.0,9082,125.961307
39 | 2580.0,9690,129.830575
40 | 1480.0,7609,132.819636
41 | 3560.0,11865,137.549402
42 | 3040.0,10390,141.719084
43 | 1160.0,6614,144.408233
44 | 1240.0,6746,147.14411
45 | 2200.0,9546,150.988787
46 | 1760.0,9329,154.759344
47 | 1920.0,9273,158.51737
48 | 1960.0,8138,161.755239
49 | 3500.0,12137,166.541739
50 | 1660.0,7074,169.483862
51 | 1880.0,9466,173.323722
52 | 1400.0,6690,175.960091
53 | 1400.0,7418,178.906495
54 | 2880.0,10650,183.116402
55 | 1960.0,9450,186.79492
56 | 2200.0,8982,190.068956
57 | 1900.0,7634,193.011389
58 | 1940.0,7545,196.121619
59 | 2120.0,8689,199.655051
60 | 2820.0,10073,203.763259
61 | 1020.0,5914,206.09671
62 | 2200.0,10458,210.370167
63 | 3000.0,11034,214.772371
64 | 1940.0,9786,218.751435
65 | 760.0,4710,220.653248
66 | 1680.0,8433,224.079637
67 | 1840.0,9137,227.823316
68 | 2400.0,8550,231.249566
69 | 2180.0,10842,235.681875
70 | 2400.0,9022,239.407329
71 | 2520.0,9370,243.204571
72 |
--------------------------------------------------------------------------------
/logs/benchmark/dqn-SpaceInvadersNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614779016.280488, "env_id": "SpaceInvadersNoFrameskip-v4"}
2 | r,l,t
3 | 775.0,3344,3.886279
4 | 455.0,2929,4.941654
5 | 550.0,3315,6.127118
6 | 570.0,3923,7.515705
7 | 800.0,4259,9.018671
8 | 395.0,2559,9.939902
9 | 530.0,3307,11.118369
10 | 710.0,3229,12.272132
11 | 515.0,3663,13.577838
12 | 1120.0,3797,14.927365
13 | 900.0,4037,16.356874
14 | 570.0,3796,17.697156
15 | 515.0,3819,19.042766
16 | 630.0,3821,20.39839
17 | 720.0,3329,21.582514
18 | 875.0,6020,23.740323
19 | 545.0,3898,25.119765
20 | 395.0,2387,25.980374
21 | 545.0,3995,27.394111
22 | 730.0,3886,28.770375
23 | 525.0,3565,30.034888
24 | 600.0,3905,31.412884
25 | 600.0,4225,32.896876
26 | 600.0,4113,34.346998
27 | 605.0,4073,35.774542
28 | 555.0,3933,37.161738
29 | 575.0,3301,38.332035
30 | 520.0,3607,39.609969
31 | 375.0,2529,40.513565
32 | 525.0,2848,41.528867
33 | 600.0,4647,43.174772
34 | 770.0,4485,44.771373
35 | 485.0,2689,45.728803
36 | 605.0,3853,47.0837
37 | 480.0,3251,48.242046
38 | 395.0,2559,49.15564
39 | 550.0,3486,50.391175
40 | 395.0,2559,51.305544
41 | 470.0,3189,52.442224
42 | 395.0,2559,53.357437
43 | 1205.0,6295,55.603836
44 | 605.0,3041,56.676831
45 | 600.0,3563,57.931883
46 | 775.0,4513,59.52345
47 | 485.0,3935,60.921994
48 | 600.0,3349,62.106138
49 | 570.0,3127,63.205454
50 | 570.0,3536,64.455716
51 | 465.0,3911,65.845823
52 | 610.0,4385,67.388742
53 | 545.0,3415,68.59359
54 | 525.0,3900,69.970141
55 | 480.0,3377,71.175281
56 | 570.0,3422,72.396504
57 | 570.0,4113,73.83675
58 | 445.0,2557,74.757191
59 | 530.0,2628,75.687501
60 | 395.0,2559,76.603561
61 | 450.0,4293,78.111617
62 | 550.0,3052,79.200124
63 | 800.0,4829,80.877832
64 | 540.0,3731,82.18373
65 | 1200.0,7364,84.811868
66 | 525.0,3062,85.919444
67 | 600.0,3921,87.297699
68 | 600.0,3675,88.588161
69 | 900.0,6521,90.903198
70 | 775.0,2945,91.956404
71 | 525.0,2912,92.994804
72 | 395.0,2559,93.915735
73 | 580.0,3116,95.02866
74 | 1130.0,5943,97.1432
75 | 775.0,6133,99.324602
76 | 575.0,3140,100.432
77 | 405.0,2711,101.396831
78 | 545.0,3581,102.666251
79 | 570.0,3307,103.841793
80 | 455.0,3815,105.192782
81 | 570.0,3029,106.258779
82 | 600.0,3971,107.646472
83 | 770.0,3308,108.822203
84 | 485.0,2625,109.757926
85 | 755.0,5625,111.753243
86 | 395.0,2559,112.669612
87 | 555.0,3192,113.804427
88 | 705.0,3009,114.880053
89 | 570.0,3796,116.211087
90 | 575.0,3643,117.496128
91 | 545.0,3017,118.564473
92 | 600.0,4729,120.210074
93 | 535.0,3305,121.376765
94 | 570.0,4695,123.022804
95 | 515.0,3699,124.317297
96 | 770.0,3763,125.638394
97 | 605.0,3841,126.9832
98 | 515.0,2848,127.989394
99 | 600.0,4197,129.467359
100 | 600.0,3801,130.805654
101 | 570.0,3796,132.143114
102 | 915.0,6533,134.45919
103 | 545.0,3844,135.80943
104 | 1085.0,7745,138.561323
105 | 405.0,3527,139.808429
106 | 520.0,2923,140.851297
107 | 550.0,3647,142.133963
108 | 630.0,4517,143.718867
109 | 515.0,3697,145.023435
110 | 600.0,4089,146.449435
111 | 405.0,3109,147.56174
112 | 520.0,3488,148.786294
113 | 700.0,3435,149.999451
114 | 580.0,3116,151.111986
115 | 660.0,4089,152.550673
116 | 395.0,2559,153.464829
117 | 570.0,3628,154.739216
118 | 975.0,7079,157.248444
119 | 575.0,3952,158.639437
120 | 1445.0,9755,162.092638
121 | 700.0,2947,163.148235
122 | 925.0,5439,165.057395
123 | 480.0,2897,166.090477
124 | 575.0,3414,167.296689
125 | 1120.0,7605,169.986964
126 | 515.0,3697,171.291006
127 | 350.0,2758,172.281944
128 | 515.0,2987,173.335978
129 | 550.0,3742,174.657949
130 | 1345.0,6667,177.009458
131 | 575.0,3514,178.250407
132 | 655.0,3745,179.576672
133 | 510.0,3217,180.717338
134 | 485.0,3217,181.861857
135 | 570.0,3796,183.193002
136 | 495.0,3169,184.315878
137 | 745.0,3692,185.618728
138 | 1010.0,7547,188.308126
139 | 575.0,3406,189.514064
140 | 395.0,2559,190.430743
141 | 520.0,3545,191.687078
142 | 500.0,3345,192.894131
143 | 715.0,3435,194.115859
144 | 520.0,3289,195.27842
145 | 600.0,4399,196.819674
146 | 1225.0,7181,199.368081
147 | 645.0,4529,200.957543
148 | 420.0,3121,202.073241
149 | 1180.0,6825,204.4855
150 | 545.0,3841,205.829714
151 | 1110.0,7663,208.533523
152 | 680.0,5273,210.396127
153 | 545.0,2817,211.400691
154 | 520.0,4191,212.877653
155 | 535.0,3305,214.050907
156 | 670.0,3757,215.378709
157 | 600.0,3675,216.67445
158 |
--------------------------------------------------------------------------------
/logs/benchmark/ppo-AsteroidsNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615282346.0657709, "env_id": "AsteroidsNoFrameskip-v4"}
2 | r,l,t
3 | 2100.0,3140,4.222781
4 | 2150.0,5416,6.706464
5 | 3100.0,4628,8.867571
6 | 1880.0,2628,10.130192
7 | 2430.0,2560,11.29486
8 | 2700.0,5986,14.025688
9 | 1540.0,2844,15.321319
10 | 1350.0,3142,16.800679
11 | 2800.0,5302,19.29634
12 | 1300.0,2948,20.63113
13 | 2400.0,2892,21.965651
14 | 2030.0,3832,23.704556
15 | 2180.0,3728,25.185869
16 | 1950.0,4020,26.805397
17 | 2530.0,2228,27.717258
18 | 1780.0,3134,29.029559
19 | 1630.0,3020,30.50439
20 | 1470.0,2142,31.474842
21 | 2500.0,3398,33.044588
22 | 3330.0,5052,35.419518
23 | 1830.0,3190,36.876289
24 | 2750.0,5234,39.277096
25 | 1390.0,3112,40.718593
26 | 1280.0,2428,41.852868
27 | 1610.0,3018,43.288413
28 | 1760.0,2242,44.305527
29 | 2350.0,6784,47.503645
30 | 2400.0,4238,49.467506
31 | 2080.0,2898,50.785973
32 | 1950.0,2936,52.131212
33 | 3000.0,4662,54.34914
34 | 2130.0,2802,55.651604
35 | 1980.0,3042,57.03654
36 | 1780.0,3176,58.489574
37 | 2850.0,4772,60.665724
38 | 1710.0,3336,62.201884
39 | 2200.0,4294,64.166347
40 | 4770.0,6812,67.312451
41 | 1590.0,5984,70.017936
42 | 930.0,1372,70.625459
43 | 2950.0,5328,73.043853
44 | 1800.0,3476,74.64306
45 | 1300.0,3418,76.182155
46 | 1080.0,1678,76.929373
47 | 1800.0,2962,78.262535
48 | 1650.0,3306,79.759297
49 | 2800.0,5370,82.260684
50 | 1910.0,2682,83.461249
51 | 1930.0,3740,85.154804
52 | 2150.0,4872,87.459444
53 | 2300.0,4200,89.39806
54 | 1300.0,3444,90.949188
55 | 2150.0,5580,93.4792
56 | 1490.0,2158,94.444278
57 | 1530.0,3430,96.019706
58 | 2500.0,5000,98.31589
59 | 2180.0,4054,100.149905
60 | 3000.0,6512,103.194933
61 | 1800.0,3770,104.926376
62 | 1280.0,2132,105.897955
63 | 1850.0,7126,109.109429
64 | 1930.0,4274,111.140121
65 | 2180.0,3110,112.584334
66 | 3480.0,7728,116.169647
67 | 2230.0,3396,117.747863
68 | 980.0,2250,118.789643
69 | 1950.0,4102,120.655639
70 | 880.0,1532,121.345101
71 | 1320.0,2418,122.527914
72 | 4820.0,7680,126.119789
73 | 2350.0,3574,127.748252
74 | 1610.0,4640,129.888136
75 | 2350.0,3484,131.468878
76 | 1280.0,2376,132.534037
77 | 1300.0,3206,133.973154
78 | 2400.0,6146,136.792883
79 | 1390.0,4482,138.916353
80 | 1300.0,3828,140.728047
81 | 1280.0,4782,142.936024
82 | 2400.0,4924,145.211154
83 | 4470.0,6594,148.311394
84 | 3510.0,6092,151.181304
85 | 2900.0,3804,152.932375
86 | 1730.0,3090,154.344851
87 | 2600.0,4410,156.450595
88 | 2450.0,3180,157.89357
89 | 1280.0,3298,159.373876
90 | 3100.0,7616,162.852006
91 | 2450.0,3494,164.390613
92 | 2130.0,5722,167.096026
93 | 2600.0,4060,168.987177
94 | 1280.0,2130,170.001644
95 | 2000.0,3638,171.701467
96 | 2950.0,4134,173.57986
97 | 780.0,1532,174.299056
98 | 2150.0,2936,175.649798
99 | 2750.0,2776,176.935084
100 | 2900.0,5568,179.56799
101 | 630.0,1532,180.246592
102 | 3410.0,5974,183.051007
103 | 1830.0,5428,185.546254
104 | 1930.0,6056,188.33652
105 | 1930.0,3252,189.853228
106 | 3000.0,5410,192.396713
107 | 1490.0,3532,193.986223
108 | 2700.0,5472,196.570051
109 | 1030.0,2348,197.62768
110 | 3000.0,4760,199.796481
111 | 3100.0,5448,202.300959
112 | 2700.0,4180,204.198598
113 | 3220.0,7278,207.60019
114 | 1950.0,4532,209.646224
115 | 1180.0,2006,210.611366
116 | 2050.0,3532,212.217238
117 | 2010.0,4226,214.121241
118 | 1730.0,4034,215.952861
119 | 2400.0,3260,217.4612
120 | 1080.0,1516,218.132377
121 | 1750.0,2932,219.454946
122 | 1420.0,4148,221.331489
123 | 2100.0,3928,223.107199
124 | 1300.0,3846,224.866148
125 | 2500.0,3884,226.630986
126 | 2200.0,3220,228.092993
127 | 2900.0,5000,230.362537
128 | 2550.0,5290,232.791035
129 | 1650.0,3432,234.413234
130 | 1440.0,2356,235.4701
131 | 3260.0,7632,239.029046
132 | 2550.0,3822,240.787463
133 | 2500.0,4572,242.955574
134 | 4020.0,6168,245.781019
135 | 1300.0,2792,247.0935
136 | 2150.0,4436,249.139655
137 | 1950.0,4634,251.331867
138 | 2200.0,4092,253.184912
139 | 2250.0,5500,255.666379
140 | 3200.0,5960,258.392801
141 | 2330.0,4604,260.466561
142 | 2500.0,4170,262.367958
143 | 2900.0,3674,264.053509
144 | 2450.0,3360,265.599392
145 | 3100.0,6872,268.700958
146 | 1860.0,3258,270.17276
147 | 1280.0,3534,271.760526
148 | 1760.0,2262,272.81702
149 | 3000.0,3806,274.614552
150 | 2280.0,4826,276.806577
151 | 2550.0,5690,279.494636
152 |
--------------------------------------------------------------------------------
/logs/benchmark/ppo-BeamRiderNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614793322.1209238, "env_id": "BeamRiderNoFrameskip-v4"}
2 | r,l,t
3 | 4080.0,14525,8.79847
4 | 5666.0,18681,16.575911
5 | 6236.0,17193,23.729032
6 | 2160.0,9873,27.818356
7 | 5154.0,17049,34.863509
8 | 1380.0,7291,37.89434
9 | 3300.0,14357,43.820789
10 | 3000.0,13109,49.238173
11 | 4798.0,17485,56.472782
12 | 3420.0,13915,62.209094
13 | 1796.0,8485,65.697914
14 | 2160.0,9735,69.714388
15 | 1332.0,7213,72.670604
16 | 1284.0,6343,75.278639
17 | 996.0,5281,77.437288
18 | 3900.0,14931,83.591233
19 | 3420.0,12495,88.744709
20 | 1380.0,7236,91.72037
21 | 2056.0,9511,95.644122
22 | 4732.0,17507,102.892356
23 | 2160.0,10658,107.295106
24 | 756.0,4491,109.137778
25 | 3308.0,12521,114.306549
26 | 4770.0,15463,120.695994
27 | 3900.0,16307,127.439384
28 | 4476.0,17315,134.610267
29 | 3240.0,15185,140.870411
30 | 4734.0,17433,148.066092
31 | 4110.0,16029,154.67535
32 | 3690.0,12429,159.81653
33 | 1284.0,6317,162.416749
34 | 2916.0,10475,166.734342
35 | 3000.0,13817,172.429865
36 | 3300.0,14313,178.328795
37 | 2328.0,11129,182.941328
38 | 1332.0,6787,185.724657
39 | 4764.0,18671,193.465057
40 | 1900.0,9041,197.198546
41 | 2160.0,10553,201.540245
42 | 2804.0,10999,206.083617
43 | 4380.0,15989,212.692215
44 | 3728.0,13888,218.42536
45 | 8358.0,21151,227.209689
46 | 5310.0,17458,234.420478
47 | 3900.0,15619,240.886745
48 | 7404.0,22673,250.270449
49 |
--------------------------------------------------------------------------------
/logs/benchmark/ppo-BipedalWalker-v3/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1654205553.3965414, "env_id": "BipedalWalker-v3"}
2 | r,l,t
3 | 286.498863,1232,3.148969
4 | 290.03395,1193,3.942982
5 | 285.871843,1236,4.766715
6 | 291.850978,1198,5.564058
7 | 288.768871,1210,6.368981
8 | 288.157038,1212,7.176008
9 | 289.361869,1202,7.97352
10 | 287.842606,1225,8.786903
11 | 287.280252,1190,9.578171
12 | 284.324796,1247,10.404098
13 | 288.956237,1213,11.208744
14 | 288.669449,1191,11.998618
15 | 285.979899,1247,12.825689
16 | 289.639171,1190,13.618726
17 | 286.947732,1244,14.446376
18 | 287.606982,1188,15.235259
19 | 286.782178,1223,16.048814
20 | 288.687124,1216,16.856583
21 | 285.721313,1232,17.67472
22 | 287.000542,1216,18.483637
23 | 288.605325,1213,19.286785
24 | 291.318049,1207,20.087602
25 | 289.566242,1193,20.877623
26 | 292.464293,1179,21.661781
27 | 290.672828,1192,22.457295
28 | 290.689305,1182,23.244957
29 | 287.682502,1206,24.045446
30 | 291.957954,1163,24.819003
31 | 282.784247,1263,25.659568
32 | 287.454218,1216,26.470194
33 | 285.451189,1228,27.283888
34 | 287.095007,1272,28.128968
35 | 287.733682,1228,28.945501
36 | 290.772435,1195,29.742402
37 | 288.365004,1236,30.56567
38 | 285.35095,1225,31.380461
39 | 289.413562,1208,32.184891
40 | 288.015387,1211,32.991751
41 | 288.731107,1205,33.791121
42 | 290.727256,1189,34.581942
43 | 289.43828,1207,35.3862
44 | 287.729774,1227,36.204048
45 | 285.253831,1260,37.041651
46 | 289.768125,1185,37.826303
47 | 285.958823,1233,38.646922
48 | 287.200568,1222,39.46325
49 | 290.043164,1185,40.250997
50 | 289.101907,1211,41.05368
51 | 286.462372,1243,41.87645
52 | 285.64948,1221,42.688791
53 | 287.519667,1187,43.476118
54 | 284.971831,1267,44.316454
55 | 288.742228,1214,45.120938
56 | 285.628617,1255,45.954174
57 | 284.757907,1250,46.783232
58 | 284.347205,1245,47.609643
59 | 289.301662,1211,48.409139
60 | 284.519905,1255,49.241136
61 | 283.713437,1242,50.06516
62 | 289.766618,1189,50.851352
63 | 282.590464,1273,51.692035
64 | 289.451637,1175,52.468679
65 | 282.330197,1285,53.320945
66 | 290.373129,1201,54.118149
67 | 285.502483,1222,54.927324
68 | 290.665951,1163,55.698771
69 | 289.852728,1213,56.503432
70 | 287.244561,1205,57.306046
71 | 286.817512,1223,58.116854
72 | 291.118836,1183,58.904925
73 | 289.975692,1205,59.704293
74 | 291.492401,1159,60.475756
75 | 287.211862,1228,61.291739
76 | 284.231949,1244,62.120625
77 | 287.456086,1196,62.914544
78 | 286.782568,1231,63.733352
79 | 290.014788,1182,64.517536
80 | 285.797936,1238,65.341084
81 | 286.128281,1239,66.164153
82 | 291.3156,1168,66.940698
83 | 285.707421,1227,67.7551
84 | 287.444993,1217,68.563281
85 | 287.287142,1235,69.380708
86 | 287.296313,1252,70.212887
87 | 285.059782,1244,71.037529
88 | 285.075845,1250,71.86698
89 | 289.072542,1172,72.643633
90 | 286.401297,1247,73.470394
91 | 289.753727,1171,74.244725
92 | 288.401924,1186,75.030774
93 | 289.74776,1210,75.834194
94 | 290.13916,1177,76.612051
95 | 292.795935,1181,77.396483
96 | 285.884796,1247,78.222464
97 | 288.603669,1212,79.027307
98 | 289.873579,1212,79.832399
99 | 285.787775,1230,80.647568
100 | 290.36039,1202,81.441948
101 | 290.807051,1179,82.224544
102 | 285.749472,1245,83.048675
103 | 290.984798,1179,83.832279
104 | 288.575334,1226,84.645881
105 | 286.358977,1210,85.449784
106 | 287.503688,1221,86.257498
107 | 290.378601,1209,87.059999
108 | 289.137484,1223,87.869361
109 | 288.796009,1224,88.683386
110 | 287.783809,1249,89.514697
111 | 291.227006,1151,90.278997
112 | 280.305489,1277,91.127263
113 | 283.010979,1279,91.975935
114 | 286.370462,1259,92.809959
115 | 294.429234,1149,93.57178
116 | 286.001682,1213,94.377335
117 | 287.589849,1233,95.19614
118 | 290.317775,1200,95.992744
119 | 286.754871,1236,96.815568
120 | 289.788937,1205,97.617576
121 | 286.015599,1251,98.448357
122 | 287.574138,1193,99.242593
123 | 291.136559,1193,100.036958
124 | 291.611669,1221,100.848697
125 | 286.55367,1234,101.670848
126 |
--------------------------------------------------------------------------------
/logs/benchmark/ppo-BipedalWalkerHardcore-v3/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615193532.8653934, "env_id": "BipedalWalkerHardcore-v3"}
2 | r,l,t
3 | 193.275095,2000,4.145592
4 | 278.431375,1216,5.233195
5 | -6.798583,356,5.553727
6 | 19.282857,935,6.39281
7 | 250.856491,1620,7.846617
8 | -65.818894,162,7.994552
9 | 221.508256,2000,9.782774
10 | 55.729149,629,10.340956
11 | 27.481974,749,11.012363
12 | -49.307592,2000,12.828135
13 | -53.150271,224,13.030152
14 | 274.377278,1245,14.144573
15 | 273.219748,1289,15.291531
16 | -1.475935,2000,17.097797
17 | 275.173584,1267,18.226109
18 | 268.651572,1333,19.420884
19 | 262.781362,1457,20.719656
20 | -55.50225,241,20.93576
21 | 143.412661,2000,22.721537
22 | 56.459139,2000,24.502594
23 | 16.917754,2000,26.304311
24 | 264.020378,1410,27.561328
25 | -4.766662,458,27.972549
26 | 33.298734,2000,29.746628
27 | 43.781743,774,30.438653
28 | 232.481688,2000,32.22467
29 | 46.904917,2000,34.022865
30 | 260.045822,1422,35.285332
31 | 281.353043,1159,36.317426
32 | 117.040305,1037,37.234354
33 | -25.477002,361,37.557418
34 | 174.199961,2000,39.350821
35 | 200.445063,2000,41.149508
36 | 12.565995,2000,42.934273
37 | 84.100166,2000,44.760193
38 | -74.903722,2000,46.59088
39 | 271.058868,1269,47.739809
40 | 103.401041,979,48.615643
41 | 241.844216,2000,50.409846
42 | -34.435864,334,50.708726
43 | 271.148666,1302,51.86712
44 | 33.819655,2000,53.672669
45 | -9.80039,2000,55.49123
46 | 75.016011,2000,57.289543
47 | 146.260913,2000,59.093938
48 | 274.414427,1274,60.231836
49 | 128.360471,1076,61.19114
50 | 264.920659,1386,62.431464
51 | 9.68412,525,62.900779
52 | -62.625699,173,63.058635
53 | 78.21942,801,63.773142
54 | 192.980267,2000,65.549125
55 | 203.484802,2000,67.37258
56 | 50.604619,629,67.934108
57 | 220.073692,2000,69.73243
58 | 94.093388,2000,71.510735
59 | 105.880485,2000,73.284172
60 | 272.858588,1286,74.432985
61 | 8.159598,2000,76.250862
62 | 159.239759,997,77.138704
63 | 26.746829,2000,78.901894
64 | -62.071558,176,79.060033
65 | 25.428189,2000,80.884813
66 | 155.089809,1392,82.126809
67 | 143.705084,1270,83.257159
68 | 275.999284,1248,84.372922
69 | 276.390156,1222,85.456104
70 | 3.633458,2000,87.313531
71 | 43.690779,2000,89.103813
72 | -39.888648,337,89.406915
73 | 77.930317,756,90.083196
74 | 264.742158,1428,91.350063
75 | 268.022265,1343,92.542526
76 | 104.829176,2000,94.329187
77 | 271.579241,1272,95.462329
78 | 7.061693,437,95.852223
79 | 270.516807,1314,97.021596
80 | 25.466871,2000,98.8095
81 | 199.094501,2000,100.804588
82 | -77.270537,2000,102.760402
83 | 274.325678,1257,103.881253
84 | -47.70453,279,104.128048
85 | 269.782561,1325,105.312254
86 | 252.021391,1595,106.737116
87 | 76.03425,2000,108.53695
88 | 268.948973,1338,109.724009
89 | -10.528409,2000,111.492292
90 | 58.413867,1108,112.472228
91 | 160.320388,2000,114.25012
92 | 91.350898,919,115.072932
93 | 271.164018,1321,116.245728
94 | -11.883781,2000,118.041878
95 | 105.827348,2000,119.822273
96 | 200.431321,2000,121.619765
97 | 265.226968,1407,122.871708
98 | 69.77737,733,123.528858
99 | 270.591673,1306,124.703248
100 | 141.358797,2000,126.497834
101 | 244.774291,1689,128.007964
102 | 21.732235,2000,129.789535
103 | -52.589515,346,130.100523
104 | 268.640395,2000,131.877187
105 | 148.517683,1315,133.04672
106 | 22.670106,528,133.518668
107 | 100.072615,2000,135.297825
108 |
--------------------------------------------------------------------------------
/logs/benchmark/ppo-BreakoutNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614770695.1439214, "env_id": "BreakoutNoFrameskip-v4"}
2 | r,l,t
3 | 417.0,12603,7.91641
4 | 396.0,9007,11.486543
5 | 431.0,12730,16.501665
6 | 420.0,9783,20.353318
7 | 393.0,10255,24.399123
8 | 414.0,35543,38.466367
9 | 409.0,10094,42.434626
10 | 372.0,7074,45.223526
11 | 414.0,9194,48.837086
12 | 418.0,9150,52.421882
13 | 421.0,7848,55.500098
14 | 387.0,7913,58.610305
15 | 397.0,7294,61.475845
16 | 397.0,9214,65.099294
17 | 405.0,9027,68.644321
18 | 391.0,14176,74.201695
19 | 380.0,7386,77.108295
20 | 421.0,8567,80.466578
21 | 389.0,7161,83.282692
22 | 413.0,6880,85.992517
23 | 405.0,8422,89.307504
24 | 411.0,8550,92.671517
25 | 415.0,23746,102.003397
26 | 396.0,9498,105.736333
27 | 415.0,7582,108.703916
28 | 390.0,9083,112.268808
29 | 409.0,8339,115.536329
30 | 288.0,7289,118.406694
31 | 415.0,9489,122.139135
32 | 406.0,9650,125.932637
33 | 401.0,13799,131.324341
34 | 397.0,7327,134.206139
35 | 425.0,11805,138.796122
36 | 412.0,8117,141.983996
37 | 412.0,12291,146.784082
38 | 409.0,8779,150.232458
39 | 421.0,10680,154.413628
40 | 213.0,6196,156.854681
41 | 404.0,9779,160.689395
42 | 413.0,16971,167.34178
43 | 398.0,10057,171.290612
44 | 404.0,9900,175.174741
45 | 423.0,8658,178.559094
46 | 373.0,8765,182.007169
47 | 392.0,8508,185.355729
48 | 372.0,7457,188.290559
49 | 421.0,13768,193.67705
50 | 367.0,7096,196.471112
51 | 398.0,7307,199.343706
52 | 415.0,9821,203.192525
53 | 417.0,8940,206.70469
54 | 396.0,9338,210.384888
55 | 421.0,9029,213.928488
56 | 421.0,10031,217.866087
57 | 419.0,9851,221.736578
58 | 395.0,8815,225.197859
59 | 397.0,8982,228.739918
60 | 389.0,11865,233.428803
61 | 399.0,7052,236.196647
62 | 323.0,6887,238.924023
63 |
--------------------------------------------------------------------------------
/logs/benchmark/ppo-EnduroNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614937615.306725, "env_id": "EnduroNoFrameskip-v4"}
2 | r,l,t
3 | 797.0,39936,19.760402
4 | 774.0,39936,36.865088
5 | 1259.0,66560,65.256766
6 | 1032.0,53248,87.962597
7 | 984.0,53248,110.666455
8 | 971.0,53248,133.364271
9 | 1093.0,53248,156.086944
10 | 961.0,53248,178.782423
11 | 975.0,53248,201.486932
12 | 772.0,39936,218.450948
13 | 1342.0,66560,246.65753
14 |
--------------------------------------------------------------------------------
/logs/benchmark/ppo-PongNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614770211.8584638, "env_id": "PongNoFrameskip-v4"}
2 | r,l,t
3 | 21.0,6621,5.645768
4 | 21.0,6619,8.278026
5 | 21.0,6619,10.91174
6 | 21.0,6695,13.5699
7 | 21.0,6701,16.227408
8 | 21.0,6695,18.884109
9 | 21.0,6621,21.507497
10 | 21.0,6695,24.158821
11 | 21.0,6619,26.782454
12 | 21.0,6619,29.404617
13 | 21.0,6701,32.060708
14 | 21.0,6701,34.718148
15 | 21.0,6701,37.365183
16 | 21.0,6695,40.013821
17 | 21.0,6695,42.648849
18 | 21.0,6619,45.254462
19 | 21.0,6701,47.898726
20 | 21.0,6621,50.505156
21 | 21.0,6701,53.146831
22 | 21.0,6619,55.753825
23 | 21.0,6621,58.36678
24 | 21.0,6701,61.006375
25 | 21.0,6701,63.655262
26 | 21.0,6621,66.262585
27 | 21.0,6701,68.902122
28 | 21.0,6619,71.509826
29 | 21.0,6619,74.11505
30 | 21.0,6621,76.72574
31 | 21.0,6695,79.360951
32 | 21.0,6695,81.99979
33 | 21.0,6619,84.609866
34 | 21.0,6695,87.247657
35 | 21.0,6621,89.853352
36 | 21.0,6701,92.504749
37 | 21.0,6695,95.142408
38 | 21.0,6695,97.778445
39 | 21.0,6621,100.390172
40 | 21.0,6695,103.025162
41 | 21.0,6695,105.660685
42 | 21.0,6695,108.294654
43 | 21.0,6621,110.901377
44 | 21.0,6621,113.507766
45 | 21.0,6621,116.119072
46 | 21.0,6701,118.759252
47 | 21.0,6621,121.366972
48 | 21.0,6621,123.986761
49 | 21.0,6621,126.592144
50 | 21.0,6619,129.200434
51 | 21.0,6701,131.840272
52 | 21.0,6695,134.475858
53 | 21.0,6619,137.083183
54 | 21.0,6621,139.692369
55 | 21.0,6619,142.300647
56 | 21.0,6619,144.905658
57 | 21.0,6701,147.542496
58 | 21.0,6695,150.183867
59 | 21.0,6695,152.830488
60 | 21.0,6695,155.470538
61 | 21.0,6619,158.079579
62 | 21.0,6621,160.693677
63 | 21.0,6701,163.329427
64 | 21.0,6695,165.966489
65 | 21.0,6619,168.575866
66 | 21.0,6695,171.215574
67 | 21.0,6701,173.854388
68 | 21.0,6619,176.460269
69 | 21.0,6701,179.104645
70 | 21.0,6701,181.746303
71 | 21.0,6619,184.365571
72 | 21.0,6695,187.007439
73 | 21.0,6619,189.617918
74 | 21.0,6701,192.254169
75 | 21.0,6701,194.89255
76 | 20.0,6905,197.61044
77 | 21.0,6619,200.216051
78 | 21.0,6695,202.851995
79 | 21.0,6701,205.491178
80 | 21.0,6695,208.131151
81 | 21.0,6619,210.740392
82 | 21.0,6695,213.388927
83 | 21.0,6619,215.992427
84 | 21.0,6621,218.596376
85 | 21.0,6695,221.233237
86 | 21.0,6695,223.871111
87 | 21.0,6621,226.481205
88 | 21.0,6621,229.09101
89 | 21.0,6701,231.728724
90 | 21.0,6701,234.371095
91 | 21.0,6701,237.008596
92 | 21.0,6701,239.64919
93 |
--------------------------------------------------------------------------------
/logs/benchmark/ppo-QbertNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614770940.1790123, "env_id": "QbertNoFrameskip-v4"}
2 | r,l,t
3 | 12175.0,5927,5.415989
4 | 16150.0,7347,8.336347
5 | 15875.0,7725,11.41015
6 | 15525.0,6352,13.925117
7 | 15825.0,7957,17.082229
8 | 16725.0,7377,20.008731
9 | 15675.0,6632,22.6371
10 | 12375.0,5794,24.927961
11 | 12350.0,5852,27.250481
12 | 15550.0,6357,29.76556
13 | 15450.0,6159,32.201469
14 | 15375.0,5834,34.504873
15 | 16150.0,7342,37.405205
16 | 15875.0,7452,40.352528
17 | 15825.0,7452,43.2892
18 | 16100.0,6422,45.815806
19 | 15675.0,6844,48.517201
20 | 15750.0,7117,51.320857
21 | 19400.0,10657,55.528912
22 | 16325.0,7822,58.619442
23 | 19375.0,9303,62.316152
24 | 12000.0,4992,64.281549
25 | 19725.0,10072,68.251343
26 | 20000.0,10267,72.296274
27 | 15725.0,8032,75.460957
28 | 5050.0,3742,76.926301
29 | 15750.0,6782,79.59477
30 | 12125.0,5272,81.666575
31 | 16175.0,7664,84.682323
32 | 5025.0,3429,86.03526
33 | 8675.0,4537,87.817425
34 | 16075.0,7667,90.836693
35 | 4150.0,2947,91.988825
36 | 12300.0,5734,94.24798
37 | 15800.0,6792,96.923041
38 | 12325.0,5497,99.083754
39 | 19400.0,10132,103.085607
40 | 12225.0,5467,105.234322
41 | 16650.0,7732,108.279649
42 | 12000.0,4697,110.122053
43 | 15425.0,6732,112.775185
44 | 16375.0,7764,115.838148
45 | 19650.0,11002,120.187555
46 | 19350.0,10149,124.190546
47 | 19075.0,8989,127.733854
48 | 19450.0,8049,130.904344
49 | 15975.0,7953,134.032731
50 | 19925.0,9842,137.909475
51 | 15425.0,6194,140.345596
52 | 16850.0,8547,143.712478
53 | 16425.0,7277,146.584682
54 | 15675.0,6922,149.312712
55 | 15650.0,6542,151.884868
56 | 15550.0,6427,154.405081
57 | 11700.0,4314,156.093832
58 | 15750.0,6877,158.806255
59 | 19550.0,9387,162.504466
60 | 19925.0,10057,166.469106
61 | 11950.0,5177,168.502842
62 | 12275.0,4624,170.320909
63 | 16250.0,7293,173.19079
64 | 16275.0,6862,175.896776
65 | 15700.0,6622,178.511129
66 | 20200.0,10903,182.807991
67 | 15725.0,7232,185.657379
68 | 15975.0,7397,188.565925
69 | 12375.0,6042,190.947371
70 | 19175.0,8742,194.39362
71 | 15375.0,5537,196.569389
72 | 19325.0,8714,200.009571
73 | 19775.0,9317,203.681354
74 | 15625.0,7582,206.672691
75 | 16700.0,8847,210.160283
76 | 19400.0,9017,213.709782
77 | 15625.0,6912,216.445144
78 | 19125.0,8424,219.868027
79 | 20250.0,10263,223.909721
80 | 19775.0,9034,227.471911
81 | 12425.0,6132,229.885228
82 | 15525.0,6687,232.511261
83 | 15575.0,6562,235.091669
84 | 11900.0,4512,236.867217
85 | 16325.0,7612,239.867593
86 |
--------------------------------------------------------------------------------
/logs/benchmark/ppo-RoadRunnerNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615282634.0171642, "env_id": "RoadRunnerNoFrameskip-v4"}
2 | r,l,t
3 | 39500.0,3693,4.607107
4 | 42300.0,4249,6.678031
5 | 38200.0,3762,8.513952
6 | 38400.0,4290,10.56961
7 | 38900.0,3916,12.442074
8 | 31300.0,3952,14.360368
9 | 41500.0,3863,16.242541
10 | 47500.0,4178,18.267558
11 | 37300.0,3890,20.1607
12 | 37500.0,4419,22.307001
13 | 43400.0,3720,24.117626
14 | 48000.0,3867,26.001089
15 | 35600.0,3650,27.768714
16 | 38500.0,3747,29.5914
17 | 47600.0,3980,31.512643
18 | 31300.0,3889,33.384604
19 | 39500.0,3870,35.2572
20 | 37600.0,3865,37.131667
21 | 33600.0,3860,38.998168
22 | 43100.0,4141,40.998706
23 | 35200.0,3828,42.850133
24 | 40500.0,3728,44.656435
25 | 43800.0,4114,46.647759
26 | 29300.0,3889,48.519863
27 | 44500.0,3985,50.438046
28 | 40800.0,3793,52.266009
29 | 40900.0,3967,54.170856
30 | 47100.0,3569,55.890799
31 | 52100.0,4196,57.898864
32 | 26100.0,3795,59.748093
33 | 39000.0,3916,61.674686
34 | 43800.0,3882,63.658678
35 | 36000.0,3988,65.747784
36 | 43400.0,3696,67.774647
37 | 42500.0,3893,69.948568
38 | 44700.0,4360,72.333062
39 | 38500.0,4063,74.596394
40 | 36300.0,3477,76.517315
41 | 53500.0,3976,78.678528
42 | 41500.0,3792,80.803227
43 | 53000.0,3939,82.964355
44 | 34500.0,3861,85.102964
45 | 48000.0,3914,87.266721
46 | 51500.0,4083,89.521037
47 | 49100.0,4784,91.993401
48 | 39700.0,3874,93.879836
49 | 40400.0,4117,95.889661
50 | 46600.0,4149,97.911088
51 | 38000.0,4318,100.008145
52 | 45500.0,3557,101.736813
53 | 37000.0,3897,103.628461
54 | 48500.0,4347,105.697235
55 | 41600.0,3288,107.307808
56 | 37700.0,3728,109.106499
57 | 31800.0,3679,110.892347
58 | 48700.0,4237,112.95045
59 | 37000.0,3764,114.778509
60 | 39600.0,3549,116.505887
61 | 35000.0,3707,118.302071
62 | 48400.0,4000,120.253019
63 | 46000.0,3853,122.118019
64 | 34200.0,3754,123.945879
65 | 49700.0,4783,126.269761
66 | 49700.0,3980,128.216163
67 | 42700.0,4097,130.196395
68 | 42100.0,3925,132.096195
69 | 43100.0,4014,134.051753
70 | 23900.0,3929,135.957948
71 | 37700.0,3763,137.782687
72 | 31100.0,3873,139.667927
73 | 34000.0,3632,141.440248
74 | 40500.0,4260,143.502083
75 | 38100.0,3936,145.421263
76 | 40500.0,3955,147.332393
77 | 37400.0,4160,149.320792
78 | 41500.0,3893,151.196564
79 | 34700.0,3881,153.079257
80 | 40600.0,4371,155.196352
81 | 43100.0,3695,156.991861
82 | 53000.0,3920,158.900923
83 | 40300.0,3983,160.830663
84 | 49000.0,4309,162.918265
85 | 44300.0,3935,164.83192
86 | 40500.0,3844,166.70039
87 | 45500.0,4016,168.653216
88 | 39100.0,4385,170.787611
89 | 49200.0,3573,172.518745
90 | 48500.0,4030,174.477761
91 | 38500.0,3678,176.263532
92 | 43600.0,4257,178.334697
93 | 40500.0,4068,180.297519
94 | 34400.0,3653,182.074754
95 | 25400.0,3632,183.833901
96 | 33600.0,3697,185.627832
97 | 39200.0,3708,187.426724
98 | 29600.0,3637,189.190862
99 | 37400.0,3731,190.998438
100 | 32900.0,3810,192.850824
101 | 40100.0,3997,194.788953
102 | 47300.0,3844,196.605136
103 | 50000.0,4189,198.47715
104 | 36200.0,3759,200.078743
105 | 39800.0,3849,201.763661
106 | 34700.0,3828,203.485893
107 | 41500.0,4269,205.557709
108 | 43500.0,4585,207.78251
109 | 49000.0,4020,209.736035
110 | 33500.0,3650,211.500322
111 | 51500.0,4124,213.506651
112 | 49000.0,3750,215.330003
113 | 33400.0,3792,217.170306
114 | 38100.0,3677,218.956944
115 | 44000.0,4000,220.899899
116 | 42200.0,3680,222.686688
117 | 51800.0,4168,224.711082
118 | 29100.0,3644,226.473148
119 | 27200.0,3710,228.278477
120 | 37400.0,3824,230.125029
121 | 35100.0,3704,231.929292
122 | 31700.0,3988,233.857676
123 | 46400.0,3954,235.780364
124 | 53500.0,3864,237.65512
125 | 26000.0,3665,239.42042
126 | 42700.0,4071,241.389238
127 | 58500.0,3992,243.31737
128 | 50600.0,4053,245.288382
129 | 45600.0,4159,247.281524
130 | 38400.0,3943,249.153082
131 | 31300.0,2612,250.421511
132 | 43700.0,3935,252.326953
133 | 48600.0,3988,254.250969
134 | 47500.0,4095,256.223561
135 | 39500.0,3757,258.030014
136 | 34000.0,3803,259.870713
137 | 36800.0,3904,261.759659
138 | 41400.0,3565,263.491798
139 | 41300.0,3886,265.365845
140 | 44800.0,3611,267.119321
141 | 46000.0,3735,268.930228
142 | 45500.0,3790,270.756583
143 | 36300.0,4301,272.847211
144 | 38600.0,3605,274.5775
145 | 46000.0,3698,276.370366
146 | 55100.0,4186,278.385095
147 | 35200.0,3785,280.205292
148 | 41200.0,4124,282.194699
149 | 31900.0,3867,284.070051
150 | 23900.0,3808,285.909715
151 | 38700.0,3597,287.647842
152 | 44300.0,3903,289.53886
153 | 40100.0,3967,291.45401
154 | 31200.0,3711,293.213691
155 | 41300.0,3908,295.099524
156 | 34900.0,3716,296.898536
157 | 41800.0,4057,298.8676
158 |
--------------------------------------------------------------------------------
/logs/benchmark/ppo-SeaquestNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615282041.2912836, "env_id": "SeaquestNoFrameskip-v4"}
2 | r,l,t
3 | 1780.0,9082,7.165651
4 | 1780.0,9082,11.51325
5 | 1780.0,9081,15.896254
6 | 1780.0,9081,20.358567
7 | 1800.0,9082,24.865566
8 | 1800.0,9082,29.322846
9 | 1800.0,9081,33.796309
10 | 1800.0,9082,38.233089
11 | 1780.0,9082,42.75617
12 | 1760.0,9082,47.221684
13 | 1800.0,9082,51.683366
14 | 1780.0,9082,56.105346
15 | 1780.0,9082,60.600484
16 | 1760.0,9081,65.0473
17 | 1800.0,9082,69.466815
18 | 1800.0,9082,73.904019
19 | 1760.0,9082,78.412824
20 | 1780.0,9082,82.912151
21 | 1800.0,9082,87.340143
22 | 1820.0,9081,91.749042
23 | 1760.0,9082,96.161275
24 | 1800.0,9082,100.642852
25 | 1760.0,9081,105.134592
26 | 1780.0,9081,109.578357
27 | 1820.0,9081,114.028118
28 | 1820.0,9081,118.46495
29 | 1800.0,9082,122.987751
30 | 1820.0,9081,127.326344
31 | 1800.0,9082,131.776265
32 | 1760.0,9081,136.280469
33 | 1760.0,9082,140.79302
34 | 1780.0,9081,145.268835
35 | 1700.0,8762,149.484477
36 | 1800.0,9082,153.923026
37 | 1800.0,9082,158.353423
38 | 1820.0,9082,162.787202
39 | 1820.0,9082,167.212062
40 | 1780.0,9081,171.657153
41 | 1760.0,9081,176.071074
42 | 1780.0,9082,180.550466
43 | 1760.0,9081,185.043164
44 | 1760.0,9081,189.465687
45 | 1760.0,9082,193.88748
46 | 1800.0,9082,198.281321
47 | 1800.0,9082,202.796558
48 | 1780.0,9082,207.248083
49 | 1760.0,9082,211.688853
50 | 1780.0,9082,216.104586
51 | 1780.0,9081,220.443628
52 | 1800.0,9082,224.857749
53 | 1840.0,9082,229.29361
54 | 1760.0,9082,233.74382
55 | 1800.0,9082,238.00834
56 | 1800.0,9082,242.521467
57 | 1760.0,9082,247.000363
58 | 1820.0,9082,251.488832
59 | 1800.0,9082,255.921816
60 | 1760.0,9082,260.386473
61 | 1800.0,9082,264.896535
62 | 1800.0,9082,269.336106
63 | 1780.0,9082,273.768382
64 | 1780.0,9082,278.173506
65 | 1820.0,9082,282.615731
66 | 1800.0,9082,287.072851
67 | 1780.0,9082,291.512813
68 | 1580.0,8250,295.521151
69 |
--------------------------------------------------------------------------------
/logs/benchmark/ppo-SpaceInvadersNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614770456.7153661, "env_id": "SpaceInvadersNoFrameskip-v4"}
2 | r,l,t
3 | 600.0,2405,3.914749
4 | 600.0,2539,4.87522
5 | 570.0,2327,5.757872
6 | 1230.0,5673,7.896288
7 | 600.0,2315,8.769479
8 | 600.0,2857,9.845313
9 | 1590.0,7541,12.730985
10 | 2090.0,8591,15.996526
11 | 1585.0,6093,18.324354
12 | 600.0,2449,19.250391
13 | 540.0,3282,20.471908
14 | 545.0,2405,21.384644
15 | 570.0,2469,22.318601
16 | 1630.0,6275,24.687005
17 | 800.0,3021,25.82658
18 | 600.0,2687,26.835624
19 | 1770.0,8099,29.917496
20 | 800.0,2893,31.008491
21 | 600.0,2843,32.087468
22 | 600.0,2573,33.062917
23 | 1380.0,5372,35.103718
24 | 570.0,2520,36.052045
25 | 1235.0,5669,38.191427
26 | 1175.0,5531,40.29176
27 | 1230.0,6129,42.596846
28 | 970.0,5423,44.657641
29 | 1230.0,5965,46.896973
30 | 600.0,2485,47.833416
31 | 570.0,3545,49.162466
32 | 555.0,2327,50.046039
33 | 575.0,4107,51.585598
34 | 1205.0,7102,54.245788
35 | 600.0,2805,55.292747
36 | 1125.0,5237,57.280273
37 | 600.0,2777,58.334494
38 | 1230.0,5557,60.431282
39 | 575.0,2708,61.452153
40 | 800.0,3143,62.625328
41 | 570.0,2956,63.728158
42 | 1370.0,6813,66.294163
43 | 1370.0,6286,68.644167
44 | 775.0,3116,69.814711
45 | 1330.0,7499,72.653976
46 | 1430.0,6605,75.134255
47 | 575.0,4397,76.772299
48 | 2030.0,10489,80.723449
49 | 510.0,3408,81.992183
50 | 600.0,2173,82.812686
51 | 580.0,2668,83.820999
52 | 1205.0,6060,86.097588
53 | 830.0,3351,87.356693
54 | 1230.0,5519,89.442007
55 | 600.0,2433,90.360981
56 | 570.0,2574,91.325186
57 | 1170.0,6277,93.682385
58 | 570.0,2241,94.533334
59 | 515.0,2731,95.565007
60 | 1435.0,5895,97.790861
61 | 1175.0,5492,99.851393
62 | 545.0,2853,100.930767
63 | 1430.0,5589,103.028106
64 | 740.0,3943,104.515226
65 | 705.0,3711,105.905905
66 | 570.0,2841,106.97071
67 | 1175.0,5545,109.064234
68 | 1175.0,5465,111.119168
69 | 1175.0,6038,113.395968
70 | 600.0,2439,114.312117
71 | 570.0,3541,115.633128
72 | 600.0,2327,116.509693
73 | 570.0,2398,117.417454
74 | 600.0,2861,118.497781
75 | 1370.0,5490,120.557487
76 | 575.0,3375,121.821491
77 | 1210.0,4932,123.689292
78 | 570.0,2206,124.524383
79 | 570.0,2277,125.382965
80 | 1175.0,6184,127.712865
81 | 1945.0,8509,130.918364
82 | 1280.0,5293,132.918554
83 | 950.0,5249,134.904678
84 | 1185.0,5208,136.870933
85 | 1200.0,6206,139.196474
86 | 545.0,3272,140.421652
87 | 600.0,2695,141.433939
88 | 570.0,2334,142.317909
89 | 1435.0,6601,144.789252
90 | 570.0,2305,145.66184
91 | 970.0,4711,147.444192
92 | 575.0,3055,148.5987
93 | 2065.0,9483,152.164299
94 | 1320.0,5597,154.271958
95 | 1230.0,5921,156.488366
96 | 540.0,2938,157.592431
97 | 1120.0,5389,159.620881
98 | 1145.0,5839,161.825858
99 | 600.0,2533,162.775769
100 | 2200.0,8533,166.009311
101 | 515.0,3115,167.179821
102 | 745.0,3351,168.435554
103 | 570.0,2192,169.262837
104 | 1435.0,5819,171.446572
105 | 600.0,2885,172.526852
106 | 1780.0,6768,175.07971
107 | 570.0,2648,176.069265
108 | 600.0,2621,177.049405
109 | 570.0,2092,177.843376
110 | 1380.0,7781,180.783677
111 | 600.0,2345,181.672377
112 | 1360.0,5950,183.912733
113 | 575.0,2639,184.90189
114 | 1230.0,6295,187.263022
115 | 540.0,2561,188.228392
116 | 1705.0,9517,191.814498
117 | 580.0,1978,192.566855
118 | 600.0,2501,193.508208
119 | 1230.0,6693,196.031862
120 | 1205.0,6623,198.51251
121 | 570.0,3149,199.688567
122 | 1400.0,5209,201.64975
123 | 950.0,4733,203.450315
124 | 570.0,2149,204.261615
125 | 1205.0,5603,206.371998
126 | 1385.0,5767,208.560025
127 | 570.0,2170,209.385584
128 | 570.0,2495,210.324781
129 | 1260.0,5779,212.495315
130 | 1230.0,5477,214.555492
131 | 1800.0,8253,217.678085
132 | 1160.0,5875,219.905236
133 | 1200.0,5733,222.082316
134 | 1465.0,7059,224.754595
135 | 600.0,2269,225.608018
136 | 1230.0,5747,227.770797
137 | 605.0,2377,228.674616
138 | 1290.0,5145,230.616475
139 |
--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-AsteroidsNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615284368.635039, "env_id": "AsteroidsNoFrameskip-v4"}
2 | r,l,t
3 | 2030.0,5280,4.917788
4 | 1280.0,7246,7.733405
5 | 1280.0,7744,10.735203
6 | 2110.0,5418,12.840264
7 | 1580.0,9792,16.635882
8 | 2700.0,14442,22.269402
9 | 580.0,2602,23.28045
10 | 1410.0,8022,26.392479
11 | 1470.0,9992,30.243479
12 | 1300.0,4820,32.112589
13 | 880.0,3728,33.556423
14 | 1850.0,6712,36.16707
15 | 2300.0,18852,43.480112
16 | 2000.0,7066,46.231887
17 | 1880.0,6666,48.825108
18 | 3100.0,14344,54.417897
19 | 1300.0,5528,56.570307
20 | 1340.0,9790,60.490703
21 | 2080.0,8110,63.656714
22 | 1660.0,6686,66.265376
23 | 4270.0,15394,72.397557
24 | 1370.0,6100,74.749058
25 | 1470.0,5336,76.823082
26 | 1760.0,13178,81.991044
27 | 6890.0,27672,92.853137
28 | 1760.0,14066,98.439594
29 | 2900.0,8700,101.827892
30 | 1410.0,5754,104.055313
31 | 1320.0,4634,105.837138
32 | 3970.0,12582,110.797729
33 | 2080.0,7168,113.588531
34 | 1080.0,5372,115.669907
35 | 1430.0,10506,119.765551
36 | 1480.0,7942,122.972187
37 | 1440.0,4668,124.785599
38 | 2000.0,6472,127.308509
39 | 3920.0,10786,131.527477
40 | 2750.0,9510,135.345486
41 | 2150.0,6788,137.990966
42 | 2000.0,7614,140.951383
43 | 3220.0,13132,146.187673
44 | 3730.0,13448,151.436091
45 | 1580.0,7324,154.288391
46 | 1370.0,6314,156.804605
47 | 3220.0,11014,161.160282
48 | 2350.0,7398,164.036948
49 | 3000.0,9886,167.901782
50 | 2950.0,9504,171.705262
51 | 830.0,4964,173.63178
52 | 1880.0,6698,176.241872
53 | 3270.0,11406,180.728887
54 | 3220.0,15342,186.772055
55 | 2280.0,9198,190.361094
56 | 2150.0,6422,192.914549
57 | 3310.0,12340,197.784235
58 | 1880.0,8594,201.132567
59 | 1180.0,5834,203.409548
60 | 1130.0,5844,205.782184
61 | 1910.0,6416,208.304251
62 | 5540.0,15882,214.497507
63 | 1590.0,5052,216.464622
64 | 2260.0,12434,221.384965
65 | 3530.0,14708,227.130663
66 | 2050.0,8244,230.450806
67 | 1390.0,8756,233.848895
68 | 1830.0,10548,237.956959
69 |
--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-BeamRiderNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614852096.4742026, "env_id": "BeamRiderNoFrameskip-v4"}
2 | r,l,t
3 | 51584.0,71433,30.11881
4 | 22170.0,44639,48.369793
5 | 30514.0,48741,68.151076
6 | 16020.0,32883,80.800972
7 | 6232.0,17303,87.346454
8 | 4412.0,17059,93.873198
9 | 20790.0,38463,112.188127
10 | 16820.0,36761,128.455022
11 | 16110.0,36277,144.132732
12 | 19380.0,38563,160.682819
13 | 5884.0,18413,168.312466
14 | 6988.0,19351,176.849476
15 | 14294.0,34023,191.423923
16 | 12660.0,31657,205.309299
17 | 12960.0,32987,219.264733
18 | 15690.0,38991,236.003012
19 | 18582.0,38939,252.629304
20 |
--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-BreakoutNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614779697.375159, "env_id": "BreakoutNoFrameskip-v4"}
2 | r,l,t
3 | 421.0,13759,7.717858
4 | 382.0,8885,11.008912
5 | 368.0,7080,13.641344
6 | 360.0,6621,16.082456
7 | 450.0,46707,33.102802
8 | 287.0,8744,36.332456
9 | 393.0,8224,39.367732
10 | 395.0,8256,42.396542
11 | 432.0,9497,45.875642
12 | 373.0,8425,48.983618
13 | 408.0,13715,54.01905
14 | 400.0,12775,58.683662
15 | 408.0,17495,65.088949
16 | 773.0,21958,73.152582
17 | 423.0,22762,81.513918
18 | 349.0,8182,84.536757
19 | 373.0,21159,92.287416
20 | 320.0,8244,95.330242
21 | 210.0,6587,97.765068
22 | 327.0,9053,101.105664
23 | 438.0,11200,105.217838
24 | 425.0,28962,115.746121
25 | 290.0,5949,117.950181
26 | 407.0,21378,125.79268
27 | 375.0,13241,130.658171
28 | 451.0,16014,136.544134
29 | 423.0,12583,141.160334
30 | 403.0,7056,143.759736
31 | 421.0,20190,151.097538
32 | 252.0,7425,153.834893
33 | 407.0,19564,160.988797
34 | 364.0,28839,171.57312
35 | 383.0,8248,174.622104
36 | 411.0,13522,179.58879
37 | 385.0,10836,183.585816
38 | 410.0,9478,187.070337
39 | 373.0,7982,190.026134
40 | 431.0,28878,200.540685
41 | 428.0,29933,211.338071
42 | 415.0,10305,215.104128
43 |
--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-EnduroNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615798755.7603855, "env_id": "EnduroNoFrameskip-v4"}
2 | r,l,t
3 | 4693.0,133120,54.108688
4 | 2582.0,119808,99.796636
5 | 4931.0,146432,155.525393
6 | 1989.0,93184,190.736086
7 | 1961.0,93184,226.100894
8 |
--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-PongNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614779925.3269277, "env_id": "PongNoFrameskip-v4"}
2 | r,l,t
3 | 21.0,7947,5.600472
4 | 21.0,8442,8.701368
5 | 20.0,9634,12.226275
6 | 20.0,9762,15.790657
7 | 20.0,9609,19.300387
8 | 21.0,10585,23.16411
9 | 21.0,7947,26.067608
10 | 21.0,10205,29.791635
11 | 21.0,7941,32.686685
12 | 21.0,7941,35.583624
13 | 21.0,9895,39.196352
14 | 21.0,9949,42.81496
15 | 21.0,9895,46.414241
16 | 19.0,12324,50.903394
17 | 19.0,11473,55.076255
18 | 21.0,7878,57.940445
19 | 21.0,9895,61.542388
20 | 21.0,8279,64.549082
21 | 20.0,9625,68.059175
22 | 21.0,8678,71.209772
23 | 21.0,7947,74.102408
24 | 21.0,8817,77.327215
25 | 20.0,10751,81.23354
26 | 21.0,7947,84.124312
27 | 21.0,10029,87.772565
28 | 21.0,7941,90.660588
29 | 21.0,8096,93.601612
30 | 21.0,7947,96.499756
31 | 20.0,8959,99.759803
32 | 20.0,12466,104.28981
33 | 20.0,8357,107.329129
34 | 21.0,11809,111.624942
35 | 20.0,8408,114.678773
36 | 19.0,10698,118.573543
37 | 21.0,10572,122.417779
38 | 20.0,12662,127.027967
39 | 20.0,8111,129.980286
40 | 21.0,12685,134.595775
41 | 21.0,10857,138.545573
42 | 19.0,10857,142.492337
43 | 21.0,7947,145.377651
44 | 20.0,8323,148.40299
45 | 21.0,7947,151.294838
46 | 20.0,9177,154.633734
47 | 21.0,7947,157.525611
48 | 19.0,8704,160.690106
49 | 21.0,8279,163.702176
50 | 20.0,8077,166.637348
51 | 21.0,9895,170.237087
52 | 21.0,12037,174.608734
53 | 20.0,8077,177.542576
54 | 21.0,7947,180.432275
55 | 20.0,8486,183.514008
56 | 21.0,7941,186.413429
57 | 21.0,9895,190.02161
58 | 21.0,12037,194.4052
59 | 19.0,12802,199.05999
60 | 19.0,12353,203.547561
61 | 20.0,8618,206.684987
62 | 21.0,7947,209.578497
63 | 20.0,9505,213.026868
64 | 21.0,11741,217.296149
65 | 21.0,7941,220.184218
66 |
--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-QbertNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614780377.3798313, "env_id": "QbertNoFrameskip-v4"}
2 | r,l,t
3 | 15600.0,6377,5.057885
4 | 15550.0,7312,7.795092
5 | 15875.0,8987,11.144577
6 | 15600.0,6372,13.514814
7 | 15400.0,6394,15.885376
8 | 15600.0,6372,18.256008
9 | 16250.0,7487,21.048109
10 | 15600.0,6372,23.41627
11 | 12175.0,5163,25.33254
12 | 7750.0,3302,26.55398
13 | 15400.0,6334,28.906239
14 | 16050.0,6422,31.29228
15 | 4300.0,3159,32.456307
16 | 15825.0,5479,34.494781
17 | 15600.0,6372,36.869116
18 | 15400.0,6339,39.219169
19 | 8400.0,4895,41.03438
20 | 15450.0,6025,43.265694
21 | 15900.0,7189,45.924935
22 | 15400.0,6339,48.269106
23 | 16250.0,7487,51.050018
24 | 14925.0,6852,53.588706
25 | 16325.0,8247,56.64451
26 | 16300.0,8242,59.700643
27 | 15400.0,6334,62.046261
28 | 15400.0,6339,64.393149
29 | 15400.0,6399,66.765665
30 | 16250.0,7059,69.384406
31 | 16000.0,7414,72.127738
32 | 16000.0,7317,74.832424
33 | 16275.0,8557,78.012088
34 | 15650.0,7473,80.778527
35 | 15600.0,6377,83.135277
36 | 16700.0,8209,86.173364
37 | 15600.0,6372,88.537448
38 | 4425.0,3129,89.688693
39 | 16425.0,7340,92.412909
40 | 3975.0,2033,93.155878
41 | 15600.0,6372,95.515862
42 | 16050.0,6442,97.906537
43 | 16250.0,7487,100.682827
44 | 16025.0,6742,103.176655
45 | 16300.0,8042,106.157331
46 | 15475.0,6850,108.689824
47 | 16175.0,7827,111.588518
48 | 8550.0,6072,113.839271
49 | 16475.0,6543,116.257065
50 | 15400.0,6339,118.600999
51 | 16050.0,7100,121.234627
52 | 16150.0,7077,123.855452
53 | 15500.0,6623,126.313986
54 | 16475.0,6357,128.661455
55 | 15400.0,6339,131.005288
56 | 12100.0,5664,133.09717
57 | 15400.0,6334,135.439019
58 | 7750.0,3307,136.661919
59 | 16075.0,7510,139.44304
60 | 15450.0,6194,141.736086
61 | 15400.0,6339,144.08686
62 | 17275.0,9532,147.619866
63 | 15550.0,6025,149.842855
64 | 15600.0,6372,152.203281
65 | 15475.0,6731,154.696078
66 | 15500.0,6002,156.923521
67 | 16475.0,7882,159.837396
68 | 15550.0,6807,162.35231
69 | 16975.0,6733,164.847731
70 | 15400.0,6334,167.194746
71 | 15400.0,6399,169.56363
72 | 15500.0,6814,172.090505
73 | 15400.0,6339,174.44393
74 | 16250.0,7492,177.215289
75 | 15075.0,7007,179.807077
76 | 15650.0,6239,182.109103
77 | 7550.0,3708,183.476477
78 | 15600.0,6372,185.836382
79 | 15725.0,6467,188.226367
80 | 15600.0,6372,190.592282
81 | 15400.0,6339,192.935986
82 | 15650.0,6927,195.50159
83 | 16900.0,6274,197.820424
84 | 16250.0,7487,200.593243
85 | 16075.0,7007,203.18213
86 | 15525.0,6727,205.669727
87 | 16275.0,7587,208.47724
88 | 15600.0,6377,210.842808
89 | 15400.0,6334,213.186167
90 | 15400.0,6334,215.530586
91 | 15400.0,6334,217.876995
92 | 15400.0,6334,220.218556
93 | 4600.0,6289,222.542634
94 | 16075.0,7365,225.269416
95 |
--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-RoadRunnerNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615284101.0187874, "env_id": "RoadRunnerNoFrameskip-v4"}
2 | r,l,t
3 | 45700.0,4195,4.675652
4 | 36400.0,10379,9.093378
5 | 42500.0,4318,10.943592
6 | 40000.0,4409,12.819046
7 | 40100.0,18894,20.841767
8 | 48000.0,16250,27.781744
9 | 31400.0,4342,29.634131
10 | 29100.0,3676,31.203189
11 | 43100.0,7511,34.405609
12 | 29500.0,3953,36.088055
13 | 39700.0,6818,38.997077
14 | 38500.0,5285,41.252269
15 | 49500.0,4064,43.000126
16 | 46600.0,10519,47.530509
17 | 29200.0,4526,49.451559
18 | 39100.0,5894,51.960575
19 | 53500.0,11215,56.743758
20 | 42100.0,4457,58.650866
21 | 30500.0,3748,60.247491
22 | 56500.0,19033,68.358962
23 | 48500.0,25777,79.503964
24 | 33400.0,6918,82.571893
25 | 47600.0,29355,95.159505
26 | 56500.0,14117,101.289444
27 | 54000.0,3714,102.91013
28 | 39900.0,27591,114.183431
29 | 45100.0,9823,118.347341
30 | 31900.0,8528,122.001608
31 | 51000.0,5418,124.284162
32 | 39400.0,3984,125.976956
33 | 49600.0,5012,128.104257
34 | 42500.0,4851,130.154958
35 | 40900.0,4812,132.180582
36 | 49000.0,10090,136.482913
37 | 23900.0,4297,138.380678
38 | 41600.0,26677,149.766945
39 | 37900.0,55625,173.340621
40 | 54500.0,8922,177.154951
41 | 36000.0,4763,179.190263
42 | 40100.0,4745,181.219405
43 | 32200.0,6252,183.88833
44 | 47700.0,5602,186.280746
45 | 34800.0,4502,188.199655
46 | 53100.0,5188,190.433815
47 | 48600.0,7978,193.853717
48 | 44500.0,20618,202.602421
49 | 45100.0,10767,207.15412
50 | 30700.0,3959,208.843217
51 | 42600.0,9351,212.814273
52 | 28100.0,4686,214.808829
53 | 48900.0,5631,217.231784
54 | 56000.0,5127,219.426613
55 | 48000.0,11301,224.260313
56 | 30300.0,7920,227.654343
57 | 42500.0,8656,231.329389
58 | 34600.0,4871,233.404373
59 | 58500.0,34210,247.963593
60 | 49000.0,8831,251.734886
61 | 47700.0,7061,254.753482
62 |
--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-SeaquestNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615283846.1270072, "env_id": "SeaquestNoFrameskip-v4"}
2 | r,l,t
3 | 2600.0,9082,6.484057
4 | 2580.0,9082,10.181498
5 | 2560.0,9081,13.884617
6 | 2580.0,9081,17.589159
7 | 2560.0,9082,21.302521
8 | 2580.0,9070,25.006688
9 | 2560.0,9081,28.729359
10 | 2560.0,9081,32.440299
11 | 2580.0,9082,36.154031
12 | 2580.0,9082,39.827491
13 | 2600.0,9081,43.539016
14 | 2560.0,9082,47.245576
15 | 2600.0,9082,50.952624
16 | 2580.0,9082,54.671462
17 | 2560.0,9082,58.392135
18 | 2560.0,9082,62.102942
19 | 2560.0,9050,65.804434
20 | 2600.0,9082,69.522391
21 | 2540.0,9114,73.254171
22 | 2460.0,8694,76.806014
23 | 2540.0,9082,80.52209
24 | 2460.0,8665,84.067281
25 | 2540.0,8954,87.693721
26 | 2500.0,8794,91.284982
27 | 2580.0,9082,94.997058
28 | 2600.0,9081,98.713351
29 | 2560.0,9082,102.426905
30 | 2600.0,9081,106.259488
31 | 2580.0,9082,110.101429
32 | 2580.0,9082,113.837194
33 | 2580.0,9082,117.564113
34 | 2580.0,9082,121.291342
35 | 2580.0,9082,125.017416
36 | 2580.0,9082,128.74763
37 | 2560.0,9082,132.486106
38 | 2560.0,9081,136.265387
39 | 2500.0,9081,140.045696
40 | 2560.0,9082,143.820506
41 | 2560.0,9082,147.595112
42 | 2560.0,9113,151.388052
43 | 2560.0,9082,155.165497
44 | 1980.0,7418,158.271334
45 | 2580.0,9081,162.070482
46 | 2560.0,9082,165.861288
47 | 2600.0,9082,169.653452
48 | 2560.0,9081,173.442111
49 | 2560.0,9082,177.22763
50 | 2560.0,9082,180.942908
51 | 2600.0,9082,184.682519
52 | 2540.0,9081,188.42108
53 | 2580.0,9082,192.16452
54 | 2560.0,9081,195.902184
55 | 2580.0,9082,199.642456
56 | 2580.0,9082,203.379417
57 | 2560.0,9082,207.112618
58 | 2600.0,9081,210.860535
59 | 2560.0,9082,214.599277
60 | 2600.0,9082,218.338832
61 | 2580.0,9082,222.068007
62 | 2540.0,8794,225.65806
63 | 2580.0,9082,229.372201
64 | 2580.0,9082,233.098182
65 | 2560.0,9081,236.829339
66 | 2600.0,9082,240.560709
67 | 2560.0,9082,244.280681
68 | 2560.0,9114,248.011547
69 |
--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-SpaceInvadersNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614780157.951553, "env_id": "SpaceInvadersNoFrameskip-v4"}
2 | r,l,t
3 | 1860.0,9433,6.017921
4 | 1175.0,5897,8.11461
5 | 2030.0,8071,10.982974
6 | 1205.0,5963,13.093197
7 | 545.0,2807,14.085371
8 | 2715.0,11574,18.185369
9 | 2360.0,10915,22.055546
10 | 2695.0,12400,26.42824
11 | 2150.0,10363,30.087275
12 | 1200.0,6444,32.357866
13 | 1230.0,6287,34.576591
14 | 1235.0,5855,36.641079
15 | 1775.0,8365,39.598783
16 | 1235.0,5743,41.63625
17 | 1200.0,5490,43.575229
18 | 2615.0,14253,48.580082
19 | 545.0,3365,49.769976
20 | 1805.0,8265,52.685741
21 | 2030.0,9036,55.880196
22 | 1230.0,6311,58.104826
23 | 2200.0,9673,61.514868
24 | 1200.0,6015,63.640325
25 | 1860.0,8851,66.758578
26 | 1435.0,5937,68.859172
27 | 600.0,3857,70.211838
28 | 2145.0,10022,73.769417
29 | 1375.0,6239,75.980469
30 | 1745.0,8183,78.877634
31 | 1230.0,5879,80.963021
32 | 1400.0,5782,83.007614
33 | 1860.0,8973,86.166276
34 | 2925.0,12519,90.581527
35 | 1355.0,6386,92.840083
36 | 2465.0,12334,97.176396
37 | 1235.0,6153,99.345549
38 | 2910.0,12098,103.606352
39 | 1835.0,8619,106.660892
40 | 575.0,3337,107.830249
41 | 2060.0,9007,111.002099
42 | 2695.0,12860,115.521394
43 | 2490.0,12121,119.778012
44 | 1200.0,6887,122.210271
45 | 1860.0,8317,125.142263
46 | 545.0,3244,126.286159
47 | 1175.0,5886,128.380142
48 | 2810.0,11428,132.413715
49 | 2860.0,11611,136.527394
50 | 3235.0,12789,141.018401
51 | 1145.0,5983,143.135476
52 | 1860.0,8419,146.100608
53 | 3920.0,17237,152.142236
54 | 1860.0,8593,155.161895
55 | 3725.0,14361,160.22655
56 | 2690.0,11517,164.291665
57 | 1435.0,5895,166.378403
58 | 3565.0,13548,171.137103
59 | 2540.0,10619,174.901788
60 | 2435.0,11811,179.077891
61 | 3380.0,14197,184.075026
62 | 570.0,3009,185.147563
63 | 1205.0,6201,187.337598
64 | 2720.0,12506,191.72243
65 | 1170.0,6345,193.966073
66 | 2915.0,11154,197.895742
67 | 1175.0,6241,200.099791
68 | 1805.0,9273,203.379786
69 | 2320.0,10262,206.994567
70 | 1430.0,6411,209.251219
71 | 3115.0,11792,213.408668
72 |
--------------------------------------------------------------------------------
/logs/benchmark/sac-BipedalWalker-v3/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614772839.3377283, "env_id": "BipedalWalker-v3"}
2 | r,l,t
3 | 302.360601,1114,3.36505
4 | 299.982386,1097,4.165927
5 | 301.305306,1118,4.981788
6 | 300.755528,1109,5.789752
7 | 300.41319,1103,6.591837
8 | 300.136167,1112,7.402884
9 | 301.942391,1111,8.212199
10 | 300.705809,1109,9.019499
11 | 302.139217,1109,9.826885
12 | 300.911729,1104,10.629605
13 | 302.272265,1109,11.437745
14 | 299.604268,1104,12.241354
15 | 299.278248,1100,13.040194
16 | 300.414958,1101,13.841523
17 | 300.574915,1113,14.650683
18 | 299.422525,1108,15.457727
19 | 301.058969,1113,16.265585
20 | 300.929906,1111,17.070907
21 | 301.217599,1112,17.876746
22 | 299.808359,1099,18.676308
23 | 301.97646,1103,19.475271
24 | 299.383641,1105,20.276059
25 | 301.048902,1119,21.089404
26 | 300.719316,1104,21.890113
27 | 300.35663,1104,22.689351
28 | 299.585902,1102,23.49109
29 | 300.387684,1101,24.289257
30 | 301.02301,1105,25.091187
31 | 299.283324,1103,25.888853
32 | 298.797811,1101,26.6852
33 | 300.889551,1105,27.484889
34 | 301.312607,1117,28.293692
35 | 298.839066,1104,29.090828
36 | 301.465511,1105,29.88746
37 | 299.999362,1107,30.688471
38 | 300.343653,1119,31.501015
39 | 300.794821,1105,32.298504
40 | 301.566184,1122,33.106802
41 | 300.154061,1104,33.904654
42 | 300.601982,1109,34.707381
43 | 299.783959,1116,35.512661
44 | 299.249841,1103,36.308428
45 | 300.459917,1103,37.104169
46 | 301.997507,1113,37.908109
47 | 299.710068,1101,38.703565
48 | 301.021391,1097,39.493932
49 | 302.55917,1113,40.297005
50 | 300.073168,1107,41.096599
51 | 303.078654,1119,41.897959
52 | 298.265251,1104,42.68975
53 | 300.982828,1104,43.482014
54 | 300.353754,1110,44.280113
55 | 300.748211,1112,45.076373
56 | 300.661172,1118,45.876885
57 | 300.173002,1106,46.671299
58 | 302.059827,1106,47.467817
59 | 302.151632,1115,48.265848
60 | 298.925516,1097,49.052199
61 | 300.456355,1118,49.853958
62 | 300.882569,1105,50.647162
63 | 301.373377,1115,51.445441
64 | 301.108553,1104,52.237467
65 | 298.812307,1104,53.032318
66 | 298.587068,1092,53.822424
67 | 296.778464,1099,54.612579
68 | 299.83619,1094,55.396447
69 | 301.041658,1104,56.188702
70 | 298.44367,1095,56.975703
71 | 301.994168,1104,57.765858
72 | 299.021788,1102,58.555475
73 | 301.347008,1109,59.351158
74 | 300.822325,1101,60.142673
75 | -86.211804,114,60.227066
76 | 302.564616,1114,61.024435
77 | 299.492492,1107,61.81947
78 | 301.447796,1108,62.615735
79 | 302.675531,1117,63.414827
80 | 301.229215,1103,64.204417
81 | 300.875007,1114,65.002628
82 | 300.933166,1094,65.787878
83 | 301.602342,1113,66.584519
84 | 298.375637,1112,67.380561
85 | 299.737012,1103,68.171237
86 | 299.803846,1105,68.962979
87 | 300.758485,1108,69.756849
88 | 301.953926,1109,70.549286
89 | 299.530853,1110,71.343521
90 | 300.314862,1108,72.137488
91 | 299.606724,1106,72.927233
92 | 298.734268,1100,73.712741
93 | 298.709114,1092,74.494166
94 | 298.423564,1095,75.279155
95 | 302.788395,1110,76.072666
96 | 298.820547,1098,76.859971
97 | 298.282516,1108,77.653742
98 | 298.552512,1109,78.449211
99 | 300.992863,1104,79.239589
100 | 299.893245,1100,80.026569
101 | 299.645259,1095,80.809745
102 | 300.726465,1106,81.601357
103 | 301.481949,1110,82.394461
104 | 300.000249,1109,83.191714
105 | 300.958623,1128,84.001832
106 | 300.926692,1123,84.806707
107 | 301.136796,1103,85.595889
108 | 300.421307,1104,86.386729
109 | 301.304972,1102,87.177112
110 | 299.196795,1107,87.971708
111 | 300.282078,1105,88.7635
112 | 298.923465,1110,89.557497
113 | 301.749845,1121,90.359199
114 | 300.179531,1102,91.150525
115 | 301.675692,1115,91.949993
116 | 298.664999,1098,92.738292
117 | 302.259756,1104,93.530223
118 | 300.335136,1109,94.327485
119 | 302.262681,1108,95.121866
120 | 301.742746,1102,95.910671
121 | 301.690168,1118,96.714285
122 | 299.771082,1106,97.508253
123 | 300.023118,1103,98.298843
124 | 297.838527,1092,99.080485
125 | 300.998053,1116,99.881017
126 | 300.246662,1101,100.672354
127 | 300.242191,1105,101.463338
128 | 300.060263,1099,102.250881
129 | 301.302396,1107,103.04414
130 | 301.973059,1114,103.8426
131 | 301.272366,1108,104.634917
132 | 302.632714,1108,105.426627
133 | 300.551611,1100,106.214734
134 | 302.824584,1116,107.015751
135 | 300.550722,1109,107.810142
136 | 300.636237,1107,108.603951
137 | 300.656654,1114,109.403951
138 | 299.35787,1095,110.189639
139 |
--------------------------------------------------------------------------------
/logs/benchmark/sac-BipedalWalkerHardcore-v3/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615282937.308309, "env_id": "BipedalWalkerHardcore-v3"}
2 | r,l,t
3 | 41.778326,953,3.662442
4 | 180.467412,2000,5.824946
5 | 97.295092,1363,7.285249
6 | -81.182201,346,7.643032
7 | -40.802484,2000,9.825659
8 | -99.934007,2000,11.965935
9 | -42.94785,2000,14.116859
10 | 282.487894,1556,15.766352
11 | -2.732902,779,16.615764
12 | -17.791582,2000,18.759749
13 | -118.597024,2000,20.948429
14 | 279.944892,1562,22.609763
15 | -37.321344,2000,24.767509
16 | -55.572132,441,25.237352
17 | 65.808814,2000,27.385929
18 | 54.506992,2000,29.527135
19 | -30.193249,2000,31.700151
20 | 262.237829,1844,33.642516
21 | -135.306954,2000,35.582859
22 | 32.545857,2000,37.762973
23 | -96.304213,2000,39.912227
24 | -62.419513,2000,42.052971
25 | -10.491988,2000,44.205731
26 | 3.009945,2000,46.351385
27 | -41.232128,2000,48.469136
28 | -25.112829,2000,50.622787
29 | 1.402057,604,51.264819
30 | 113.538035,2000,53.445968
31 | 279.57376,1561,55.120767
32 | -11.387091,512,55.665265
33 | 2.594991,2000,57.829058
34 | -46.001328,2000,59.981994
35 | -111.158098,209,60.211232
36 | 5.653206,2000,62.355254
37 | -115.540607,2000,64.528579
38 | -29.031126,2000,66.686181
39 | -20.090611,2000,68.844233
40 | 25.867034,2000,71.026041
41 | 68.224921,2000,73.172868
42 | -41.639959,2000,75.30538
43 | -63.43205,314,75.639336
44 | -55.897433,2000,77.824094
45 | -79.076376,348,78.183614
46 | -75.219312,336,78.550389
47 | 166.120098,2000,80.70091
48 | -6.885657,2000,82.591767
49 | 67.274203,2000,84.728624
50 | -100.736684,2000,86.88388
51 | 9.361807,2000,89.031049
52 | -104.43978,2000,91.185034
53 | 175.253622,2000,93.32884
54 | 72.826926,1502,94.950952
55 | -107.557122,426,95.420961
56 | 34.350763,2000,97.562223
57 | 118.684665,2000,99.711555
58 | 18.436068,918,100.70732
59 | -72.023005,512,101.245563
60 | -94.057267,2000,103.430716
61 | -14.339757,2000,105.585078
62 | 276.646473,1682,107.405251
63 | -93.379379,2000,109.552183
64 | -32.890574,2000,111.734328
65 | -19.358966,2000,113.875139
66 | -97.591226,2000,116.016107
67 | -104.305948,2000,118.203043
68 | 268.461755,1756,120.0877
69 | -83.482728,2000,122.255662
70 | 59.102619,2000,124.415739
71 | 31.694314,2000,126.600275
72 | -24.754214,2000,128.495678
73 | -61.835281,2000,130.675213
74 | -35.282332,2000,132.834678
75 | -33.850913,2000,134.96645
76 | -111.449906,2000,137.133202
77 | -113.290239,263,137.41089
78 | 7.253,2000,139.565842
79 | 100.123289,1549,141.236544
80 | -35.315291,2000,143.384549
81 | -62.103672,2000,145.527463
82 | -55.579403,458,146.024764
83 | 176.048998,2000,148.188452
84 | -15.598691,2000,150.343705
85 | 100.101637,2000,152.481565
86 | 138.802199,2000,154.663462
87 | -108.792934,2000,156.829956
88 | -6.308218,2000,158.973216
89 | -91.763453,2000,161.134987
90 | 5.102684,2000,163.266236
91 |
--------------------------------------------------------------------------------
/logs/benchmark/td3-BipedalWalkerHardcore-v3/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615035716.3739204, "env_id": "BipedalWalkerHardcore-v3"}
2 | r,l,t
3 | -75.120605,2000,4.864824
4 | -132.737428,2000,6.84062
5 | -75.439565,2000,8.801577
6 | -76.590578,2000,10.690364
7 | -77.46551,2000,12.538607
8 | -118.239931,2000,14.351909
9 | -104.317353,2000,16.067962
10 | -69.326927,2000,17.797764
11 | -117.754794,2000,19.485572
12 | -72.893132,2000,21.180301
13 | -100.400577,2000,22.862713
14 | -115.384812,2000,24.573101
15 | -111.380875,2000,26.278326
16 | -113.021511,2000,27.959434
17 | -72.503946,2000,29.715913
18 | -118.90079,2000,31.564067
19 | -104.994169,2000,33.428279
20 | -112.627715,2000,35.262558
21 | -76.1824,2000,36.904961
22 | -76.263312,2000,38.549288
23 | -115.804972,2000,40.212135
24 | -112.246159,2000,41.872432
25 | -91.741549,2000,43.504618
26 | -82.195084,2000,45.10595
27 | -79.176994,2000,46.691015
28 | -99.303968,2000,48.336355
29 | -87.31807,2000,49.946664
30 | -95.103121,2000,51.592625
31 | -106.846239,2000,53.230053
32 | -113.287192,2000,54.865682
33 | -102.919775,2000,56.480147
34 | -90.229798,2000,58.131267
35 | -115.558105,2000,59.771812
36 | -108.654203,2000,61.383754
37 | -115.18619,2000,63.12032
38 | -76.182868,2000,64.965528
39 | -81.919436,2000,66.800929
40 | -94.051451,2000,68.635713
41 | -100.343992,2000,70.541423
42 | -107.29148,2000,72.423547
43 | -89.732649,2000,74.275149
44 | -101.208089,2000,76.139937
45 | -73.837888,2000,78.020883
46 | -108.810423,2000,79.869502
47 | -90.368393,2000,81.699364
48 | -118.587039,2000,83.560536
49 | -90.45015,2000,85.259661
50 | -111.38551,2000,86.979527
51 | -90.475702,2000,88.684398
52 | -115.950619,2000,90.399352
53 | -82.306352,2000,92.135506
54 | -73.167066,2000,93.870667
55 | -116.350767,2000,95.585823
56 | -104.899976,2000,97.168645
57 | -109.348082,2000,98.77277
58 | -89.763986,2000,100.360732
59 | -75.626801,2000,101.942616
60 | -114.163178,2000,103.571591
61 | -108.562208,2000,105.200625
62 | -81.079522,2000,106.821184
63 | -94.88129,2000,108.5505
64 | -95.688965,2000,110.409605
65 | -96.025015,2000,112.25252
66 | -115.966983,2000,114.110633
67 | -104.212579,2000,115.954515
68 | -108.756676,2000,117.801634
69 | -77.533709,2000,119.665398
70 | -108.630552,2000,121.521044
71 | -90.906734,2000,123.2634
72 | -132.124042,2000,124.980735
73 | -82.543561,2000,126.690589
74 | -110.444789,2000,128.422873
75 | -81.163665,2000,130.135998
76 | -116.381794,2000,131.861476
77 | -100.473055,2000,133.715868
78 |
--------------------------------------------------------------------------------
/logs/benchmark/tqc-BipedalWalkerHardcore-v3/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1614972944.323092, "env_id": "BipedalWalkerHardcore-v3"}
2 | r,l,t
3 | 158.985985,844,4.102602
4 | 300.32726,1050,5.244834
5 | 300.535558,1038,6.388168
6 | 296.875619,1085,7.575825
7 | 302.358559,1013,8.64315
8 | 25.923265,577,9.246357
9 | 228.283257,2000,11.337982
10 | 311.316454,975,12.353261
11 | 52.706594,568,12.944439
12 | -23.643158,330,13.288158
13 | 298.875393,1063,14.3944
14 | 307.395613,1004,15.440466
15 | 291.257043,1082,16.563598
16 | 302.321319,1016,17.622731
17 | 292.55956,1117,18.783862
18 | 300.52161,1030,19.861118
19 | 305.402259,1032,20.934168
20 | 305.712174,960,21.97389
21 | 15.765116,441,22.457574
22 | 286.893706,1176,23.768661
23 | 292.588987,1106,25.062059
24 | 303.161458,1006,26.238271
25 | 124.72623,2000,28.58553
26 | 299.188807,1067,29.837964
27 | 297.265926,1055,31.059305
28 | 299.246128,1058,32.225142
29 | 138.067304,2000,34.423318
30 | 158.101962,2000,36.611668
31 | 307.149411,982,37.682646
32 | 301.376617,1033,38.810155
33 | 52.525784,2000,41.012082
34 | -5.957255,2000,43.22972
35 | 303.222906,1042,44.368109
36 | -30.104262,296,44.690454
37 | 302.856284,1033,45.814348
38 | 299.417344,1052,46.969577
39 | 299.568715,1052,48.122539
40 | 302.167447,1038,49.255569
41 | 300.055837,1071,50.428728
42 | 304.075581,974,51.535719
43 | 300.606989,1064,52.697526
44 | 21.185638,2000,54.917194
45 | 293.851598,1069,56.089131
46 | 35.995669,2000,58.279191
47 | 218.588658,2000,60.470027
48 | 44.859679,2000,62.676093
49 | 298.081841,1028,63.770969
50 | 291.965119,1100,64.95215
51 | 288.8617,1160,66.225818
52 | 65.046742,883,67.195506
53 | 303.598842,1029,68.311282
54 | 298.681165,1122,69.500515
55 | 134.920099,816,70.363624
56 | 303.944964,963,71.381186
57 | 261.650718,2000,73.633541
58 | 298.924762,1050,74.818844
59 | 178.425483,2000,77.085008
60 | 301.952066,1029,78.24571
61 | 297.590377,1096,79.475244
62 | 308.256382,999,80.60499
63 | 303.66264,1048,81.782448
64 | 233.439754,2000,83.977091
65 | -43.841983,314,84.32413
66 | 291.240187,1248,85.687433
67 | 303.171049,967,86.73503
68 | 307.011786,993,87.805815
69 | 307.201248,1034,88.926074
70 | 92.128525,2000,91.102643
71 | 298.651352,1050,92.212756
72 | 296.614816,1089,93.362487
73 | 195.222046,2000,95.562203
74 | 305.238567,1023,96.697325
75 | 303.564229,1010,97.819911
76 | 291.643838,1115,99.053748
77 | 295.809282,1120,100.295977
78 | 303.4209,967,101.361445
79 | 307.599352,973,102.432886
80 | 296.079821,1059,103.588525
81 | 296.641486,1042,104.726761
82 | 304.632718,974,105.792635
83 | 300.684866,1019,106.913619
84 | 300.337727,1037,108.04645
85 | 294.32521,1111,109.264762
86 | 299.984938,1028,110.397997
87 | 288.785893,1167,111.689689
88 | 303.119877,1011,112.865363
89 | 298.647242,1058,114.10566
90 | 48.931804,2000,116.453665
91 | 187.262679,2000,118.781733
92 | 291.822427,1128,120.056754
93 | 303.411501,1061,121.223087
94 | 297.410528,1064,122.394384
95 | 45.52435,545,122.994992
96 | 298.206199,1059,124.1576
97 | 302.624397,1032,125.287297
98 | 237.922429,2000,127.487648
99 | 12.073966,514,128.050607
100 | 307.005817,971,129.119031
101 | 217.421274,2000,131.331841
102 | -54.266479,274,131.637203
103 | -45.189495,272,131.948989
104 | 301.654401,983,133.04844
105 | 305.29741,1005,134.153365
106 | 110.310658,2000,136.342585
107 | 304.438778,963,137.356552
108 | 299.639912,1068,138.473265
109 | -61.984761,317,138.806689
110 | 296.903281,1087,139.96187
111 | 311.947863,989,141.033978
112 | 305.173788,968,142.080554
113 | -24.936318,449,142.561262
114 | 293.318206,1155,143.815958
115 | 302.575292,1039,144.945063
116 | 301.656432,1026,146.112909
117 | 298.843551,1037,147.290129
118 | 295.248605,1068,148.497394
119 | 305.653108,1032,149.650847
120 | 169.665353,2000,151.820901
121 | 295.730421,1071,152.976426
122 | 41.452605,2000,155.148677
123 | 304.44089,1024,156.245353
124 | 305.12927,1002,157.330209
125 | 300.891507,1032,158.455373
126 | 121.746389,867,159.394768
127 | 300.548733,1040,160.525973
128 | 97.863702,758,161.330169
129 | 300.743761,1009,162.391677
130 | 301.44053,1067,163.515258
131 | 57.961807,713,164.271593
132 | 290.951927,1117,165.47083
133 | 299.077015,1095,166.676866
134 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.ruff]
2 | # Same as Black.
3 | line-length = 127
4 | # Assume Python 3.9
5 | target-version = "py39"
6 |
7 | [tool.ruff.lint]
8 | # See https://beta.ruff.rs/docs/rules/
9 | select = ["E", "F", "B", "UP", "C90", "RUF"]
10 | # Ignore explicit stacklevel`
11 | ignore = ["B028"]
12 |
13 | [tool.ruff.lint.per-file-ignores]
14 | "./rl_zoo3/import_envs.py"= ["F401"]
15 | # "./rl_zoo3/plots/plot_train.py"= ["E501"]
16 |
17 |
18 | [tool.ruff.lint.mccabe]
19 | # Unlike Flake8, default to a complexity level of 10.
20 | max-complexity = 15
21 |
22 | [tool.black]
23 | line-length = 127
24 |
25 | [tool.mypy]
26 | ignore_missing_imports = true
27 | follow_imports = "silent"
28 | show_error_codes = true
29 | exclude = """(?x)(
30 | tests/dummy_env/*$
31 | )"""
32 |
33 | [tool.pytest.ini_options]
34 | # Deterministic ordering for tests; useful for pytest-xdist.
35 | env = [
36 | "PYTHONHASHSEED=0"
37 | ]
38 |
39 | filterwarnings = [
40 | # Tensorboard warnings
41 | "ignore::DeprecationWarning:tensorboard",
42 | # Gym warnings
43 | "ignore::UserWarning:gym",
44 | ]
45 | markers = [
46 | "slow: marks tests as slow (deselect with '-m \"not slow\"')"
47 | ]
48 |
49 | [tool.coverage.run]
50 | disable_warnings = ["couldnt-parse"]
51 | branch = false
52 | omit = [
53 | "tests/*",
54 | "setup.py",
55 | "rl_zoo3/plots/*",
56 | "rl_zoo3/push_to_hub.py",
57 | "scripts/*",
58 | ]
59 |
60 | [tool.coverage.report]
61 | exclude_lines = [ "pragma: no cover", "raise NotImplementedError()", "if typing.TYPE_CHECKING:"]
62 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | gym==0.26.2
2 | stable-baselines3[extra,tests,docs]>=2.6.1a1,<3.0
3 | box2d-py==2.3.8
4 | pybullet_envs_gymnasium>=0.6.0
5 | # minigrid
6 | cloudpickle>=2.2.1
7 | # Optuna auto
8 | optunahub>=0.2.0
9 | # optuna plots:
10 | plotly
11 | # need to upgrade to gymnasium:
12 | # panda-gym~=3.0.1
13 | wandb
14 | moviepy>=1.0.0
15 |
--------------------------------------------------------------------------------
/rl_zoo3/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | # isort: off
4 |
5 | import rl_zoo3.gym_patches # noqa: F401
6 |
7 | # isort: on
8 |
9 | from rl_zoo3.utils import (
10 | ALGOS,
11 | create_test_env,
12 | get_latest_run_id,
13 | get_saved_hyperparams,
14 | get_trained_models,
15 | get_wrapper_class,
16 | linear_schedule,
17 | )
18 |
19 | # Read version from file
20 | version_file = os.path.join(os.path.dirname(__file__), "version.txt")
21 | with open(version_file) as file_handler:
22 | __version__ = file_handler.read().strip()
23 |
24 | __all__ = [
25 | "ALGOS",
26 | "create_test_env",
27 | "get_latest_run_id",
28 | "get_saved_hyperparams",
29 | "get_trained_models",
30 | "get_wrapper_class",
31 | "linear_schedule",
32 | ]
33 |
--------------------------------------------------------------------------------
/rl_zoo3/cli.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from rl_zoo3.enjoy import enjoy
4 | from rl_zoo3.plots import all_plots, plot_from_file, plot_train
5 | from rl_zoo3.train import train
6 |
7 |
8 | def main():
9 | script_name = sys.argv[1]
10 | # Remove script name
11 | del sys.argv[1]
12 | # Execute known script
13 | known_scripts = {
14 | "train": train,
15 | "enjoy": enjoy,
16 | "plot_train": plot_train,
17 | "plot_from_file": plot_from_file,
18 | "all_plots": all_plots,
19 | }
20 | if script_name not in known_scripts.keys():
21 | raise ValueError(f"The script {script_name} is unknown, please use one of {known_scripts.keys()}")
22 | known_scripts[script_name]()
23 |
24 |
25 | if __name__ == "__main__":
26 | main()
27 |
--------------------------------------------------------------------------------
/rl_zoo3/gym_patches.py:
--------------------------------------------------------------------------------
1 | """
2 | Patches for gym 0.26+ so RL Zoo3 keeps working as before
3 | (notably TimeLimit wrapper and Pybullet envs)
4 | """
5 |
6 | import numpy as np
7 |
8 | # Deprecation warning with gym 0.26 and numpy 1.24
9 | np.bool8 = np.bool_ # type: ignore[attr-defined]
10 |
11 | import gymnasium # noqa: E402
12 |
13 |
14 | class PatchedTimeLimit(gymnasium.wrappers.TimeLimit):
15 | """
16 | See https://github.com/openai/gym/issues/3102
17 | and https://github.com/Farama-Foundation/Gymnasium/pull/101:
18 | keep the behavior as before and provide additionnal info
19 | that the episode reached a timeout, but only
20 | when the episode is over because of that.
21 | """
22 |
23 | def step(self, action):
24 | observation, reward, terminated, truncated, info = self.env.step(action)
25 | self._elapsed_steps += 1
26 |
27 | if self._elapsed_steps >= self._max_episode_steps:
28 | done = truncated or terminated
29 | # TimeLimit.truncated key may have been already set by the environment
30 | # do not overwrite it
31 | # only set it when the episode is not over for other reasons
32 | episode_truncated = not done or info.get("TimeLimit.truncated", False)
33 | info["TimeLimit.truncated"] = episode_truncated
34 | # truncated may have been set by the env too
35 | truncated = truncated or episode_truncated
36 |
37 | return observation, reward, terminated, truncated, info
38 |
39 |
40 | # Patch Gymnasium TimeLimit
41 | gymnasium.wrappers.TimeLimit = PatchedTimeLimit # type: ignore[misc]
42 | try:
43 | gymnasium.wrappers.time_limit.TimeLimit = PatchedTimeLimit # type: ignore[misc]
44 | except AttributeError:
45 | gymnasium.wrappers.common.TimeLimit = PatchedTimeLimit # type: ignore
46 | gymnasium.envs.registration.TimeLimit = PatchedTimeLimit # type: ignore[misc,attr-defined]
47 |
--------------------------------------------------------------------------------
/rl_zoo3/import_envs.py:
--------------------------------------------------------------------------------
1 | from typing import Callable, Optional
2 |
3 | import gymnasium as gym
4 | from gymnasium.envs.registration import register, register_envs
5 |
6 | from rl_zoo3.wrappers import MaskVelocityWrapper
7 |
8 | try:
9 | import pybullet_envs_gymnasium
10 | except ImportError:
11 | pass
12 |
13 | try:
14 | import ale_py
15 |
16 | # no-op
17 | gym.register_envs(ale_py)
18 | except ImportError:
19 | pass
20 |
21 | try:
22 | import highway_env
23 | except ImportError:
24 | pass
25 | else:
26 | # hotfix for highway_env
27 | import numpy as np
28 |
29 | np.float = np.float32 # type: ignore[attr-defined]
30 |
31 | try:
32 | import custom_envs
33 | except ImportError:
34 | pass
35 |
36 | try:
37 | import gym_donkeycar
38 | except ImportError:
39 | pass
40 |
41 | try:
42 | import panda_gym
43 | except ImportError:
44 | pass
45 |
46 | try:
47 | import rocket_lander_gym
48 | except ImportError:
49 | pass
50 |
51 | try:
52 | import minigrid
53 | except ImportError:
54 | pass
55 |
56 |
57 | # Register no vel envs
58 | def create_no_vel_env(env_id: str) -> Callable[[Optional[str]], gym.Env]:
59 | def make_env(render_mode: Optional[str] = None) -> gym.Env:
60 | env = gym.make(env_id, render_mode=render_mode)
61 | env = MaskVelocityWrapper(env)
62 | return env
63 |
64 | return make_env
65 |
66 |
67 | for env_id in MaskVelocityWrapper.velocity_indices.keys():
68 | name, version = env_id.split("-v")
69 | register(
70 | id=f"{name}NoVel-v{version}",
71 | entry_point=create_no_vel_env(env_id), # type: ignore[arg-type]
72 | )
73 |
--------------------------------------------------------------------------------
/rl_zoo3/plots/__init__.py:
--------------------------------------------------------------------------------
1 | from rl_zoo3.plots.all_plots import all_plots
2 | from rl_zoo3.plots.plot_from_file import plot_from_file
3 | from rl_zoo3.plots.plot_train import plot_train
4 |
5 | __all__ = [
6 | "all_plots",
7 | "plot_from_file",
8 | "plot_train",
9 | ]
10 |
--------------------------------------------------------------------------------
/rl_zoo3/plots/plot_train.py:
--------------------------------------------------------------------------------
1 | """
2 | Plot training reward/success rate
3 | """
4 |
5 | import argparse
6 | import os
7 |
8 | import numpy as np
9 | import seaborn
10 | from matplotlib import pyplot as plt
11 | from stable_baselines3.common.monitor import LoadMonitorResultsError, load_results
12 | from stable_baselines3.common.results_plotter import X_EPISODES, X_TIMESTEPS, X_WALLTIME, ts2xy, window_func
13 |
14 | # Activate seaborn
15 | seaborn.set()
16 |
17 |
18 | def plot_train():
19 | parser = argparse.ArgumentParser("Gather results, plot training reward/success")
20 | parser.add_argument("-a", "--algo", help="Algorithm to include", type=str, required=True)
21 | parser.add_argument("-e", "--env", help="Environment(s) to include", nargs="+", type=str, required=True)
22 | parser.add_argument("-f", "--exp-folder", help="Folders to include", type=str, required=True)
23 | parser.add_argument("--figsize", help="Figure size, width, height in inches.", nargs=2, type=int, default=[6.4, 4.8])
24 | parser.add_argument("--fontsize", help="Font size", type=int, default=14)
25 | parser.add_argument("-max", "--max-timesteps", help="Max number of timesteps to display", type=int)
26 | parser.add_argument("-x", "--x-axis", help="X-axis", choices=["steps", "episodes", "time"], type=str, default="steps")
27 | parser.add_argument("-y", "--y-axis", help="Y-axis", choices=["success", "reward", "length"], type=str, default="reward")
28 | parser.add_argument("-w", "--episode-window", help="Rolling window size", type=int, default=100)
29 |
30 | args = parser.parse_args()
31 |
32 | algo = args.algo
33 | envs = args.env
34 | log_path = os.path.join(args.exp_folder, algo)
35 |
36 | x_axis = {
37 | "steps": X_TIMESTEPS,
38 | "episodes": X_EPISODES,
39 | "time": X_WALLTIME,
40 | }[args.x_axis]
41 | x_label = {
42 | "steps": "Timesteps",
43 | "episodes": "Episodes",
44 | "time": "Walltime (in hours)",
45 | }[args.x_axis]
46 |
47 | y_axis = {
48 | "success": "is_success",
49 | "reward": "r",
50 | "length": "l",
51 | }[args.y_axis]
52 | y_label = {
53 | "success": "Training Success Rate",
54 | "reward": "Training Episodic Reward",
55 | "length": "Training Episode Length",
56 | }[args.y_axis]
57 |
58 | dirs = []
59 |
60 | for env in envs:
61 | # Sort by last modification
62 | entries = sorted(os.scandir(log_path), key=lambda entry: entry.stat().st_mtime)
63 | dirs.extend(entry.path for entry in entries if env in entry.name and entry.is_dir())
64 |
65 | plt.figure(y_label, figsize=args.figsize)
66 | plt.title(y_label, fontsize=args.fontsize)
67 | plt.xlabel(f"{x_label}", fontsize=args.fontsize)
68 | plt.ylabel(y_label, fontsize=args.fontsize)
69 | for folder in dirs:
70 | try:
71 | data_frame = load_results(folder)
72 | except LoadMonitorResultsError:
73 | continue
74 | if args.max_timesteps is not None:
75 | data_frame = data_frame[data_frame.l.cumsum() <= args.max_timesteps]
76 | try:
77 | y = np.array(data_frame[y_axis])
78 | except KeyError:
79 | print(f"No data available for {folder}")
80 | continue
81 | x, _ = ts2xy(data_frame, x_axis)
82 |
83 | # Do not plot the smoothed curve at all if the timeseries is shorter than window size.
84 | if x.shape[0] >= args.episode_window:
85 | # Compute and plot rolling mean with window of size args.episode_window
86 | x, y_mean = window_func(x, y, args.episode_window, np.mean)
87 | plt.plot(x, y_mean, linewidth=2, label=folder.split("/")[-1])
88 |
89 | plt.legend()
90 | plt.tight_layout()
91 | plt.show()
92 |
93 |
94 | if __name__ == "__main__":
95 | plot_train()
96 |
--------------------------------------------------------------------------------
/rl_zoo3/plots/score_normalization.py:
--------------------------------------------------------------------------------
1 | """
2 | Min and Max score for each env for normalization when plotting.
3 | Min score corresponds to random agent.
4 | Max score corresponds to acceptable performance, for instance
5 | human level performance in the case of Atari games.
6 | """
7 |
8 | from typing import NamedTuple
9 |
10 | import numpy as np
11 |
12 |
13 | class ReferenceScore(NamedTuple):
14 | env_id: str
15 | min: float
16 | max: float
17 |
18 |
19 | reference_scores = [
20 | # PyBullet Envs
21 | ReferenceScore("HalfCheetahBulletEnv-v0", -1400, 3000),
22 | ReferenceScore("AntBulletEnv-v0", 300, 3500),
23 | ReferenceScore("HopperBulletEnv-v0", 20, 2500),
24 | ReferenceScore("Walker2DBulletEnv-v0", 200, 2500),
25 | ]
26 |
27 | # Alternative scaling
28 | # Min is a poorly optimized algorithm
29 | # reference_scores = [
30 | # ReferenceScore("HalfCheetahBulletEnv-v0", 1000, 3000),
31 | # ReferenceScore("AntBulletEnv-v0", 1000, 3500),
32 | # ReferenceScore("HopperBulletEnv-v0", 1000, 2500),
33 | # ReferenceScore("Walker2DBulletEnv-v0", 500, 2500),
34 | # ]
35 |
36 | min_max_score_per_env = {reference_score.env_id: reference_score for reference_score in reference_scores}
37 |
38 |
39 | def normalize_score(score: np.ndarray, env_id: str) -> np.ndarray:
40 | """
41 | Normalize score to be in [0, 1] where 1 is maximal performance.
42 |
43 | :param score: unnormalized score
44 | :param env_id: environment id
45 | :return: normalized score
46 | """
47 | if env_id not in min_max_score_per_env:
48 | raise KeyError(f"No reference score for {env_id}")
49 | reference_score = min_max_score_per_env[env_id]
50 | return (score - reference_score.min) / (reference_score.max - reference_score.min)
51 |
52 |
53 | # From rliable, for atari games:
54 | #
55 | # RANDOM_SCORES = {
56 | # 'Alien': 227.8,
57 | # 'Amidar': 5.8,
58 | # 'Assault': 222.4,
59 | # 'Asterix': 210.0,
60 | # 'BankHeist': 14.2,
61 | # 'BattleZone': 2360.0,
62 | # 'Boxing': 0.1,
63 | # 'Breakout': 1.7,
64 | # 'ChopperCommand': 811.0,
65 | # 'CrazyClimber': 10780.5,
66 | # 'DemonAttack': 152.1,
67 | # 'Freeway': 0.0,
68 | # 'Frostbite': 65.2,
69 | # 'Gopher': 257.6,
70 | # 'Hero': 1027.0,
71 | # 'Jamesbond': 29.0,
72 | # 'Kangaroo': 52.0,
73 | # 'Krull': 1598.0,
74 | # 'KungFuMaster': 258.5,
75 | # 'MsPacman': 307.3,
76 | # 'Pong': -20.7,
77 | # 'PrivateEye': 24.9,
78 | # 'Qbert': 163.9,
79 | # 'RoadRunner': 11.5,
80 | # 'Seaquest': 68.4,
81 | # 'UpNDown': 533.4
82 | # }
83 | #
84 | # HUMAN_SCORES = {
85 | # 'Alien': 7127.7,
86 | # 'Amidar': 1719.5,
87 | # 'Assault': 742.0,
88 | # 'Asterix': 8503.3,
89 | # 'BankHeist': 753.1,
90 | # 'BattleZone': 37187.5,
91 | # 'Boxing': 12.1,
92 | # 'Breakout': 30.5,
93 | # 'ChopperCommand': 7387.8,
94 | # 'CrazyClimber': 35829.4,
95 | # 'DemonAttack': 1971.0,
96 | # 'Freeway': 29.6,
97 | # 'Frostbite': 4334.7,
98 | # 'Gopher': 2412.5,
99 | # 'Hero': 30826.4,
100 | # 'Jamesbond': 302.8,
101 | # 'Kangaroo': 3035.0,
102 | # 'Krull': 2665.5,
103 | # 'KungFuMaster': 22736.3,
104 | # 'MsPacman': 6951.6,
105 | # 'Pong': 14.6,
106 | # 'PrivateEye': 69571.3,
107 | # 'Qbert': 13455.0,
108 | # 'RoadRunner': 7845.0,
109 | # 'Seaquest': 42054.7,
110 | # 'UpNDown': 11693.2
111 | # }
112 |
--------------------------------------------------------------------------------
/rl_zoo3/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DLR-RM/rl-baselines3-zoo/577616cb9f13341579953cb0f6111e007acc0a1d/rl_zoo3/py.typed
--------------------------------------------------------------------------------
/rl_zoo3/version.txt:
--------------------------------------------------------------------------------
1 | 2.6.1a1
2 |
--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DLR-RM/rl-baselines3-zoo/577616cb9f13341579953cb0f6111e007acc0a1d/scripts/__init__.py
--------------------------------------------------------------------------------
/scripts/all_plots.py:
--------------------------------------------------------------------------------
1 | from rl_zoo3.plots.all_plots import all_plots
2 |
3 | if __name__ == "__main__":
4 | all_plots()
5 |
--------------------------------------------------------------------------------
/scripts/build_docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | PARENT=stablebaselines/stable-baselines3
4 |
5 | TAG=stablebaselines/rl-baselines3-zoo
6 | VERSION=$(cat ./rl_zoo3/version.txt)
7 |
8 | if [[ ${USE_GPU} == "True" ]]; then
9 | PARENT="${PARENT}:${VERSION}"
10 | else
11 | PARENT="${PARENT}-cpu:${VERSION}"
12 | TAG="${TAG}-cpu"
13 | fi
14 |
15 | docker build --build-arg PARENT_IMAGE=${PARENT} -t ${TAG}:${VERSION} . -f docker/Dockerfile
16 | docker tag ${TAG}:${VERSION} ${TAG}:latest
17 |
18 | if [[ ${RELEASE} == "True" ]]; then
19 | docker push ${TAG}:${VERSION}
20 | docker push ${TAG}:latest
21 | fi
22 |
--------------------------------------------------------------------------------
/scripts/create_cluster_jobs.py:
--------------------------------------------------------------------------------
1 | """
2 | Send multiple jobs to the cluster.
3 | """
4 |
5 | import os
6 | import subprocess
7 | import time
8 |
9 | import numpy as np
10 |
11 | ALGOS = ["sac"]
12 | ENVS = ["HalfCheetahBulletEnv-v0"]
13 | N_SEEDS = 5
14 | N_EVAL_EPISODES = 10
15 | LOG_STD_INIT = [-6, -5, -4, -3, -2, -1, 0, 1]
16 |
17 | os.makedirs(os.path.join("logs", "slurm"), exist_ok=True)
18 |
19 | for algo in ALGOS:
20 | for env_id in ENVS:
21 | for log_std_init in LOG_STD_INIT:
22 | log_folder = f"logs_std_{np.exp(log_std_init):.4f}"
23 | for _ in range(N_SEEDS):
24 | args = [
25 | "--algo",
26 | algo,
27 | "--env",
28 | env_id,
29 | "--hyperparams",
30 | f'policy_kwargs:"dict(log_std_init={log_std_init}, net_arch=[400, 300])"',
31 | "--eval-episodes",
32 | N_EVAL_EPISODES,
33 | "-f",
34 | log_folder,
35 | "-uuid",
36 | ]
37 | arg_str_list: list[str] = list(map(str, args))
38 |
39 | command = " ".join(["python", "-u", "train.py", *arg_str_list])
40 |
41 | ok = subprocess.call(["sbatch", "cluster_torchy.sh", algo, env_id, "ablation", command])
42 | time.sleep(0.05)
43 |
--------------------------------------------------------------------------------
/scripts/create_mujoco_jobs.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import time
4 |
5 | import numpy as np
6 |
7 | ALGOS = ["sac", "td3", "tqc"]
8 | # "Humanoid-v3",
9 | ENVS = ["HalfCheetah-v3", "Ant-v3", "Hopper-v3", "Walker2d-v3", "Swimmer-v3"]
10 | N_SEEDS = 1
11 | EVAL_FREQ = 25000
12 | N_EVAL_EPISODES = 20
13 | N_EVAL_ENVS = 5
14 | np.random.seed(8)
15 | SEEDS = np.random.randint(2**20, size=(N_SEEDS,))
16 | # N_TIMESTEPS = int(1e6)
17 |
18 | os.makedirs(os.path.join("logs", "slurm"), exist_ok=True)
19 | log_folder = "logs/"
20 |
21 |
22 | for algo in ALGOS:
23 | for env_id in ENVS:
24 | for seed in SEEDS:
25 | args = [
26 | "--algo",
27 | algo,
28 | "--env",
29 | env_id,
30 | # "--hyperparams",
31 | # "use_sde:False",
32 | "--eval-episodes",
33 | N_EVAL_EPISODES,
34 | "--eval-freq",
35 | EVAL_FREQ,
36 | "--n-eval-envs",
37 | N_EVAL_ENVS,
38 | "-f",
39 | log_folder,
40 | "--seed",
41 | seed,
42 | "--log-interval",
43 | 10,
44 | "--num-threads",
45 | 2,
46 | # "-n",
47 | # N_TIMESTEPS,
48 | "-uuid",
49 | ]
50 | args = list(map(str, args))
51 |
52 | command = " ".join(["python", "-u", "train.py", *args])
53 |
54 | ok = subprocess.call(["sbatch", "cluster_torchy.sh", algo, env_id, "ablation", command])
55 | time.sleep(0.05)
56 |
--------------------------------------------------------------------------------
/scripts/migrate_to_hub.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 |
3 | from rl_zoo3.utils import get_hf_trained_models, get_trained_models
4 |
5 | folder = "rl-trained-agents"
6 | orga = "sb3"
7 | trained_models_local = get_trained_models(folder)
8 | trained_models_hub = get_hf_trained_models(orga)
9 | remaining_models = set(trained_models_local.keys()) - set(trained_models_hub.keys())
10 |
11 | for trained_model in list(remaining_models):
12 | algo, env_id = trained_models_local[trained_model]
13 | args = ["-orga", orga, "-f", folder, "--algo", algo, "--env", env_id]
14 |
15 | # Since SB3 >= 1.1.0, HER is no more an algorithm but a replay buffer class
16 | if algo == "her":
17 | continue
18 |
19 | return_code = subprocess.call(["python", "-m", "rl_zoo3.push_to_hub", *args])
20 |
--------------------------------------------------------------------------------
/scripts/parse_study.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import os
4 | import pickle
5 | from pprint import pprint
6 |
7 | import optuna
8 | from optuna.trial import FrozenTrial
9 |
10 |
11 | def value_key(trial: FrozenTrial) -> float:
12 | # Returns value of trial object for sorting
13 | if trial.value is None:
14 | return float("-inf")
15 | else:
16 | return trial.value
17 |
18 |
19 | print(
20 | "DEPRECATED: `parse_study.py` is deprecated, please use optuna-dashboard "
21 | "together with the `--trial-id` argument in the train script."
22 | )
23 |
24 |
25 | parser = argparse.ArgumentParser()
26 | parser.add_argument("-i", "--study-file", help="Path to a pickle file contained a saved study", type=str)
27 | parser.add_argument(
28 | "-f",
29 | "--folder",
30 | help="Folder where the best hyperparameter json files will be written",
31 | type=str,
32 | default="logs/hyperparameter_jsons",
33 | )
34 | parser.add_argument("--study-name", help="Study name used during hyperparameter optimization", type=str)
35 | parser.add_argument("--storage", help="Database storage path used during hyperparameter optimization", type=str)
36 | parser.add_argument("--print-n-best-trials", help="Show final return values for n best trials", type=int, default=0)
37 | parser.add_argument(
38 | "--save-n-best-hyperparameters",
39 | help="Save the hyperparameters for the n best trials that resulted in the best returns",
40 | type=int,
41 | default=0,
42 | )
43 | args = parser.parse_args()
44 |
45 | if args.study_name is None:
46 | assert args.study_file is not None, "No --study-file, nor --study-name were provided."
47 | with open(args.study_file, "rb") as f:
48 | study = pickle.load(f)
49 |
50 | else:
51 | assert args.storage is not None, "No storage was specified."
52 |
53 | study = optuna.create_study(
54 | study_name=args.study_name,
55 | storage=args.storage,
56 | load_if_exists=True,
57 | direction="maximize",
58 | )
59 |
60 | trials = study.trials
61 | trials.sort(key=value_key, reverse=True)
62 |
63 | for idx, trial in enumerate(trials):
64 | if idx < args.print_n_best_trials:
65 | print(f"# Top {idx + 1} - value: {trial.value:.2f}")
66 | print()
67 | pprint(trial.params)
68 | print()
69 |
70 | if args.save_n_best_hyperparameters > 0:
71 | os.makedirs(f"{args.folder}", exist_ok=True)
72 | for i in range(min(args.save_n_best_hyperparameters, len(trials))):
73 | params = trials[i].params
74 | with open(f"{args.folder}/hyperparameters_{i + 1}.json", "w+") as json_file:
75 | json_file.write(json.dumps(trials[i].params, indent=4))
76 | print(f"Saved best hyperparameters to {args.folder}")
77 |
--------------------------------------------------------------------------------
/scripts/plot_from_file.py:
--------------------------------------------------------------------------------
1 | from rl_zoo3.plots.plot_from_file import plot_from_file
2 |
3 | if __name__ == "__main__":
4 | plot_from_file()
5 |
--------------------------------------------------------------------------------
/scripts/plot_train.py:
--------------------------------------------------------------------------------
1 | from rl_zoo3.plots.plot_train import plot_train
2 |
3 | if __name__ == "__main__":
4 | plot_train()
5 |
--------------------------------------------------------------------------------
/scripts/run_docker_cpu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Launch an experiment using the docker cpu image
3 |
4 | cmd_line="$@"
5 |
6 | echo "Executing in the docker (cpu image):"
7 | echo $cmd_line
8 |
9 | # Note: --user=root is needed, as the current user id/group id will be used
10 | # to mount the log directory (and $MAMBAUSER is not root)
11 | docker run -it --user=root --rm --network host --ipc=host \
12 | --mount src=$(pwd),target=/home/mambauser/code/rl_zoo3,type=bind stablebaselines/rl-baselines3-zoo-cpu:latest\
13 | bash -c "cd /home/mambauser/code/rl_zoo3/ && $cmd_line"
14 |
--------------------------------------------------------------------------------
/scripts/run_docker_gpu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Launch an experiment using the docker gpu image
3 |
4 | cmd_line="$@"
5 |
6 | echo "Executing in the docker (gpu image):"
7 | echo $cmd_line
8 |
9 | # Note: --user=root is needed, as the current user id/group id will be used
10 | # to mount the log directory (and $MAMBAUSER is not root)
11 | docker run -it --user=root --gpus all --rm --network host --ipc=host \
12 | --mount src=$(pwd),target=/home/mambauser/code/rl_zoo3,type=bind stablebaselines/rl-baselines3-zoo:latest\
13 | bash -c "cd /home/mambauser/code/rl_zoo3/ && $cmd_line"
14 |
--------------------------------------------------------------------------------
/scripts/run_jobs.py:
--------------------------------------------------------------------------------
1 | """
2 | Run multiple experiments on a single machine.
3 | """
4 |
5 | import subprocess
6 |
7 | import numpy as np
8 |
9 | ALGOS = ["sac"]
10 | ENVS = ["MountainCarContinuous-v0"]
11 | N_SEEDS = 10
12 | EVAL_FREQ = 5000
13 | N_EVAL_EPISODES = 10
14 | LOG_STD_INIT = [-6, -5, -4, -3, -2, -1, 0, 1]
15 |
16 | for algo in ALGOS:
17 | for env_id in ENVS:
18 | for log_std_init in LOG_STD_INIT:
19 | log_folder = f"logs_std_{np.exp(log_std_init):.4f}"
20 | for _ in range(N_SEEDS):
21 | args = [
22 | "--algo",
23 | algo,
24 | "--env",
25 | env_id,
26 | "--hyperparams",
27 | f"policy_kwargs:dict(log_std_init={log_std_init}, net_arch=[64, 64])",
28 | "--eval-episodes",
29 | N_EVAL_EPISODES,
30 | "--eval-freq",
31 | EVAL_FREQ,
32 | "-f",
33 | log_folder,
34 | ]
35 | arg_str_list: list[str] = list(map(str, args))
36 |
37 | ok = subprocess.call(["python", "train.py", *arg_str_list])
38 |
--------------------------------------------------------------------------------
/scripts/run_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python -m pytest --cov-config .coveragerc --cov-report html --cov-report term --cov=. -v -m "not slow" --color=yes
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 |
4 | from setuptools import setup
5 |
6 | with open(os.path.join("rl_zoo3", "version.txt")) as file_handler:
7 | __version__ = file_handler.read().strip()
8 |
9 | # Copy hyperparams files for packaging
10 | shutil.copytree("hyperparams", os.path.join("rl_zoo3", "hyperparams"))
11 |
12 | long_description = """
13 | # RL Baselines3 Zoo: A Training Framework for Stable Baselines3 Reinforcement Learning Agents
14 |
15 | See https://github.com/DLR-RM/rl-baselines3-zoo
16 | """
17 | install_requires = [
18 | "sb3_contrib>=2.6.1a1,<3.0",
19 | "gymnasium>=0.29.1,<1.2.0",
20 | "huggingface_sb3>=3.0,<4.0",
21 | "tqdm",
22 | "rich",
23 | "optuna>=3.0",
24 | "pyyaml>=5.1",
25 | "pytablewriter~=1.2",
26 | "shimmy~=2.0",
27 | ]
28 | plots_requires = ["seaborn", "rliable~=1.2.0", "scipy~=1.10"]
29 | test_requires = [
30 | # for MuJoCo envs v4:
31 | "mujoco>=2.3,<4",
32 | # install parking-env to test HER
33 | "highway-env>=1.10.1,<1.11.0",
34 | ]
35 |
36 | setup(
37 | name="rl_zoo3",
38 | packages=["rl_zoo3", "rl_zoo3.plots"],
39 | package_data={
40 | "rl_zoo3": [
41 | "py.typed",
42 | "version.txt",
43 | "hyperparams/*.yml",
44 | ]
45 | },
46 | entry_points={"console_scripts": ["rl_zoo3=rl_zoo3.cli:main"]},
47 | install_requires=install_requires,
48 | extras_require={"plots": plots_requires, "tests": test_requires},
49 | description="A Training Framework for Stable Baselines3 Reinforcement Learning Agents",
50 | author="Antonin Raffin",
51 | url="https://github.com/DLR-RM/rl-baselines3-zoo",
52 | author_email="antonin.raffin@dlr.de",
53 | keywords="reinforcement-learning-algorithms reinforcement-learning machine-learning "
54 | "gymnasium openai stable baselines sb3 toolbox python data-science",
55 | license="MIT",
56 | long_description=long_description,
57 | long_description_content_type="text/markdown",
58 | version=__version__,
59 | python_requires=">=3.9",
60 | # PyPI package information.
61 | project_urls={
62 | "Code": "https://github.com/DLR-RM/rl-baselines3-zoo",
63 | "Documentation": "https://rl-baselines3-zoo.readthedocs.io/en/master/",
64 | "Changelog": "https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/CHANGELOG.md",
65 | "Stable-Baselines3": "https://github.com/DLR-RM/stable-baselines3",
66 | "RL-Zoo": "https://github.com/DLR-RM/rl-baselines3-zoo",
67 | "SBX": "https://github.com/araffin/sbx",
68 | },
69 | classifiers=[
70 | "Programming Language :: Python :: 3",
71 | "Programming Language :: Python :: 3.9",
72 | "Programming Language :: Python :: 3.10",
73 | "Programming Language :: Python :: 3.11",
74 | "Programming Language :: Python :: 3.12",
75 | ],
76 | )
77 |
78 | # Remove copied files after packaging
79 | shutil.rmtree(os.path.join("rl_zoo3", "hyperparams"))
80 |
--------------------------------------------------------------------------------
/tests/dummy_env/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(name="test_env")
4 |
--------------------------------------------------------------------------------
/tests/dummy_env/test_env/__init__.py:
--------------------------------------------------------------------------------
1 | from gymnasium.envs.registration import register
2 |
3 | register(
4 | id="TestEnv-v0",
5 | entry_point="test_env.test_env:TestEnv",
6 | )
7 |
--------------------------------------------------------------------------------
/tests/dummy_env/test_env/config.py:
--------------------------------------------------------------------------------
1 | hyperparams = {
2 | "TestEnv-v0": {
3 | "policy": "MlpPolicy",
4 | "n_timesteps": 20000,
5 | }
6 | }
7 |
--------------------------------------------------------------------------------
/tests/dummy_env/test_env/test_env.py:
--------------------------------------------------------------------------------
1 | from typing import ClassVar
2 |
3 | import gymnasium as gym
4 | import numpy as np
5 | from gymnasium import spaces
6 |
7 |
8 | class TestEnv(gym.Env):
9 | metadata: ClassVar[dict] = {"render_modes": ["human"], "render_fps": 4}
10 | __test__ = False
11 |
12 | def __init__(self, render_mode=None):
13 | super().__init__()
14 |
15 | self.action_space = spaces.Discrete(2)
16 | self.observation_space = spaces.Box(low=0, high=1, shape=(2,), dtype=np.float32)
17 |
18 | def step(self, action):
19 | return self.observation_space.sample(), 0.0, self.np_random.random() > 0.5, False, {}
20 |
21 | def reset(self, *, seed=None, options=None):
22 | super().reset(seed=seed)
23 | if seed is not None:
24 | self.observation_space.seed(seed)
25 | return self.observation_space.sample(), {}
26 |
27 | def render(self, mode="human"):
28 | pass
29 |
30 |
31 | if __name__ == "__main__":
32 | from gymnasium.utils.env_checker import check_env as gym_check
33 | from stable_baselines3.common.env_checker import check_env
34 |
35 | check_env(TestEnv())
36 | gym_check(TestEnv())
37 |
--------------------------------------------------------------------------------
/tests/test_callbacks.py:
--------------------------------------------------------------------------------
1 | import shlex
2 | import subprocess
3 |
4 | import pytest
5 | import stable_baselines3 as sb3
6 |
7 | from rl_zoo3.utils import get_callback_list
8 |
9 |
10 | def _assert_eq(left, right):
11 | assert left == right, f"{left} != {right}"
12 |
13 |
14 | def test_raw_stat_callback(tmp_path):
15 | cmd = (
16 | f"python train.py -n 200 --algo ppo --env CartPole-v1 --log-folder {tmp_path} "
17 | f"--tensorboard-log {tmp_path} -params callback:\"'rl_zoo3.callbacks.RawStatisticsCallback'\""
18 | )
19 | return_code = subprocess.call(shlex.split(cmd))
20 | _assert_eq(return_code, 0)
21 |
22 |
23 | @pytest.mark.parametrize(
24 | "callback",
25 | [
26 | None,
27 | "rl_zoo3.callbacks.RawStatisticsCallback",
28 | [
29 | {"stable_baselines3.common.callbacks.StopTrainingOnMaxEpisodes": dict(max_episodes=3)},
30 | "rl_zoo3.callbacks.RawStatisticsCallback",
31 | ],
32 | [sb3.common.callbacks.StopTrainingOnMaxEpisodes(3)],
33 | ],
34 | )
35 | def test_get_callback(callback):
36 | hyperparams = {"callback": callback}
37 | callback_list = get_callback_list(hyperparams)
38 | if callback is None:
39 | assert len(callback_list) == 0
40 | elif isinstance(callback, str):
41 | assert len(callback_list) == 1
42 | else:
43 | assert len(callback_list) == len(callback)
44 |
--------------------------------------------------------------------------------
/tests/test_wrappers.py:
--------------------------------------------------------------------------------
1 | import gymnasium as gym
2 | import numpy as np
3 | import pytest
4 | import stable_baselines3 as sb3
5 | from sb3_contrib.common.wrappers import TimeFeatureWrapper
6 | from stable_baselines3 import A2C
7 | from stable_baselines3.common.env_checker import check_env
8 | from stable_baselines3.common.env_util import DummyVecEnv
9 |
10 | import rl_zoo3.import_envs
11 | import rl_zoo3.wrappers
12 | from rl_zoo3.utils import SimpleLinearSchedule, get_wrapper_class, linear_schedule
13 | from rl_zoo3.wrappers import ActionNoiseWrapper, DelayedRewardWrapper, HistoryWrapper
14 |
15 |
16 | def test_wrappers():
17 | env = gym.make("Ant-v4")
18 | env = DelayedRewardWrapper(env)
19 | env = ActionNoiseWrapper(env)
20 | env = HistoryWrapper(env)
21 | env = TimeFeatureWrapper(env)
22 | check_env(env)
23 |
24 |
25 | @pytest.mark.parametrize(
26 | "env_wrapper",
27 | [
28 | None,
29 | {"rl_zoo3.wrappers.HistoryWrapper": dict(horizon=2)},
30 | [{"rl_zoo3.wrappers.HistoryWrapper": dict(horizon=3)}, "rl_zoo3.wrappers.TimeFeatureWrapper"],
31 | [{rl_zoo3.wrappers.HistoryWrapper: dict(horizon=3)}, "rl_zoo3.wrappers.TimeFeatureWrapper"],
32 | ],
33 | )
34 | def test_get_wrapper(env_wrapper):
35 | env = gym.make("Ant-v4")
36 | hyperparams = {"env_wrapper": env_wrapper}
37 | wrapper_class = get_wrapper_class(hyperparams)
38 | if env_wrapper is not None:
39 | env = wrapper_class(env)
40 | check_env(env)
41 |
42 |
43 | @pytest.mark.parametrize(
44 | "vec_env_wrapper",
45 | [
46 | None,
47 | {"stable_baselines3.common.vec_env.VecFrameStack": dict(n_stack=2)},
48 | {sb3.common.vec_env.VecFrameStack: dict(n_stack=2)},
49 | [{"stable_baselines3.common.vec_env.VecFrameStack": dict(n_stack=3)}, "stable_baselines3.common.vec_env.VecMonitor"],
50 | ],
51 | )
52 | def test_get_vec_env_wrapper(vec_env_wrapper):
53 | env = gym.make("Ant-v4")
54 | env = DummyVecEnv([lambda: env])
55 | hyperparams = {"vec_env_wrapper": vec_env_wrapper}
56 | wrapper_class = get_wrapper_class(hyperparams, "vec_env_wrapper")
57 | if wrapper_class is not None:
58 | env = wrapper_class(env)
59 | A2C("MlpPolicy", env).learn(16)
60 |
61 |
62 | def test_linear_schedule():
63 | schedule = linear_schedule(100)
64 | assert isinstance(schedule, SimpleLinearSchedule)
65 | assert np.allclose(schedule(1.0), 100.0)
66 | assert np.allclose(schedule(0.5), 50.0)
67 | assert np.allclose(schedule(0.0), 0.0)
68 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | from rl_zoo3.train import train
2 |
3 | if __name__ == "__main__":
4 | train()
5 |
--------------------------------------------------------------------------------