├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── documentation.yml │ ├── feature_request.yml │ └── question.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── ci.yml │ └── trained_agents.yml ├── .gitignore ├── .gitmodules ├── .readthedocs.yml ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── benchmark.md ├── docker └── Dockerfile ├── docs ├── Makefile ├── README.md ├── _static │ ├── css │ │ └── baselines_theme.css │ └── img │ │ ├── colab-badge.svg │ │ └── colab.svg ├── conda_env.yml ├── conf.py ├── guide │ ├── config.rst │ ├── custom_env.rst │ ├── enjoy.rst │ ├── install.rst │ ├── integrations.rst │ ├── plot.rst │ ├── quickstart.rst │ ├── sbx.rst │ ├── train.rst │ └── tuning.rst ├── index.rst ├── make.bat ├── misc │ └── changelog.rst ├── modules │ ├── callbacks.rst │ ├── exp_manager.rst │ ├── utils.rst │ └── wrappers.rst └── spelling_wordlist.txt ├── enjoy.py ├── hyperparams ├── a2c.yml ├── ars.yml ├── crossq.yml ├── ddpg.yml ├── dqn.yml ├── her.yml ├── ppo.yml ├── ppo_lstm.yml ├── python │ └── ppo_config_example.py ├── qrdqn.yml ├── sac.yml ├── td3.yml ├── tqc.yml └── trpo.yml ├── images └── car.jpg ├── logs └── benchmark │ ├── a2c-Acrobot-v1 │ └── 0.monitor.csv │ ├── a2c-Ant-v3 │ └── 0.monitor.csv │ ├── a2c-AntBulletEnv-v0 │ └── 0.monitor.csv │ ├── a2c-AsteroidsNoFrameskip-v4 │ └── 0.monitor.csv │ ├── a2c-BeamRiderNoFrameskip-v4 │ └── 0.monitor.csv │ ├── a2c-BipedalWalker-v3 │ └── 0.monitor.csv │ ├── a2c-BipedalWalkerHardcore-v3 │ └── 0.monitor.csv │ ├── a2c-BreakoutNoFrameskip-v4 │ └── 0.monitor.csv │ ├── a2c-CartPole-v1 │ └── 0.monitor.csv │ ├── a2c-EnduroNoFrameskip-v4 │ └── 0.monitor.csv │ ├── a2c-HalfCheetah-v3 │ └── 0.monitor.csv │ ├── a2c-HalfCheetahBulletEnv-v0 │ └── 0.monitor.csv │ ├── a2c-Hopper-v3 │ └── 0.monitor.csv │ ├── a2c-HopperBulletEnv-v0 │ └── 0.monitor.csv │ ├── a2c-Humanoid-v3 │ └── 0.monitor.csv │ ├── a2c-LunarLander-v2 │ └── 0.monitor.csv │ ├── a2c-LunarLanderContinuous-v2 │ └── 0.monitor.csv │ ├── a2c-MountainCar-v0 │ └── 0.monitor.csv │ ├── a2c-MountainCarContinuous-v0 │ └── 0.monitor.csv │ ├── a2c-MsPacmanNoFrameskip-v4 │ └── 0.monitor.csv │ ├── a2c-Pendulum-v1 │ └── 0.monitor.csv │ ├── a2c-PongNoFrameskip-v4 │ └── 0.monitor.csv │ ├── a2c-QbertNoFrameskip-v4 │ └── 0.monitor.csv │ ├── a2c-ReacherBulletEnv-v0 │ └── 0.monitor.csv │ ├── a2c-RoadRunnerNoFrameskip-v4 │ └── 0.monitor.csv │ ├── a2c-SeaquestNoFrameskip-v4 │ └── 0.monitor.csv │ ├── a2c-SpaceInvadersNoFrameskip-v4 │ └── 0.monitor.csv │ ├── a2c-Swimmer-v3 │ └── 0.monitor.csv │ ├── a2c-Walker2DBulletEnv-v0 │ └── 0.monitor.csv │ ├── a2c-Walker2d-v3 │ └── 0.monitor.csv │ ├── ars-Acrobot-v1 │ └── 0.monitor.csv │ ├── ars-Ant-v3 │ └── 0.monitor.csv │ ├── ars-CartPole-v1 │ └── 0.monitor.csv │ ├── ars-HalfCheetah-v3 │ └── 0.monitor.csv │ ├── ars-Hopper-v3 │ └── 0.monitor.csv │ ├── ars-LunarLanderContinuous-v2 │ └── 0.monitor.csv │ ├── ars-MountainCar-v0 │ └── 0.monitor.csv │ ├── ars-MountainCarContinuous-v0 │ └── 0.monitor.csv │ ├── ars-Pendulum-v1 │ └── 0.monitor.csv │ ├── ars-Swimmer-v3 │ └── 0.monitor.csv │ ├── ars-Walker2d-v3 │ └── 0.monitor.csv │ ├── benchmark.md │ ├── ddpg-AntBulletEnv-v0 │ └── 0.monitor.csv │ ├── ddpg-BipedalWalker-v3 │ └── 0.monitor.csv │ ├── ddpg-HalfCheetahBulletEnv-v0 │ └── 0.monitor.csv │ ├── ddpg-HopperBulletEnv-v0 │ └── 0.monitor.csv │ ├── ddpg-LunarLanderContinuous-v2 │ └── 0.monitor.csv │ ├── ddpg-MountainCarContinuous-v0 │ └── 0.monitor.csv │ ├── ddpg-Pendulum-v1 │ └── 0.monitor.csv │ ├── ddpg-ReacherBulletEnv-v0 │ └── 0.monitor.csv │ ├── ddpg-Walker2DBulletEnv-v0 │ └── 0.monitor.csv │ ├── dqn-Acrobot-v1 │ └── 0.monitor.csv │ ├── dqn-AsteroidsNoFrameskip-v4 │ └── 0.monitor.csv │ ├── dqn-BeamRiderNoFrameskip-v4 │ └── 0.monitor.csv │ ├── dqn-BreakoutNoFrameskip-v4 │ └── 0.monitor.csv │ ├── dqn-CartPole-v1 │ └── 0.monitor.csv │ ├── dqn-EnduroNoFrameskip-v4 │ └── 0.monitor.csv │ ├── dqn-LunarLander-v2 │ └── 0.monitor.csv │ ├── dqn-MountainCar-v0 │ └── 0.monitor.csv │ ├── dqn-MsPacmanNoFrameskip-v4 │ └── 0.monitor.csv │ ├── dqn-PongNoFrameskip-v4 │ └── 0.monitor.csv │ ├── dqn-QbertNoFrameskip-v4 │ └── 0.monitor.csv │ ├── dqn-RoadRunnerNoFrameskip-v4 │ └── 0.monitor.csv │ ├── dqn-SeaquestNoFrameskip-v4 │ └── 0.monitor.csv │ ├── dqn-SpaceInvadersNoFrameskip-v4 │ └── 0.monitor.csv │ ├── her-FetchPickAndPlace-v1 │ └── 0.monitor.csv │ ├── her-FetchPush-v1 │ └── 0.monitor.csv │ ├── her-FetchReach-v1 │ └── 0.monitor.csv │ ├── her-FetchSlide-v1 │ └── 0.monitor.csv │ ├── her-parking-v0 │ └── 0.monitor.csv │ ├── ppo-Acrobot-v1 │ └── 0.monitor.csv │ ├── ppo-Ant-v3 │ └── 0.monitor.csv │ ├── ppo-AntBulletEnv-v0 │ └── 0.monitor.csv │ ├── ppo-AsteroidsNoFrameskip-v4 │ └── 0.monitor.csv │ ├── ppo-BeamRiderNoFrameskip-v4 │ └── 0.monitor.csv │ ├── ppo-BipedalWalker-v3 │ └── 0.monitor.csv │ ├── ppo-BipedalWalkerHardcore-v3 │ └── 0.monitor.csv │ ├── ppo-BreakoutNoFrameskip-v4 │ └── 0.monitor.csv │ ├── ppo-CarRacing-v0 │ └── 0.monitor.csv │ ├── ppo-CartPole-v1 │ └── 0.monitor.csv │ ├── ppo-EnduroNoFrameskip-v4 │ └── 0.monitor.csv │ ├── ppo-HalfCheetah-v3 │ └── 0.monitor.csv │ ├── ppo-HalfCheetahBulletEnv-v0 │ └── 0.monitor.csv │ ├── ppo-Hopper-v3 │ └── 0.monitor.csv │ ├── ppo-HopperBulletEnv-v0 │ └── 0.monitor.csv │ ├── ppo-LunarLander-v2 │ └── 0.monitor.csv │ ├── ppo-LunarLanderContinuous-v2 │ └── 0.monitor.csv │ ├── ppo-MiniGrid-DoorKey-5x5-v0 │ └── 0.monitor.csv │ ├── ppo-MiniGrid-Empty-Random-5x5-v0 │ └── 0.monitor.csv │ ├── ppo-MiniGrid-FourRooms-v0 │ └── 0.monitor.csv │ ├── ppo-MiniGrid-GoToDoor-5x5-v0 │ └── 0.monitor.csv │ ├── ppo-MiniGrid-KeyCorridorS3R1-v0 │ └── 0.monitor.csv │ ├── ppo-MiniGrid-LockedRoom-v0 │ └── 0.monitor.csv │ ├── ppo-MiniGrid-MultiRoom-N4-S5-v0 │ └── 0.monitor.csv │ ├── ppo-MiniGrid-ObstructedMaze-2Dlh-v0 │ └── 0.monitor.csv │ ├── ppo-MiniGrid-PutNear-6x6-N2-v0 │ └── 0.monitor.csv │ ├── ppo-MiniGrid-RedBlueDoors-6x6-v0 │ └── 0.monitor.csv │ ├── ppo-MiniGrid-Unlock-v0 │ └── 0.monitor.csv │ ├── ppo-MountainCar-v0 │ └── 0.monitor.csv │ ├── ppo-MountainCarContinuous-v0 │ └── 0.monitor.csv │ ├── ppo-MsPacmanNoFrameskip-v4 │ └── 0.monitor.csv │ ├── ppo-Pendulum-v1 │ └── 0.monitor.csv │ ├── ppo-PongNoFrameskip-v4 │ └── 0.monitor.csv │ ├── ppo-QbertNoFrameskip-v4 │ └── 0.monitor.csv │ ├── ppo-ReacherBulletEnv-v0 │ └── 0.monitor.csv │ ├── ppo-RoadRunnerNoFrameskip-v4 │ └── 0.monitor.csv │ ├── ppo-SeaquestNoFrameskip-v4 │ └── 0.monitor.csv │ ├── ppo-SpaceInvadersNoFrameskip-v4 │ └── 0.monitor.csv │ ├── ppo-Swimmer-v3 │ └── 0.monitor.csv │ ├── ppo-Walker2DBulletEnv-v0 │ └── 0.monitor.csv │ ├── ppo-Walker2d-v3 │ └── 0.monitor.csv │ ├── ppo_lstm-CarRacing-v0 │ └── 0.monitor.csv │ ├── ppo_lstm-CartPoleNoVel-v1 │ └── 0.monitor.csv │ ├── ppo_lstm-MountainCarContinuousNoVel-v0 │ └── 0.monitor.csv │ ├── ppo_lstm-PendulumNoVel-v1 │ └── 0.monitor.csv │ ├── qrdqn-Acrobot-v1 │ └── 0.monitor.csv │ ├── qrdqn-AsteroidsNoFrameskip-v4 │ └── 0.monitor.csv │ ├── qrdqn-BeamRiderNoFrameskip-v4 │ └── 0.monitor.csv │ ├── qrdqn-BreakoutNoFrameskip-v4 │ └── 0.monitor.csv │ ├── qrdqn-CartPole-v1 │ └── 0.monitor.csv │ ├── qrdqn-EnduroNoFrameskip-v4 │ └── 0.monitor.csv │ ├── qrdqn-LunarLander-v2 │ └── 0.monitor.csv │ ├── qrdqn-MountainCar-v0 │ └── 0.monitor.csv │ ├── qrdqn-MsPacmanNoFrameskip-v4 │ └── 0.monitor.csv │ ├── qrdqn-PongNoFrameskip-v4 │ └── 0.monitor.csv │ ├── qrdqn-QbertNoFrameskip-v4 │ └── 0.monitor.csv │ ├── qrdqn-RoadRunnerNoFrameskip-v4 │ └── 0.monitor.csv │ ├── qrdqn-SeaquestNoFrameskip-v4 │ └── 0.monitor.csv │ ├── qrdqn-SpaceInvadersNoFrameskip-v4 │ └── 0.monitor.csv │ ├── sac-Ant-v3 │ └── 0.monitor.csv │ ├── sac-AntBulletEnv-v0 │ └── 0.monitor.csv │ ├── sac-BipedalWalker-v3 │ └── 0.monitor.csv │ ├── sac-BipedalWalkerHardcore-v3 │ └── 0.monitor.csv │ ├── sac-HalfCheetah-v3 │ └── 0.monitor.csv │ ├── sac-HalfCheetahBulletEnv-v0 │ └── 0.monitor.csv │ ├── sac-Hopper-v3 │ └── 0.monitor.csv │ ├── sac-HopperBulletEnv-v0 │ └── 0.monitor.csv │ ├── sac-Humanoid-v3 │ └── 0.monitor.csv │ ├── sac-LunarLanderContinuous-v2 │ └── 0.monitor.csv │ ├── sac-MountainCarContinuous-v0 │ └── 0.monitor.csv │ ├── sac-Pendulum-v1 │ └── 0.monitor.csv │ ├── sac-ReacherBulletEnv-v0 │ └── 0.monitor.csv │ ├── sac-Swimmer-v3 │ └── 0.monitor.csv │ ├── sac-Walker2DBulletEnv-v0 │ └── 0.monitor.csv │ ├── sac-Walker2d-v3 │ └── 0.monitor.csv │ ├── td3-Ant-v3 │ └── 0.monitor.csv │ ├── td3-AntBulletEnv-v0 │ └── 0.monitor.csv │ ├── td3-BipedalWalker-v3 │ └── 0.monitor.csv │ ├── td3-BipedalWalkerHardcore-v3 │ └── 0.monitor.csv │ ├── td3-HalfCheetah-v3 │ └── 0.monitor.csv │ ├── td3-HalfCheetahBulletEnv-v0 │ └── 0.monitor.csv │ ├── td3-Hopper-v3 │ └── 0.monitor.csv │ ├── td3-HopperBulletEnv-v0 │ └── 0.monitor.csv │ ├── td3-Humanoid-v3 │ └── 0.monitor.csv │ ├── td3-LunarLanderContinuous-v2 │ └── 0.monitor.csv │ ├── td3-MountainCarContinuous-v0 │ └── 0.monitor.csv │ ├── td3-Pendulum-v1 │ └── 0.monitor.csv │ ├── td3-ReacherBulletEnv-v0 │ └── 0.monitor.csv │ ├── td3-Swimmer-v3 │ └── 0.monitor.csv │ ├── td3-Walker2DBulletEnv-v0 │ └── 0.monitor.csv │ ├── td3-Walker2d-v3 │ └── 0.monitor.csv │ ├── tqc-Ant-v3 │ └── 0.monitor.csv │ ├── tqc-AntBulletEnv-v0 │ └── 0.monitor.csv │ ├── tqc-BipedalWalker-v3 │ └── 0.monitor.csv │ ├── tqc-BipedalWalkerHardcore-v3 │ └── 0.monitor.csv │ ├── tqc-FetchPickAndPlace-v1 │ └── 0.monitor.csv │ ├── tqc-FetchPush-v1 │ └── 0.monitor.csv │ ├── tqc-FetchReach-v1 │ └── 0.monitor.csv │ ├── tqc-FetchSlide-v1 │ └── 0.monitor.csv │ ├── tqc-HalfCheetah-v3 │ └── 0.monitor.csv │ ├── tqc-HalfCheetahBulletEnv-v0 │ └── 0.monitor.csv │ ├── tqc-Hopper-v3 │ └── 0.monitor.csv │ ├── tqc-HopperBulletEnv-v0 │ └── 0.monitor.csv │ ├── tqc-Humanoid-v3 │ └── 0.monitor.csv │ ├── tqc-LunarLanderContinuous-v2 │ └── 0.monitor.csv │ ├── tqc-MountainCarContinuous-v0 │ └── 0.monitor.csv │ ├── tqc-PandaPickAndPlace-v1 │ └── 0.monitor.csv │ ├── tqc-PandaPush-v1 │ └── 0.monitor.csv │ ├── tqc-PandaReach-v1 │ └── 0.monitor.csv │ ├── tqc-PandaSlide-v1 │ └── 0.monitor.csv │ ├── tqc-PandaStack-v1 │ └── 0.monitor.csv │ ├── tqc-Pendulum-v1 │ └── 0.monitor.csv │ ├── tqc-ReacherBulletEnv-v0 │ └── 0.monitor.csv │ ├── tqc-Swimmer-v3 │ └── 0.monitor.csv │ ├── tqc-Walker2DBulletEnv-v0 │ └── 0.monitor.csv │ ├── tqc-Walker2d-v3 │ └── 0.monitor.csv │ ├── tqc-parking-v0 │ └── 0.monitor.csv │ ├── trpo-Acrobot-v1 │ └── 0.monitor.csv │ ├── trpo-Ant-v3 │ └── 0.monitor.csv │ ├── trpo-AntBulletEnv-v0 │ └── 0.monitor.csv │ ├── trpo-BipedalWalker-v3 │ └── 0.monitor.csv │ ├── trpo-CartPole-v1 │ └── 0.monitor.csv │ ├── trpo-HalfCheetah-v3 │ └── 0.monitor.csv │ ├── trpo-HalfCheetahBulletEnv-v0 │ └── 0.monitor.csv │ ├── trpo-Hopper-v3 │ └── 0.monitor.csv │ ├── trpo-HopperBulletEnv-v0 │ └── 0.monitor.csv │ ├── trpo-LunarLander-v2 │ └── 0.monitor.csv │ ├── trpo-LunarLanderContinuous-v2 │ └── 0.monitor.csv │ ├── trpo-MountainCar-v0 │ └── 0.monitor.csv │ ├── trpo-MountainCarContinuous-v0 │ └── 0.monitor.csv │ ├── trpo-Pendulum-v1 │ └── 0.monitor.csv │ ├── trpo-ReacherBulletEnv-v0 │ └── 0.monitor.csv │ ├── trpo-Swimmer-v3 │ └── 0.monitor.csv │ ├── trpo-Walker2DBulletEnv-v0 │ └── 0.monitor.csv │ └── trpo-Walker2d-v3 │ └── 0.monitor.csv ├── pyproject.toml ├── requirements.txt ├── rl_zoo3 ├── __init__.py ├── benchmark.py ├── callbacks.py ├── cli.py ├── enjoy.py ├── exp_manager.py ├── gym_patches.py ├── hyperparams_opt.py ├── import_envs.py ├── load_from_hub.py ├── plots │ ├── __init__.py │ ├── all_plots.py │ ├── plot_from_file.py │ ├── plot_train.py │ └── score_normalization.py ├── push_to_hub.py ├── py.typed ├── record_training.py ├── record_video.py ├── train.py ├── utils.py ├── version.txt └── wrappers.py ├── scripts ├── __init__.py ├── all_plots.py ├── build_docker.sh ├── create_cluster_jobs.py ├── create_mujoco_jobs.py ├── migrate_to_hub.py ├── parse_study.py ├── plot_from_file.py ├── plot_train.py ├── run_docker_cpu.sh ├── run_docker_gpu.sh ├── run_jobs.py └── run_tests.sh ├── setup.py ├── tests ├── dummy_env │ ├── setup.py │ └── test_env │ │ ├── __init__.py │ │ ├── config.py │ │ └── test_env.py ├── test_callbacks.py ├── test_enjoy.py ├── test_hyperparams_opt.py ├── test_train.py └── test_wrappers.py └── train.py /.dockerignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | logs 3 | .pytest_cache/ 4 | .coverage 5 | .coverage.* 6 | .idea/ 7 | cluster_sbatch.sh 8 | cluster_sbatch_mpi.sh 9 | cluster_torchy.sh 10 | logs/ 11 | .pytype/ 12 | htmlcov/ 13 | git_rewrite_commit_history.sh 14 | .vscode/ 15 | # ignore for docker builds 16 | rl-trained-agents/ 17 | .git/ 18 | .mypy_cache/ 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F41B Bug Report" 2 | description: If you encounter an unexpected behavior, software crash, or other bug. 3 | title: "[Bug]: bug title" 4 | labels: ["bug"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | **Important Note: We do not do technical support, nor consulting** and don't answer personal questions per email. 10 | Please post your question on the [RL Discord](https://discord.com/invite/xhfNqQv), [Reddit](https://www.reddit.com/r/reinforcementlearning/) or [Stack Overflow](https://stackoverflow.com/) in that case. 11 | - type: textarea 12 | id: description 13 | attributes: 14 | label: 🐛 Bug 15 | description: A clear and concise description of what the bug is. 16 | validations: 17 | required: true 18 | - type: textarea 19 | id: reproduce 20 | attributes: 21 | label: To Reproduce 22 | description: | 23 | Steps to reproduce the behavior. Please try to provide a minimal example to reproduce the bug. Error messages and stack traces are also helpful. 24 | Please use the [markdown code blocks](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) for both code and stack traces. 25 | value: | 26 | ```shell 27 | python train.py --algo ... 28 | ``` 29 | 30 | - type: textarea 31 | id: traceback 32 | attributes: 33 | label: Relevant log output / Error message 34 | description: Please copy and paste any relevant log output / error message. This will be automatically formatted into code, so no need for backticks. 35 | placeholder: "Traceback (most recent call last): File ..." 36 | render: shell 37 | 38 | - type: textarea 39 | id: system-info 40 | attributes: 41 | label: System Info 42 | description: | 43 | Describe the characteristic of your environment: 44 | * Describe how the library was installed (pip, docker, source, ...) 45 | * GPU models and configuration 46 | * Python version 47 | * PyTorch version 48 | * Gymnasium version 49 | * (if installed) OpenAI Gym version 50 | * Versions of any other relevant libraries 51 | 52 | You can use `sb3.get_system_info()` to print relevant packages info: 53 | ```sh 54 | python -c 'import stable_baselines3 as sb3; sb3.get_system_info()' 55 | ``` 56 | - type: checkboxes 57 | id: terms 58 | attributes: 59 | label: Checklist 60 | options: 61 | - label: I have checked that there is no similar [issue](https://github.com/DLR-RM/rl-baselines3-zoo/issues) in the repo 62 | required: true 63 | - label: I have read the [SB3 documentation](https://stable-baselines3.readthedocs.io/en/master/) 64 | required: true 65 | - label: I have read the [RL Zoo documentation](https://rl-baselines3-zoo.readthedocs.io) 66 | required: true 67 | - label: I have provided a [minimal and working](https://github.com/DLR-RM/stable-baselines3/issues/982#issuecomment-1197044014) example to reproduce the bug 68 | required: true 69 | - label: I've used the [markdown code blocks](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) for both code and stack traces. 70 | required: true 71 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F4DA Documentation" 2 | description: If you want to improve the documentation by reporting errors, inconsistencies, or missing information. 3 | labels: ["documentation"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | **Important Note: We do not do technical support, nor consulting** and don't answer personal questions per email. 9 | Please post your question on the [RL Discord](https://discord.com/invite/xhfNqQv), [Reddit](https://www.reddit.com/r/reinforcementlearning/) or [Stack Overflow](https://stackoverflow.com/) in that case. 10 | - type: textarea 11 | id: description 12 | attributes: 13 | label: 📚 Documentation 14 | description: A clear and concise description of what should be improved in the documentation. 15 | validations: 16 | required: true 17 | - type: checkboxes 18 | id: terms 19 | attributes: 20 | label: Checklist 21 | options: 22 | - label: I have checked that there is no similar [issue](https://github.com/DLR-RM/rl-baselines3-zoo/issues) in the repo 23 | required: true 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F680 Feature Request" 2 | description: How to create an issue for requesting a feature 3 | title: "[Feature Request] request title" 4 | labels: ["enhancement"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | **Important Note: We do not do technical support, nor consulting** and don't answer personal questions per email. 10 | Please post your question on the [RL Discord](https://discord.com/invite/xhfNqQv), [Reddit](https://www.reddit.com/r/reinforcementlearning/) or [Stack Overflow](https://stackoverflow.com/) in that case. 11 | - type: textarea 12 | id: description 13 | attributes: 14 | label: 🚀 Feature 15 | description: A clear and concise description of the feature proposal. 16 | validations: 17 | required: true 18 | - type: textarea 19 | id: motivation 20 | attributes: 21 | label: Motivation 22 | description: Please outline the motivation for the proposal. Is your feature request related to a problem? e.g.,"I'm always frustrated when [...]". If this is related to another GitHub issue, please link here too. 23 | - type: textarea 24 | id: pitch 25 | attributes: 26 | label: Pitch 27 | description: A clear and concise description of what you want to happen. 28 | - type: textarea 29 | id: alternatives 30 | attributes: 31 | label: Alternatives 32 | description: A clear and concise description of any alternative solutions or features you've considered, if any. 33 | - type: textarea 34 | id: additional-context 35 | attributes: 36 | label: Additional context 37 | description: Add any other context or screenshots about the feature request here. 38 | - type: checkboxes 39 | id: terms 40 | attributes: 41 | label: Checklist 42 | options: 43 | - label: I have checked that there is no similar [issue](https://github.com/DLR-RM/rl-baselines3-zoo/issues) in the repo 44 | required: true 45 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.yml: -------------------------------------------------------------------------------- 1 | name: "❓ Question" 2 | description: If you have a general question about RL Baselines3 Zoo 3 | title: "[Question] question title" 4 | labels: ["question"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | **Important Note: We do not do technical support, nor consulting** and don't answer personal questions per email. 10 | Please post your question on the [RL Discord](https://discord.com/invite/xhfNqQv), [Reddit](https://www.reddit.com/r/reinforcementlearning/) or [Stack Overflow](https://stackoverflow.com/) in that case. 11 | - type: textarea 12 | id: question 13 | attributes: 14 | label: ❓ Question 15 | description: | 16 | Your question. This can be e.g. questions regarding confusing or unclear behaviour of functions or a question if X can be done using stable-baselines3. Make sure to check out the documentation first. 17 | **Important Note: If your question is anything like "Why is my code generating this error?", you must [submit a bug report](https://github.com/DLR-RM/rl-baselines3-zoo/issues/new?assignees=&labels=bug&projects=&template=bug_report.yml&title=%5BBug%5D%3A+bug+title) instead.** 18 | validations: 19 | required: true 20 | - type: checkboxes 21 | id: terms 22 | attributes: 23 | label: Checklist 24 | options: 25 | - label: I have checked that there is no similar [issue](https://github.com/DLR-RM/rl-baselines3-zoo/issues) in the repo 26 | required: true 27 | - label: I have read the [SB3 documentation](https://stable-baselines3.readthedocs.io/en/master/) 28 | required: true 29 | - label: I have read the [RL Zoo documentation](https://rl-baselines3-zoo.readthedocs.io) 30 | required: true 31 | - label: If code there is, it is [minimal and working](https://github.com/DLR-RM/stable-baselines3/issues/982#issuecomment-1197044014) 32 | required: true 33 | - label: If code there is, it is formatted using the [markdown code blocks](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) for both code and stack traces. 34 | required: true 35 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Description 4 | 5 | 6 | ## Motivation and Context 7 | 8 | 9 | 10 | - [ ] I have raised an issue to propose this change ([required](https://github.com/DLR-RM/stable-baselines3/blob/master/CONTRIBUTING.md) for new features and bug fixes) 11 | 12 | ## Types of changes 13 | 14 | - [ ] Bug fix (non-breaking change which fixes an issue) 15 | - [ ] New feature (non-breaking change which adds functionality) 16 | - [ ] Breaking change (fix or feature that would cause existing functionality to change) 17 | - [ ] Documentation (update in the documentation) 18 | 19 | ## Checklist: 20 | 21 | 22 | - [ ] I've read the [CONTRIBUTION](https://github.com/DLR-RM/stable-baselines3/blob/master/CONTRIBUTING.md) guide (**required**) 23 | - [ ] I have updated the changelog accordingly (**required**). 24 | - [ ] My change requires a change to the documentation. 25 | - [ ] I have updated the tests accordingly (*required for a bug fix or a new feature*). 26 | - [ ] I have updated the documentation accordingly. 27 | - [ ] I have reformatted the code using `make format` (**required**) 28 | - [ ] I have checked the codestyle using `make check-codestyle` and `make lint` (**required**) 29 | - [ ] I have ensured `make pytest` and `make type` both pass. (**required**) 30 | 31 | 32 | Note: we are using a maximum length of 127 characters per line 33 | 34 | 35 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: CI 5 | 6 | on: 7 | push: 8 | branches: [master] 9 | pull_request: 10 | branches: [master] 11 | 12 | jobs: 13 | build: 14 | env: 15 | TERM: xterm-256color 16 | FORCE_COLOR: 1 17 | HF_TOKEN: ${{ secrets.HF_TOKEN }} 18 | # Skip CI if [ci skip] in the commit message 19 | if: "! contains(toJSON(github.event.commits.*.message), '[ci skip]')" 20 | runs-on: ubuntu-latest 21 | strategy: 22 | matrix: 23 | python-version: ["3.9", "3.10", "3.11", "3.12"] 24 | include: 25 | # Default version 26 | - gymnasium-version: "1.0.0" 27 | # Add a new config to test gym<1.0 28 | - python-version: "3.10" 29 | gymnasium-version: "0.29.1" 30 | steps: 31 | - uses: actions/checkout@v3 32 | with: 33 | submodules: true 34 | - name: Set up Python ${{ matrix.python-version }} 35 | uses: actions/setup-python@v4 36 | with: 37 | python-version: ${{ matrix.python-version }} 38 | - name: Install dependencies 39 | run: | 40 | python -m pip install --upgrade pip 41 | # Use uv for faster downloads 42 | pip install uv 43 | # cpu version of pytorch 44 | # See https://github.com/astral-sh/uv/issues/1497 45 | uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu 46 | # Install full requirements (for additional envs and test tools) 47 | uv pip install --system -r requirements.txt 48 | # Use headless version 49 | uv pip install --system opencv-python-headless 50 | uv pip install --system -e .[plots,tests] 51 | 52 | - name: Install specific version of gym 53 | run: | 54 | uv pip install --system gymnasium==${{ matrix.gymnasium-version }} 55 | uv pip install --system "numpy<2" 56 | # Only run for python 3.10, downgrade gym to 0.29.1 57 | if: matrix.gymnasium-version != '1.0.0' 58 | 59 | - name: Lint with ruff 60 | run: | 61 | make lint 62 | - name: Check codestyle 63 | run: | 64 | make check-codestyle 65 | - name: Build the doc 66 | run: | 67 | make doc 68 | - name: Type check 69 | run: | 70 | make type 71 | - name: Test with pytest 72 | run: | 73 | make pytest 74 | -------------------------------------------------------------------------------- /.github/workflows/trained_agents.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run check on trained agents 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Check-Trained-Agents 5 | 6 | on: 7 | push: 8 | branches: [master] 9 | pull_request: 10 | branches: [master] 11 | 12 | jobs: 13 | build: 14 | env: 15 | TERM: xterm-256color 16 | FORCE_COLOR: 1 17 | HF_TOKEN: ${{ secrets.HF_TOKEN }} 18 | # Skip CI if [ci skip] in the commit message 19 | if: "! contains(toJSON(github.event.commits.*.message), '[ci skip]')" 20 | runs-on: ubuntu-latest 21 | strategy: 22 | matrix: 23 | python-version: ["3.9", "3.10", "3.11", "3.12"] 24 | include: 25 | # Default version 26 | - gymnasium-version: "1.0.0" 27 | # Add a new config to test gym<1.0 28 | - python-version: "3.10" 29 | gymnasium-version: "0.29.1" 30 | steps: 31 | - uses: actions/checkout@v3 32 | with: 33 | submodules: true 34 | - name: Set up Python ${{ matrix.python-version }} 35 | uses: actions/setup-python@v4 36 | with: 37 | python-version: ${{ matrix.python-version }} 38 | - name: Install dependencies 39 | run: | 40 | python -m pip install --upgrade pip 41 | 42 | # Use uv for faster downloads 43 | pip install uv 44 | # cpu version of pytorch 45 | # See https://github.com/astral-sh/uv/issues/1497 46 | uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu 47 | # Install full requirements (for additional envs and test tools) 48 | uv pip install --system -r requirements.txt 49 | # Use headless version 50 | uv pip install --system opencv-python-headless 51 | uv pip install --system -e .[plots,tests] 52 | # Downgrade numpy to run pybullet agents 53 | # See https://github.com/bulletphysics/bullet3/issues/4649 54 | uv pip install --system "numpy<2" 55 | 56 | - name: Install specific version of gym 57 | run: | 58 | uv pip install --system gymnasium==${{ matrix.gymnasium-version }} 59 | uv pip install --system "numpy<2" 60 | # Only run for python 3.10, downgrade gym to 0.29.1 61 | if: matrix.gymnasium-version != '1.0.0' 62 | 63 | - name: Check trained agents 64 | run: | 65 | make check-trained-agents 66 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | logs 3 | .pytest_cache/ 4 | .coverage 5 | .coverage.* 6 | .idea/ 7 | cluster_sbatch.sh 8 | cluster_sbatch_mpi.sh 9 | cluster_torchy.sh 10 | logs/ 11 | rl-trained_agents/ 12 | .pytype/ 13 | htmlcov/ 14 | git_rewrite_commit_history.sh 15 | .vscode/ 16 | wandb 17 | runs 18 | hub 19 | *.mp4 20 | *.json 21 | _build/ 22 | 23 | tests/dummy_env/build/ 24 | 25 | 26 | # Setuptools distribution and build folders. 27 | /dist/ 28 | /build 29 | keys/ 30 | *.egg-info 31 | .cache 32 | *.lprof 33 | *.prof 34 | 35 | # Environments 36 | .env 37 | .venv 38 | env/ 39 | venv/ 40 | ENV/ 41 | env.bak/ 42 | venv.bak/ 43 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "rl-trained-agents"] 2 | path = rl-trained-agents 3 | url = https://github.com/DLR-RM/rl-trained-agents 4 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Build documentation in the docs/ directory with Sphinx 8 | sphinx: 9 | configuration: docs/conf.py 10 | 11 | # Optionally build your docs in additional formats such as PDF and ePub 12 | formats: all 13 | 14 | # Set requirements using conda env 15 | conda: 16 | environment: docs/conda_env.yml 17 | 18 | build: 19 | os: ubuntu-24.04 20 | tools: 21 | python: "mambaforge-23.11" 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Antonin RAFFIN 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | LINT_PATHS = *.py tests/ scripts/ rl_zoo3/ hyperparams/python/*.py docs/conf.py 2 | 3 | # Run pytest and coverage report 4 | pytest: 5 | ./scripts/run_tests.sh 6 | 7 | # check all trained agents (slow) 8 | check-trained-agents: 9 | python -m pytest -v tests/test_enjoy.py -k trained_agent --color=yes 10 | 11 | mypy: 12 | mypy ${LINT_PATHS} --install-types --non-interactive 13 | 14 | type: mypy 15 | 16 | lint: 17 | # stop the build if there are Python syntax errors or undefined names 18 | # see https://www.flake8rules.com/ 19 | ruff check ${LINT_PATHS} --select=E9,F63,F7,F82 --output-format=full 20 | # exit-zero treats all errors as warnings. 21 | ruff check ${LINT_PATHS} --exit-zero --output-format=concise 22 | 23 | format: 24 | # Sort imports 25 | ruff check --select I ${LINT_PATHS} --fix 26 | # Reformat using black 27 | black ${LINT_PATHS} 28 | 29 | check-codestyle: 30 | # Sort imports 31 | ruff check --select I ${LINT_PATHS} 32 | # Reformat using black 33 | black --check ${LINT_PATHS} 34 | 35 | commit-checks: format type lint 36 | 37 | doc: 38 | cd docs && make html 39 | 40 | spelling: 41 | cd docs && make spelling 42 | 43 | clean: 44 | cd docs && make clean 45 | 46 | docker: docker-cpu docker-gpu 47 | 48 | docker-cpu: 49 | ./scripts/build_docker.sh 50 | 51 | docker-gpu: 52 | USE_GPU=True ./scripts/build_docker.sh 53 | 54 | # PyPi package release 55 | release: 56 | # rm -r build/* dist/* 57 | python -m build -s 58 | python -m build -w 59 | twine upload dist/* 60 | 61 | # Test PyPi package release 62 | test-release: 63 | # rm -r build/* dist/* 64 | python -m build -s 65 | python -m build -w 66 | twine upload --repository-url https://test.pypi.org/legacy/ dist/* 67 | 68 | .PHONY: lint format check-codestyle commit-checks doc spelling docker type pytest 69 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PARENT_IMAGE=stablebaselines/stable-baselines3 2 | FROM $PARENT_IMAGE 3 | 4 | USER root 5 | 6 | RUN apt-get -y update \ 7 | && apt-get -y install \ 8 | ffmpeg \ 9 | swig \ 10 | # For building box2d 11 | build-essential \ 12 | && apt-get clean \ 13 | && rm -rf /var/lib/apt/lists/* 14 | 15 | USER $MAMBA_USER 16 | 17 | ENV CODE_DIR=/home/$MAMBA_USER/code 18 | COPY requirements.txt /tmp/ 19 | 20 | # Copy setup file only to install dependencies 21 | COPY --chown=$MAMBA_USER:$MAMBA_USER ./setup.py ${CODE_DIR}/rl_zoo3/setup.py 22 | COPY --chown=$MAMBA_USER:$MAMBA_USER ./rl_zoo3/version.txt ${CODE_DIR}/rl_zoo3/rl_zoo3/version.txt 23 | COPY --chown=$MAMBA_USER:$MAMBA_USER ./hyperparams ${CODE_DIR}/rl_zoo3/hyperparams 24 | COPY --chown=$MAMBA_USER:$MAMBA_USER ./rl_zoo3/plots ${CODE_DIR}/rl_zoo3/rl_zoo3/plots 25 | 26 | 27 | RUN \ 28 | eval "$(micromamba shell hook --shell bash)" && \ 29 | micromamba activate && \ 30 | cd ${CODE_DIR}/rl_zoo3 && \ 31 | uv pip uninstall stable-baselines3 && \ 32 | uv pip install --system -r /tmp/requirements.txt && \ 33 | uv pip install --system -e ".[plots,tests]" && \ 34 | uv cache clean 35 | 36 | CMD /bin/bash 37 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | # For debug: SPHINXOPTS = -nWT --keep-going -vvv 6 | SPHINXOPTS = -W # make warnings fatal (disabled because of gym in the wrappers) 7 | SPHINXBUILD = sphinx-build 8 | SPHINXPROJ = RLZoo 9 | SOURCEDIR = . 10 | BUILDDIR = _build 11 | 12 | # Put it first so that "make" without argument is like "make help". 13 | help: 14 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 15 | 16 | .PHONY: help Makefile 17 | 18 | # Catch-all target: route all unknown targets to Sphinx using the new 19 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 20 | %: Makefile 21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | ## RL Zoo3 Documentation 2 | 3 | This folder contains documentation for the RL Zoo. 4 | 5 | 6 | ### Build the Documentation 7 | 8 | #### Install Sphinx and Theme 9 | Execute this command in the project root: 10 | ``` 11 | pip install stable_baselines3[docs] 12 | pip install -e . 13 | ``` 14 | 15 | #### Building the Docs 16 | 17 | In the `docs/` folder: 18 | ``` 19 | make html 20 | ``` 21 | 22 | if you want to building each time a file is changed: 23 | 24 | ``` 25 | sphinx-autobuild . _build/html 26 | ``` 27 | -------------------------------------------------------------------------------- /docs/_static/css/baselines_theme.css: -------------------------------------------------------------------------------- 1 | /* Main colors adapted from pytorch doc */ 2 | :root{ 3 | --main-bg-color: #343A40; 4 | --link-color: #FD7E14; 5 | } 6 | 7 | /* Header fonts y */ 8 | h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption { 9 | font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif; 10 | } 11 | 12 | 13 | /* Docs background */ 14 | .wy-side-nav-search{ 15 | background-color: var(--main-bg-color); 16 | } 17 | 18 | /* Mobile version */ 19 | .wy-nav-top{ 20 | background-color: var(--main-bg-color); 21 | } 22 | 23 | /* Change link colors (except for the menu) */ 24 | a { 25 | color: var(--link-color); 26 | } 27 | 28 | a:hover { 29 | color: #4F778F; 30 | } 31 | 32 | .wy-menu a { 33 | color: #b3b3b3; 34 | } 35 | 36 | .wy-menu a:hover { 37 | color: #b3b3b3; 38 | } 39 | 40 | a.icon.icon-home { 41 | color: #b3b3b3; 42 | } 43 | 44 | .version{ 45 | color: var(--link-color) !important; 46 | } 47 | 48 | 49 | /* Make code blocks have a background */ 50 | .codeblock,pre.literal-block,.rst-content .literal-block,.rst-content pre.literal-block,div[class^='highlight'] { 51 | background: #f8f8f8;; 52 | } 53 | 54 | /* Change style of types in the docstrings .rst-content .field-list */ 55 | .field-list .xref.py.docutils, .field-list code.docutils, .field-list .docutils.literal.notranslate 56 | { 57 | border: None; 58 | padding-left: 0; 59 | padding-right: 0; 60 | color: #404040; 61 | } 62 | -------------------------------------------------------------------------------- /docs/_static/img/colab-badge.svg: -------------------------------------------------------------------------------- 1 | Open in ColabOpen in Colab 2 | -------------------------------------------------------------------------------- /docs/_static/img/colab.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /docs/conda_env.yml: -------------------------------------------------------------------------------- 1 | name: root 2 | channels: 3 | - pytorch 4 | - conda-forge 5 | dependencies: 6 | - cpuonly=1.0=0 7 | - pip=24.2 8 | - python=3.11 9 | - pytorch=2.5.0=py3.11_cpu_0 10 | - pip: 11 | - gymnasium>=0.28.1,<0.30 12 | - cloudpickle 13 | - opencv-python-headless 14 | - pandas 15 | - numpy 16 | - matplotlib 17 | - sphinx>=5,<8 18 | - sphinx_rtd_theme>=1.3.0 19 | - sphinx_copybutton 20 | - sb3-contrib>=2.4.0a10,<3.0 21 | - optuna>=3.0,<5.0 22 | - huggingface_sb3>=3.0 23 | - rich 24 | - tqdm 25 | - pyyaml>=5.1 26 | - pytablewriter==1.2.0 27 | -------------------------------------------------------------------------------- /docs/guide/config.rst: -------------------------------------------------------------------------------- 1 | .. _config: 2 | 3 | ============= 4 | Configuration 5 | ============= 6 | 7 | Hyperparameter yaml syntax 8 | -------------------------- 9 | 10 | The syntax used in ``hyperparameters/algo_name.yml`` for setting 11 | hyperparameters (likewise the syntax to `overwrite 12 | hyperparameters `__ 13 | on the cli) may be specialized if the argument is a function. See 14 | examples in the ``hyperparameters/`` directory. For example: 15 | 16 | - Specify a linear schedule for the learning rate: 17 | 18 | .. code:: yaml 19 | 20 | learning_rate: lin_0.012486195510232303 21 | 22 | Specify a different activation function for the network: 23 | 24 | .. code:: yaml 25 | 26 | policy_kwargs: "dict(activation_fn=nn.ReLU)" 27 | 28 | For a custom policy: 29 | 30 | .. code:: yaml 31 | 32 | policy: my_package.MyCustomPolicy # for instance stable_baselines3.ppo.MlpPolicy 33 | 34 | Env Normalization 35 | ----------------- 36 | 37 | In the hyperparameter file, ``normalize: True`` means that the training 38 | environment will be wrapped in a 39 | `VecNormalize `__ 40 | wrapper. 41 | 42 | `Normalization 43 | uses `__ the 44 | default parameters of ``VecNormalize``, with the exception of ``gamma`` 45 | which is set to match that of the agent. This can be 46 | `overridden `__ 47 | using the appropriate ``hyperparameters/algo_name.yml``, e.g. 48 | 49 | .. code:: yaml 50 | 51 | normalize: "{'norm_obs': True, 'norm_reward': False}" 52 | 53 | Env Wrappers 54 | ------------ 55 | 56 | You can specify in the hyperparameter config one or more wrapper to use 57 | around the environment: 58 | 59 | for one wrapper: 60 | 61 | .. code:: yaml 62 | 63 | env_wrapper: gym_minigrid.wrappers.FlatObsWrapper 64 | 65 | for multiple, specify a list: 66 | 67 | .. code:: yaml 68 | 69 | env_wrapper: 70 | - rl_zoo3.wrappers.TruncatedOnSuccessWrapper: 71 | reward_offset: 1.0 72 | - sb3_contrib.common.wrappers.TimeFeatureWrapper 73 | 74 | Note that you can easily specify parameters too. 75 | 76 | By default, the environment is wrapped with a ``Monitor`` wrapper to 77 | record episode statistics. You can specify arguments to it using 78 | ``monitor_kwargs`` parameter to log additional data. That data *must* be 79 | present in the info dictionary at the last step of each episode. 80 | 81 | For instance, for recording success with goal envs 82 | (e.g. ``FetchReach-v1``): 83 | 84 | .. code:: yaml 85 | 86 | monitor_kwargs: dict(info_keywords=('is_success',)) 87 | 88 | or recording final x position with ``Ant-v3``: 89 | 90 | .. code:: yaml 91 | 92 | monitor_kwargs: dict(info_keywords=('x_position',)) 93 | 94 | Note: for known ``GoalEnv`` like ``FetchReach``, 95 | ``info_keywords=('is_success',)`` is actually the default. 96 | 97 | VecEnvWrapper 98 | ------------- 99 | 100 | You can specify which ``VecEnvWrapper`` to use in the config, the same 101 | way as for env wrappers (see above), using the ``vec_env_wrapper`` key: 102 | 103 | For instance: 104 | 105 | .. code:: yaml 106 | 107 | vec_env_wrapper: stable_baselines3.common.vec_env.VecMonitor 108 | 109 | Note: ``VecNormalize`` is supported separately using ``normalize`` 110 | keyword, and ``VecFrameStack`` has a dedicated keyword ``frame_stack``. 111 | 112 | Callbacks 113 | --------- 114 | 115 | Following the same syntax as env wrappers, you can also add custom 116 | callbacks to use during training. 117 | 118 | .. code:: yaml 119 | 120 | callback: 121 | - rl_zoo3.callbacks.ParallelTrainCallback: 122 | gradient_steps: 256 123 | -------------------------------------------------------------------------------- /docs/guide/custom_env.rst: -------------------------------------------------------------------------------- 1 | .. _custom: 2 | 3 | ================== 4 | Custom Environment 5 | ================== 6 | 7 | The easiest way to add support for a custom environment is to edit 8 | ``rl_zoo3/import_envs.py`` and register your environment here. Then, you 9 | need to add a section for it in the hyperparameters file 10 | (``hyperparams/algo.yml`` or a custom yaml file that you can specify 11 | using ``--conf-file`` argument). 12 | -------------------------------------------------------------------------------- /docs/guide/enjoy.rst: -------------------------------------------------------------------------------- 1 | .. _enjoy: 2 | 3 | ===================== 4 | Enjoy a Trained Agent 5 | ===================== 6 | 7 | .. note:: 8 | 9 | To download the repo with the trained agents, you must use 10 | ``git clone --recursive https://github.com/DLR-RM/rl-baselines3-zoo`` 11 | in order to clone the submodule too. 12 | 13 | 14 | Enjoy a trained agent 15 | --------------------- 16 | 17 | If the trained agent exists, then you can see it in action using: 18 | 19 | :: 20 | 21 | python enjoy.py --algo algo_name --env env_id 22 | 23 | For example, enjoy A2C on Breakout during 5000 timesteps: 24 | 25 | :: 26 | 27 | python enjoy.py --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000 28 | 29 | If you have trained an agent yourself, you need to do: 30 | 31 | :: 32 | 33 | # exp-id 0 corresponds to the last experiment, otherwise, you can specify another ID 34 | python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 0 35 | 36 | Load Checkpoints, Best Model 37 | ----------------------------- 38 | 39 | To load the best model (when using evaluation environment): 40 | 41 | :: 42 | 43 | python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-best 44 | 45 | To load a checkpoint (here the checkpoint name is 46 | ``rl_model_10000_steps.zip``): 47 | 48 | :: 49 | 50 | python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-checkpoint 10000 51 | 52 | To load the latest checkpoint: 53 | 54 | :: 55 | 56 | python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-last-checkpoint 57 | 58 | 59 | Record a Video of a Trained Agent 60 | --------------------------------- 61 | 62 | Record 1000 steps with the latest saved model: 63 | 64 | :: 65 | 66 | python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 67 | 68 | Use the best saved model instead: 69 | 70 | :: 71 | 72 | python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-best 73 | 74 | Record a video of a checkpoint saved during training (here the 75 | checkpoint name is ``rl_model_10000_steps.zip``): 76 | 77 | :: 78 | 79 | python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-checkpoint 10000 80 | 81 | 82 | Record a Video of a Training Experiment 83 | --------------------------------------- 84 | 85 | Apart from recording videos of specific saved models, it is also 86 | possible to record a video of a training experiment where checkpoints 87 | have been saved. 88 | 89 | Record 1000 steps for each checkpoint, latest and best saved models: 90 | 91 | :: 92 | 93 | python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic 94 | 95 | The previous command will create a ``mp4`` file. To convert this file to 96 | ``gif`` format as well: 97 | 98 | :: 99 | 100 | python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic --gif 101 | -------------------------------------------------------------------------------- /docs/guide/install.rst: -------------------------------------------------------------------------------- 1 | .. _install: 2 | 3 | Installation 4 | ============ 5 | 6 | Prerequisites 7 | ------------- 8 | 9 | RL Zoo requires python 3.8+ and PyTorch >= 1.13 10 | 11 | 12 | Minimal Installation 13 | -------------------- 14 | 15 | To install RL Zoo with pip, execute: 16 | 17 | .. code-block:: bash 18 | 19 | pip install rl_zoo3 20 | 21 | From source: 22 | 23 | .. code-block:: bash 24 | 25 | git clone https://github.com/DLR-RM/rl-baselines3-zoo 26 | cd rl-baselines3-zoo/ 27 | pip install -e . 28 | 29 | .. note:: 30 | 31 | You can do ``python -m rl_zoo3.train`` from any folder and you have access to ``rl_zoo3`` command line interface, for instance, ``rl_zoo3 train`` is equivalent to ``python train.py`` 32 | 33 | 34 | 35 | Full installation 36 | ----------------- 37 | 38 | With extra envs and test dependencies: 39 | 40 | 41 | .. note:: 42 | 43 | If you want to use Atari games, you will need to do ``pip install "autorom[accept-rom-license]"`` 44 | additionally to download the ROMs 45 | 46 | 47 | .. code-block:: bash 48 | 49 | apt-get install swig cmake ffmpeg 50 | pip install -r requirements.txt 51 | pip install -e .[plots,tests] 52 | 53 | 54 | Please see `Stable Baselines3 documentation `_ for alternatives to install stable baselines3. 55 | 56 | 57 | Docker Images 58 | ------------- 59 | 60 | Build docker image (CPU): 61 | 62 | :: 63 | 64 | make docker-cpu 65 | 66 | GPU: 67 | 68 | :: 69 | 70 | USE_GPU=True make docker-gpu 71 | 72 | Pull built docker image (CPU): 73 | 74 | :: 75 | 76 | docker pull stablebaselines/rl-baselines3-zoo-cpu 77 | 78 | GPU image: 79 | 80 | :: 81 | 82 | docker pull stablebaselines/rl-baselines3-zoo 83 | 84 | Run script in the docker image: 85 | 86 | :: 87 | 88 | ./scripts/run_docker_cpu.sh python train.py --algo ppo --env CartPole-v1 89 | -------------------------------------------------------------------------------- /docs/guide/integrations.rst: -------------------------------------------------------------------------------- 1 | .. _integrations: 2 | 3 | ============ 4 | Integrations 5 | ============ 6 | 7 | Huggingface Hub Integration 8 | --------------------------- 9 | 10 | List and videos of trained agents can be found on our Huggingface page: https://huggingface.co/sb3 11 | 12 | 13 | Upload model to hub (same syntax as for ``enjoy.py``): 14 | 15 | :: 16 | 17 | python -m rl_zoo3.push_to_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3 -m "Initial commit" 18 | 19 | you can choose custom ``repo-name`` (default: ``{algo}-{env_id}``) by 20 | passing a ``--repo-name`` argument. 21 | 22 | Download model from hub: 23 | 24 | :: 25 | 26 | python -m rl_zoo3.load_from_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3 27 | 28 | 29 | Experiment tracking 30 | ------------------- 31 | 32 | We support tracking experiment data such as learning curves and 33 | hyperparameters via `Weights and Biases `__. 34 | 35 | The following command 36 | 37 | :: 38 | 39 | python train.py --algo ppo --env CartPole-v1 --track --wandb-project-name sb3 40 | 41 | yields a tracked experiment at this 42 | `URL `__. 43 | 44 | To add a tag to the run, (e.g. ``optimized``), use the argument 45 | ``--wandb-tags optimized``. 46 | -------------------------------------------------------------------------------- /docs/guide/plot.rst: -------------------------------------------------------------------------------- 1 | .. _plot: 2 | 3 | ============ 4 | Plot Scripts 5 | ============ 6 | 7 | 8 | Plot scripts (to be documented, see "Results" sections in SB3 9 | documentation): 10 | 11 | - ``scripts/all_plots.py``/``scripts/plot_from_file.py`` for plotting evaluations 12 | 13 | - ``scripts/plot_train.py`` for plotting training reward/success 14 | 15 | 16 | Examples 17 | -------- 18 | 19 | Plot training success (y-axis) w.r.t. timesteps (x-axis) with a moving 20 | window of 500 episodes for all the ``Fetch`` environment with ``HER`` 21 | algorithm: 22 | 23 | :: 24 | 25 | python scripts/plot_train.py -a her -e Fetch -y success -f rl-trained-agents/ -w 500 -x steps 26 | 27 | Plot evaluation reward curve for TQC, SAC and TD3 on the HalfCheetah and 28 | Ant PyBullet environments: 29 | 30 | :: 31 | 32 | python3 scripts/all_plots.py -a sac td3 tqc --env HalfCheetahBullet AntBullet -f rl-trained-agents/ 33 | 34 | Plot with the rliable library 35 | ----------------------------- 36 | 37 | The RL zoo integrates some of 38 | `rliable `__ library features. You 39 | can find a visual explanation of the tools used by rliable in this `blog 40 | post `__. 41 | 42 | First, you need to install 43 | `rliable `__. 44 | 45 | Note: Python 3.7+ is required in that case. 46 | 47 | Then export your results to a file using the ``all_plots.py`` script 48 | (see above): 49 | 50 | :: 51 | 52 | python scripts/all_plots.py -a sac td3 tqc --env Half Ant -f logs/ -o logs/offpolicy 53 | 54 | You can now use the ``plot_from_file.py`` script with ``--rliable``, 55 | ``--versus`` and ``--iqm`` arguments: 56 | 57 | :: 58 | 59 | python scripts/plot_from_file.py -i logs/offpolicy.pkl --skip-timesteps --rliable --versus -l SAC TD3 TQC 60 | 61 | .. note:: 62 | 63 | you may need to edit ``plot_from_file.py``, in particular the 64 | ``env_key_to_env_id`` dictionary and the 65 | ``scripts/score_normalization.py`` which stores min and max score for 66 | each environment. 67 | 68 | 69 | Remark: plotting with the ``--rliable`` option is usually slow as 70 | confidence interval need to be computed using bootstrap sampling. 71 | -------------------------------------------------------------------------------- /docs/guide/quickstart.rst: -------------------------------------------------------------------------------- 1 | .. _quickstart: 2 | 3 | =============== 4 | Getting Started 5 | =============== 6 | 7 | .. note:: 8 | 9 | You can try the following examples online using Google Colab |Colab| 10 | notebook: `RL Baselines zoo notebook`_ 11 | 12 | 13 | .. _RL Baselines zoo notebook: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/rl-baselines-zoo.ipynb 14 | .. |Colab| image:: ../_static/img/colab.svg 15 | 16 | 17 | The hyperparameters for each environment are defined in 18 | ``hyperparameters/algo_name.yml``. 19 | 20 | If the environment exists in this file, then you can train an agent 21 | using: 22 | 23 | :: 24 | 25 | python -m rl_zoo3.train --algo algo_name --env env_id 26 | 27 | Or if you are in the RL Zoo3 folder: 28 | 29 | :: 30 | 31 | python train.py --algo algo_name --env env_id 32 | 33 | For example (with evaluation and checkpoints): 34 | 35 | :: 36 | 37 | python -m rl_zoo3.train --algo ppo --env CartPole-v1 --eval-freq 10000 --save-freq 50000 38 | 39 | 40 | 41 | If the trained agent exists, then you can see it in action using: 42 | 43 | :: 44 | 45 | python -m rl_zoo3.enjoy --algo algo_name --env env_id 46 | 47 | For example, enjoy A2C on Breakout during 5000 timesteps: 48 | 49 | :: 50 | 51 | python -m rl_zoo3.enjoy --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000 52 | -------------------------------------------------------------------------------- /docs/guide/sbx.rst: -------------------------------------------------------------------------------- 1 | .. _sbx: 2 | 3 | ========================== 4 | Stable Baselines Jax (SBX) 5 | ========================== 6 | 7 | `Stable Baselines Jax (SBX) `_ is a proof of concept version of Stable-Baselines3 in Jax. 8 | 9 | It provides a minimal number of features compared to SB3 but can be much faster (up to 20x times!): https://twitter.com/araffin2/status/1590714558628253698 10 | 11 | 12 | It is also compatible with the RL Zoo. 13 | For that you will need to create two files. 14 | 15 | ``train_sbx.py``: 16 | 17 | .. code-block:: python 18 | 19 | import rl_zoo3 20 | import rl_zoo3.train 21 | from rl_zoo3.train import train 22 | from sbx import DQN, PPO, SAC, TQC, DroQ 23 | 24 | 25 | rl_zoo3.ALGOS["tqc"] = TQC 26 | rl_zoo3.ALGOS["droq"] = DroQ 27 | rl_zoo3.ALGOS["sac"] = SAC 28 | rl_zoo3.ALGOS["ppo"] = PPO 29 | rl_zoo3.ALGOS["dqn"] = DQN 30 | rl_zoo3.train.ALGOS = rl_zoo3.ALGOS 31 | rl_zoo3.exp_manager.ALGOS = rl_zoo3.ALGOS 32 | 33 | if __name__ == "__main__": 34 | train() 35 | 36 | Then you can call ``python train_sbx.py --algo sac --env Pendulum-v1`` and use the RL Zoo CLI. 37 | 38 | 39 | ``enjoy_sbx.py``: 40 | 41 | .. code-block:: python 42 | 43 | import rl_zoo3 44 | import rl_zoo3.enjoy 45 | from rl_zoo3.enjoy import enjoy 46 | from sbx import DQN, PPO, SAC, TQC, DroQ 47 | 48 | 49 | rl_zoo3.ALGOS["tqc"] = TQC 50 | rl_zoo3.ALGOS["droq"] = DroQ 51 | rl_zoo3.ALGOS["sac"] = SAC 52 | rl_zoo3.ALGOS["ppo"] = PPO 53 | rl_zoo3.ALGOS["dqn"] = DQN 54 | rl_zoo3.enjoy.ALGOS = rl_zoo3.ALGOS 55 | rl_zoo3.exp_manager.ALGOS = rl_zoo3.ALGOS 56 | 57 | if __name__ == "__main__": 58 | enjoy() 59 | -------------------------------------------------------------------------------- /docs/guide/train.rst: -------------------------------------------------------------------------------- 1 | .. _train: 2 | 3 | ============== 4 | Train an Agent 5 | ============== 6 | 7 | Basic Usage 8 | ----------- 9 | 10 | The hyperparameters for each environment are defined in 11 | ``hyperparameters/algo_name.yml``. 12 | 13 | 14 | .. note:: 15 | 16 | Once RL Zoo3 is install, you can do ``python -m rl_zoo3.train`` from any folder, it is equivalent to ``python train.py`` 17 | 18 | 19 | If the environment exists in this file, then you can train an agent using: 20 | 21 | :: 22 | 23 | python train.py --algo algo_name --env env_id 24 | 25 | 26 | .. note:: 27 | 28 | You can use ``-P`` (``--progress``) option to display a progress bar. 29 | 30 | 31 | Custom Config File 32 | ------------------ 33 | 34 | Using a custom config file when it is a yaml file with a which contains a ``env_id`` entry: 35 | 36 | :: 37 | 38 | python train.py --algo algo_name --env env_id --conf-file my_yaml.yml 39 | 40 | 41 | You can also use a python file that contains a dictionary called `hyperparams` with an entry for each ``env_id``. 42 | (see ``hyperparams/python/ppo_config_example.py`` for an example) 43 | 44 | :: 45 | 46 | # You can pass a path to a python file 47 | python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams/python/ppo_config_example.py 48 | # Or pass a path to a file from a module (for instance my_package.my_file) 49 | python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams.python.ppo_config_example 50 | 51 | The advantage of this approach is that you can specify arbitrary python dictionaries 52 | and ensure that all their dependencies are imported in the config file itself. 53 | 54 | Tensorboard, Checkpoints, Evaluation 55 | ------------------------------------ 56 | 57 | For example (with tensorboard support): 58 | 59 | :: 60 | 61 | python train.py --algo ppo --env CartPole-v1 --tensorboard-log /tmp/stable-baselines/ 62 | 63 | 64 | Evaluate the agent every 10000 steps using 10 episodes for evaluation (using only one evaluation env): 65 | 66 | :: 67 | 68 | python train.py --algo sac --env AntBulletEnv-v0 --eval-freq 10000 --eval-episodes 10 --n-eval-envs 1 69 | 70 | 71 | Save a checkpoint of the agent every 100000 steps: 72 | 73 | :: 74 | 75 | python train.py --algo td3 --env AntBulletEnv-v0 --save-freq 100000 76 | 77 | Resume Training 78 | --------------- 79 | 80 | Continue training (here, load pretrained agent for Breakout and continue training for 5000 steps): 81 | 82 | :: 83 | 84 | python train.py --algo a2c --env BreakoutNoFrameskip-v4 -i rl-trained-agents/a2c/BreakoutNoFrameskip-v4_1/BreakoutNoFrameskip-v4.zip -n 5000 85 | 86 | Save Replay Buffer 87 | ------------------ 88 | 89 | When using off-policy algorithms, you can also **save the replay buffer** after training: 90 | 91 | :: 92 | 93 | python train.py --algo sac --env Pendulum-v1 --save-replay-buffer 94 | 95 | It will be automatically loaded if present when continuing training. 96 | 97 | 98 | Env keyword arguments 99 | --------------------- 100 | 101 | You can specify keyword arguments to pass to the env constructor in the 102 | command line, using ``--env-kwargs``: 103 | 104 | :: 105 | 106 | python enjoy.py --algo ppo --env MountainCar-v0 --env-kwargs goal_velocity:10 107 | 108 | 109 | Overwrite hyperparameters 110 | ------------------------- 111 | 112 | You can easily overwrite hyperparameters in the command line, using 113 | ``--hyperparams``: 114 | 115 | :: 116 | 117 | python train.py --algo a2c --env MountainCarContinuous-v0 --hyperparams learning_rate:0.001 policy_kwargs:"dict(net_arch=[64, 64])" 118 | 119 | Note: if you want to pass a string, you need to escape it like that: 120 | ``my_string:"'value'"`` 121 | -------------------------------------------------------------------------------- /docs/guide/tuning.rst: -------------------------------------------------------------------------------- 1 | .. _tuning: 2 | 3 | ===================== 4 | Hyperparameter Tuning 5 | ===================== 6 | 7 | Automated hyperparameter optimization 8 | ------------------------------------- 9 | 10 | Blog post: `Automatic Hyperparameter Tuning - A Visual Guide `_ 11 | 12 | Video: https://www.youtube.com/watch?v=AidFTOdGNFQ 13 | 14 | We use `Optuna `__ for optimizing the 15 | hyperparameters. Not all hyperparameters are tuned, and tuning enforces 16 | certain default hyperparameter settings that may be different from the 17 | official defaults. See 18 | `rl_zoo3/hyperparams_opt.py `__ 19 | for the current settings for each agent. 20 | 21 | Hyperparameters not specified in 22 | `rl_zoo3/hyperparams_opt.py `__ 23 | are taken from the associated YAML file and fallback to the default 24 | values of SB3 if not present. 25 | 26 | Note: when using SuccessiveHalvingPruner (“halving”), you must specify 27 | ``--n-jobs > 1`` 28 | 29 | Budget of 1000 trials with a maximum of 50000 steps: 30 | 31 | :: 32 | 33 | python train.py --algo ppo --env MountainCar-v0 -n 50000 -optimize --n-trials 1000 --n-jobs 2 \ 34 | --sampler tpe --pruner median 35 | 36 | Distributed optimization using a shared database is also possible (see 37 | the corresponding `Optuna 38 | documentation `__): 39 | 40 | :: 41 | 42 | python train.py --algo ppo --env MountainCar-v0 -optimize --study-name test --storage logs/demo.log 43 | 44 | 45 | 46 | Visualize live using `optuna-dashboard `__ 47 | 48 | .. code:: bash 49 | 50 | optuna-dashboard logs/demo.log 51 | 52 | Load hyperparameters from trial number 21 and train an agent with it: 53 | 54 | .. code:: bash 55 | 56 | python train.py --algo ppo --env MountainCar-v0 --study-name test --storage logs/demo.log --trial-id 21 57 | 58 | 59 | The default budget for hyperparameter tuning is 500 trials and there is 60 | one intermediate evaluation for pruning/early stopping per 100k time 61 | steps. 62 | 63 | Hyperparameters search space 64 | ---------------------------- 65 | 66 | Note that the default hyperparameters used in the zoo when tuning are 67 | not always the same as the defaults provided in 68 | `stable-baselines3 `__. 69 | Consult the latest source code to be sure of these settings. For 70 | example: 71 | 72 | - PPO tuning assumes a network architecture with ``ortho_init = False`` 73 | when tuning, though it is ``True`` by 74 | `default `__. 75 | You can change that by updating 76 | `rl_zoo3/hyperparams_opt.py `__. 77 | 78 | - Non-episodic rollout in TD3 and DDPG assumes 79 | ``gradient_steps = train_freq`` and so tunes only ``train_freq`` to 80 | reduce the search space. 81 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | RL Baselines3 Zoo Docs - A Training Framework for Stable Baselines3 2 | =================================================================== 3 | 4 | `RL Baselines3 Zoo `_ s a training framework for Reinforcement Learning (RL), using `Stable Baselines3 (SB3) `_, 5 | reliable implementations of reinforcement learning algorithms in PyTorch. 6 | 7 | Github repository: https://github.com/DLR-RM/rl-baselines3-zoo 8 | 9 | It provides scripts for training, evaluating agents, tuning hyperparameters, plotting results and recording videos. 10 | 11 | In addition, it includes a collection of tuned hyperparameters for common environments and RL algorithms, and agents trained with those settings. 12 | 13 | .. toctree:: 14 | :maxdepth: 2 15 | :caption: User Guide 16 | 17 | guide/install 18 | guide/quickstart 19 | guide/train 20 | guide/plot 21 | guide/enjoy 22 | guide/custom_env 23 | guide/config 24 | guide/integrations 25 | guide/tuning 26 | guide/sbx 27 | 28 | 29 | .. toctree:: 30 | :maxdepth: 1 31 | :caption: RL Zoo API 32 | 33 | modules/exp_manager 34 | modules/wrappers 35 | modules/callbacks 36 | modules/utils 37 | 38 | .. toctree:: 39 | :maxdepth: 1 40 | :caption: Misc 41 | 42 | misc/changelog 43 | 44 | 45 | Citing RL Baselines3 Zoo 46 | ------------------------ 47 | To cite this project in publications: 48 | 49 | .. code-block:: bibtex 50 | 51 | @misc{rl-zoo3, 52 | author = {Raffin, Antonin}, 53 | title = {RL Baselines3 Zoo}, 54 | year = {2020}, 55 | publisher = {GitHub}, 56 | journal = {GitHub repository}, 57 | howpublished = {\url{https://github.com/DLR-RM/rl-baselines3-zoo}}, 58 | } 59 | 60 | Contributing 61 | ------------ 62 | 63 | To any interested in making the rl baselines better, there are still some improvements 64 | that need to be done. 65 | You can check issues in the `repo `_. 66 | 67 | If you want to contribute, please read `CONTRIBUTING.md `_ first. 68 | 69 | Indices and tables 70 | ------------------- 71 | 72 | * :ref:`genindex` 73 | * :ref:`search` 74 | * :ref:`modindex` 75 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=StableBaselines 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/misc/changelog.rst: -------------------------------------------------------------------------------- 1 | .. _changelog: 2 | 3 | Changelog 4 | ========== 5 | 6 | 7 | See https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/CHANGELOG.md 8 | -------------------------------------------------------------------------------- /docs/modules/callbacks.rst: -------------------------------------------------------------------------------- 1 | .. _callbacks: 2 | 3 | 4 | Callbacks 5 | ========= 6 | 7 | .. automodule:: rl_zoo3.callbacks 8 | :members: 9 | -------------------------------------------------------------------------------- /docs/modules/exp_manager.rst: -------------------------------------------------------------------------------- 1 | .. _manager: 2 | 3 | .. automodule:: rl_zoo3.exp_manager 4 | 5 | 6 | Experiment Manager 7 | ================== 8 | 9 | 10 | Parameters 11 | ---------- 12 | 13 | .. autoclass:: ExperimentManager 14 | :members: 15 | :inherited-members: 16 | -------------------------------------------------------------------------------- /docs/modules/utils.rst: -------------------------------------------------------------------------------- 1 | .. _utils: 2 | 3 | 4 | Utils 5 | ===== 6 | 7 | .. automodule:: rl_zoo3.utils 8 | :members: 9 | -------------------------------------------------------------------------------- /docs/modules/wrappers.rst: -------------------------------------------------------------------------------- 1 | .. _wrappers: 2 | 3 | 4 | Wrappers 5 | ======== 6 | 7 | .. automodule:: rl_zoo3.wrappers 8 | :members: 9 | -------------------------------------------------------------------------------- /docs/spelling_wordlist.txt: -------------------------------------------------------------------------------- 1 | py 2 | env 3 | atari 4 | argparse 5 | Argparse 6 | TensorFlow 7 | feedforward 8 | envs 9 | VecEnv 10 | pretrain 11 | petrained 12 | tf 13 | th 14 | nn 15 | np 16 | str 17 | mujoco 18 | cpu 19 | ndarray 20 | ndarrays 21 | timestep 22 | timesteps 23 | stepsize 24 | dataset 25 | adam 26 | fn 27 | normalisation 28 | Kullback 29 | Leibler 30 | boolean 31 | deserialized 32 | pretrained 33 | minibatch 34 | subprocesses 35 | ArgumentParser 36 | Tensorflow 37 | Gaussian 38 | approximator 39 | minibatches 40 | hyperparameters 41 | hyperparameter 42 | vectorized 43 | rl 44 | colab 45 | dataloader 46 | npz 47 | datasets 48 | vf 49 | logits 50 | num 51 | Utils 52 | backpropagate 53 | prepend 54 | NaN 55 | preprocessing 56 | Cloudpickle 57 | async 58 | multiprocess 59 | tensorflow 60 | mlp 61 | cnn 62 | neglogp 63 | tanh 64 | coef 65 | repo 66 | Huber 67 | params 68 | ppo 69 | arxiv 70 | Arxiv 71 | func 72 | DQN 73 | Uhlenbeck 74 | Ornstein 75 | multithread 76 | cancelled 77 | Tensorboard 78 | parallelize 79 | customising 80 | serializable 81 | Multiprocessed 82 | cartpole 83 | toolset 84 | lstm 85 | rescale 86 | ffmpeg 87 | avconv 88 | unnormalized 89 | Github 90 | pre 91 | preprocess 92 | backend 93 | attr 94 | preprocess 95 | Antonin 96 | Raffin 97 | araffin 98 | Homebrew 99 | Numpy 100 | Theano 101 | rollout 102 | kfac 103 | Piecewise 104 | csv 105 | nvidia 106 | visdom 107 | tensorboard 108 | preprocessed 109 | namespace 110 | sklearn 111 | GoalEnv 112 | Torchy 113 | pytorch 114 | dicts 115 | optimizers 116 | Deprecations 117 | forkserver 118 | cuda 119 | Polyak 120 | gSDE 121 | rollouts 122 | Pyro 123 | softmax 124 | stdout 125 | Contrib 126 | Quantile 127 | Huggingface 128 | Jax 129 | Optuna 130 | -------------------------------------------------------------------------------- /enjoy.py: -------------------------------------------------------------------------------- 1 | from rl_zoo3.enjoy import enjoy 2 | 3 | if __name__ == "__main__": 4 | enjoy() 5 | -------------------------------------------------------------------------------- /hyperparams/a2c.yml: -------------------------------------------------------------------------------- 1 | atari: 2 | env_wrapper: 3 | - stable_baselines3.common.atari_wrappers.AtariWrapper 4 | # Equivalent to 5 | # vec_env_wrapper: 6 | # - stable_baselines3.common.vec_env.VecFrameStack: 7 | # n_stack: 4 8 | frame_stack: 4 9 | policy: 'CnnPolicy' 10 | n_envs: 16 11 | n_timesteps: !!float 1e7 12 | ent_coef: 0.01 13 | vf_coef: 0.25 14 | policy_kwargs: "dict(optimizer_class=RMSpropTFLike, optimizer_kwargs=dict(eps=1e-5))" 15 | 16 | CartPole-v1: 17 | n_envs: 8 18 | n_timesteps: !!float 5e5 19 | policy: 'MlpPolicy' 20 | ent_coef: 0.0 21 | 22 | LunarLander-v2: 23 | n_envs: 8 24 | n_timesteps: !!float 2e5 25 | policy: 'MlpPolicy' 26 | gamma: 0.995 27 | n_steps: 5 28 | learning_rate: lin_0.00083 29 | ent_coef: 0.00001 30 | 31 | MountainCar-v0: 32 | normalize: true 33 | n_envs: 16 34 | n_timesteps: !!float 1e6 35 | policy: 'MlpPolicy' 36 | ent_coef: .0 37 | 38 | Acrobot-v1: 39 | normalize: true 40 | n_envs: 16 41 | n_timesteps: !!float 5e5 42 | policy: 'MlpPolicy' 43 | ent_coef: .0 44 | 45 | # Tuned 46 | Pendulum-v1: 47 | normalize: True 48 | n_envs: 8 49 | n_timesteps: !!float 1e6 50 | policy: 'MlpPolicy' 51 | ent_coef: 0.0 52 | max_grad_norm: 0.5 53 | n_steps: 8 54 | gae_lambda: 0.9 55 | vf_coef: 0.4 56 | gamma: 0.9 57 | use_rms_prop: True 58 | normalize_advantage: False 59 | learning_rate: lin_7e-4 60 | use_sde: True 61 | policy_kwargs: "dict(log_std_init=-2, ortho_init=False)" 62 | 63 | # Tuned 64 | LunarLanderContinuous-v3: 65 | normalize: true 66 | n_envs: 4 67 | n_timesteps: !!float 5e6 68 | policy: 'MlpPolicy' 69 | ent_coef: 0.0 70 | max_grad_norm: 0.5 71 | n_steps: 8 72 | gae_lambda: 0.9 73 | vf_coef: 0.4 74 | gamma: 0.99 75 | use_rms_prop: True 76 | normalize_advantage: False 77 | learning_rate: lin_7e-4 78 | use_sde: True 79 | policy_kwargs: "dict(log_std_init=-2, ortho_init=False)" 80 | 81 | # Tuned 82 | MountainCarContinuous-v0: 83 | normalize: true 84 | n_envs: 4 85 | n_steps: 100 86 | n_timesteps: !!float 1e5 87 | policy: 'MlpPolicy' 88 | ent_coef: 0.0 89 | use_sde: True 90 | sde_sample_freq: 16 91 | policy_kwargs: "dict(log_std_init=0.0, ortho_init=False)" 92 | 93 | # Tuned 94 | BipedalWalker-v3: 95 | normalize: true 96 | n_envs: 16 97 | n_timesteps: !!float 5e6 98 | policy: 'MlpPolicy' 99 | ent_coef: 0.0 100 | max_grad_norm: 0.5 101 | n_steps: 8 102 | gae_lambda: 0.9 103 | vf_coef: 0.4 104 | gamma: 0.99 105 | use_rms_prop: True 106 | normalize_advantage: False 107 | learning_rate: lin_0.00096 108 | use_sde: True 109 | policy_kwargs: "dict(log_std_init=-2, ortho_init=False)" 110 | 111 | # Tuned 112 | BipedalWalkerHardcore-v3: 113 | normalize: true 114 | n_envs: 32 115 | n_timesteps: !!float 20e7 116 | policy: 'MlpPolicy' 117 | ent_coef: 0.001 118 | max_grad_norm: 0.5 119 | n_steps: 8 120 | gae_lambda: 0.9 121 | vf_coef: 0.4 122 | gamma: 0.99 123 | use_rms_prop: True 124 | normalize_advantage: False 125 | learning_rate: lin_0.0008 126 | use_sde: True 127 | policy_kwargs: "dict(log_std_init=-2, ortho_init=False)" 128 | 129 | # Tuned 130 | HalfCheetahBulletEnv-v0: &pybullet-defaults 131 | normalize: true 132 | n_envs: 4 133 | n_timesteps: !!float 2e6 134 | policy: 'MlpPolicy' 135 | ent_coef: 0.0 136 | max_grad_norm: 0.5 137 | n_steps: 8 138 | gae_lambda: 0.9 139 | vf_coef: 0.4 140 | gamma: 0.99 141 | use_rms_prop: True 142 | normalize_advantage: False 143 | # Both works 144 | learning_rate: lin_0.00096 145 | # learning_rate: !!float 3e-4 146 | use_sde: True 147 | policy_kwargs: "dict(log_std_init=-2, ortho_init=False)" 148 | 149 | Walker2DBulletEnv-v0: 150 | <<: *pybullet-defaults 151 | 152 | # Tuned 153 | AntBulletEnv-v0: 154 | <<: *pybullet-defaults 155 | 156 | # Tuned 157 | HopperBulletEnv-v0: 158 | <<: *pybullet-defaults 159 | 160 | # Tuned but unstable 161 | # Not working without SDE? 162 | ReacherBulletEnv-v0: 163 | <<: *pybullet-defaults 164 | learning_rate: lin_0.0008 165 | 166 | # === Mujoco Envs === 167 | 168 | HalfCheetah-v4: &mujoco-defaults 169 | normalize: true 170 | n_timesteps: !!float 1e6 171 | policy: 'MlpPolicy' 172 | 173 | Ant-v4: 174 | <<: *mujoco-defaults 175 | 176 | Hopper-v4: 177 | <<: *mujoco-defaults 178 | 179 | Walker2d-v4: 180 | <<: *mujoco-defaults 181 | 182 | Humanoid-v4: 183 | <<: *mujoco-defaults 184 | n_timesteps: !!float 2e6 185 | 186 | Swimmer-v4: 187 | <<: *mujoco-defaults 188 | gamma: 0.9999 189 | -------------------------------------------------------------------------------- /hyperparams/crossq.yml: -------------------------------------------------------------------------------- 1 | MountainCarContinuous-v0: 2 | n_timesteps: !!float 50000 3 | policy: 'MlpPolicy' 4 | learning_rate: !!float 7e-4 5 | buffer_size: 50000 6 | train_freq: 32 7 | gradient_steps: 32 8 | gamma: 0.9999 9 | learning_starts: 100 10 | use_sde: True 11 | policy_delay: 2 12 | policy_kwargs: "dict(use_expln=True, log_std_init=-1, net_arch=[64, 64])" 13 | 14 | Pendulum-v1: 15 | n_timesteps: 20000 16 | policy: 'MlpPolicy' 17 | policy_delay: 2 18 | policy_kwargs: "dict(net_arch=[256, 256])" 19 | 20 | 21 | LunarLanderContinuous-v3: 22 | n_timesteps: !!float 2e5 23 | policy: 'MlpPolicy' 24 | buffer_size: 1000000 25 | learning_starts: 10000 26 | 27 | 28 | BipedalWalker-v3: 29 | n_timesteps: !!float 2e5 30 | policy: 'MlpPolicy' 31 | buffer_size: 300000 32 | gamma: 0.98 33 | learning_starts: 10000 34 | policy_kwargs: "dict(net_arch=dict(pi=[256, 256], qf=[1024, 1024]))" 35 | 36 | # === Mujoco Envs === 37 | 38 | HalfCheetah-v4: &mujoco-defaults 39 | buffer_size: 1_000_000 40 | learning_rate: !!float 1e-3 41 | learning_starts: 5000 42 | n_timesteps: !!float 5e6 43 | policy: 'MlpPolicy' 44 | policy_delay: 3 45 | policy_kwargs: "dict(net_arch=dict(pi=[256, 256], qf=[2048, 2048]))" 46 | 47 | Ant-v4: 48 | <<: *mujoco-defaults 49 | 50 | Hopper-v4: 51 | <<: *mujoco-defaults 52 | 53 | Walker2d-v4: 54 | <<: *mujoco-defaults 55 | 56 | Humanoid-v4: 57 | <<: *mujoco-defaults 58 | 59 | HumanoidStandup-v4: 60 | <<: *mujoco-defaults 61 | 62 | Swimmer-v4: 63 | <<: *mujoco-defaults 64 | gamma: 0.999 65 | 66 | # Tuned for SAC, need to check with CrossQ 67 | HalfCheetahBulletEnv-v0: &pybullet-defaults 68 | n_timesteps: !!float 1e6 69 | policy: 'MlpPolicy' 70 | learning_rate: !!float 7.3e-4 71 | buffer_size: 300000 72 | batch_size: 256 73 | ent_coef: 'auto' 74 | gamma: 0.98 75 | train_freq: 8 76 | gradient_steps: 8 77 | learning_starts: 10000 78 | use_sde: True 79 | policy_kwargs: "dict(use_expln=True, log_std_init=-3)" 80 | 81 | # Tuned 82 | AntBulletEnv-v0: 83 | <<: *pybullet-defaults 84 | 85 | HopperBulletEnv-v0: 86 | <<: *pybullet-defaults 87 | learning_rate: lin_7.3e-4 88 | 89 | Walker2DBulletEnv-v0: 90 | <<: *pybullet-defaults 91 | learning_rate: lin_7.3e-4 92 | -------------------------------------------------------------------------------- /hyperparams/ddpg.yml: -------------------------------------------------------------------------------- 1 | # Tuned 2 | MountainCarContinuous-v0: 3 | n_timesteps: 300000 4 | policy: 'MlpPolicy' 5 | noise_type: 'ornstein-uhlenbeck' 6 | noise_std: 0.5 7 | gradient_steps: 1 8 | train_freq: 1 9 | learning_rate: !!float 1e-3 10 | batch_size: 256 11 | policy_kwargs: "dict(net_arch=[400, 300])" 12 | 13 | Pendulum-v1: 14 | n_timesteps: 20000 15 | policy: 'MlpPolicy' 16 | gamma: 0.98 17 | buffer_size: 200000 18 | learning_starts: 10000 19 | noise_type: 'normal' 20 | noise_std: 0.1 21 | gradient_steps: 1 22 | train_freq: 1 23 | learning_rate: !!float 1e-3 24 | policy_kwargs: "dict(net_arch=[400, 300])" 25 | 26 | LunarLanderContinuous-v3: 27 | n_timesteps: !!float 3e5 28 | policy: 'MlpPolicy' 29 | gamma: 0.98 30 | buffer_size: 200000 31 | learning_starts: 10000 32 | noise_type: 'normal' 33 | noise_std: 0.1 34 | gradient_steps: 1 35 | train_freq: 1 36 | learning_rate: !!float 1e-3 37 | policy_kwargs: "dict(net_arch=[400, 300])" 38 | 39 | BipedalWalker-v3: 40 | n_timesteps: !!float 1e6 41 | policy: 'MlpPolicy' 42 | gamma: 0.98 43 | buffer_size: 200000 44 | learning_starts: 10000 45 | noise_type: 'normal' 46 | noise_std: 0.1 47 | gradient_steps: 1 48 | train_freq: 1 49 | learning_rate: !!float 1e-3 50 | policy_kwargs: "dict(net_arch=[400, 300])" 51 | 52 | # To be tuned 53 | BipedalWalkerHardcore-v3: 54 | n_timesteps: !!float 1e7 55 | policy: 'MlpPolicy' 56 | gamma: 0.99 57 | buffer_size: 1000000 58 | learning_starts: 10000 59 | noise_type: 'normal' 60 | noise_std: 0.1 61 | batch_size: 256 62 | train_freq: 1 63 | learning_rate: lin_7e-4 64 | policy_kwargs: "dict(net_arch=[400, 300])" 65 | 66 | # Tuned 67 | HalfCheetahBulletEnv-v0: &pybullet-defaults 68 | n_timesteps: !!float 1e6 69 | policy: 'MlpPolicy' 70 | gamma: 0.98 71 | buffer_size: 200000 72 | learning_starts: 10000 73 | noise_type: 'normal' 74 | noise_std: 0.1 75 | gradient_steps: 1 76 | train_freq: 1 77 | batch_size: 256 78 | learning_rate: !!float 7e-4 79 | policy_kwargs: "dict(net_arch=[400, 300])" 80 | 81 | # Tuned 82 | AntBulletEnv-v0: 83 | <<: *pybullet-defaults 84 | 85 | # Tuned 86 | HopperBulletEnv-v0: 87 | <<: *pybullet-defaults 88 | 89 | # Tuned 90 | Walker2DBulletEnv-v0: 91 | <<: *pybullet-defaults 92 | 93 | # TO BE tested 94 | HumanoidBulletEnv-v0: 95 | n_timesteps: !!float 2e6 96 | policy: 'MlpPolicy' 97 | gamma: 0.98 98 | buffer_size: 200000 99 | learning_starts: 10000 100 | noise_type: 'normal' 101 | noise_std: 0.1 102 | gradient_steps: -1 103 | train_freq: [1, "episode"] 104 | learning_rate: !!float 1e-3 105 | policy_kwargs: "dict(net_arch=[400, 300])" 106 | 107 | # Tuned 108 | ReacherBulletEnv-v0: 109 | <<: *pybullet-defaults 110 | n_timesteps: !!float 3e5 111 | 112 | 113 | # To be tuned 114 | InvertedDoublePendulumBulletEnv-v0: 115 | <<: *pybullet-defaults 116 | n_timesteps: !!float 1e6 117 | 118 | # To be tuned 119 | InvertedPendulumSwingupBulletEnv-v0: 120 | <<: *pybullet-defaults 121 | n_timesteps: !!float 3e5 122 | 123 | # === Mujoco Envs === 124 | HalfCheetah-v4: &mujoco-defaults 125 | n_timesteps: !!float 1e6 126 | policy: 'MlpPolicy' 127 | learning_starts: 10000 128 | noise_type: 'normal' 129 | noise_std: 0.1 130 | train_freq: 1 131 | gradient_steps: 1 132 | learning_rate: !!float 1e-3 133 | batch_size: 256 134 | policy_kwargs: "dict(net_arch=[400, 300])" 135 | 136 | Ant-v4: 137 | <<: *mujoco-defaults 138 | 139 | Hopper-v4: 140 | <<: *mujoco-defaults 141 | 142 | Walker2d-v4: 143 | <<: *mujoco-defaults 144 | 145 | Humanoid-v4: 146 | <<: *mujoco-defaults 147 | n_timesteps: !!float 2e6 148 | 149 | Swimmer-v4: 150 | <<: *mujoco-defaults 151 | gamma: 0.9999 152 | -------------------------------------------------------------------------------- /hyperparams/dqn.yml: -------------------------------------------------------------------------------- 1 | atari: 2 | env_wrapper: 3 | - stable_baselines3.common.atari_wrappers.AtariWrapper 4 | frame_stack: 4 5 | policy: 'CnnPolicy' 6 | n_timesteps: !!float 1e7 7 | buffer_size: 100000 8 | learning_rate: !!float 1e-4 9 | batch_size: 32 10 | learning_starts: 100000 11 | target_update_interval: 1000 12 | train_freq: 4 13 | gradient_steps: 1 14 | exploration_fraction: 0.1 15 | exploration_final_eps: 0.01 16 | # If True, you need to deactivate handle_timeout_termination 17 | # in the replay_buffer_kwargs 18 | optimize_memory_usage: False 19 | 20 | # Almost Tuned 21 | CartPole-v1: 22 | n_timesteps: !!float 5e4 23 | policy: 'MlpPolicy' 24 | learning_rate: !!float 2.3e-3 25 | batch_size: 64 26 | buffer_size: 100000 27 | learning_starts: 1000 28 | gamma: 0.99 29 | target_update_interval: 10 30 | train_freq: 256 31 | gradient_steps: 128 32 | exploration_fraction: 0.16 33 | exploration_final_eps: 0.04 34 | policy_kwargs: "dict(net_arch=[256, 256])" 35 | 36 | # Tuned 37 | MountainCar-v0: 38 | n_timesteps: !!float 1.2e5 39 | policy: 'MlpPolicy' 40 | learning_rate: !!float 4e-3 41 | batch_size: 128 42 | buffer_size: 10000 43 | learning_starts: 1000 44 | gamma: 0.98 45 | target_update_interval: 600 46 | train_freq: 16 47 | gradient_steps: 8 48 | exploration_fraction: 0.2 49 | exploration_final_eps: 0.07 50 | policy_kwargs: "dict(net_arch=[256, 256])" 51 | 52 | # Tuned 53 | LunarLander-v2: 54 | n_timesteps: !!float 1e5 55 | policy: 'MlpPolicy' 56 | learning_rate: !!float 6.3e-4 57 | batch_size: 128 58 | buffer_size: 50000 59 | learning_starts: 0 60 | gamma: 0.99 61 | target_update_interval: 250 62 | train_freq: 4 63 | gradient_steps: -1 64 | exploration_fraction: 0.12 65 | exploration_final_eps: 0.1 66 | policy_kwargs: "dict(net_arch=[256, 256])" 67 | 68 | # Tuned 69 | Acrobot-v1: 70 | n_timesteps: !!float 1e5 71 | policy: 'MlpPolicy' 72 | learning_rate: !!float 6.3e-4 73 | batch_size: 128 74 | buffer_size: 50000 75 | learning_starts: 0 76 | gamma: 0.99 77 | target_update_interval: 250 78 | train_freq: 4 79 | gradient_steps: -1 80 | exploration_fraction: 0.12 81 | exploration_final_eps: 0.1 82 | policy_kwargs: "dict(net_arch=[256, 256])" 83 | -------------------------------------------------------------------------------- /hyperparams/her.yml: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | # NOTE: STARTING WITH SB3 >= 1.1.0, because HER is now HerReplayBuffer, 3 | # this file is no longer used. 4 | # It is only here as a reference. 5 | ############################################################# 6 | 7 | parking-v0: 8 | n_timesteps: !!float 1e5 9 | policy: 'MlpPolicy' 10 | model_class: 'tqc' 11 | n_sampled_goal: 4 12 | goal_selection_strategy: 'future' 13 | buffer_size: 1000000 14 | batch_size: 1024 15 | gamma: 0.95 16 | learning_rate: !!float 1e-3 17 | tau: 0.05 18 | policy_kwargs: "dict(n_critics=2, net_arch=[512, 512, 512])" 19 | online_sampling: True 20 | max_episode_length: 100 21 | # normalize: True 22 | 23 | # Mujoco Robotic Env 24 | 25 | FetchPush-v1: 26 | env_wrapper: 27 | - sb3_contrib.common.wrappers.TimeFeatureWrapper 28 | n_timesteps: !!float 1e6 29 | policy: 'MlpPolicy' 30 | model_class: 'tqc' 31 | n_sampled_goal: 4 32 | goal_selection_strategy: 'future' 33 | buffer_size: 1000000 34 | batch_size: 2048 35 | gamma: 0.95 36 | learning_rate: !!float 1e-3 37 | tau: 0.05 38 | policy_kwargs: "dict(n_critics=2, net_arch=[512, 512, 512])" 39 | online_sampling: True 40 | 41 | FetchSlide-v1: 42 | env_wrapper: 43 | - sb3_contrib.common.wrappers.TimeFeatureWrapper 44 | n_timesteps: !!float 2.5e6 45 | policy: 'MlpPolicy' 46 | model_class: 'tqc' 47 | n_sampled_goal: 4 48 | goal_selection_strategy: 'future' 49 | buffer_size: 1000000 50 | batch_size: 2048 51 | gamma: 0.95 52 | learning_rate: !!float 1e-3 53 | tau: 0.05 54 | # ent_coef: 0.01 55 | policy_kwargs: "dict(n_critics=2, net_arch=[512, 512, 512])" 56 | online_sampling: True 57 | 58 | 59 | FetchPickAndPlace-v1: 60 | env_wrapper: 61 | - sb3_contrib.common.wrappers.TimeFeatureWrapper 62 | # - rl_zoo3.wrappers.TruncatedOnSuccessWrapper: 63 | # reward_offset: 0 64 | # n_successes: 4 65 | # - stable_baselines3.common.monitor.Monitor 66 | n_timesteps: !!float 1e6 67 | policy: 'MlpPolicy' 68 | model_class: 'tqc' 69 | n_sampled_goal: 4 70 | goal_selection_strategy: 'future' 71 | buffer_size: 1000000 72 | batch_size: 1024 73 | gamma: 0.95 74 | learning_rate: !!float 1e-3 75 | tau: 0.05 76 | policy_kwargs: "dict(n_critics=2, net_arch=[512, 512, 512])" 77 | online_sampling: True 78 | 79 | # SAC hyperparams 80 | FetchReach-v1: 81 | n_timesteps: !!float 20000 82 | policy: 'MlpPolicy' 83 | model_class: 'sac' 84 | n_sampled_goal: 4 85 | goal_selection_strategy: 'future' 86 | buffer_size: 1000000 87 | ent_coef: 'auto' 88 | batch_size: 256 89 | gamma: 0.95 90 | learning_rate: 0.001 91 | learning_starts: 1000 92 | online_sampling: True 93 | normalize: True 94 | 95 | # === Real Robot envs 96 | NeckGoalEnvRelativeSparse-v2: 97 | model_class: 'sac' 98 | # env_wrapper: 99 | # - rl_zoo3.wrappers.HistoryWrapper: 100 | # horizon: 2 101 | # - sb3_contrib.common.wrappers.TimeFeatureWrapper 102 | n_timesteps: !!float 1e6 103 | policy: 'MlpPolicy' 104 | learning_rate: !!float 7.3e-4 105 | buffer_size: 100000 106 | batch_size: 256 107 | ent_coef: 'auto' 108 | gamma: 0.99 109 | tau: 0.02 110 | train_freq: [1, "episode"] 111 | gradient_steps: -1 112 | # 10 episodes of warm-up 113 | learning_starts: 1500 114 | use_sde_at_warmup: True 115 | use_sde: True 116 | sde_sample_freq: 64 117 | policy_kwargs: "dict(log_std_init=-2, net_arch=[256, 256])" 118 | n_sampled_goal: 4 119 | goal_selection_strategy: 'future' 120 | online_sampling: False 121 | 122 | NeckGoalEnvRelativeDense-v2: 123 | model_class: 'sac' 124 | env_wrapper: 125 | - rl_zoo3.wrappers.HistoryWrapperObsDict: 126 | horizon: 2 127 | # - sb3_contrib.common.wrappers.TimeFeatureWrapper 128 | n_timesteps: !!float 1e6 129 | policy: 'MlpPolicy' 130 | learning_rate: !!float 7.3e-4 131 | buffer_size: 200000 132 | batch_size: 256 133 | ent_coef: 'auto' 134 | gamma: 0.99 135 | tau: 0.02 136 | train_freq: [1, "episode"] 137 | gradient_steps: -1 138 | # 10 episodes of warm-up 139 | learning_starts: 1500 140 | use_sde_at_warmup: True 141 | use_sde: True 142 | sde_sample_freq: 64 143 | policy_kwargs: "dict(log_std_init=-2, net_arch=[256, 256])" 144 | n_sampled_goal: 4 145 | goal_selection_strategy: 'future' 146 | online_sampling: False 147 | -------------------------------------------------------------------------------- /hyperparams/python/ppo_config_example.py: -------------------------------------------------------------------------------- 1 | """This file just serves as an example on how to configure the zoo 2 | using python scripts instead of yaml files.""" 3 | 4 | import torch 5 | 6 | hyperparams = { 7 | "MountainCarContinuous-v0": dict( 8 | env_wrapper=[{"gymnasium.wrappers.TimeLimit": {"max_episode_steps": 100}}], 9 | normalize=True, 10 | n_envs=1, 11 | n_timesteps=20000.0, 12 | policy="MlpPolicy", 13 | batch_size=8, 14 | n_steps=8, 15 | gamma=0.9999, 16 | learning_rate=7.77e-05, 17 | ent_coef=0.00429, 18 | clip_range=0.1, 19 | n_epochs=2, 20 | gae_lambda=0.9, 21 | max_grad_norm=5, 22 | vf_coef=0.19, 23 | use_sde=True, 24 | policy_kwargs=dict( 25 | log_std_init=-3.29, 26 | ortho_init=False, 27 | activation_fn=torch.nn.ReLU, 28 | ), 29 | ) 30 | } 31 | -------------------------------------------------------------------------------- /hyperparams/qrdqn.yml: -------------------------------------------------------------------------------- 1 | atari: 2 | env_wrapper: 3 | - stable_baselines3.common.atari_wrappers.AtariWrapper 4 | frame_stack: 4 5 | policy: 'CnnPolicy' 6 | n_timesteps: !!float 1e7 7 | learning_starts: 50000 8 | exploration_fraction: 0.025 # explore 250k steps = 10M * 0.025 9 | # If True, you need to deactivate handle_timeout_termination 10 | # in the replay_buffer_kwargs 11 | optimize_memory_usage: False 12 | 13 | # Tuned 14 | CartPole-v1: 15 | n_timesteps: !!float 5e4 16 | policy: 'MlpPolicy' 17 | learning_rate: !!float 2.3e-3 18 | batch_size: 64 19 | buffer_size: 100000 20 | learning_starts: 1000 21 | gamma: 0.99 22 | target_update_interval: 10 23 | train_freq: 256 24 | gradient_steps: 128 25 | exploration_fraction: 0.16 26 | exploration_final_eps: 0.04 27 | policy_kwargs: "dict(net_arch=[256, 256], n_quantiles=10)" 28 | 29 | # Tuned 30 | MountainCar-v0: 31 | n_timesteps: !!float 1.2e5 32 | policy: 'MlpPolicy' 33 | learning_rate: !!float 4e-3 34 | batch_size: 128 35 | buffer_size: 10000 36 | learning_starts: 1000 37 | gamma: 0.98 38 | target_update_interval: 600 39 | train_freq: 16 40 | gradient_steps: 8 41 | exploration_fraction: 0.2 42 | exploration_final_eps: 0.07 43 | policy_kwargs: "dict(net_arch=[256, 256], n_quantiles=25)" 44 | 45 | # Tuned 46 | LunarLander-v2: 47 | n_timesteps: !!float 1e5 48 | policy: 'MlpPolicy' 49 | learning_rate: lin_1.5e-3 50 | batch_size: 128 51 | buffer_size: 100000 52 | learning_starts: 10000 53 | gamma: 0.995 54 | target_update_interval: 1 55 | train_freq: 256 56 | gradient_steps: -1 57 | exploration_fraction: 0.24 58 | exploration_final_eps: 0.18 59 | policy_kwargs: "dict(net_arch=[256, 256], n_quantiles=170)" 60 | 61 | # Tuned 62 | Acrobot-v1: 63 | n_timesteps: !!float 1e5 64 | policy: 'MlpPolicy' 65 | learning_rate: !!float 6.3e-4 66 | batch_size: 128 67 | buffer_size: 50000 68 | learning_starts: 0 69 | gamma: 0.99 70 | target_update_interval: 250 71 | train_freq: 4 72 | gradient_steps: -1 73 | exploration_fraction: 0.12 74 | exploration_final_eps: 0.1 75 | policy_kwargs: "dict(net_arch=[256, 256], n_quantiles=25)" 76 | -------------------------------------------------------------------------------- /hyperparams/td3.yml: -------------------------------------------------------------------------------- 1 | # Tuned 2 | MountainCarContinuous-v0: 3 | n_timesteps: 300000 4 | policy: 'MlpPolicy' 5 | noise_type: 'ornstein-uhlenbeck' 6 | noise_std: 0.5 7 | gradient_steps: 1 8 | train_freq: 1 9 | learning_rate: !!float 1e-3 10 | batch_size: 256 11 | policy_kwargs: "dict(net_arch=[400, 300])" 12 | 13 | Pendulum-v1: 14 | n_timesteps: 20000 15 | policy: 'MlpPolicy' 16 | gamma: 0.98 17 | buffer_size: 200000 18 | learning_starts: 10000 19 | noise_type: 'normal' 20 | noise_std: 0.1 21 | gradient_steps: 1 22 | train_freq: 1 23 | learning_rate: !!float 1e-3 24 | policy_kwargs: "dict(net_arch=[400, 300])" 25 | 26 | LunarLanderContinuous-v3: 27 | n_timesteps: !!float 3e5 28 | policy: 'MlpPolicy' 29 | gamma: 0.98 30 | buffer_size: 200000 31 | learning_starts: 10000 32 | noise_type: 'normal' 33 | noise_std: 0.1 34 | gradient_steps: 1 35 | train_freq: 1 36 | learning_rate: !!float 1e-3 37 | policy_kwargs: "dict(net_arch=[400, 300])" 38 | 39 | BipedalWalker-v3: 40 | n_timesteps: !!float 1e6 41 | policy: 'MlpPolicy' 42 | gamma: 0.98 43 | buffer_size: 200000 44 | learning_starts: 10000 45 | noise_type: 'normal' 46 | noise_std: 0.1 47 | gradient_steps: 1 48 | train_freq: 1 49 | learning_rate: !!float 1e-3 50 | policy_kwargs: "dict(net_arch=[400, 300])" 51 | 52 | # To be tuned 53 | BipedalWalkerHardcore-v3: 54 | n_timesteps: !!float 1e7 55 | policy: 'MlpPolicy' 56 | gamma: 0.99 57 | buffer_size: 1000000 58 | learning_starts: 10000 59 | noise_type: 'normal' 60 | noise_std: 0.1 61 | batch_size: 256 62 | train_freq: 1 63 | learning_rate: lin_7e-4 64 | policy_kwargs: "dict(net_arch=[400, 300])" 65 | 66 | # Tuned 67 | HalfCheetahBulletEnv-v0: &pybullet-defaults 68 | n_timesteps: !!float 1e6 69 | policy: 'MlpPolicy' 70 | gamma: 0.98 71 | buffer_size: 200000 72 | learning_starts: 10000 73 | noise_type: 'normal' 74 | noise_std: 0.1 75 | gradient_steps: 1 76 | train_freq: 1 77 | batch_size: 256 78 | learning_rate: !!float 7e-4 79 | policy_kwargs: "dict(net_arch=[400, 300])" 80 | 81 | AntBulletEnv-v0: 82 | <<: *pybullet-defaults 83 | 84 | HopperBulletEnv-v0: 85 | <<: *pybullet-defaults 86 | 87 | Walker2DBulletEnv-v0: 88 | <<: *pybullet-defaults 89 | 90 | 91 | # TO BE tested 92 | HumanoidBulletEnv-v0: 93 | n_timesteps: !!float 2e6 94 | policy: 'MlpPolicy' 95 | gamma: 0.98 96 | buffer_size: 200000 97 | learning_starts: 10000 98 | noise_type: 'normal' 99 | noise_std: 0.1 100 | train_freq: 1 101 | learning_rate: !!float 1e-3 102 | policy_kwargs: "dict(net_arch=[400, 300])" 103 | 104 | # Tuned 105 | ReacherBulletEnv-v0: 106 | <<: *pybullet-defaults 107 | n_timesteps: !!float 3e5 108 | 109 | # Tuned 110 | InvertedDoublePendulumBulletEnv-v0: 111 | <<: *pybullet-defaults 112 | 113 | # Tuned 114 | InvertedPendulumSwingupBulletEnv-v0: 115 | <<: *pybullet-defaults 116 | n_timesteps: !!float 3e5 117 | 118 | 119 | MinitaurBulletEnv-v0: 120 | n_timesteps: !!float 1e6 121 | policy: 'MlpPolicy' 122 | gamma: 0.99 123 | buffer_size: 1000000 124 | noise_type: 'normal' 125 | noise_std: 0.1 126 | learning_starts: 10000 127 | batch_size: 256 128 | learning_rate: !!float 1e-3 129 | train_freq: 1 130 | gradient_steps: 1 131 | policy_kwargs: "dict(net_arch=[400, 300])" 132 | 133 | # === Mujoco Envs === 134 | HalfCheetah-v4: &mujoco-defaults 135 | n_timesteps: !!float 1e6 136 | policy: 'MlpPolicy' 137 | learning_starts: 10000 138 | noise_type: 'normal' 139 | noise_std: 0.1 140 | train_freq: 1 141 | gradient_steps: 1 142 | learning_rate: !!float 1e-3 143 | batch_size: 256 144 | policy_kwargs: "dict(net_arch=[400, 300])" 145 | 146 | Ant-v4: 147 | <<: *mujoco-defaults 148 | 149 | Hopper-v4: 150 | <<: *mujoco-defaults 151 | 152 | Walker2d-v4: 153 | <<: *mujoco-defaults 154 | 155 | Humanoid-v4: 156 | <<: *mujoco-defaults 157 | n_timesteps: !!float 2e6 158 | 159 | # Tuned 160 | Swimmer-v4: 161 | <<: *mujoco-defaults 162 | gamma: 0.9999 163 | -------------------------------------------------------------------------------- /images/car.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DLR-RM/rl-baselines3-zoo/577616cb9f13341579953cb0f6111e007acc0a1d/images/car.jpg -------------------------------------------------------------------------------- /logs/benchmark/a2c-BeamRiderNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614768288.4174142, "env_id": "BeamRiderNoFrameskip-v4"} 2 | r,l,t 3 | 5310.0,19931,11.331775 4 | 5278.0,18863,19.145522 5 | 4412.0,18499,26.784143 6 | 2160.0,10025,30.907018 7 | 1380.0,7261,33.878756 8 | 1848.0,9623,37.852282 9 | 6372.0,22457,47.104666 10 | 2160.0,10896,51.553509 11 | 5470.0,17871,58.882525 12 | 1380.0,7366,61.998566 13 | 1380.0,7597,65.10874 14 | 1380.0,7625,68.216608 15 | 1188.0,6035,70.682402 16 | 2496.0,12475,75.791831 17 | 3000.0,13264,81.213101 18 | 3840.0,14565,87.340508 19 | 1588.0,9303,91.146296 20 | 5024.0,18179,98.595311 21 | 4734.0,15926,105.128943 22 | 2496.0,11991,110.020293 23 | 3000.0,12733,115.234923 24 | 2160.0,10681,119.611882 25 | 1284.0,9009,123.288294 26 | 2160.0,10005,127.381307 27 | 1536.0,7641,130.503966 28 | 3964.0,16999,137.472344 29 | 1796.0,9815,141.513283 30 | 2776.0,12343,146.616668 31 | 1188.0,7561,149.761088 32 | 1952.0,9335,153.61017 33 | 4028.0,17717,160.872984 34 | 2160.0,11651,165.693949 35 | 1692.0,9569,169.702119 36 | 2160.0,10413,173.962517 37 | 3420.0,13499,179.485842 38 | 1484.0,7885,182.708011 39 | 3360.0,14125,188.490419 40 | 2056.0,11479,193.165845 41 | 3000.0,14509,199.107143 42 | 2832.0,11739,203.909528 43 | 4832.0,17885,211.235447 44 | 4576.0,18013,218.621224 45 | 4348.0,17821,226.003222 46 | 4384.0,15790,232.468659 47 | 2160.0,11752,237.280512 48 | 1640.0,8824,241.053055 49 | 3000.0,12559,246.359317 50 | -------------------------------------------------------------------------------- /logs/benchmark/a2c-BipedalWalkerHardcore-v3/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615193378.914093, "env_id": "BipedalWalkerHardcore-v3"} 2 | r,l,t 3 | 267.45342,1121,3.428861 4 | 52.082271,706,4.11655 5 | 96.57381,881,4.968083 6 | -54.718438,2000,6.907327 7 | -40.642382,356,7.254022 8 | -74.03119,183,7.43183 9 | 92.052367,897,8.295187 10 | 131.774118,959,9.223105 11 | 42.129698,788,9.97768 12 | -13.796958,2000,11.907291 13 | 258.424171,1285,13.191344 14 | 270.596795,1127,14.412434 15 | 266.009622,1171,15.68599 16 | -37.48283,307,16.011555 17 | 5.6025,498,16.489811 18 | 71.054019,668,17.134797 19 | 226.553959,2000,19.053179 20 | -29.138281,320,19.361876 21 | 114.494889,2000,21.290722 22 | 14.069756,475,21.746166 23 | -33.844138,2000,23.699775 24 | -71.932055,140,23.835918 25 | 43.003696,663,24.471339 26 | 266.815498,1151,25.580017 27 | -50.527918,2000,27.503519 28 | -43.358147,305,27.799032 29 | 64.183689,2000,29.724874 30 | 47.123,664,30.363194 31 | 267.460295,1167,31.487743 32 | 211.121624,2000,33.415691 33 | -5.723699,2000,35.34649 34 | 45.352271,2000,37.356237 35 | 31.473474,2000,39.551819 36 | -22.38959,2000,41.54504 37 | 262.385989,1233,42.732931 38 | -56.927479,2000,44.666323 39 | -35.414777,311,44.971309 40 | 37.318868,2000,46.90783 41 | 216.945029,2000,48.835921 42 | 268.85666,1135,49.923489 43 | 258.979918,1245,51.121087 44 | -53.220669,280,51.395679 45 | -52.96222,2000,53.395831 46 | -30.573014,2000,55.571042 47 | 86.642791,823,56.416 48 | 262.444665,1200,57.577227 49 | 264.190806,1200,58.728613 50 | -16.522844,490,59.203664 51 | 29.094346,2000,61.154416 52 | -7.494107,484,61.623292 53 | 146.257517,1037,62.624309 54 | 87.786025,705,63.30837 55 | 244.124279,1425,64.674047 56 | -5.306314,392,65.052497 57 | 206.864062,2000,66.985329 58 | 68.390227,2000,68.913324 59 | -78.383474,141,69.049458 60 | 32.96207,601,69.629996 61 | 266.996667,1162,70.747888 62 | 146.95878,967,71.681916 63 | -20.563405,2000,73.619046 64 | -41.638012,2000,75.55533 65 | -35.748264,2000,77.584659 66 | 262.590096,1215,78.910465 67 | 266.482528,1177,80.171633 68 | 261.938973,1184,81.313759 69 | -17.894561,2000,83.252325 70 | 256.634081,1282,84.484862 71 | 264.271622,1192,85.628885 72 | -56.296586,2000,87.590536 73 | 79.150195,2000,89.522783 74 | 260.159147,1227,90.70997 75 | 267.130668,1162,91.83126 76 | 125.648535,2000,93.906298 77 | -70.775179,2000,96.076391 78 | 267.10438,1163,97.195431 79 | 266.611174,1175,98.323065 80 | -35.588589,2000,100.260114 81 | 166.139694,2000,102.199227 82 | 265.671085,1187,103.346298 83 | 78.281103,717,104.035931 84 | -30.56892,2000,105.959747 85 | 104.053373,2000,107.905378 86 | 110.668196,2000,109.837001 87 | 27.119705,706,110.521208 88 | 250.962943,1363,111.834788 89 | 259.842996,1249,113.04013 90 | 116.491483,2000,114.972856 91 | 110.491584,2000,116.926345 92 | 137.124841,2000,119.112893 93 | 44.207205,2000,121.151075 94 | 97.941463,1188,122.29184 95 | 13.754588,438,122.717643 96 | -35.686061,2000,124.655047 97 | 268.467145,1161,125.767297 98 | -85.521298,95,125.862401 99 | -15.321094,2000,127.812872 100 | 262.569942,1170,128.940263 101 | 37.488241,2000,130.87561 102 | -60.510632,2000,132.82389 103 | -67.947069,2000,135.042222 104 | 267.845441,1157,136.268686 105 | 265.24081,1146,137.37269 106 | 71.176076,2000,139.299905 107 | 19.659466,556,139.834481 108 | 157.192219,2000,141.771152 109 | -45.485992,297,142.060811 110 | 94.526833,953,142.978194 111 | 275.8062,1055,143.992682 112 | 147.267272,2000,145.937913 113 | -6.264808,2000,147.862182 114 | 46.27028,592,148.43445 115 | 260.913623,1234,149.621845 116 | -------------------------------------------------------------------------------- /logs/benchmark/a2c-BreakoutNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614767776.374316, "env_id": "BreakoutNoFrameskip-v4"} 2 | r,l,t 3 | 375.0,7606,6.076401 4 | 86.0,5504,8.302102 5 | 421.0,9923,12.304659 6 | 270.0,7092,15.171841 7 | 416.0,10123,19.272449 8 | 375.0,7936,22.472544 9 | 66.0,4884,24.44387 10 | 155.0,3572,25.881367 11 | 353.0,8509,29.312325 12 | 206.0,5237,31.436018 13 | 152.0,6099,33.923915 14 | 261.0,4678,35.802962 15 | 420.0,10346,39.934088 16 | 414.0,9589,43.782337 17 | 76.0,4809,45.771475 18 | 369.0,7685,48.882821 19 | 378.0,9284,52.609996 20 | 422.0,9561,56.527488 21 | 360.0,9243,60.541219 22 | 285.0,7841,63.847421 23 | 388.0,9060,67.705209 24 | 424.0,11123,72.290951 25 | 398.0,11786,77.135814 26 | 381.0,8441,80.522488 27 | 306.0,6006,82.938207 28 | 97.0,5661,85.224353 29 | 217.0,7602,88.45842 30 | 424.0,10106,92.498425 31 | 408.0,9388,96.425963 32 | 388.0,9039,100.346374 33 | 365.0,7323,103.434267 34 | 254.0,4257,105.377463 35 | 377.0,6523,108.253703 36 | 172.0,4913,110.310678 37 | 54.0,4115,111.961039 38 | 427.0,9623,115.799037 39 | 290.0,8287,119.121087 40 | 321.0,6662,121.808789 41 | 341.0,8114,125.064535 42 | 299.0,7691,128.199606 43 | 101.0,5482,130.39826 44 | 25.0,3313,131.728176 45 | 149.0,7167,134.684085 46 | 396.0,6986,137.652418 47 | 20.0,3088,138.91907 48 | 367.0,6303,141.544503 49 | 60.0,5134,143.608451 50 | 272.0,7200,146.649598 51 | 241.0,8170,150.087444 52 | 323.0,7407,153.176795 53 | 60.0,5605,155.588469 54 | 357.0,7098,158.436126 55 | 382.0,11383,162.98275 56 | 346.0,7662,166.189121 57 | 433.0,11569,170.862319 58 | 322.0,8534,174.628042 59 | 364.0,9157,178.331138 60 | 393.0,11215,182.997787 61 | 33.0,3490,184.547825 62 | 184.0,6600,187.359139 63 | 116.0,6307,189.997411 64 | 389.0,7875,193.394374 65 | 162.0,6677,196.233643 66 | 227.0,5508,198.446902 67 | 349.0,8482,201.844003 68 | 232.0,5904,204.213293 69 | 322.0,7344,207.185952 70 | 98.0,5867,209.543781 71 | 409.0,7671,212.636367 72 | 256.0,5574,214.934022 73 | 78.0,3914,216.506063 74 | 232.0,5971,219.082668 75 | 277.0,7206,222.017627 76 | 345.0,8122,225.279926 77 | 258.0,8608,228.830755 78 | 134.0,6329,231.487061 79 | 224.0,5912,234.179806 80 | 377.0,7698,237.315182 81 | 412.0,9315,241.053151 82 | 304.0,6834,243.793879 83 | 363.0,9009,247.567084 84 | 360.0,10184,251.635881 85 | -------------------------------------------------------------------------------- /logs/benchmark/a2c-EnduroNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614793054.584203, "env_id": "EnduroNoFrameskip-v4"} 2 | r,l,t 3 | 0.0,13312,8.52731 4 | 0.0,13312,14.248429 5 | 0.0,13312,19.970835 6 | 0.0,13312,25.683224 7 | 0.0,13312,31.395868 8 | 0.0,13312,37.1207 9 | 0.0,13312,42.844106 10 | 0.0,13312,48.554223 11 | 0.0,13312,54.268349 12 | 0.0,13312,59.978596 13 | 0.0,13312,65.714733 14 | 0.0,13312,71.429556 15 | 0.0,13312,77.140308 16 | 0.0,13312,82.848562 17 | 0.0,13312,88.577056 18 | 0.0,13312,94.29743 19 | 0.0,13312,100.011838 20 | 0.0,13312,105.724653 21 | 0.0,13312,111.4436 22 | 0.0,13312,117.16673 23 | 0.0,13312,122.898437 24 | 0.0,13312,128.624969 25 | 0.0,13312,134.347584 26 | 0.0,13312,140.069764 27 | 0.0,13312,145.793799 28 | 0.0,13312,151.52352 29 | 0.0,13312,157.250938 30 | 0.0,13312,162.968618 31 | 0.0,13312,168.684224 32 | 0.0,13312,174.38758 33 | 0.0,13312,180.092482 34 | 0.0,13312,185.795563 35 | 0.0,13312,191.487232 36 | 0.0,13312,197.166874 37 | 0.0,13312,202.842334 38 | 0.0,13312,208.507896 39 | 0.0,13312,214.19725 40 | 0.0,13312,219.878992 41 | 0.0,13312,225.556518 42 | 0.0,13312,231.221534 43 | 0.0,13312,236.901492 44 | 0.0,13312,242.571724 45 | 0.0,13312,248.255306 46 | 0.0,13312,253.931271 47 | 0.0,13312,259.607796 48 | -------------------------------------------------------------------------------- /logs/benchmark/a2c-PongNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614937367.290064, "env_id": "PongNoFrameskip-v4"} 2 | r,l,t 3 | 17.0,9410,6.490385 4 | 20.0,6460,9.053506 5 | 21.0,6612,11.682447 6 | 16.0,10462,15.840014 7 | 16.0,9991,19.805953 8 | 18.0,9430,23.54848 9 | 14.0,12478,28.513371 10 | 21.0,8217,31.778995 11 | 13.0,10952,36.132016 12 | 15.0,9796,40.019487 13 | 20.0,8360,43.334407 14 | 18.0,8692,46.782233 15 | 19.0,8696,50.229097 16 | 15.0,9513,54.008494 17 | 18.0,8654,57.441068 18 | 18.0,9672,61.291382 19 | 16.0,9992,65.258582 20 | 18.0,8518,68.636641 21 | 21.0,7975,71.803567 22 | 14.0,11213,76.254159 23 | 13.0,11658,80.887247 24 | 19.0,8659,84.324623 25 | 19.0,8699,87.776711 26 | 17.0,8714,91.239745 27 | 14.0,10644,95.463618 28 | 20.0,6268,97.947606 29 | 16.0,8232,101.212081 30 | 17.0,8342,104.524828 31 | 13.0,10497,108.687028 32 | 21.0,7977,111.853881 33 | 9.0,16950,118.590817 34 | 19.0,8345,121.907908 35 | 17.0,8330,125.210845 36 | 19.0,8607,128.627715 37 | 21.0,7977,131.794741 38 | 16.0,10361,135.957308 39 | 19.0,7406,138.89799 40 | 14.0,9998,142.860846 41 | 19.0,8425,146.197888 42 | 9.0,13049,151.363693 43 | 18.0,7094,154.169493 44 | 13.0,10650,158.380911 45 | 18.0,9850,162.280141 46 | 20.0,8748,165.737029 47 | 11.0,14626,171.512476 48 | 20.0,7334,174.406643 49 | 19.0,7206,177.238544 50 | 17.0,7696,180.272341 51 | 20.0,8487,183.609726 52 | 18.0,9257,187.252612 53 | 20.0,6972,189.992589 54 | 20.0,6478,192.620501 55 | 19.0,7444,195.968758 56 | 19.0,9188,199.581835 57 | 15.0,10271,203.622099 58 | 19.0,8481,206.963809 59 | 18.0,9057,210.536548 60 | 21.0,7905,213.649117 61 | 20.0,7008,216.409976 62 | 6.0,14798,222.241111 63 | 21.0,7975,225.376031 64 | 15.0,10766,229.613505 65 | 20.0,6676,232.238959 66 | 17.0,8737,235.679444 67 | 21.0,7975,238.823486 68 | -------------------------------------------------------------------------------- /logs/benchmark/a2c-SeaquestNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615281451.6329484, "env_id": "SeaquestNoFrameskip-v4"} 2 | r,l,t 3 | 1720.0,9082,7.009356 4 | 1720.0,9082,11.020819 5 | 1800.0,9081,15.104989 6 | 1720.0,9081,19.505034 7 | 1780.0,9082,23.829598 8 | 1680.0,9082,28.13943 9 | 1720.0,9081,32.502165 10 | 1720.0,9082,36.898275 11 | 1780.0,9082,41.240505 12 | 1720.0,8826,45.521239 13 | 1760.0,9082,49.81492 14 | 1780.0,9081,54.232201 15 | 1760.0,9082,58.573261 16 | 1760.0,9082,62.944622 17 | 1460.0,7674,66.655156 18 | 1740.0,9082,71.012287 19 | 1760.0,9082,75.39344 20 | 1600.0,8538,79.487796 21 | 1760.0,9082,83.874407 22 | 1760.0,9081,88.244735 23 | 1800.0,9081,92.608476 24 | 1620.0,8449,96.688586 25 | 1680.0,8985,100.902476 26 | 1760.0,9081,105.253726 27 | 1760.0,9082,109.753073 28 | 1660.0,9082,114.145087 29 | 1660.0,9082,118.488832 30 | 1760.0,9081,122.930631 31 | 1760.0,9082,127.255234 32 | 1760.0,9082,131.570187 33 | 1740.0,9082,135.983795 34 | 1540.0,8262,140.112782 35 | 1560.0,8458,144.235981 36 | 1740.0,9082,148.655545 37 | 1820.0,9082,153.08726 38 | 1780.0,9081,157.483775 39 | 1760.0,9082,161.925575 40 | 1800.0,9081,166.24076 41 | 1780.0,9082,170.670106 42 | 1780.0,9082,175.08615 43 | 1620.0,8890,179.489985 44 | 1760.0,9082,183.932905 45 | 1720.0,9082,188.377881 46 | 1740.0,9082,192.827981 47 | 1660.0,9082,197.229679 48 | 1720.0,9082,201.669629 49 | 1740.0,9082,206.061735 50 | 1760.0,9081,210.484065 51 | 1800.0,9082,214.923074 52 | 1820.0,9082,219.321101 53 | 1760.0,9082,223.750549 54 | 1680.0,8746,227.91953 55 | 1760.0,9081,232.40319 56 | 1760.0,9081,236.780692 57 | 1780.0,9082,241.235071 58 | 1560.0,8409,245.320963 59 | 1740.0,9081,249.75418 60 | 1780.0,9082,254.156047 61 | 1800.0,9082,258.546376 62 | 1820.0,9081,263.008498 63 | 1660.0,8570,267.151795 64 | 1780.0,9082,271.580838 65 | 1760.0,9082,275.890209 66 | 1760.0,9082,280.313025 67 | 1080.0,6389,283.3866 68 | 1740.0,9082,287.816725 69 | 1760.0,9082,292.247788 70 | -------------------------------------------------------------------------------- /logs/benchmark/a2c-SpaceInvadersNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614768543.9226692, "env_id": "SpaceInvadersNoFrameskip-v4"} 2 | r,l,t 3 | 495.0,3553,4.361108 4 | 550.0,3429,5.705929 5 | 550.0,3287,7.173348 6 | 730.0,3349,8.490565 7 | 545.0,3337,9.800351 8 | 525.0,3355,11.111516 9 | 545.0,2675,12.16402 10 | 570.0,2978,13.33383 11 | 570.0,3676,14.939109 12 | 545.0,3137,16.191074 13 | 800.0,3321,17.490357 14 | 515.0,3126,18.717305 15 | 580.0,3390,20.041372 16 | 575.0,3362,21.372456 17 | 345.0,2841,22.496135 18 | 515.0,3585,23.907067 19 | 575.0,2803,25.019226 20 | 520.0,3360,26.338413 21 | 545.0,3477,27.708291 22 | 925.0,5786,29.996137 23 | 835.0,5663,32.236963 24 | 525.0,3168,33.48305 25 | 1450.0,7057,36.259171 26 | 600.0,3823,37.755884 27 | 520.0,3563,39.157691 28 | 575.0,3715,40.611313 29 | 570.0,3322,41.912127 30 | 550.0,3481,43.280879 31 | 570.0,3459,44.628411 32 | 515.0,3459,46.029524 33 | 575.0,3795,47.527183 34 | 525.0,3062,48.729837 35 | 545.0,3865,50.2381 36 | 745.0,2843,51.355055 37 | 575.0,3545,52.73239 38 | 600.0,3691,54.164446 39 | 485.0,4321,55.862061 40 | 1315.0,7361,58.748237 41 | 515.0,3484,60.111922 42 | 560.0,2739,61.186349 43 | 555.0,3289,62.582255 44 | 525.0,3659,64.016016 45 | 570.0,3440,65.357781 46 | 1155.0,6121,67.756124 47 | 545.0,3315,69.061701 48 | 570.0,3365,70.485821 49 | 570.0,3858,72.024043 50 | 540.0,2799,73.125923 51 | 775.0,4102,74.728046 52 | 1130.0,7383,77.617019 53 | 545.0,4152,79.233018 54 | 545.0,2633,80.268746 55 | 575.0,3038,81.466269 56 | 570.0,3209,82.723093 57 | 545.0,4051,84.292974 58 | 600.0,3437,85.638784 59 | 390.0,3047,87.014406 60 | 975.0,6881,89.72367 61 | 515.0,3819,91.222727 62 | 725.0,3221,92.494269 63 | 745.0,3249,93.762644 64 | 540.0,3393,95.268128 65 | 515.0,3435,96.621554 66 | 600.0,4159,98.248236 67 | 1115.0,7433,101.158546 68 | 370.0,2565,102.171789 69 | 570.0,2896,103.312471 70 | 540.0,3486,104.682461 71 | 525.0,3669,106.128078 72 | 1160.0,7370,109.019551 73 | 510.0,3912,110.559911 74 | 520.0,3469,111.917484 75 | 420.0,3453,113.272565 76 | 545.0,3116,114.497209 77 | 550.0,3450,115.850051 78 | 745.0,3147,117.086844 79 | 520.0,3287,118.369515 80 | 570.0,4413,120.086121 81 | 575.0,4237,121.742209 82 | 520.0,3777,123.223256 83 | 545.0,3377,124.550506 84 | 575.0,3467,125.921681 85 | 520.0,3360,127.257644 86 | 545.0,2761,128.345333 87 | 575.0,3862,129.863054 88 | 490.0,3505,131.242345 89 | 415.0,2915,132.388172 90 | 575.0,3159,133.637486 91 | 545.0,3223,134.904184 92 | 515.0,3273,136.193307 93 | 600.0,3703,137.632767 94 | 770.0,3621,139.062721 95 | 540.0,3294,140.35698 96 | 545.0,2845,141.470494 97 | 515.0,3457,142.824692 98 | 600.0,4255,144.484802 99 | 600.0,3891,145.999692 100 | 460.0,3043,147.201257 101 | 525.0,3496,148.572112 102 | 745.0,3746,150.075762 103 | 600.0,3229,151.412536 104 | 800.0,3625,152.829677 105 | 505.0,3329,154.141243 106 | 1495.0,6859,156.837327 107 | 395.0,3243,158.115792 108 | 750.0,3166,159.360218 109 | 570.0,3679,160.795473 110 | 1010.0,4911,162.731656 111 | 570.0,3457,164.089787 112 | 605.0,2957,165.261341 113 | 1050.0,4771,167.193539 114 | 575.0,3417,168.530889 115 | 775.0,3331,169.843351 116 | 545.0,3855,171.345333 117 | 550.0,3275,172.629834 118 | 500.0,3263,173.936338 119 | 750.0,4023,175.632566 120 | 520.0,3051,176.833315 121 | 740.0,3550,178.231961 122 | 525.0,3157,179.469186 123 | 980.0,5809,181.75283 124 | 545.0,3373,183.07182 125 | 680.0,3205,184.332636 126 | 600.0,3309,185.631534 127 | 490.0,3827,187.135568 128 | 550.0,3344,188.445078 129 | 725.0,4961,190.382123 130 | 950.0,3156,191.617991 131 | 570.0,3672,193.044935 132 | 520.0,3355,194.369309 133 | 540.0,3834,195.876062 134 | 545.0,3689,197.317266 135 | 545.0,2803,198.413652 136 | 515.0,3243,199.68673 137 | 575.0,3441,201.035745 138 | 555.0,3152,202.270614 139 | 715.0,3957,203.821104 140 | 520.0,3490,205.186715 141 | 550.0,3545,206.592009 142 | 580.0,3537,207.970124 143 | 1255.0,7445,210.877233 144 | 605.0,3449,212.216876 145 | 520.0,3051,213.408286 146 | 1625.0,9443,217.101115 147 | 570.0,3159,218.331664 148 | 605.0,3249,219.598299 149 | 545.0,3231,220.860407 150 | 575.0,2931,221.999535 151 | 510.0,2737,223.074782 152 | 600.0,3699,224.509123 153 | 575.0,4609,226.294702 154 | 600.0,4095,227.885526 155 | 600.0,3505,229.250235 156 | 730.0,3457,230.704194 157 | 745.0,3130,231.928799 158 | 575.0,3216,233.187673 159 | 600.0,3293,234.468616 160 | 545.0,3218,235.712941 161 | 500.0,3685,237.147581 162 | 545.0,3973,238.775266 163 | 600.0,3801,240.252894 164 | 550.0,3391,241.570879 165 | -------------------------------------------------------------------------------- /logs/benchmark/dqn-AsteroidsNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615283610.9370732, "env_id": "AsteroidsNoFrameskip-v4"} 2 | r,l,t 3 | 630.0,2948,3.806405 4 | 880.0,4636,5.560121 5 | 1080.0,3886,7.029938 6 | 630.0,5936,9.272279 7 | 780.0,4538,10.979955 8 | 1080.0,5056,12.878166 9 | 730.0,2726,13.92655 10 | 530.0,1782,14.599309 11 | 830.0,3484,15.916041 12 | 780.0,4220,17.516311 13 | 1320.0,21476,25.599429 14 | 680.0,2820,26.653483 15 | 580.0,5602,28.763651 16 | 630.0,3204,29.963536 17 | 830.0,3230,31.171206 18 | 580.0,4414,32.818263 19 | 1350.0,13594,37.866124 20 | 530.0,2182,38.679443 21 | 980.0,6624,41.277205 22 | 380.0,2260,42.171792 23 | 530.0,2246,43.034077 24 | 680.0,2662,44.036025 25 | 830.0,3606,45.39094 26 | 980.0,3120,46.550376 27 | 730.0,3518,47.880018 28 | 380.0,2260,48.734687 29 | 660.0,3296,49.979011 30 | 830.0,4678,51.755557 31 | 580.0,2652,52.757058 32 | 630.0,2612,53.747996 33 | 380.0,1268,54.225423 34 | 780.0,3730,55.635921 35 | 1180.0,8152,58.709851 36 | 430.0,1970,59.45361 37 | 930.0,9838,63.172857 38 | 930.0,14486,68.64684 39 | 680.0,4178,70.234863 40 | 1180.0,22736,78.745257 41 | 630.0,1780,79.413591 42 | 730.0,3642,80.78565 43 | 380.0,1938,81.516054 44 | 230.0,1494,82.075747 45 | 930.0,4740,83.871758 46 | 530.0,4052,85.406782 47 | 930.0,2948,86.517116 48 | 930.0,3048,87.663416 49 | 1340.0,16218,93.72735 50 | 360.0,1496,94.288277 51 | 630.0,2744,95.325491 52 | 630.0,1780,95.993038 53 | 780.0,3612,97.35601 54 | 880.0,3692,98.752235 55 | 730.0,2452,99.674829 56 | 780.0,2676,100.681265 57 | 780.0,2324,101.55319 58 | 780.0,2678,102.566934 59 | 630.0,2612,103.553079 60 | 1080.0,4050,105.075956 61 | 1460.0,10328,108.963586 62 | 630.0,2438,109.88712 63 | 1180.0,16976,116.303472 64 | 1080.0,4614,118.073532 65 | 1080.0,4690,119.866369 66 | 780.0,3664,121.273588 67 | 830.0,4382,122.946313 68 | 830.0,4578,124.703727 69 | 730.0,2838,125.787912 70 | 430.0,2502,126.745888 71 | 610.0,3276,127.999948 72 | 780.0,2678,129.026407 73 | 730.0,4440,130.724924 74 | 1080.0,8960,134.155202 75 | 1080.0,5556,136.260477 76 | 630.0,3438,137.511145 77 | 1080.0,6520,139.76433 78 | 630.0,1780,140.379913 79 | 410.0,1252,140.832459 80 | 780.0,2678,141.805494 81 | 380.0,2260,142.623271 82 | 630.0,1780,143.278926 83 | 1030.0,4518,145.009336 84 | 980.0,4404,146.690564 85 | 630.0,1780,147.365815 86 | 830.0,4266,148.99672 87 | 430.0,3342,150.270021 88 | 680.0,3476,151.585912 89 | 1080.0,10406,155.475321 90 | 730.0,4966,157.342425 91 | 930.0,4308,158.954403 92 | 880.0,4486,160.639949 93 | 1400.0,11686,165.001423 94 | 630.0,2992,166.126216 95 | 580.0,2652,167.116391 96 | 980.0,3866,168.562401 97 | 930.0,5498,170.620965 98 | 630.0,1780,171.284593 99 | 980.0,5008,173.16315 100 | 830.0,3448,174.455275 101 | 830.0,4424,176.110061 102 | 1030.0,4918,177.942581 103 | 230.0,1494,178.497264 104 | 830.0,4946,180.345697 105 | 930.0,6104,182.637209 106 | 780.0,4566,184.351494 107 | 1080.0,7800,187.232992 108 | 1320.0,8124,190.256808 109 | 430.0,1760,190.907736 110 | 880.0,6200,193.223942 111 | 880.0,2678,194.225293 112 | 680.0,3120,195.386348 113 | 780.0,2676,196.381358 114 | 530.0,3896,197.835135 115 | 1080.0,5948,200.057237 116 | 680.0,3930,201.528874 117 | 1180.0,4240,203.113463 118 | 630.0,3372,204.369068 119 | 430.0,2502,205.304568 120 | 830.0,4266,206.894893 121 | 630.0,3306,208.131841 122 | 730.0,4428,209.786296 123 | 830.0,5548,211.859851 124 | 830.0,4014,213.35282 125 | 430.0,2502,214.288179 126 | 680.0,4510,215.979968 127 | 930.0,4202,217.558226 128 | 780.0,5600,219.657292 129 | 1410.0,6062,221.910441 130 | 780.0,4486,223.582816 131 | 430.0,2502,224.521364 132 | 330.0,2164,225.330174 133 | 380.0,2010,226.078029 134 | 530.0,2628,227.055925 135 | 1280.0,7440,229.784199 136 | 1030.0,3484,231.088481 137 | -------------------------------------------------------------------------------- /logs/benchmark/dqn-BeamRiderNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614793581.2582045, "env_id": "BeamRiderNoFrameskip-v4"} 2 | r,l,t 3 | 6576.0,20983,10.683172 4 | 2720.0,13592,15.828768 5 | 7740.0,23813,24.863982 6 | 4320.0,18685,31.91907 7 | 2108.0,8766,35.233023 8 | 1380.0,7675,38.103883 9 | 5696.0,19085,45.328726 10 | 5768.0,17864,52.062259 11 | 4320.0,13583,57.184806 12 | 4606.0,18828,64.292317 13 | 6478.0,19078,71.502656 14 | 2160.0,10273,75.362682 15 | 4260.0,16581,81.626119 16 | 6742.0,22736,90.241258 17 | 1952.0,9649,93.879019 18 | 3480.0,16215,99.980142 19 | 2160.0,12859,104.814137 20 | 3360.0,15497,110.641296 21 | 5376.0,19580,118.032608 22 | 6440.0,20723,125.866222 23 | 4734.0,17357,132.415582 24 | 4350.0,17837,139.141831 25 | 1432.0,8217,142.219953 26 | 4804.0,17582,148.852138 27 | 3420.0,14283,154.229172 28 | 4448.0,16951,160.630883 29 | 7140.0,21519,168.752505 30 | 6810.0,21377,176.88184 31 | 5182.0,20176,184.515731 32 | 3780.0,15807,190.476166 33 | 4796.0,20191,198.123916 34 | 1380.0,7373,200.899911 35 | 6292.0,22057,209.207382 36 | 1744.0,9035,212.610699 37 | 2160.0,9577,216.214398 38 | 4680.0,16599,222.475892 39 | 4156.0,18829,229.597445 40 | -------------------------------------------------------------------------------- /logs/benchmark/dqn-BreakoutNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614778785.8173542, "env_id": "BreakoutNoFrameskip-v4"} 2 | r,l,t 3 | 390.0,14936,8.298947 4 | 310.0,8119,11.281157 5 | 137.0,5823,13.430059 6 | 308.0,8376,16.514826 7 | 405.0,9718,20.091962 8 | 375.0,10544,23.962725 9 | 396.0,11052,28.012314 10 | 399.0,10183,31.745516 11 | 77.0,4572,33.434542 12 | 332.0,10465,37.285213 13 | 377.0,11866,41.624256 14 | 261.0,6623,44.058955 15 | 402.0,6847,46.571992 16 | 387.0,9037,49.892307 17 | 303.0,9094,53.333262 18 | 319.0,22476,61.599988 19 | 372.0,8259,64.668601 20 | 396.0,26450,74.652882 21 | 318.0,6843,77.411975 22 | 372.0,8167,80.415497 23 | 400.0,21998,88.477855 24 | 351.0,20941,96.400516 25 | 397.0,14151,101.604712 26 | 403.0,8723,104.806063 27 | 370.0,9713,108.366861 28 | 386.0,7965,111.298332 29 | 381.0,10702,115.23716 30 | 408.0,10803,119.190315 31 | 387.0,8583,122.349447 32 | 419.0,8411,125.439177 33 | 374.0,8879,128.696841 34 | 403.0,11577,133.006629 35 | 405.0,14521,138.325462 36 | 354.0,8603,141.489714 37 | 355.0,8642,144.662911 38 | 390.0,15271,150.377894 39 | 393.0,9215,153.764675 40 | 401.0,8586,157.064263 41 | 369.0,9011,160.368164 42 | 405.0,8090,163.333655 43 | 306.0,13302,168.210222 44 | 399.0,23832,177.095047 45 | 320.0,8164,180.124154 46 | 404.0,10433,184.107888 47 | 379.0,9634,187.624143 48 | 399.0,8575,190.755317 49 | 336.0,13848,195.785423 50 | 383.0,7562,198.536605 51 | 368.0,8137,201.509571 52 | 387.0,8579,204.624538 53 | 292.0,7483,207.352157 54 | 407.0,9572,210.8472 55 | 342.0,25215,220.13302 56 | 304.0,6047,222.341196 57 | 295.0,7243,224.988355 58 | -------------------------------------------------------------------------------- /logs/benchmark/dqn-EnduroNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614937878.9643338, "env_id": "EnduroNoFrameskip-v4"} 2 | r,l,t 3 | 773.0,39936,18.466804 4 | 1076.0,53248,39.507428 5 | 1022.0,53248,60.545804 6 | 1049.0,53248,81.489567 7 | 792.0,39936,97.197674 8 | 1012.0,53248,118.132145 9 | 771.0,39936,133.855354 10 | 785.0,39936,149.558964 11 | 484.0,26624,160.020037 12 | 788.0,39936,175.723603 13 | 792.0,39936,191.440947 14 | 784.0,39936,207.150612 15 | 433.0,26624,217.608288 16 | 1072.0,53248,238.547646 17 | -------------------------------------------------------------------------------- /logs/benchmark/dqn-MsPacmanNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1659728618.0716512, "env_id": "MsPacmanNoFrameskip-v4"} 2 | r,l,t 3 | 3240.0,4610,4.565343 4 | 2380.0,3650,6.179767 5 | 3240.0,4722,8.242586 6 | 2950.0,4218,9.901634 7 | 2440.0,4450,11.644655 8 | 1600.0,3226,12.901034 9 | 3140.0,3970,14.512564 10 | 2570.0,4658,16.381635 11 | 2720.0,4242,18.09169 12 | 2540.0,4274,19.819639 13 | 2450.0,4098,21.46781 14 | 2450.0,4330,23.219339 15 | 2980.0,4578,25.034832 16 | 2750.0,4714,26.924132 17 | 3050.0,4386,28.647907 18 | 2840.0,4490,30.418036 19 | 2300.0,4402,32.153116 20 | 2290.0,3586,33.554561 21 | 1750.0,3802,35.044998 22 | 2400.0,4074,36.637052 23 | 3240.0,4546,38.421748 24 | 1760.0,4346,40.121146 25 | 2650.0,4402,41.842615 26 | 2500.0,3530,43.217673 27 | 2450.0,4570,45.000941 28 | 3140.0,4618,46.799562 29 | 2880.0,3730,48.256968 30 | 2240.0,3842,49.760671 31 | 3050.0,5538,51.925182 32 | 2730.0,4466,53.678259 33 | 3630.0,4698,55.515136 34 | 3150.0,4578,57.304975 35 | 3290.0,4794,59.179325 36 | 2210.0,3146,60.410003 37 | 2850.0,4450,62.155477 38 | 2480.0,4154,63.773322 39 | 3240.0,4458,65.523309 40 | 2850.0,4450,67.262537 41 | 2640.0,4066,68.852192 42 | 2440.0,4226,70.500446 43 | 3240.0,4626,72.309346 44 | 2830.0,4498,74.066068 45 | 2040.0,3810,75.559353 46 | 2850.0,4938,77.493622 47 | 2180.0,3202,78.742667 48 | 2960.0,4474,80.49608 49 | 2640.0,4714,82.340948 50 | 2240.0,4426,84.070252 51 | 2020.0,4474,85.819671 52 | 2550.0,4234,87.476478 53 | 2300.0,3754,88.943698 54 | 2740.0,3970,90.497502 55 | 1760.0,3698,91.945864 56 | 3110.0,4266,93.617598 57 | 2060.0,3546,94.994061 58 | 2440.0,4258,96.665241 59 | 2850.0,4530,98.440153 60 | 2850.0,4450,100.183625 61 | 2430.0,5162,102.198829 62 | 3030.0,4178,103.835441 63 | 2580.0,3618,105.243359 64 | 2850.0,4450,106.991979 65 | 2720.0,4242,108.651297 66 | 3030.0,4562,110.433888 67 | 2450.0,4354,112.137468 68 | 2700.0,4162,113.759301 69 | 3640.0,4898,115.678251 70 | 2850.0,3986,117.23745 71 | 3030.0,3946,118.78445 72 | 2890.0,4994,120.743981 73 | 2580.0,4786,122.618189 74 | 2640.0,5162,124.632866 75 | 2290.0,4098,126.237313 76 | 2280.0,4842,128.13284 77 | 2640.0,4650,129.952747 78 | 2440.0,4258,131.618814 79 | 2430.0,4226,133.269789 80 | 2640.0,4642,135.086737 81 | 2230.0,3522,136.464286 82 | 2200.0,3146,137.690324 83 | 2530.0,4778,139.554016 84 | 2960.0,4066,141.136904 85 | 2850.0,4450,142.878844 86 | 2840.0,4386,144.598397 87 | 3240.0,4626,146.411167 88 | 2180.0,3202,147.667167 89 | 2440.0,4338,149.359808 90 | 2230.0,3330,150.663123 91 | 2450.0,4810,152.543828 92 | 2840.0,4218,154.185746 93 | 2280.0,4458,155.923587 94 | 3640.0,4418,157.645611 95 | 2350.0,6634,160.237373 96 | 2450.0,4450,161.976687 97 | 2050.0,4258,163.636643 98 | 2740.0,4322,165.323966 99 | 2570.0,4162,166.970684 100 | 2740.0,4378,168.688692 101 | 2850.0,4130,170.294853 102 | 2320.0,3474,171.649862 103 | 3040.0,4186,173.286216 104 | 2730.0,4274,174.960623 105 | 2850.0,4322,176.654881 106 | 3040.0,4394,178.378233 107 | 4230.0,4306,180.056643 108 | 1840.0,4114,181.660494 109 | 3240.0,4842,183.554174 110 | 2570.0,3658,184.979727 111 | 2220.0,4146,186.601411 112 | 1830.0,3378,187.914714 113 | 1670.0,3226,189.174523 114 | 2300.0,3850,190.687076 115 | 2710.0,3842,192.195436 116 | 2850.0,4450,193.952946 117 | 3080.0,5314,196.122141 118 | 2880.0,4922,198.095347 119 | 2240.0,4066,199.693413 120 | 2450.0,4578,201.546361 121 | 2440.0,4066,203.163933 122 | 5040.0,4266,204.833798 123 | 1830.0,4090,206.447688 124 | 2230.0,3650,207.992231 125 | 3240.0,5170,210.199356 126 | 4040.0,4970,212.319058 127 | 3230.0,5090,214.496098 128 | 2740.0,4338,216.337857 129 | 2740.0,4386,218.197287 130 | 3640.0,4378,220.113769 131 | 2270.0,3418,221.690941 132 | 3050.0,5098,223.746212 133 | 2450.0,3482,225.097706 134 | 3240.0,4578,226.889236 135 | 2450.0,3906,228.403416 136 | 3030.0,3970,230.008759 137 | 2310.0,3906,231.596919 138 | 2640.0,4850,233.561483 139 | 2730.0,4274,235.296346 140 | 2640.0,4290,237.03278 141 | 3040.0,4402,238.817874 142 | 2540.0,3778,240.350946 143 | -------------------------------------------------------------------------------- /logs/benchmark/dqn-PongNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614779237.7516618, "env_id": "PongNoFrameskip-v4"} 2 | r,l,t 3 | 20.0,6948,5.213674 4 | 21.0,6683,7.638885 5 | 20.0,6939,10.154375 6 | 20.0,7144,12.737012 7 | 21.0,6533,15.094103 8 | 21.0,6527,17.458776 9 | 21.0,6692,19.88431 10 | 20.0,7397,22.561709 11 | 20.0,7379,25.2273 12 | 21.0,6755,27.670537 13 | 21.0,6535,30.035142 14 | 19.0,7189,32.634809 15 | 21.0,6533,34.991524 16 | 21.0,6527,37.351326 17 | 21.0,6527,39.702946 18 | 21.0,6687,42.109237 19 | 21.0,6533,44.463506 20 | 21.0,6620,46.849023 21 | 21.0,6533,49.212942 22 | 21.0,6995,51.728969 23 | 21.0,6860,54.200454 24 | 21.0,6533,56.553215 25 | 20.0,7258,59.159745 26 | 20.0,6752,61.596981 27 | 19.0,7025,64.160838 28 | 21.0,6687,66.576899 29 | 21.0,6927,69.062689 30 | 21.0,6692,71.471189 31 | 19.0,7170,74.047396 32 | 20.0,7392,76.718277 33 | 21.0,6687,79.130105 34 | 20.0,6853,81.594396 35 | 20.0,6992,84.121735 36 | 21.0,6533,86.475407 37 | 19.0,7365,89.140565 38 | 21.0,6527,91.490788 39 | 21.0,6692,93.91832 40 | 20.0,7269,96.531502 41 | 21.0,6527,98.888406 42 | 20.0,7273,101.498355 43 | 20.0,6681,103.895792 44 | 20.0,7136,106.459496 45 | 21.0,6620,108.843947 46 | 21.0,6533,111.20154 47 | 21.0,6692,113.604377 48 | 21.0,6692,116.014685 49 | 21.0,7000,118.542409 50 | 21.0,6687,120.945755 51 | 21.0,7258,123.563952 52 | 19.0,7286,126.18596 53 | 21.0,6763,128.66123 54 | 21.0,6690,131.142843 55 | 21.0,6687,133.553499 56 | 21.0,6687,135.956716 57 | 21.0,6671,138.370338 58 | 21.0,6527,140.729741 59 | 21.0,6527,143.078396 60 | 21.0,6527,145.443633 61 | 21.0,6683,147.850354 62 | 21.0,6692,150.267021 63 | 20.0,6922,152.779045 64 | 21.0,6527,155.125277 65 | 20.0,7446,157.806427 66 | 20.0,6737,160.240098 67 | 21.0,6533,162.594134 68 | 21.0,6906,165.074382 69 | 21.0,6533,167.429593 70 | 20.0,6745,169.864267 71 | 21.0,6999,172.378367 72 | 21.0,6508,174.721212 73 | 21.0,6915,177.212817 74 | 20.0,7224,179.820872 75 | 20.0,7279,182.435939 76 | 21.0,6692,184.85815 77 | 20.0,6942,187.353981 78 | 21.0,6527,189.70991 79 | 21.0,6533,192.060815 80 | 21.0,6527,194.421159 81 | 21.0,6755,196.848697 82 | 21.0,6595,199.219058 83 | 20.0,7042,201.748603 84 | 21.0,6692,204.171969 85 | 21.0,6527,206.523586 86 | 20.0,6853,208.986539 87 | 19.0,7552,211.70457 88 | 21.0,6692,214.113817 89 | 21.0,6533,216.462623 90 | 21.0,6533,218.815706 91 | -------------------------------------------------------------------------------- /logs/benchmark/dqn-QbertNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614779461.8854597, "env_id": "QbertNoFrameskip-v4"} 2 | r,l,t 3 | 500.0,2147,3.450365 4 | 675.0,2280,4.281284 5 | 850.0,3327,5.504148 6 | 11350.0,6037,7.714523 7 | 3900.0,2954,8.786828 8 | 15300.0,6627,11.213221 9 | 4225.0,4297,12.786068 10 | 14600.0,6943,15.321182 11 | 425.0,1954,16.032893 12 | 775.0,2362,16.890738 13 | 14800.0,6422,19.253953 14 | 4250.0,3370,20.482237 15 | 11525.0,5167,22.36452 16 | 11425.0,4544,24.030699 17 | 11350.0,4444,25.65287 18 | 8575.0,5000,27.47626 19 | 14725.0,6593,29.891513 20 | 4100.0,2769,30.893262 21 | 14800.0,6422,33.239472 22 | 650.0,2632,34.196923 23 | 11700.0,7037,36.776742 24 | 11625.0,6226,39.061646 25 | 11525.0,5167,40.944191 26 | 3925.0,2408,41.811353 27 | 11675.0,7397,44.505452 28 | 11950.0,4852,46.267992 29 | 700.0,2432,47.143335 30 | 11625.0,6157,49.401633 31 | 11425.0,4604,51.083146 32 | 11425.0,4604,52.764815 33 | 875.0,2887,53.816217 34 | 14600.0,5348,55.76363 35 | 4000.0,2792,56.77684 36 | 14650.0,6588,59.178909 37 | 11425.0,4604,60.859552 38 | 14650.0,5609,63.0423 39 | 11625.0,6157,65.393578 40 | 725.0,2217,66.189359 41 | 14725.0,6612,68.602587 42 | 14800.0,6687,71.035443 43 | 7775.0,4892,72.811937 44 | 14625.0,5478,74.801383 45 | 14600.0,5088,76.658427 46 | 11100.0,4034,78.122569 47 | 50.0,1199,78.547241 48 | 15150.0,6012,80.753348 49 | 11575.0,6872,83.254765 50 | 4425.0,3952,84.685961 51 | 4250.0,2785,85.690472 52 | 7800.0,3592,86.996258 53 | 700.0,2424,87.872761 54 | 7875.0,5553,89.891262 55 | 650.0,2722,90.881686 56 | 475.0,1785,91.521566 57 | 14600.0,5678,93.58827 58 | 14650.0,5558,95.610656 59 | 11425.0,4604,97.288862 60 | 11425.0,4604,98.97228 61 | 650.0,2592,99.915128 62 | 11400.0,5527,101.934255 63 | 4925.0,4992,103.74385 64 | 14750.0,7337,106.41443 65 | 750.0,2639,107.370049 66 | 11875.0,5802,109.488307 67 | 14800.0,6422,112.072564 68 | 11950.0,5387,114.030024 69 | 15050.0,7730,116.853713 70 | 14600.0,5798,118.965109 71 | 875.0,3017,120.060684 72 | 11425.0,4604,121.752643 73 | 4325.0,3712,123.10552 74 | 4025.0,3307,124.304083 75 | 14600.0,6413,126.636068 76 | 14800.0,6157,128.87671 77 | 750.0,2373,129.735242 78 | 14650.0,6723,132.529886 79 | 11975.0,6208,134.864778 80 | 11425.0,4664,136.570588 81 | 775.0,2362,137.425285 82 | 4000.0,2717,138.410109 83 | 11750.0,6157,140.658257 84 | 11625.0,6157,143.046972 85 | 9225.0,3942,144.577907 86 | 14600.0,5403,146.544769 87 | 14875.0,6683,148.984606 88 | 15075.0,6292,151.476602 89 | 11250.0,3950,152.95514 90 | 14700.0,7993,155.873396 91 | 14725.0,6352,158.215972 92 | 14700.0,7312,160.962415 93 | 14700.0,6803,163.442979 94 | 3675.0,2443,164.332046 95 | 14600.0,6743,166.793701 96 | 3700.0,3134,167.926943 97 | 4150.0,4097,169.420132 98 | 11550.0,5930,171.578741 99 | 11425.0,4604,173.259727 100 | 8075.0,3280,174.520545 101 | 14800.0,6422,176.900329 102 | 8025.0,3705,178.240064 103 | 11575.0,7623,181.017266 104 | 750.0,2803,182.034169 105 | 11425.0,4604,183.710549 106 | 11625.0,6157,185.954851 107 | 14800.0,6432,188.295226 108 | 14600.0,6813,190.778485 109 | 14850.0,6704,193.219888 110 | 650.0,2722,194.206065 111 | 15100.0,6267,196.488646 112 | 15025.0,5802,198.600253 113 | 750.0,2803,199.618398 114 | 775.0,2397,200.487827 115 | 14600.0,5838,202.614424 116 | 11100.0,4049,204.087964 117 | 3625.0,2323,204.925828 118 | 14800.0,6432,207.265786 119 | 14725.0,6812,209.751004 120 | 14600.0,6068,212.2734 121 | 12250.0,7827,215.238562 122 | 14800.0,6422,217.575494 123 | 825.0,3072,218.685553 124 | 11525.0,5064,220.524356 125 | 14650.0,5578,222.557985 126 | 14975.0,7839,225.419132 127 | -------------------------------------------------------------------------------- /logs/benchmark/dqn-RoadRunnerNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615283352.8105607, "env_id": "RoadRunnerNoFrameskip-v4"} 2 | r,l,t 3 | 45600.0,4331,4.578996 4 | 35200.0,4359,6.388526 5 | 49100.0,4577,8.294341 6 | 46300.0,4812,10.291539 7 | 30400.0,2996,11.530835 8 | 40600.0,4622,13.434747 9 | 37400.0,3984,15.079498 10 | 46600.0,4608,16.980999 11 | 41300.0,4311,18.765128 12 | 41500.0,4272,20.535463 13 | 38300.0,4694,22.473144 14 | 26400.0,4265,24.230634 15 | 28700.0,4342,26.017114 16 | 28800.0,4423,27.848736 17 | 41000.0,4308,29.627312 18 | 47100.0,4669,31.560422 19 | 37300.0,4405,33.359143 20 | 36700.0,4353,35.152945 21 | 34900.0,4492,37.008729 22 | 41400.0,4212,38.757955 23 | 32400.0,4408,40.58818 24 | 28500.0,4744,42.547467 25 | 35700.0,4446,44.38607 26 | 52500.0,4346,46.184409 27 | 33100.0,4171,47.908489 28 | 38000.0,4278,49.682371 29 | 44100.0,4193,51.412412 30 | 30800.0,4582,53.31371 31 | 43600.0,4589,55.214548 32 | 36600.0,4183,56.942748 33 | 44600.0,4574,58.847455 34 | 53600.0,4590,60.767837 35 | 44600.0,4267,62.550773 36 | 30900.0,4176,64.288576 37 | 35600.0,4288,66.056793 38 | 56000.0,4673,67.991279 39 | 49000.0,4516,69.869656 40 | 46800.0,4460,71.706389 41 | 36600.0,4630,73.619184 42 | 50000.0,4389,75.435945 43 | 42600.0,4499,77.273676 44 | 38500.0,4665,79.194321 45 | 34300.0,4593,81.097265 46 | 45300.0,4486,82.948642 47 | 50100.0,4341,84.745933 48 | 35700.0,4318,86.536117 49 | 39600.0,4860,88.540414 50 | 33300.0,4382,90.352031 51 | 43400.0,4286,92.123165 52 | 48500.0,4734,94.088329 53 | 22900.0,3440,95.512706 54 | 37700.0,4222,97.262531 55 | 39200.0,4558,99.151146 56 | 51700.0,4582,101.045657 57 | 30400.0,4121,102.746956 58 | 41500.0,4542,104.62781 59 | 36400.0,4351,106.431224 60 | 35100.0,4365,108.237686 61 | 42900.0,4423,110.065588 62 | 52600.0,4707,112.015754 63 | 52000.0,4405,113.839923 64 | 32100.0,4448,115.679077 65 | 34000.0,4537,117.548685 66 | 43800.0,4494,119.412248 67 | 39900.0,4682,121.348915 68 | 33300.0,4531,123.198588 69 | 35700.0,4454,125.039904 70 | 43600.0,4270,126.80856 71 | 37400.0,4224,128.562729 72 | 45000.0,4304,130.349417 73 | 48000.0,4372,132.164388 74 | 40500.0,4486,134.016303 75 | 46300.0,4519,135.889534 76 | 59500.0,4447,137.728427 77 | 46100.0,4420,139.553422 78 | 38500.0,4342,141.348007 79 | 40200.0,4308,143.126172 80 | 31600.0,4513,144.993962 81 | 40900.0,4414,146.819435 82 | 38300.0,4590,148.714608 83 | 38400.0,4326,150.503732 84 | 48000.0,4393,152.340948 85 | 46600.0,4398,154.171558 86 | 34900.0,4412,156.00668 87 | 52500.0,4694,157.958101 88 | 43100.0,4529,159.842753 89 | 39800.0,4345,161.651971 90 | 42600.0,4415,163.485021 91 | 47100.0,4774,165.471995 92 | 41500.0,4421,167.306136 93 | 41400.0,4230,169.038306 94 | 32700.0,4277,170.820947 95 | 50000.0,4406,172.647852 96 | 25600.0,4351,174.455871 97 | 37400.0,4655,176.389951 98 | 42100.0,4412,178.224521 99 | 44500.0,4539,180.112359 100 | 42700.0,4422,181.948988 101 | 37700.0,4614,183.867699 102 | 43900.0,4190,185.605365 103 | 37700.0,4233,187.360487 104 | 45000.0,3984,189.032385 105 | 47700.0,4382,190.859018 106 | 45100.0,4139,192.587108 107 | 41800.0,4318,194.378205 108 | 45000.0,4390,196.197077 109 | 46800.0,4318,197.987917 110 | 44100.0,4251,199.751307 111 | 40300.0,4446,201.604496 112 | 33900.0,4309,203.388354 113 | 49600.0,4560,205.283296 114 | 32000.0,4601,207.192033 115 | 31000.0,4477,209.046702 116 | 44300.0,4318,210.844528 117 | 39700.0,4454,212.696337 118 | 45700.0,4409,214.494696 119 | 26800.0,4208,216.242845 120 | 26600.0,4133,217.952397 121 | 43600.0,4781,219.946394 122 | 34900.0,4468,221.796999 123 | 47100.0,4464,223.652724 124 | 48100.0,4216,225.403106 125 | 31900.0,4136,227.11711 126 | 36100.0,4235,228.873448 127 | 30300.0,3572,230.364285 128 | 27000.0,4702,232.3186 129 | 50000.0,4601,234.225036 130 | 50100.0,4604,236.139172 131 | 33900.0,4382,237.965039 132 | 33600.0,4544,239.851667 133 | 40600.0,4619,241.768723 134 | 35000.0,4284,243.543154 135 | 41800.0,4520,245.416899 136 | 48600.0,4593,247.330962 137 | 33600.0,4376,249.147818 138 | 41000.0,4128,250.868949 139 | 45500.0,4526,252.75774 140 | -------------------------------------------------------------------------------- /logs/benchmark/dqn-SeaquestNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615283104.8211045, "env_id": "SeaquestNoFrameskip-v4"} 2 | r,l,t 3 | 760.0,4674,4.580214 4 | 1080.0,6074,6.95292 5 | 1900.0,9529,10.720694 6 | 1840.0,9065,14.293803 7 | 1840.0,9082,17.876858 8 | 1840.0,9114,21.572851 9 | 1660.0,7806,24.662641 10 | 2320.0,8890,28.260154 11 | 2000.0,7674,31.37266 12 | 1320.0,6921,34.148088 13 | 2600.0,9818,38.122417 14 | 1940.0,9530,41.912591 15 | 2880.0,10146,45.971469 16 | 2600.0,9529,49.750961 17 | 1340.0,6297,52.203349 18 | 2020.0,8334,55.703941 19 | 2200.0,10841,60.11372 20 | 1060.0,5982,62.467764 21 | 1880.0,9321,66.280149 22 | 2500.0,9450,70.017334 23 | 2460.0,9274,73.811825 24 | 2660.0,9526,77.645413 25 | 1840.0,7705,80.782437 26 | 940.0,5402,82.964944 27 | 2080.0,10182,87.057083 28 | 1840.0,7298,90.006956 29 | 1600.0,7890,93.164201 30 | 1660.0,8666,96.604625 31 | 2960.0,10586,100.910327 32 | 1420.0,7834,103.996339 33 | 2380.0,9149,107.735687 34 | 1440.0,7401,110.651457 35 | 1860.0,9114,114.266929 36 | 2920.0,10269,118.386817 37 | 2000.0,9786,122.288618 38 | 1880.0,9082,125.961307 39 | 2580.0,9690,129.830575 40 | 1480.0,7609,132.819636 41 | 3560.0,11865,137.549402 42 | 3040.0,10390,141.719084 43 | 1160.0,6614,144.408233 44 | 1240.0,6746,147.14411 45 | 2200.0,9546,150.988787 46 | 1760.0,9329,154.759344 47 | 1920.0,9273,158.51737 48 | 1960.0,8138,161.755239 49 | 3500.0,12137,166.541739 50 | 1660.0,7074,169.483862 51 | 1880.0,9466,173.323722 52 | 1400.0,6690,175.960091 53 | 1400.0,7418,178.906495 54 | 2880.0,10650,183.116402 55 | 1960.0,9450,186.79492 56 | 2200.0,8982,190.068956 57 | 1900.0,7634,193.011389 58 | 1940.0,7545,196.121619 59 | 2120.0,8689,199.655051 60 | 2820.0,10073,203.763259 61 | 1020.0,5914,206.09671 62 | 2200.0,10458,210.370167 63 | 3000.0,11034,214.772371 64 | 1940.0,9786,218.751435 65 | 760.0,4710,220.653248 66 | 1680.0,8433,224.079637 67 | 1840.0,9137,227.823316 68 | 2400.0,8550,231.249566 69 | 2180.0,10842,235.681875 70 | 2400.0,9022,239.407329 71 | 2520.0,9370,243.204571 72 | -------------------------------------------------------------------------------- /logs/benchmark/dqn-SpaceInvadersNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614779016.280488, "env_id": "SpaceInvadersNoFrameskip-v4"} 2 | r,l,t 3 | 775.0,3344,3.886279 4 | 455.0,2929,4.941654 5 | 550.0,3315,6.127118 6 | 570.0,3923,7.515705 7 | 800.0,4259,9.018671 8 | 395.0,2559,9.939902 9 | 530.0,3307,11.118369 10 | 710.0,3229,12.272132 11 | 515.0,3663,13.577838 12 | 1120.0,3797,14.927365 13 | 900.0,4037,16.356874 14 | 570.0,3796,17.697156 15 | 515.0,3819,19.042766 16 | 630.0,3821,20.39839 17 | 720.0,3329,21.582514 18 | 875.0,6020,23.740323 19 | 545.0,3898,25.119765 20 | 395.0,2387,25.980374 21 | 545.0,3995,27.394111 22 | 730.0,3886,28.770375 23 | 525.0,3565,30.034888 24 | 600.0,3905,31.412884 25 | 600.0,4225,32.896876 26 | 600.0,4113,34.346998 27 | 605.0,4073,35.774542 28 | 555.0,3933,37.161738 29 | 575.0,3301,38.332035 30 | 520.0,3607,39.609969 31 | 375.0,2529,40.513565 32 | 525.0,2848,41.528867 33 | 600.0,4647,43.174772 34 | 770.0,4485,44.771373 35 | 485.0,2689,45.728803 36 | 605.0,3853,47.0837 37 | 480.0,3251,48.242046 38 | 395.0,2559,49.15564 39 | 550.0,3486,50.391175 40 | 395.0,2559,51.305544 41 | 470.0,3189,52.442224 42 | 395.0,2559,53.357437 43 | 1205.0,6295,55.603836 44 | 605.0,3041,56.676831 45 | 600.0,3563,57.931883 46 | 775.0,4513,59.52345 47 | 485.0,3935,60.921994 48 | 600.0,3349,62.106138 49 | 570.0,3127,63.205454 50 | 570.0,3536,64.455716 51 | 465.0,3911,65.845823 52 | 610.0,4385,67.388742 53 | 545.0,3415,68.59359 54 | 525.0,3900,69.970141 55 | 480.0,3377,71.175281 56 | 570.0,3422,72.396504 57 | 570.0,4113,73.83675 58 | 445.0,2557,74.757191 59 | 530.0,2628,75.687501 60 | 395.0,2559,76.603561 61 | 450.0,4293,78.111617 62 | 550.0,3052,79.200124 63 | 800.0,4829,80.877832 64 | 540.0,3731,82.18373 65 | 1200.0,7364,84.811868 66 | 525.0,3062,85.919444 67 | 600.0,3921,87.297699 68 | 600.0,3675,88.588161 69 | 900.0,6521,90.903198 70 | 775.0,2945,91.956404 71 | 525.0,2912,92.994804 72 | 395.0,2559,93.915735 73 | 580.0,3116,95.02866 74 | 1130.0,5943,97.1432 75 | 775.0,6133,99.324602 76 | 575.0,3140,100.432 77 | 405.0,2711,101.396831 78 | 545.0,3581,102.666251 79 | 570.0,3307,103.841793 80 | 455.0,3815,105.192782 81 | 570.0,3029,106.258779 82 | 600.0,3971,107.646472 83 | 770.0,3308,108.822203 84 | 485.0,2625,109.757926 85 | 755.0,5625,111.753243 86 | 395.0,2559,112.669612 87 | 555.0,3192,113.804427 88 | 705.0,3009,114.880053 89 | 570.0,3796,116.211087 90 | 575.0,3643,117.496128 91 | 545.0,3017,118.564473 92 | 600.0,4729,120.210074 93 | 535.0,3305,121.376765 94 | 570.0,4695,123.022804 95 | 515.0,3699,124.317297 96 | 770.0,3763,125.638394 97 | 605.0,3841,126.9832 98 | 515.0,2848,127.989394 99 | 600.0,4197,129.467359 100 | 600.0,3801,130.805654 101 | 570.0,3796,132.143114 102 | 915.0,6533,134.45919 103 | 545.0,3844,135.80943 104 | 1085.0,7745,138.561323 105 | 405.0,3527,139.808429 106 | 520.0,2923,140.851297 107 | 550.0,3647,142.133963 108 | 630.0,4517,143.718867 109 | 515.0,3697,145.023435 110 | 600.0,4089,146.449435 111 | 405.0,3109,147.56174 112 | 520.0,3488,148.786294 113 | 700.0,3435,149.999451 114 | 580.0,3116,151.111986 115 | 660.0,4089,152.550673 116 | 395.0,2559,153.464829 117 | 570.0,3628,154.739216 118 | 975.0,7079,157.248444 119 | 575.0,3952,158.639437 120 | 1445.0,9755,162.092638 121 | 700.0,2947,163.148235 122 | 925.0,5439,165.057395 123 | 480.0,2897,166.090477 124 | 575.0,3414,167.296689 125 | 1120.0,7605,169.986964 126 | 515.0,3697,171.291006 127 | 350.0,2758,172.281944 128 | 515.0,2987,173.335978 129 | 550.0,3742,174.657949 130 | 1345.0,6667,177.009458 131 | 575.0,3514,178.250407 132 | 655.0,3745,179.576672 133 | 510.0,3217,180.717338 134 | 485.0,3217,181.861857 135 | 570.0,3796,183.193002 136 | 495.0,3169,184.315878 137 | 745.0,3692,185.618728 138 | 1010.0,7547,188.308126 139 | 575.0,3406,189.514064 140 | 395.0,2559,190.430743 141 | 520.0,3545,191.687078 142 | 500.0,3345,192.894131 143 | 715.0,3435,194.115859 144 | 520.0,3289,195.27842 145 | 600.0,4399,196.819674 146 | 1225.0,7181,199.368081 147 | 645.0,4529,200.957543 148 | 420.0,3121,202.073241 149 | 1180.0,6825,204.4855 150 | 545.0,3841,205.829714 151 | 1110.0,7663,208.533523 152 | 680.0,5273,210.396127 153 | 545.0,2817,211.400691 154 | 520.0,4191,212.877653 155 | 535.0,3305,214.050907 156 | 670.0,3757,215.378709 157 | 600.0,3675,216.67445 158 | -------------------------------------------------------------------------------- /logs/benchmark/ppo-AsteroidsNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615282346.0657709, "env_id": "AsteroidsNoFrameskip-v4"} 2 | r,l,t 3 | 2100.0,3140,4.222781 4 | 2150.0,5416,6.706464 5 | 3100.0,4628,8.867571 6 | 1880.0,2628,10.130192 7 | 2430.0,2560,11.29486 8 | 2700.0,5986,14.025688 9 | 1540.0,2844,15.321319 10 | 1350.0,3142,16.800679 11 | 2800.0,5302,19.29634 12 | 1300.0,2948,20.63113 13 | 2400.0,2892,21.965651 14 | 2030.0,3832,23.704556 15 | 2180.0,3728,25.185869 16 | 1950.0,4020,26.805397 17 | 2530.0,2228,27.717258 18 | 1780.0,3134,29.029559 19 | 1630.0,3020,30.50439 20 | 1470.0,2142,31.474842 21 | 2500.0,3398,33.044588 22 | 3330.0,5052,35.419518 23 | 1830.0,3190,36.876289 24 | 2750.0,5234,39.277096 25 | 1390.0,3112,40.718593 26 | 1280.0,2428,41.852868 27 | 1610.0,3018,43.288413 28 | 1760.0,2242,44.305527 29 | 2350.0,6784,47.503645 30 | 2400.0,4238,49.467506 31 | 2080.0,2898,50.785973 32 | 1950.0,2936,52.131212 33 | 3000.0,4662,54.34914 34 | 2130.0,2802,55.651604 35 | 1980.0,3042,57.03654 36 | 1780.0,3176,58.489574 37 | 2850.0,4772,60.665724 38 | 1710.0,3336,62.201884 39 | 2200.0,4294,64.166347 40 | 4770.0,6812,67.312451 41 | 1590.0,5984,70.017936 42 | 930.0,1372,70.625459 43 | 2950.0,5328,73.043853 44 | 1800.0,3476,74.64306 45 | 1300.0,3418,76.182155 46 | 1080.0,1678,76.929373 47 | 1800.0,2962,78.262535 48 | 1650.0,3306,79.759297 49 | 2800.0,5370,82.260684 50 | 1910.0,2682,83.461249 51 | 1930.0,3740,85.154804 52 | 2150.0,4872,87.459444 53 | 2300.0,4200,89.39806 54 | 1300.0,3444,90.949188 55 | 2150.0,5580,93.4792 56 | 1490.0,2158,94.444278 57 | 1530.0,3430,96.019706 58 | 2500.0,5000,98.31589 59 | 2180.0,4054,100.149905 60 | 3000.0,6512,103.194933 61 | 1800.0,3770,104.926376 62 | 1280.0,2132,105.897955 63 | 1850.0,7126,109.109429 64 | 1930.0,4274,111.140121 65 | 2180.0,3110,112.584334 66 | 3480.0,7728,116.169647 67 | 2230.0,3396,117.747863 68 | 980.0,2250,118.789643 69 | 1950.0,4102,120.655639 70 | 880.0,1532,121.345101 71 | 1320.0,2418,122.527914 72 | 4820.0,7680,126.119789 73 | 2350.0,3574,127.748252 74 | 1610.0,4640,129.888136 75 | 2350.0,3484,131.468878 76 | 1280.0,2376,132.534037 77 | 1300.0,3206,133.973154 78 | 2400.0,6146,136.792883 79 | 1390.0,4482,138.916353 80 | 1300.0,3828,140.728047 81 | 1280.0,4782,142.936024 82 | 2400.0,4924,145.211154 83 | 4470.0,6594,148.311394 84 | 3510.0,6092,151.181304 85 | 2900.0,3804,152.932375 86 | 1730.0,3090,154.344851 87 | 2600.0,4410,156.450595 88 | 2450.0,3180,157.89357 89 | 1280.0,3298,159.373876 90 | 3100.0,7616,162.852006 91 | 2450.0,3494,164.390613 92 | 2130.0,5722,167.096026 93 | 2600.0,4060,168.987177 94 | 1280.0,2130,170.001644 95 | 2000.0,3638,171.701467 96 | 2950.0,4134,173.57986 97 | 780.0,1532,174.299056 98 | 2150.0,2936,175.649798 99 | 2750.0,2776,176.935084 100 | 2900.0,5568,179.56799 101 | 630.0,1532,180.246592 102 | 3410.0,5974,183.051007 103 | 1830.0,5428,185.546254 104 | 1930.0,6056,188.33652 105 | 1930.0,3252,189.853228 106 | 3000.0,5410,192.396713 107 | 1490.0,3532,193.986223 108 | 2700.0,5472,196.570051 109 | 1030.0,2348,197.62768 110 | 3000.0,4760,199.796481 111 | 3100.0,5448,202.300959 112 | 2700.0,4180,204.198598 113 | 3220.0,7278,207.60019 114 | 1950.0,4532,209.646224 115 | 1180.0,2006,210.611366 116 | 2050.0,3532,212.217238 117 | 2010.0,4226,214.121241 118 | 1730.0,4034,215.952861 119 | 2400.0,3260,217.4612 120 | 1080.0,1516,218.132377 121 | 1750.0,2932,219.454946 122 | 1420.0,4148,221.331489 123 | 2100.0,3928,223.107199 124 | 1300.0,3846,224.866148 125 | 2500.0,3884,226.630986 126 | 2200.0,3220,228.092993 127 | 2900.0,5000,230.362537 128 | 2550.0,5290,232.791035 129 | 1650.0,3432,234.413234 130 | 1440.0,2356,235.4701 131 | 3260.0,7632,239.029046 132 | 2550.0,3822,240.787463 133 | 2500.0,4572,242.955574 134 | 4020.0,6168,245.781019 135 | 1300.0,2792,247.0935 136 | 2150.0,4436,249.139655 137 | 1950.0,4634,251.331867 138 | 2200.0,4092,253.184912 139 | 2250.0,5500,255.666379 140 | 3200.0,5960,258.392801 141 | 2330.0,4604,260.466561 142 | 2500.0,4170,262.367958 143 | 2900.0,3674,264.053509 144 | 2450.0,3360,265.599392 145 | 3100.0,6872,268.700958 146 | 1860.0,3258,270.17276 147 | 1280.0,3534,271.760526 148 | 1760.0,2262,272.81702 149 | 3000.0,3806,274.614552 150 | 2280.0,4826,276.806577 151 | 2550.0,5690,279.494636 152 | -------------------------------------------------------------------------------- /logs/benchmark/ppo-BeamRiderNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614793322.1209238, "env_id": "BeamRiderNoFrameskip-v4"} 2 | r,l,t 3 | 4080.0,14525,8.79847 4 | 5666.0,18681,16.575911 5 | 6236.0,17193,23.729032 6 | 2160.0,9873,27.818356 7 | 5154.0,17049,34.863509 8 | 1380.0,7291,37.89434 9 | 3300.0,14357,43.820789 10 | 3000.0,13109,49.238173 11 | 4798.0,17485,56.472782 12 | 3420.0,13915,62.209094 13 | 1796.0,8485,65.697914 14 | 2160.0,9735,69.714388 15 | 1332.0,7213,72.670604 16 | 1284.0,6343,75.278639 17 | 996.0,5281,77.437288 18 | 3900.0,14931,83.591233 19 | 3420.0,12495,88.744709 20 | 1380.0,7236,91.72037 21 | 2056.0,9511,95.644122 22 | 4732.0,17507,102.892356 23 | 2160.0,10658,107.295106 24 | 756.0,4491,109.137778 25 | 3308.0,12521,114.306549 26 | 4770.0,15463,120.695994 27 | 3900.0,16307,127.439384 28 | 4476.0,17315,134.610267 29 | 3240.0,15185,140.870411 30 | 4734.0,17433,148.066092 31 | 4110.0,16029,154.67535 32 | 3690.0,12429,159.81653 33 | 1284.0,6317,162.416749 34 | 2916.0,10475,166.734342 35 | 3000.0,13817,172.429865 36 | 3300.0,14313,178.328795 37 | 2328.0,11129,182.941328 38 | 1332.0,6787,185.724657 39 | 4764.0,18671,193.465057 40 | 1900.0,9041,197.198546 41 | 2160.0,10553,201.540245 42 | 2804.0,10999,206.083617 43 | 4380.0,15989,212.692215 44 | 3728.0,13888,218.42536 45 | 8358.0,21151,227.209689 46 | 5310.0,17458,234.420478 47 | 3900.0,15619,240.886745 48 | 7404.0,22673,250.270449 49 | -------------------------------------------------------------------------------- /logs/benchmark/ppo-BipedalWalker-v3/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1654205553.3965414, "env_id": "BipedalWalker-v3"} 2 | r,l,t 3 | 286.498863,1232,3.148969 4 | 290.03395,1193,3.942982 5 | 285.871843,1236,4.766715 6 | 291.850978,1198,5.564058 7 | 288.768871,1210,6.368981 8 | 288.157038,1212,7.176008 9 | 289.361869,1202,7.97352 10 | 287.842606,1225,8.786903 11 | 287.280252,1190,9.578171 12 | 284.324796,1247,10.404098 13 | 288.956237,1213,11.208744 14 | 288.669449,1191,11.998618 15 | 285.979899,1247,12.825689 16 | 289.639171,1190,13.618726 17 | 286.947732,1244,14.446376 18 | 287.606982,1188,15.235259 19 | 286.782178,1223,16.048814 20 | 288.687124,1216,16.856583 21 | 285.721313,1232,17.67472 22 | 287.000542,1216,18.483637 23 | 288.605325,1213,19.286785 24 | 291.318049,1207,20.087602 25 | 289.566242,1193,20.877623 26 | 292.464293,1179,21.661781 27 | 290.672828,1192,22.457295 28 | 290.689305,1182,23.244957 29 | 287.682502,1206,24.045446 30 | 291.957954,1163,24.819003 31 | 282.784247,1263,25.659568 32 | 287.454218,1216,26.470194 33 | 285.451189,1228,27.283888 34 | 287.095007,1272,28.128968 35 | 287.733682,1228,28.945501 36 | 290.772435,1195,29.742402 37 | 288.365004,1236,30.56567 38 | 285.35095,1225,31.380461 39 | 289.413562,1208,32.184891 40 | 288.015387,1211,32.991751 41 | 288.731107,1205,33.791121 42 | 290.727256,1189,34.581942 43 | 289.43828,1207,35.3862 44 | 287.729774,1227,36.204048 45 | 285.253831,1260,37.041651 46 | 289.768125,1185,37.826303 47 | 285.958823,1233,38.646922 48 | 287.200568,1222,39.46325 49 | 290.043164,1185,40.250997 50 | 289.101907,1211,41.05368 51 | 286.462372,1243,41.87645 52 | 285.64948,1221,42.688791 53 | 287.519667,1187,43.476118 54 | 284.971831,1267,44.316454 55 | 288.742228,1214,45.120938 56 | 285.628617,1255,45.954174 57 | 284.757907,1250,46.783232 58 | 284.347205,1245,47.609643 59 | 289.301662,1211,48.409139 60 | 284.519905,1255,49.241136 61 | 283.713437,1242,50.06516 62 | 289.766618,1189,50.851352 63 | 282.590464,1273,51.692035 64 | 289.451637,1175,52.468679 65 | 282.330197,1285,53.320945 66 | 290.373129,1201,54.118149 67 | 285.502483,1222,54.927324 68 | 290.665951,1163,55.698771 69 | 289.852728,1213,56.503432 70 | 287.244561,1205,57.306046 71 | 286.817512,1223,58.116854 72 | 291.118836,1183,58.904925 73 | 289.975692,1205,59.704293 74 | 291.492401,1159,60.475756 75 | 287.211862,1228,61.291739 76 | 284.231949,1244,62.120625 77 | 287.456086,1196,62.914544 78 | 286.782568,1231,63.733352 79 | 290.014788,1182,64.517536 80 | 285.797936,1238,65.341084 81 | 286.128281,1239,66.164153 82 | 291.3156,1168,66.940698 83 | 285.707421,1227,67.7551 84 | 287.444993,1217,68.563281 85 | 287.287142,1235,69.380708 86 | 287.296313,1252,70.212887 87 | 285.059782,1244,71.037529 88 | 285.075845,1250,71.86698 89 | 289.072542,1172,72.643633 90 | 286.401297,1247,73.470394 91 | 289.753727,1171,74.244725 92 | 288.401924,1186,75.030774 93 | 289.74776,1210,75.834194 94 | 290.13916,1177,76.612051 95 | 292.795935,1181,77.396483 96 | 285.884796,1247,78.222464 97 | 288.603669,1212,79.027307 98 | 289.873579,1212,79.832399 99 | 285.787775,1230,80.647568 100 | 290.36039,1202,81.441948 101 | 290.807051,1179,82.224544 102 | 285.749472,1245,83.048675 103 | 290.984798,1179,83.832279 104 | 288.575334,1226,84.645881 105 | 286.358977,1210,85.449784 106 | 287.503688,1221,86.257498 107 | 290.378601,1209,87.059999 108 | 289.137484,1223,87.869361 109 | 288.796009,1224,88.683386 110 | 287.783809,1249,89.514697 111 | 291.227006,1151,90.278997 112 | 280.305489,1277,91.127263 113 | 283.010979,1279,91.975935 114 | 286.370462,1259,92.809959 115 | 294.429234,1149,93.57178 116 | 286.001682,1213,94.377335 117 | 287.589849,1233,95.19614 118 | 290.317775,1200,95.992744 119 | 286.754871,1236,96.815568 120 | 289.788937,1205,97.617576 121 | 286.015599,1251,98.448357 122 | 287.574138,1193,99.242593 123 | 291.136559,1193,100.036958 124 | 291.611669,1221,100.848697 125 | 286.55367,1234,101.670848 126 | -------------------------------------------------------------------------------- /logs/benchmark/ppo-BipedalWalkerHardcore-v3/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615193532.8653934, "env_id": "BipedalWalkerHardcore-v3"} 2 | r,l,t 3 | 193.275095,2000,4.145592 4 | 278.431375,1216,5.233195 5 | -6.798583,356,5.553727 6 | 19.282857,935,6.39281 7 | 250.856491,1620,7.846617 8 | -65.818894,162,7.994552 9 | 221.508256,2000,9.782774 10 | 55.729149,629,10.340956 11 | 27.481974,749,11.012363 12 | -49.307592,2000,12.828135 13 | -53.150271,224,13.030152 14 | 274.377278,1245,14.144573 15 | 273.219748,1289,15.291531 16 | -1.475935,2000,17.097797 17 | 275.173584,1267,18.226109 18 | 268.651572,1333,19.420884 19 | 262.781362,1457,20.719656 20 | -55.50225,241,20.93576 21 | 143.412661,2000,22.721537 22 | 56.459139,2000,24.502594 23 | 16.917754,2000,26.304311 24 | 264.020378,1410,27.561328 25 | -4.766662,458,27.972549 26 | 33.298734,2000,29.746628 27 | 43.781743,774,30.438653 28 | 232.481688,2000,32.22467 29 | 46.904917,2000,34.022865 30 | 260.045822,1422,35.285332 31 | 281.353043,1159,36.317426 32 | 117.040305,1037,37.234354 33 | -25.477002,361,37.557418 34 | 174.199961,2000,39.350821 35 | 200.445063,2000,41.149508 36 | 12.565995,2000,42.934273 37 | 84.100166,2000,44.760193 38 | -74.903722,2000,46.59088 39 | 271.058868,1269,47.739809 40 | 103.401041,979,48.615643 41 | 241.844216,2000,50.409846 42 | -34.435864,334,50.708726 43 | 271.148666,1302,51.86712 44 | 33.819655,2000,53.672669 45 | -9.80039,2000,55.49123 46 | 75.016011,2000,57.289543 47 | 146.260913,2000,59.093938 48 | 274.414427,1274,60.231836 49 | 128.360471,1076,61.19114 50 | 264.920659,1386,62.431464 51 | 9.68412,525,62.900779 52 | -62.625699,173,63.058635 53 | 78.21942,801,63.773142 54 | 192.980267,2000,65.549125 55 | 203.484802,2000,67.37258 56 | 50.604619,629,67.934108 57 | 220.073692,2000,69.73243 58 | 94.093388,2000,71.510735 59 | 105.880485,2000,73.284172 60 | 272.858588,1286,74.432985 61 | 8.159598,2000,76.250862 62 | 159.239759,997,77.138704 63 | 26.746829,2000,78.901894 64 | -62.071558,176,79.060033 65 | 25.428189,2000,80.884813 66 | 155.089809,1392,82.126809 67 | 143.705084,1270,83.257159 68 | 275.999284,1248,84.372922 69 | 276.390156,1222,85.456104 70 | 3.633458,2000,87.313531 71 | 43.690779,2000,89.103813 72 | -39.888648,337,89.406915 73 | 77.930317,756,90.083196 74 | 264.742158,1428,91.350063 75 | 268.022265,1343,92.542526 76 | 104.829176,2000,94.329187 77 | 271.579241,1272,95.462329 78 | 7.061693,437,95.852223 79 | 270.516807,1314,97.021596 80 | 25.466871,2000,98.8095 81 | 199.094501,2000,100.804588 82 | -77.270537,2000,102.760402 83 | 274.325678,1257,103.881253 84 | -47.70453,279,104.128048 85 | 269.782561,1325,105.312254 86 | 252.021391,1595,106.737116 87 | 76.03425,2000,108.53695 88 | 268.948973,1338,109.724009 89 | -10.528409,2000,111.492292 90 | 58.413867,1108,112.472228 91 | 160.320388,2000,114.25012 92 | 91.350898,919,115.072932 93 | 271.164018,1321,116.245728 94 | -11.883781,2000,118.041878 95 | 105.827348,2000,119.822273 96 | 200.431321,2000,121.619765 97 | 265.226968,1407,122.871708 98 | 69.77737,733,123.528858 99 | 270.591673,1306,124.703248 100 | 141.358797,2000,126.497834 101 | 244.774291,1689,128.007964 102 | 21.732235,2000,129.789535 103 | -52.589515,346,130.100523 104 | 268.640395,2000,131.877187 105 | 148.517683,1315,133.04672 106 | 22.670106,528,133.518668 107 | 100.072615,2000,135.297825 108 | -------------------------------------------------------------------------------- /logs/benchmark/ppo-BreakoutNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614770695.1439214, "env_id": "BreakoutNoFrameskip-v4"} 2 | r,l,t 3 | 417.0,12603,7.91641 4 | 396.0,9007,11.486543 5 | 431.0,12730,16.501665 6 | 420.0,9783,20.353318 7 | 393.0,10255,24.399123 8 | 414.0,35543,38.466367 9 | 409.0,10094,42.434626 10 | 372.0,7074,45.223526 11 | 414.0,9194,48.837086 12 | 418.0,9150,52.421882 13 | 421.0,7848,55.500098 14 | 387.0,7913,58.610305 15 | 397.0,7294,61.475845 16 | 397.0,9214,65.099294 17 | 405.0,9027,68.644321 18 | 391.0,14176,74.201695 19 | 380.0,7386,77.108295 20 | 421.0,8567,80.466578 21 | 389.0,7161,83.282692 22 | 413.0,6880,85.992517 23 | 405.0,8422,89.307504 24 | 411.0,8550,92.671517 25 | 415.0,23746,102.003397 26 | 396.0,9498,105.736333 27 | 415.0,7582,108.703916 28 | 390.0,9083,112.268808 29 | 409.0,8339,115.536329 30 | 288.0,7289,118.406694 31 | 415.0,9489,122.139135 32 | 406.0,9650,125.932637 33 | 401.0,13799,131.324341 34 | 397.0,7327,134.206139 35 | 425.0,11805,138.796122 36 | 412.0,8117,141.983996 37 | 412.0,12291,146.784082 38 | 409.0,8779,150.232458 39 | 421.0,10680,154.413628 40 | 213.0,6196,156.854681 41 | 404.0,9779,160.689395 42 | 413.0,16971,167.34178 43 | 398.0,10057,171.290612 44 | 404.0,9900,175.174741 45 | 423.0,8658,178.559094 46 | 373.0,8765,182.007169 47 | 392.0,8508,185.355729 48 | 372.0,7457,188.290559 49 | 421.0,13768,193.67705 50 | 367.0,7096,196.471112 51 | 398.0,7307,199.343706 52 | 415.0,9821,203.192525 53 | 417.0,8940,206.70469 54 | 396.0,9338,210.384888 55 | 421.0,9029,213.928488 56 | 421.0,10031,217.866087 57 | 419.0,9851,221.736578 58 | 395.0,8815,225.197859 59 | 397.0,8982,228.739918 60 | 389.0,11865,233.428803 61 | 399.0,7052,236.196647 62 | 323.0,6887,238.924023 63 | -------------------------------------------------------------------------------- /logs/benchmark/ppo-EnduroNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614937615.306725, "env_id": "EnduroNoFrameskip-v4"} 2 | r,l,t 3 | 797.0,39936,19.760402 4 | 774.0,39936,36.865088 5 | 1259.0,66560,65.256766 6 | 1032.0,53248,87.962597 7 | 984.0,53248,110.666455 8 | 971.0,53248,133.364271 9 | 1093.0,53248,156.086944 10 | 961.0,53248,178.782423 11 | 975.0,53248,201.486932 12 | 772.0,39936,218.450948 13 | 1342.0,66560,246.65753 14 | -------------------------------------------------------------------------------- /logs/benchmark/ppo-PongNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614770211.8584638, "env_id": "PongNoFrameskip-v4"} 2 | r,l,t 3 | 21.0,6621,5.645768 4 | 21.0,6619,8.278026 5 | 21.0,6619,10.91174 6 | 21.0,6695,13.5699 7 | 21.0,6701,16.227408 8 | 21.0,6695,18.884109 9 | 21.0,6621,21.507497 10 | 21.0,6695,24.158821 11 | 21.0,6619,26.782454 12 | 21.0,6619,29.404617 13 | 21.0,6701,32.060708 14 | 21.0,6701,34.718148 15 | 21.0,6701,37.365183 16 | 21.0,6695,40.013821 17 | 21.0,6695,42.648849 18 | 21.0,6619,45.254462 19 | 21.0,6701,47.898726 20 | 21.0,6621,50.505156 21 | 21.0,6701,53.146831 22 | 21.0,6619,55.753825 23 | 21.0,6621,58.36678 24 | 21.0,6701,61.006375 25 | 21.0,6701,63.655262 26 | 21.0,6621,66.262585 27 | 21.0,6701,68.902122 28 | 21.0,6619,71.509826 29 | 21.0,6619,74.11505 30 | 21.0,6621,76.72574 31 | 21.0,6695,79.360951 32 | 21.0,6695,81.99979 33 | 21.0,6619,84.609866 34 | 21.0,6695,87.247657 35 | 21.0,6621,89.853352 36 | 21.0,6701,92.504749 37 | 21.0,6695,95.142408 38 | 21.0,6695,97.778445 39 | 21.0,6621,100.390172 40 | 21.0,6695,103.025162 41 | 21.0,6695,105.660685 42 | 21.0,6695,108.294654 43 | 21.0,6621,110.901377 44 | 21.0,6621,113.507766 45 | 21.0,6621,116.119072 46 | 21.0,6701,118.759252 47 | 21.0,6621,121.366972 48 | 21.0,6621,123.986761 49 | 21.0,6621,126.592144 50 | 21.0,6619,129.200434 51 | 21.0,6701,131.840272 52 | 21.0,6695,134.475858 53 | 21.0,6619,137.083183 54 | 21.0,6621,139.692369 55 | 21.0,6619,142.300647 56 | 21.0,6619,144.905658 57 | 21.0,6701,147.542496 58 | 21.0,6695,150.183867 59 | 21.0,6695,152.830488 60 | 21.0,6695,155.470538 61 | 21.0,6619,158.079579 62 | 21.0,6621,160.693677 63 | 21.0,6701,163.329427 64 | 21.0,6695,165.966489 65 | 21.0,6619,168.575866 66 | 21.0,6695,171.215574 67 | 21.0,6701,173.854388 68 | 21.0,6619,176.460269 69 | 21.0,6701,179.104645 70 | 21.0,6701,181.746303 71 | 21.0,6619,184.365571 72 | 21.0,6695,187.007439 73 | 21.0,6619,189.617918 74 | 21.0,6701,192.254169 75 | 21.0,6701,194.89255 76 | 20.0,6905,197.61044 77 | 21.0,6619,200.216051 78 | 21.0,6695,202.851995 79 | 21.0,6701,205.491178 80 | 21.0,6695,208.131151 81 | 21.0,6619,210.740392 82 | 21.0,6695,213.388927 83 | 21.0,6619,215.992427 84 | 21.0,6621,218.596376 85 | 21.0,6695,221.233237 86 | 21.0,6695,223.871111 87 | 21.0,6621,226.481205 88 | 21.0,6621,229.09101 89 | 21.0,6701,231.728724 90 | 21.0,6701,234.371095 91 | 21.0,6701,237.008596 92 | 21.0,6701,239.64919 93 | -------------------------------------------------------------------------------- /logs/benchmark/ppo-QbertNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614770940.1790123, "env_id": "QbertNoFrameskip-v4"} 2 | r,l,t 3 | 12175.0,5927,5.415989 4 | 16150.0,7347,8.336347 5 | 15875.0,7725,11.41015 6 | 15525.0,6352,13.925117 7 | 15825.0,7957,17.082229 8 | 16725.0,7377,20.008731 9 | 15675.0,6632,22.6371 10 | 12375.0,5794,24.927961 11 | 12350.0,5852,27.250481 12 | 15550.0,6357,29.76556 13 | 15450.0,6159,32.201469 14 | 15375.0,5834,34.504873 15 | 16150.0,7342,37.405205 16 | 15875.0,7452,40.352528 17 | 15825.0,7452,43.2892 18 | 16100.0,6422,45.815806 19 | 15675.0,6844,48.517201 20 | 15750.0,7117,51.320857 21 | 19400.0,10657,55.528912 22 | 16325.0,7822,58.619442 23 | 19375.0,9303,62.316152 24 | 12000.0,4992,64.281549 25 | 19725.0,10072,68.251343 26 | 20000.0,10267,72.296274 27 | 15725.0,8032,75.460957 28 | 5050.0,3742,76.926301 29 | 15750.0,6782,79.59477 30 | 12125.0,5272,81.666575 31 | 16175.0,7664,84.682323 32 | 5025.0,3429,86.03526 33 | 8675.0,4537,87.817425 34 | 16075.0,7667,90.836693 35 | 4150.0,2947,91.988825 36 | 12300.0,5734,94.24798 37 | 15800.0,6792,96.923041 38 | 12325.0,5497,99.083754 39 | 19400.0,10132,103.085607 40 | 12225.0,5467,105.234322 41 | 16650.0,7732,108.279649 42 | 12000.0,4697,110.122053 43 | 15425.0,6732,112.775185 44 | 16375.0,7764,115.838148 45 | 19650.0,11002,120.187555 46 | 19350.0,10149,124.190546 47 | 19075.0,8989,127.733854 48 | 19450.0,8049,130.904344 49 | 15975.0,7953,134.032731 50 | 19925.0,9842,137.909475 51 | 15425.0,6194,140.345596 52 | 16850.0,8547,143.712478 53 | 16425.0,7277,146.584682 54 | 15675.0,6922,149.312712 55 | 15650.0,6542,151.884868 56 | 15550.0,6427,154.405081 57 | 11700.0,4314,156.093832 58 | 15750.0,6877,158.806255 59 | 19550.0,9387,162.504466 60 | 19925.0,10057,166.469106 61 | 11950.0,5177,168.502842 62 | 12275.0,4624,170.320909 63 | 16250.0,7293,173.19079 64 | 16275.0,6862,175.896776 65 | 15700.0,6622,178.511129 66 | 20200.0,10903,182.807991 67 | 15725.0,7232,185.657379 68 | 15975.0,7397,188.565925 69 | 12375.0,6042,190.947371 70 | 19175.0,8742,194.39362 71 | 15375.0,5537,196.569389 72 | 19325.0,8714,200.009571 73 | 19775.0,9317,203.681354 74 | 15625.0,7582,206.672691 75 | 16700.0,8847,210.160283 76 | 19400.0,9017,213.709782 77 | 15625.0,6912,216.445144 78 | 19125.0,8424,219.868027 79 | 20250.0,10263,223.909721 80 | 19775.0,9034,227.471911 81 | 12425.0,6132,229.885228 82 | 15525.0,6687,232.511261 83 | 15575.0,6562,235.091669 84 | 11900.0,4512,236.867217 85 | 16325.0,7612,239.867593 86 | -------------------------------------------------------------------------------- /logs/benchmark/ppo-RoadRunnerNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615282634.0171642, "env_id": "RoadRunnerNoFrameskip-v4"} 2 | r,l,t 3 | 39500.0,3693,4.607107 4 | 42300.0,4249,6.678031 5 | 38200.0,3762,8.513952 6 | 38400.0,4290,10.56961 7 | 38900.0,3916,12.442074 8 | 31300.0,3952,14.360368 9 | 41500.0,3863,16.242541 10 | 47500.0,4178,18.267558 11 | 37300.0,3890,20.1607 12 | 37500.0,4419,22.307001 13 | 43400.0,3720,24.117626 14 | 48000.0,3867,26.001089 15 | 35600.0,3650,27.768714 16 | 38500.0,3747,29.5914 17 | 47600.0,3980,31.512643 18 | 31300.0,3889,33.384604 19 | 39500.0,3870,35.2572 20 | 37600.0,3865,37.131667 21 | 33600.0,3860,38.998168 22 | 43100.0,4141,40.998706 23 | 35200.0,3828,42.850133 24 | 40500.0,3728,44.656435 25 | 43800.0,4114,46.647759 26 | 29300.0,3889,48.519863 27 | 44500.0,3985,50.438046 28 | 40800.0,3793,52.266009 29 | 40900.0,3967,54.170856 30 | 47100.0,3569,55.890799 31 | 52100.0,4196,57.898864 32 | 26100.0,3795,59.748093 33 | 39000.0,3916,61.674686 34 | 43800.0,3882,63.658678 35 | 36000.0,3988,65.747784 36 | 43400.0,3696,67.774647 37 | 42500.0,3893,69.948568 38 | 44700.0,4360,72.333062 39 | 38500.0,4063,74.596394 40 | 36300.0,3477,76.517315 41 | 53500.0,3976,78.678528 42 | 41500.0,3792,80.803227 43 | 53000.0,3939,82.964355 44 | 34500.0,3861,85.102964 45 | 48000.0,3914,87.266721 46 | 51500.0,4083,89.521037 47 | 49100.0,4784,91.993401 48 | 39700.0,3874,93.879836 49 | 40400.0,4117,95.889661 50 | 46600.0,4149,97.911088 51 | 38000.0,4318,100.008145 52 | 45500.0,3557,101.736813 53 | 37000.0,3897,103.628461 54 | 48500.0,4347,105.697235 55 | 41600.0,3288,107.307808 56 | 37700.0,3728,109.106499 57 | 31800.0,3679,110.892347 58 | 48700.0,4237,112.95045 59 | 37000.0,3764,114.778509 60 | 39600.0,3549,116.505887 61 | 35000.0,3707,118.302071 62 | 48400.0,4000,120.253019 63 | 46000.0,3853,122.118019 64 | 34200.0,3754,123.945879 65 | 49700.0,4783,126.269761 66 | 49700.0,3980,128.216163 67 | 42700.0,4097,130.196395 68 | 42100.0,3925,132.096195 69 | 43100.0,4014,134.051753 70 | 23900.0,3929,135.957948 71 | 37700.0,3763,137.782687 72 | 31100.0,3873,139.667927 73 | 34000.0,3632,141.440248 74 | 40500.0,4260,143.502083 75 | 38100.0,3936,145.421263 76 | 40500.0,3955,147.332393 77 | 37400.0,4160,149.320792 78 | 41500.0,3893,151.196564 79 | 34700.0,3881,153.079257 80 | 40600.0,4371,155.196352 81 | 43100.0,3695,156.991861 82 | 53000.0,3920,158.900923 83 | 40300.0,3983,160.830663 84 | 49000.0,4309,162.918265 85 | 44300.0,3935,164.83192 86 | 40500.0,3844,166.70039 87 | 45500.0,4016,168.653216 88 | 39100.0,4385,170.787611 89 | 49200.0,3573,172.518745 90 | 48500.0,4030,174.477761 91 | 38500.0,3678,176.263532 92 | 43600.0,4257,178.334697 93 | 40500.0,4068,180.297519 94 | 34400.0,3653,182.074754 95 | 25400.0,3632,183.833901 96 | 33600.0,3697,185.627832 97 | 39200.0,3708,187.426724 98 | 29600.0,3637,189.190862 99 | 37400.0,3731,190.998438 100 | 32900.0,3810,192.850824 101 | 40100.0,3997,194.788953 102 | 47300.0,3844,196.605136 103 | 50000.0,4189,198.47715 104 | 36200.0,3759,200.078743 105 | 39800.0,3849,201.763661 106 | 34700.0,3828,203.485893 107 | 41500.0,4269,205.557709 108 | 43500.0,4585,207.78251 109 | 49000.0,4020,209.736035 110 | 33500.0,3650,211.500322 111 | 51500.0,4124,213.506651 112 | 49000.0,3750,215.330003 113 | 33400.0,3792,217.170306 114 | 38100.0,3677,218.956944 115 | 44000.0,4000,220.899899 116 | 42200.0,3680,222.686688 117 | 51800.0,4168,224.711082 118 | 29100.0,3644,226.473148 119 | 27200.0,3710,228.278477 120 | 37400.0,3824,230.125029 121 | 35100.0,3704,231.929292 122 | 31700.0,3988,233.857676 123 | 46400.0,3954,235.780364 124 | 53500.0,3864,237.65512 125 | 26000.0,3665,239.42042 126 | 42700.0,4071,241.389238 127 | 58500.0,3992,243.31737 128 | 50600.0,4053,245.288382 129 | 45600.0,4159,247.281524 130 | 38400.0,3943,249.153082 131 | 31300.0,2612,250.421511 132 | 43700.0,3935,252.326953 133 | 48600.0,3988,254.250969 134 | 47500.0,4095,256.223561 135 | 39500.0,3757,258.030014 136 | 34000.0,3803,259.870713 137 | 36800.0,3904,261.759659 138 | 41400.0,3565,263.491798 139 | 41300.0,3886,265.365845 140 | 44800.0,3611,267.119321 141 | 46000.0,3735,268.930228 142 | 45500.0,3790,270.756583 143 | 36300.0,4301,272.847211 144 | 38600.0,3605,274.5775 145 | 46000.0,3698,276.370366 146 | 55100.0,4186,278.385095 147 | 35200.0,3785,280.205292 148 | 41200.0,4124,282.194699 149 | 31900.0,3867,284.070051 150 | 23900.0,3808,285.909715 151 | 38700.0,3597,287.647842 152 | 44300.0,3903,289.53886 153 | 40100.0,3967,291.45401 154 | 31200.0,3711,293.213691 155 | 41300.0,3908,295.099524 156 | 34900.0,3716,296.898536 157 | 41800.0,4057,298.8676 158 | -------------------------------------------------------------------------------- /logs/benchmark/ppo-SeaquestNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615282041.2912836, "env_id": "SeaquestNoFrameskip-v4"} 2 | r,l,t 3 | 1780.0,9082,7.165651 4 | 1780.0,9082,11.51325 5 | 1780.0,9081,15.896254 6 | 1780.0,9081,20.358567 7 | 1800.0,9082,24.865566 8 | 1800.0,9082,29.322846 9 | 1800.0,9081,33.796309 10 | 1800.0,9082,38.233089 11 | 1780.0,9082,42.75617 12 | 1760.0,9082,47.221684 13 | 1800.0,9082,51.683366 14 | 1780.0,9082,56.105346 15 | 1780.0,9082,60.600484 16 | 1760.0,9081,65.0473 17 | 1800.0,9082,69.466815 18 | 1800.0,9082,73.904019 19 | 1760.0,9082,78.412824 20 | 1780.0,9082,82.912151 21 | 1800.0,9082,87.340143 22 | 1820.0,9081,91.749042 23 | 1760.0,9082,96.161275 24 | 1800.0,9082,100.642852 25 | 1760.0,9081,105.134592 26 | 1780.0,9081,109.578357 27 | 1820.0,9081,114.028118 28 | 1820.0,9081,118.46495 29 | 1800.0,9082,122.987751 30 | 1820.0,9081,127.326344 31 | 1800.0,9082,131.776265 32 | 1760.0,9081,136.280469 33 | 1760.0,9082,140.79302 34 | 1780.0,9081,145.268835 35 | 1700.0,8762,149.484477 36 | 1800.0,9082,153.923026 37 | 1800.0,9082,158.353423 38 | 1820.0,9082,162.787202 39 | 1820.0,9082,167.212062 40 | 1780.0,9081,171.657153 41 | 1760.0,9081,176.071074 42 | 1780.0,9082,180.550466 43 | 1760.0,9081,185.043164 44 | 1760.0,9081,189.465687 45 | 1760.0,9082,193.88748 46 | 1800.0,9082,198.281321 47 | 1800.0,9082,202.796558 48 | 1780.0,9082,207.248083 49 | 1760.0,9082,211.688853 50 | 1780.0,9082,216.104586 51 | 1780.0,9081,220.443628 52 | 1800.0,9082,224.857749 53 | 1840.0,9082,229.29361 54 | 1760.0,9082,233.74382 55 | 1800.0,9082,238.00834 56 | 1800.0,9082,242.521467 57 | 1760.0,9082,247.000363 58 | 1820.0,9082,251.488832 59 | 1800.0,9082,255.921816 60 | 1760.0,9082,260.386473 61 | 1800.0,9082,264.896535 62 | 1800.0,9082,269.336106 63 | 1780.0,9082,273.768382 64 | 1780.0,9082,278.173506 65 | 1820.0,9082,282.615731 66 | 1800.0,9082,287.072851 67 | 1780.0,9082,291.512813 68 | 1580.0,8250,295.521151 69 | -------------------------------------------------------------------------------- /logs/benchmark/ppo-SpaceInvadersNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614770456.7153661, "env_id": "SpaceInvadersNoFrameskip-v4"} 2 | r,l,t 3 | 600.0,2405,3.914749 4 | 600.0,2539,4.87522 5 | 570.0,2327,5.757872 6 | 1230.0,5673,7.896288 7 | 600.0,2315,8.769479 8 | 600.0,2857,9.845313 9 | 1590.0,7541,12.730985 10 | 2090.0,8591,15.996526 11 | 1585.0,6093,18.324354 12 | 600.0,2449,19.250391 13 | 540.0,3282,20.471908 14 | 545.0,2405,21.384644 15 | 570.0,2469,22.318601 16 | 1630.0,6275,24.687005 17 | 800.0,3021,25.82658 18 | 600.0,2687,26.835624 19 | 1770.0,8099,29.917496 20 | 800.0,2893,31.008491 21 | 600.0,2843,32.087468 22 | 600.0,2573,33.062917 23 | 1380.0,5372,35.103718 24 | 570.0,2520,36.052045 25 | 1235.0,5669,38.191427 26 | 1175.0,5531,40.29176 27 | 1230.0,6129,42.596846 28 | 970.0,5423,44.657641 29 | 1230.0,5965,46.896973 30 | 600.0,2485,47.833416 31 | 570.0,3545,49.162466 32 | 555.0,2327,50.046039 33 | 575.0,4107,51.585598 34 | 1205.0,7102,54.245788 35 | 600.0,2805,55.292747 36 | 1125.0,5237,57.280273 37 | 600.0,2777,58.334494 38 | 1230.0,5557,60.431282 39 | 575.0,2708,61.452153 40 | 800.0,3143,62.625328 41 | 570.0,2956,63.728158 42 | 1370.0,6813,66.294163 43 | 1370.0,6286,68.644167 44 | 775.0,3116,69.814711 45 | 1330.0,7499,72.653976 46 | 1430.0,6605,75.134255 47 | 575.0,4397,76.772299 48 | 2030.0,10489,80.723449 49 | 510.0,3408,81.992183 50 | 600.0,2173,82.812686 51 | 580.0,2668,83.820999 52 | 1205.0,6060,86.097588 53 | 830.0,3351,87.356693 54 | 1230.0,5519,89.442007 55 | 600.0,2433,90.360981 56 | 570.0,2574,91.325186 57 | 1170.0,6277,93.682385 58 | 570.0,2241,94.533334 59 | 515.0,2731,95.565007 60 | 1435.0,5895,97.790861 61 | 1175.0,5492,99.851393 62 | 545.0,2853,100.930767 63 | 1430.0,5589,103.028106 64 | 740.0,3943,104.515226 65 | 705.0,3711,105.905905 66 | 570.0,2841,106.97071 67 | 1175.0,5545,109.064234 68 | 1175.0,5465,111.119168 69 | 1175.0,6038,113.395968 70 | 600.0,2439,114.312117 71 | 570.0,3541,115.633128 72 | 600.0,2327,116.509693 73 | 570.0,2398,117.417454 74 | 600.0,2861,118.497781 75 | 1370.0,5490,120.557487 76 | 575.0,3375,121.821491 77 | 1210.0,4932,123.689292 78 | 570.0,2206,124.524383 79 | 570.0,2277,125.382965 80 | 1175.0,6184,127.712865 81 | 1945.0,8509,130.918364 82 | 1280.0,5293,132.918554 83 | 950.0,5249,134.904678 84 | 1185.0,5208,136.870933 85 | 1200.0,6206,139.196474 86 | 545.0,3272,140.421652 87 | 600.0,2695,141.433939 88 | 570.0,2334,142.317909 89 | 1435.0,6601,144.789252 90 | 570.0,2305,145.66184 91 | 970.0,4711,147.444192 92 | 575.0,3055,148.5987 93 | 2065.0,9483,152.164299 94 | 1320.0,5597,154.271958 95 | 1230.0,5921,156.488366 96 | 540.0,2938,157.592431 97 | 1120.0,5389,159.620881 98 | 1145.0,5839,161.825858 99 | 600.0,2533,162.775769 100 | 2200.0,8533,166.009311 101 | 515.0,3115,167.179821 102 | 745.0,3351,168.435554 103 | 570.0,2192,169.262837 104 | 1435.0,5819,171.446572 105 | 600.0,2885,172.526852 106 | 1780.0,6768,175.07971 107 | 570.0,2648,176.069265 108 | 600.0,2621,177.049405 109 | 570.0,2092,177.843376 110 | 1380.0,7781,180.783677 111 | 600.0,2345,181.672377 112 | 1360.0,5950,183.912733 113 | 575.0,2639,184.90189 114 | 1230.0,6295,187.263022 115 | 540.0,2561,188.228392 116 | 1705.0,9517,191.814498 117 | 580.0,1978,192.566855 118 | 600.0,2501,193.508208 119 | 1230.0,6693,196.031862 120 | 1205.0,6623,198.51251 121 | 570.0,3149,199.688567 122 | 1400.0,5209,201.64975 123 | 950.0,4733,203.450315 124 | 570.0,2149,204.261615 125 | 1205.0,5603,206.371998 126 | 1385.0,5767,208.560025 127 | 570.0,2170,209.385584 128 | 570.0,2495,210.324781 129 | 1260.0,5779,212.495315 130 | 1230.0,5477,214.555492 131 | 1800.0,8253,217.678085 132 | 1160.0,5875,219.905236 133 | 1200.0,5733,222.082316 134 | 1465.0,7059,224.754595 135 | 600.0,2269,225.608018 136 | 1230.0,5747,227.770797 137 | 605.0,2377,228.674616 138 | 1290.0,5145,230.616475 139 | -------------------------------------------------------------------------------- /logs/benchmark/qrdqn-AsteroidsNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615284368.635039, "env_id": "AsteroidsNoFrameskip-v4"} 2 | r,l,t 3 | 2030.0,5280,4.917788 4 | 1280.0,7246,7.733405 5 | 1280.0,7744,10.735203 6 | 2110.0,5418,12.840264 7 | 1580.0,9792,16.635882 8 | 2700.0,14442,22.269402 9 | 580.0,2602,23.28045 10 | 1410.0,8022,26.392479 11 | 1470.0,9992,30.243479 12 | 1300.0,4820,32.112589 13 | 880.0,3728,33.556423 14 | 1850.0,6712,36.16707 15 | 2300.0,18852,43.480112 16 | 2000.0,7066,46.231887 17 | 1880.0,6666,48.825108 18 | 3100.0,14344,54.417897 19 | 1300.0,5528,56.570307 20 | 1340.0,9790,60.490703 21 | 2080.0,8110,63.656714 22 | 1660.0,6686,66.265376 23 | 4270.0,15394,72.397557 24 | 1370.0,6100,74.749058 25 | 1470.0,5336,76.823082 26 | 1760.0,13178,81.991044 27 | 6890.0,27672,92.853137 28 | 1760.0,14066,98.439594 29 | 2900.0,8700,101.827892 30 | 1410.0,5754,104.055313 31 | 1320.0,4634,105.837138 32 | 3970.0,12582,110.797729 33 | 2080.0,7168,113.588531 34 | 1080.0,5372,115.669907 35 | 1430.0,10506,119.765551 36 | 1480.0,7942,122.972187 37 | 1440.0,4668,124.785599 38 | 2000.0,6472,127.308509 39 | 3920.0,10786,131.527477 40 | 2750.0,9510,135.345486 41 | 2150.0,6788,137.990966 42 | 2000.0,7614,140.951383 43 | 3220.0,13132,146.187673 44 | 3730.0,13448,151.436091 45 | 1580.0,7324,154.288391 46 | 1370.0,6314,156.804605 47 | 3220.0,11014,161.160282 48 | 2350.0,7398,164.036948 49 | 3000.0,9886,167.901782 50 | 2950.0,9504,171.705262 51 | 830.0,4964,173.63178 52 | 1880.0,6698,176.241872 53 | 3270.0,11406,180.728887 54 | 3220.0,15342,186.772055 55 | 2280.0,9198,190.361094 56 | 2150.0,6422,192.914549 57 | 3310.0,12340,197.784235 58 | 1880.0,8594,201.132567 59 | 1180.0,5834,203.409548 60 | 1130.0,5844,205.782184 61 | 1910.0,6416,208.304251 62 | 5540.0,15882,214.497507 63 | 1590.0,5052,216.464622 64 | 2260.0,12434,221.384965 65 | 3530.0,14708,227.130663 66 | 2050.0,8244,230.450806 67 | 1390.0,8756,233.848895 68 | 1830.0,10548,237.956959 69 | -------------------------------------------------------------------------------- /logs/benchmark/qrdqn-BeamRiderNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614852096.4742026, "env_id": "BeamRiderNoFrameskip-v4"} 2 | r,l,t 3 | 51584.0,71433,30.11881 4 | 22170.0,44639,48.369793 5 | 30514.0,48741,68.151076 6 | 16020.0,32883,80.800972 7 | 6232.0,17303,87.346454 8 | 4412.0,17059,93.873198 9 | 20790.0,38463,112.188127 10 | 16820.0,36761,128.455022 11 | 16110.0,36277,144.132732 12 | 19380.0,38563,160.682819 13 | 5884.0,18413,168.312466 14 | 6988.0,19351,176.849476 15 | 14294.0,34023,191.423923 16 | 12660.0,31657,205.309299 17 | 12960.0,32987,219.264733 18 | 15690.0,38991,236.003012 19 | 18582.0,38939,252.629304 20 | -------------------------------------------------------------------------------- /logs/benchmark/qrdqn-BreakoutNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614779697.375159, "env_id": "BreakoutNoFrameskip-v4"} 2 | r,l,t 3 | 421.0,13759,7.717858 4 | 382.0,8885,11.008912 5 | 368.0,7080,13.641344 6 | 360.0,6621,16.082456 7 | 450.0,46707,33.102802 8 | 287.0,8744,36.332456 9 | 393.0,8224,39.367732 10 | 395.0,8256,42.396542 11 | 432.0,9497,45.875642 12 | 373.0,8425,48.983618 13 | 408.0,13715,54.01905 14 | 400.0,12775,58.683662 15 | 408.0,17495,65.088949 16 | 773.0,21958,73.152582 17 | 423.0,22762,81.513918 18 | 349.0,8182,84.536757 19 | 373.0,21159,92.287416 20 | 320.0,8244,95.330242 21 | 210.0,6587,97.765068 22 | 327.0,9053,101.105664 23 | 438.0,11200,105.217838 24 | 425.0,28962,115.746121 25 | 290.0,5949,117.950181 26 | 407.0,21378,125.79268 27 | 375.0,13241,130.658171 28 | 451.0,16014,136.544134 29 | 423.0,12583,141.160334 30 | 403.0,7056,143.759736 31 | 421.0,20190,151.097538 32 | 252.0,7425,153.834893 33 | 407.0,19564,160.988797 34 | 364.0,28839,171.57312 35 | 383.0,8248,174.622104 36 | 411.0,13522,179.58879 37 | 385.0,10836,183.585816 38 | 410.0,9478,187.070337 39 | 373.0,7982,190.026134 40 | 431.0,28878,200.540685 41 | 428.0,29933,211.338071 42 | 415.0,10305,215.104128 43 | -------------------------------------------------------------------------------- /logs/benchmark/qrdqn-EnduroNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615798755.7603855, "env_id": "EnduroNoFrameskip-v4"} 2 | r,l,t 3 | 4693.0,133120,54.108688 4 | 2582.0,119808,99.796636 5 | 4931.0,146432,155.525393 6 | 1989.0,93184,190.736086 7 | 1961.0,93184,226.100894 8 | -------------------------------------------------------------------------------- /logs/benchmark/qrdqn-PongNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614779925.3269277, "env_id": "PongNoFrameskip-v4"} 2 | r,l,t 3 | 21.0,7947,5.600472 4 | 21.0,8442,8.701368 5 | 20.0,9634,12.226275 6 | 20.0,9762,15.790657 7 | 20.0,9609,19.300387 8 | 21.0,10585,23.16411 9 | 21.0,7947,26.067608 10 | 21.0,10205,29.791635 11 | 21.0,7941,32.686685 12 | 21.0,7941,35.583624 13 | 21.0,9895,39.196352 14 | 21.0,9949,42.81496 15 | 21.0,9895,46.414241 16 | 19.0,12324,50.903394 17 | 19.0,11473,55.076255 18 | 21.0,7878,57.940445 19 | 21.0,9895,61.542388 20 | 21.0,8279,64.549082 21 | 20.0,9625,68.059175 22 | 21.0,8678,71.209772 23 | 21.0,7947,74.102408 24 | 21.0,8817,77.327215 25 | 20.0,10751,81.23354 26 | 21.0,7947,84.124312 27 | 21.0,10029,87.772565 28 | 21.0,7941,90.660588 29 | 21.0,8096,93.601612 30 | 21.0,7947,96.499756 31 | 20.0,8959,99.759803 32 | 20.0,12466,104.28981 33 | 20.0,8357,107.329129 34 | 21.0,11809,111.624942 35 | 20.0,8408,114.678773 36 | 19.0,10698,118.573543 37 | 21.0,10572,122.417779 38 | 20.0,12662,127.027967 39 | 20.0,8111,129.980286 40 | 21.0,12685,134.595775 41 | 21.0,10857,138.545573 42 | 19.0,10857,142.492337 43 | 21.0,7947,145.377651 44 | 20.0,8323,148.40299 45 | 21.0,7947,151.294838 46 | 20.0,9177,154.633734 47 | 21.0,7947,157.525611 48 | 19.0,8704,160.690106 49 | 21.0,8279,163.702176 50 | 20.0,8077,166.637348 51 | 21.0,9895,170.237087 52 | 21.0,12037,174.608734 53 | 20.0,8077,177.542576 54 | 21.0,7947,180.432275 55 | 20.0,8486,183.514008 56 | 21.0,7941,186.413429 57 | 21.0,9895,190.02161 58 | 21.0,12037,194.4052 59 | 19.0,12802,199.05999 60 | 19.0,12353,203.547561 61 | 20.0,8618,206.684987 62 | 21.0,7947,209.578497 63 | 20.0,9505,213.026868 64 | 21.0,11741,217.296149 65 | 21.0,7941,220.184218 66 | -------------------------------------------------------------------------------- /logs/benchmark/qrdqn-QbertNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614780377.3798313, "env_id": "QbertNoFrameskip-v4"} 2 | r,l,t 3 | 15600.0,6377,5.057885 4 | 15550.0,7312,7.795092 5 | 15875.0,8987,11.144577 6 | 15600.0,6372,13.514814 7 | 15400.0,6394,15.885376 8 | 15600.0,6372,18.256008 9 | 16250.0,7487,21.048109 10 | 15600.0,6372,23.41627 11 | 12175.0,5163,25.33254 12 | 7750.0,3302,26.55398 13 | 15400.0,6334,28.906239 14 | 16050.0,6422,31.29228 15 | 4300.0,3159,32.456307 16 | 15825.0,5479,34.494781 17 | 15600.0,6372,36.869116 18 | 15400.0,6339,39.219169 19 | 8400.0,4895,41.03438 20 | 15450.0,6025,43.265694 21 | 15900.0,7189,45.924935 22 | 15400.0,6339,48.269106 23 | 16250.0,7487,51.050018 24 | 14925.0,6852,53.588706 25 | 16325.0,8247,56.64451 26 | 16300.0,8242,59.700643 27 | 15400.0,6334,62.046261 28 | 15400.0,6339,64.393149 29 | 15400.0,6399,66.765665 30 | 16250.0,7059,69.384406 31 | 16000.0,7414,72.127738 32 | 16000.0,7317,74.832424 33 | 16275.0,8557,78.012088 34 | 15650.0,7473,80.778527 35 | 15600.0,6377,83.135277 36 | 16700.0,8209,86.173364 37 | 15600.0,6372,88.537448 38 | 4425.0,3129,89.688693 39 | 16425.0,7340,92.412909 40 | 3975.0,2033,93.155878 41 | 15600.0,6372,95.515862 42 | 16050.0,6442,97.906537 43 | 16250.0,7487,100.682827 44 | 16025.0,6742,103.176655 45 | 16300.0,8042,106.157331 46 | 15475.0,6850,108.689824 47 | 16175.0,7827,111.588518 48 | 8550.0,6072,113.839271 49 | 16475.0,6543,116.257065 50 | 15400.0,6339,118.600999 51 | 16050.0,7100,121.234627 52 | 16150.0,7077,123.855452 53 | 15500.0,6623,126.313986 54 | 16475.0,6357,128.661455 55 | 15400.0,6339,131.005288 56 | 12100.0,5664,133.09717 57 | 15400.0,6334,135.439019 58 | 7750.0,3307,136.661919 59 | 16075.0,7510,139.44304 60 | 15450.0,6194,141.736086 61 | 15400.0,6339,144.08686 62 | 17275.0,9532,147.619866 63 | 15550.0,6025,149.842855 64 | 15600.0,6372,152.203281 65 | 15475.0,6731,154.696078 66 | 15500.0,6002,156.923521 67 | 16475.0,7882,159.837396 68 | 15550.0,6807,162.35231 69 | 16975.0,6733,164.847731 70 | 15400.0,6334,167.194746 71 | 15400.0,6399,169.56363 72 | 15500.0,6814,172.090505 73 | 15400.0,6339,174.44393 74 | 16250.0,7492,177.215289 75 | 15075.0,7007,179.807077 76 | 15650.0,6239,182.109103 77 | 7550.0,3708,183.476477 78 | 15600.0,6372,185.836382 79 | 15725.0,6467,188.226367 80 | 15600.0,6372,190.592282 81 | 15400.0,6339,192.935986 82 | 15650.0,6927,195.50159 83 | 16900.0,6274,197.820424 84 | 16250.0,7487,200.593243 85 | 16075.0,7007,203.18213 86 | 15525.0,6727,205.669727 87 | 16275.0,7587,208.47724 88 | 15600.0,6377,210.842808 89 | 15400.0,6334,213.186167 90 | 15400.0,6334,215.530586 91 | 15400.0,6334,217.876995 92 | 15400.0,6334,220.218556 93 | 4600.0,6289,222.542634 94 | 16075.0,7365,225.269416 95 | -------------------------------------------------------------------------------- /logs/benchmark/qrdqn-RoadRunnerNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615284101.0187874, "env_id": "RoadRunnerNoFrameskip-v4"} 2 | r,l,t 3 | 45700.0,4195,4.675652 4 | 36400.0,10379,9.093378 5 | 42500.0,4318,10.943592 6 | 40000.0,4409,12.819046 7 | 40100.0,18894,20.841767 8 | 48000.0,16250,27.781744 9 | 31400.0,4342,29.634131 10 | 29100.0,3676,31.203189 11 | 43100.0,7511,34.405609 12 | 29500.0,3953,36.088055 13 | 39700.0,6818,38.997077 14 | 38500.0,5285,41.252269 15 | 49500.0,4064,43.000126 16 | 46600.0,10519,47.530509 17 | 29200.0,4526,49.451559 18 | 39100.0,5894,51.960575 19 | 53500.0,11215,56.743758 20 | 42100.0,4457,58.650866 21 | 30500.0,3748,60.247491 22 | 56500.0,19033,68.358962 23 | 48500.0,25777,79.503964 24 | 33400.0,6918,82.571893 25 | 47600.0,29355,95.159505 26 | 56500.0,14117,101.289444 27 | 54000.0,3714,102.91013 28 | 39900.0,27591,114.183431 29 | 45100.0,9823,118.347341 30 | 31900.0,8528,122.001608 31 | 51000.0,5418,124.284162 32 | 39400.0,3984,125.976956 33 | 49600.0,5012,128.104257 34 | 42500.0,4851,130.154958 35 | 40900.0,4812,132.180582 36 | 49000.0,10090,136.482913 37 | 23900.0,4297,138.380678 38 | 41600.0,26677,149.766945 39 | 37900.0,55625,173.340621 40 | 54500.0,8922,177.154951 41 | 36000.0,4763,179.190263 42 | 40100.0,4745,181.219405 43 | 32200.0,6252,183.88833 44 | 47700.0,5602,186.280746 45 | 34800.0,4502,188.199655 46 | 53100.0,5188,190.433815 47 | 48600.0,7978,193.853717 48 | 44500.0,20618,202.602421 49 | 45100.0,10767,207.15412 50 | 30700.0,3959,208.843217 51 | 42600.0,9351,212.814273 52 | 28100.0,4686,214.808829 53 | 48900.0,5631,217.231784 54 | 56000.0,5127,219.426613 55 | 48000.0,11301,224.260313 56 | 30300.0,7920,227.654343 57 | 42500.0,8656,231.329389 58 | 34600.0,4871,233.404373 59 | 58500.0,34210,247.963593 60 | 49000.0,8831,251.734886 61 | 47700.0,7061,254.753482 62 | -------------------------------------------------------------------------------- /logs/benchmark/qrdqn-SeaquestNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615283846.1270072, "env_id": "SeaquestNoFrameskip-v4"} 2 | r,l,t 3 | 2600.0,9082,6.484057 4 | 2580.0,9082,10.181498 5 | 2560.0,9081,13.884617 6 | 2580.0,9081,17.589159 7 | 2560.0,9082,21.302521 8 | 2580.0,9070,25.006688 9 | 2560.0,9081,28.729359 10 | 2560.0,9081,32.440299 11 | 2580.0,9082,36.154031 12 | 2580.0,9082,39.827491 13 | 2600.0,9081,43.539016 14 | 2560.0,9082,47.245576 15 | 2600.0,9082,50.952624 16 | 2580.0,9082,54.671462 17 | 2560.0,9082,58.392135 18 | 2560.0,9082,62.102942 19 | 2560.0,9050,65.804434 20 | 2600.0,9082,69.522391 21 | 2540.0,9114,73.254171 22 | 2460.0,8694,76.806014 23 | 2540.0,9082,80.52209 24 | 2460.0,8665,84.067281 25 | 2540.0,8954,87.693721 26 | 2500.0,8794,91.284982 27 | 2580.0,9082,94.997058 28 | 2600.0,9081,98.713351 29 | 2560.0,9082,102.426905 30 | 2600.0,9081,106.259488 31 | 2580.0,9082,110.101429 32 | 2580.0,9082,113.837194 33 | 2580.0,9082,117.564113 34 | 2580.0,9082,121.291342 35 | 2580.0,9082,125.017416 36 | 2580.0,9082,128.74763 37 | 2560.0,9082,132.486106 38 | 2560.0,9081,136.265387 39 | 2500.0,9081,140.045696 40 | 2560.0,9082,143.820506 41 | 2560.0,9082,147.595112 42 | 2560.0,9113,151.388052 43 | 2560.0,9082,155.165497 44 | 1980.0,7418,158.271334 45 | 2580.0,9081,162.070482 46 | 2560.0,9082,165.861288 47 | 2600.0,9082,169.653452 48 | 2560.0,9081,173.442111 49 | 2560.0,9082,177.22763 50 | 2560.0,9082,180.942908 51 | 2600.0,9082,184.682519 52 | 2540.0,9081,188.42108 53 | 2580.0,9082,192.16452 54 | 2560.0,9081,195.902184 55 | 2580.0,9082,199.642456 56 | 2580.0,9082,203.379417 57 | 2560.0,9082,207.112618 58 | 2600.0,9081,210.860535 59 | 2560.0,9082,214.599277 60 | 2600.0,9082,218.338832 61 | 2580.0,9082,222.068007 62 | 2540.0,8794,225.65806 63 | 2580.0,9082,229.372201 64 | 2580.0,9082,233.098182 65 | 2560.0,9081,236.829339 66 | 2600.0,9082,240.560709 67 | 2560.0,9082,244.280681 68 | 2560.0,9114,248.011547 69 | -------------------------------------------------------------------------------- /logs/benchmark/qrdqn-SpaceInvadersNoFrameskip-v4/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614780157.951553, "env_id": "SpaceInvadersNoFrameskip-v4"} 2 | r,l,t 3 | 1860.0,9433,6.017921 4 | 1175.0,5897,8.11461 5 | 2030.0,8071,10.982974 6 | 1205.0,5963,13.093197 7 | 545.0,2807,14.085371 8 | 2715.0,11574,18.185369 9 | 2360.0,10915,22.055546 10 | 2695.0,12400,26.42824 11 | 2150.0,10363,30.087275 12 | 1200.0,6444,32.357866 13 | 1230.0,6287,34.576591 14 | 1235.0,5855,36.641079 15 | 1775.0,8365,39.598783 16 | 1235.0,5743,41.63625 17 | 1200.0,5490,43.575229 18 | 2615.0,14253,48.580082 19 | 545.0,3365,49.769976 20 | 1805.0,8265,52.685741 21 | 2030.0,9036,55.880196 22 | 1230.0,6311,58.104826 23 | 2200.0,9673,61.514868 24 | 1200.0,6015,63.640325 25 | 1860.0,8851,66.758578 26 | 1435.0,5937,68.859172 27 | 600.0,3857,70.211838 28 | 2145.0,10022,73.769417 29 | 1375.0,6239,75.980469 30 | 1745.0,8183,78.877634 31 | 1230.0,5879,80.963021 32 | 1400.0,5782,83.007614 33 | 1860.0,8973,86.166276 34 | 2925.0,12519,90.581527 35 | 1355.0,6386,92.840083 36 | 2465.0,12334,97.176396 37 | 1235.0,6153,99.345549 38 | 2910.0,12098,103.606352 39 | 1835.0,8619,106.660892 40 | 575.0,3337,107.830249 41 | 2060.0,9007,111.002099 42 | 2695.0,12860,115.521394 43 | 2490.0,12121,119.778012 44 | 1200.0,6887,122.210271 45 | 1860.0,8317,125.142263 46 | 545.0,3244,126.286159 47 | 1175.0,5886,128.380142 48 | 2810.0,11428,132.413715 49 | 2860.0,11611,136.527394 50 | 3235.0,12789,141.018401 51 | 1145.0,5983,143.135476 52 | 1860.0,8419,146.100608 53 | 3920.0,17237,152.142236 54 | 1860.0,8593,155.161895 55 | 3725.0,14361,160.22655 56 | 2690.0,11517,164.291665 57 | 1435.0,5895,166.378403 58 | 3565.0,13548,171.137103 59 | 2540.0,10619,174.901788 60 | 2435.0,11811,179.077891 61 | 3380.0,14197,184.075026 62 | 570.0,3009,185.147563 63 | 1205.0,6201,187.337598 64 | 2720.0,12506,191.72243 65 | 1170.0,6345,193.966073 66 | 2915.0,11154,197.895742 67 | 1175.0,6241,200.099791 68 | 1805.0,9273,203.379786 69 | 2320.0,10262,206.994567 70 | 1430.0,6411,209.251219 71 | 3115.0,11792,213.408668 72 | -------------------------------------------------------------------------------- /logs/benchmark/sac-BipedalWalker-v3/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614772839.3377283, "env_id": "BipedalWalker-v3"} 2 | r,l,t 3 | 302.360601,1114,3.36505 4 | 299.982386,1097,4.165927 5 | 301.305306,1118,4.981788 6 | 300.755528,1109,5.789752 7 | 300.41319,1103,6.591837 8 | 300.136167,1112,7.402884 9 | 301.942391,1111,8.212199 10 | 300.705809,1109,9.019499 11 | 302.139217,1109,9.826885 12 | 300.911729,1104,10.629605 13 | 302.272265,1109,11.437745 14 | 299.604268,1104,12.241354 15 | 299.278248,1100,13.040194 16 | 300.414958,1101,13.841523 17 | 300.574915,1113,14.650683 18 | 299.422525,1108,15.457727 19 | 301.058969,1113,16.265585 20 | 300.929906,1111,17.070907 21 | 301.217599,1112,17.876746 22 | 299.808359,1099,18.676308 23 | 301.97646,1103,19.475271 24 | 299.383641,1105,20.276059 25 | 301.048902,1119,21.089404 26 | 300.719316,1104,21.890113 27 | 300.35663,1104,22.689351 28 | 299.585902,1102,23.49109 29 | 300.387684,1101,24.289257 30 | 301.02301,1105,25.091187 31 | 299.283324,1103,25.888853 32 | 298.797811,1101,26.6852 33 | 300.889551,1105,27.484889 34 | 301.312607,1117,28.293692 35 | 298.839066,1104,29.090828 36 | 301.465511,1105,29.88746 37 | 299.999362,1107,30.688471 38 | 300.343653,1119,31.501015 39 | 300.794821,1105,32.298504 40 | 301.566184,1122,33.106802 41 | 300.154061,1104,33.904654 42 | 300.601982,1109,34.707381 43 | 299.783959,1116,35.512661 44 | 299.249841,1103,36.308428 45 | 300.459917,1103,37.104169 46 | 301.997507,1113,37.908109 47 | 299.710068,1101,38.703565 48 | 301.021391,1097,39.493932 49 | 302.55917,1113,40.297005 50 | 300.073168,1107,41.096599 51 | 303.078654,1119,41.897959 52 | 298.265251,1104,42.68975 53 | 300.982828,1104,43.482014 54 | 300.353754,1110,44.280113 55 | 300.748211,1112,45.076373 56 | 300.661172,1118,45.876885 57 | 300.173002,1106,46.671299 58 | 302.059827,1106,47.467817 59 | 302.151632,1115,48.265848 60 | 298.925516,1097,49.052199 61 | 300.456355,1118,49.853958 62 | 300.882569,1105,50.647162 63 | 301.373377,1115,51.445441 64 | 301.108553,1104,52.237467 65 | 298.812307,1104,53.032318 66 | 298.587068,1092,53.822424 67 | 296.778464,1099,54.612579 68 | 299.83619,1094,55.396447 69 | 301.041658,1104,56.188702 70 | 298.44367,1095,56.975703 71 | 301.994168,1104,57.765858 72 | 299.021788,1102,58.555475 73 | 301.347008,1109,59.351158 74 | 300.822325,1101,60.142673 75 | -86.211804,114,60.227066 76 | 302.564616,1114,61.024435 77 | 299.492492,1107,61.81947 78 | 301.447796,1108,62.615735 79 | 302.675531,1117,63.414827 80 | 301.229215,1103,64.204417 81 | 300.875007,1114,65.002628 82 | 300.933166,1094,65.787878 83 | 301.602342,1113,66.584519 84 | 298.375637,1112,67.380561 85 | 299.737012,1103,68.171237 86 | 299.803846,1105,68.962979 87 | 300.758485,1108,69.756849 88 | 301.953926,1109,70.549286 89 | 299.530853,1110,71.343521 90 | 300.314862,1108,72.137488 91 | 299.606724,1106,72.927233 92 | 298.734268,1100,73.712741 93 | 298.709114,1092,74.494166 94 | 298.423564,1095,75.279155 95 | 302.788395,1110,76.072666 96 | 298.820547,1098,76.859971 97 | 298.282516,1108,77.653742 98 | 298.552512,1109,78.449211 99 | 300.992863,1104,79.239589 100 | 299.893245,1100,80.026569 101 | 299.645259,1095,80.809745 102 | 300.726465,1106,81.601357 103 | 301.481949,1110,82.394461 104 | 300.000249,1109,83.191714 105 | 300.958623,1128,84.001832 106 | 300.926692,1123,84.806707 107 | 301.136796,1103,85.595889 108 | 300.421307,1104,86.386729 109 | 301.304972,1102,87.177112 110 | 299.196795,1107,87.971708 111 | 300.282078,1105,88.7635 112 | 298.923465,1110,89.557497 113 | 301.749845,1121,90.359199 114 | 300.179531,1102,91.150525 115 | 301.675692,1115,91.949993 116 | 298.664999,1098,92.738292 117 | 302.259756,1104,93.530223 118 | 300.335136,1109,94.327485 119 | 302.262681,1108,95.121866 120 | 301.742746,1102,95.910671 121 | 301.690168,1118,96.714285 122 | 299.771082,1106,97.508253 123 | 300.023118,1103,98.298843 124 | 297.838527,1092,99.080485 125 | 300.998053,1116,99.881017 126 | 300.246662,1101,100.672354 127 | 300.242191,1105,101.463338 128 | 300.060263,1099,102.250881 129 | 301.302396,1107,103.04414 130 | 301.973059,1114,103.8426 131 | 301.272366,1108,104.634917 132 | 302.632714,1108,105.426627 133 | 300.551611,1100,106.214734 134 | 302.824584,1116,107.015751 135 | 300.550722,1109,107.810142 136 | 300.636237,1107,108.603951 137 | 300.656654,1114,109.403951 138 | 299.35787,1095,110.189639 139 | -------------------------------------------------------------------------------- /logs/benchmark/sac-BipedalWalkerHardcore-v3/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615282937.308309, "env_id": "BipedalWalkerHardcore-v3"} 2 | r,l,t 3 | 41.778326,953,3.662442 4 | 180.467412,2000,5.824946 5 | 97.295092,1363,7.285249 6 | -81.182201,346,7.643032 7 | -40.802484,2000,9.825659 8 | -99.934007,2000,11.965935 9 | -42.94785,2000,14.116859 10 | 282.487894,1556,15.766352 11 | -2.732902,779,16.615764 12 | -17.791582,2000,18.759749 13 | -118.597024,2000,20.948429 14 | 279.944892,1562,22.609763 15 | -37.321344,2000,24.767509 16 | -55.572132,441,25.237352 17 | 65.808814,2000,27.385929 18 | 54.506992,2000,29.527135 19 | -30.193249,2000,31.700151 20 | 262.237829,1844,33.642516 21 | -135.306954,2000,35.582859 22 | 32.545857,2000,37.762973 23 | -96.304213,2000,39.912227 24 | -62.419513,2000,42.052971 25 | -10.491988,2000,44.205731 26 | 3.009945,2000,46.351385 27 | -41.232128,2000,48.469136 28 | -25.112829,2000,50.622787 29 | 1.402057,604,51.264819 30 | 113.538035,2000,53.445968 31 | 279.57376,1561,55.120767 32 | -11.387091,512,55.665265 33 | 2.594991,2000,57.829058 34 | -46.001328,2000,59.981994 35 | -111.158098,209,60.211232 36 | 5.653206,2000,62.355254 37 | -115.540607,2000,64.528579 38 | -29.031126,2000,66.686181 39 | -20.090611,2000,68.844233 40 | 25.867034,2000,71.026041 41 | 68.224921,2000,73.172868 42 | -41.639959,2000,75.30538 43 | -63.43205,314,75.639336 44 | -55.897433,2000,77.824094 45 | -79.076376,348,78.183614 46 | -75.219312,336,78.550389 47 | 166.120098,2000,80.70091 48 | -6.885657,2000,82.591767 49 | 67.274203,2000,84.728624 50 | -100.736684,2000,86.88388 51 | 9.361807,2000,89.031049 52 | -104.43978,2000,91.185034 53 | 175.253622,2000,93.32884 54 | 72.826926,1502,94.950952 55 | -107.557122,426,95.420961 56 | 34.350763,2000,97.562223 57 | 118.684665,2000,99.711555 58 | 18.436068,918,100.70732 59 | -72.023005,512,101.245563 60 | -94.057267,2000,103.430716 61 | -14.339757,2000,105.585078 62 | 276.646473,1682,107.405251 63 | -93.379379,2000,109.552183 64 | -32.890574,2000,111.734328 65 | -19.358966,2000,113.875139 66 | -97.591226,2000,116.016107 67 | -104.305948,2000,118.203043 68 | 268.461755,1756,120.0877 69 | -83.482728,2000,122.255662 70 | 59.102619,2000,124.415739 71 | 31.694314,2000,126.600275 72 | -24.754214,2000,128.495678 73 | -61.835281,2000,130.675213 74 | -35.282332,2000,132.834678 75 | -33.850913,2000,134.96645 76 | -111.449906,2000,137.133202 77 | -113.290239,263,137.41089 78 | 7.253,2000,139.565842 79 | 100.123289,1549,141.236544 80 | -35.315291,2000,143.384549 81 | -62.103672,2000,145.527463 82 | -55.579403,458,146.024764 83 | 176.048998,2000,148.188452 84 | -15.598691,2000,150.343705 85 | 100.101637,2000,152.481565 86 | 138.802199,2000,154.663462 87 | -108.792934,2000,156.829956 88 | -6.308218,2000,158.973216 89 | -91.763453,2000,161.134987 90 | 5.102684,2000,163.266236 91 | -------------------------------------------------------------------------------- /logs/benchmark/td3-BipedalWalkerHardcore-v3/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1615035716.3739204, "env_id": "BipedalWalkerHardcore-v3"} 2 | r,l,t 3 | -75.120605,2000,4.864824 4 | -132.737428,2000,6.84062 5 | -75.439565,2000,8.801577 6 | -76.590578,2000,10.690364 7 | -77.46551,2000,12.538607 8 | -118.239931,2000,14.351909 9 | -104.317353,2000,16.067962 10 | -69.326927,2000,17.797764 11 | -117.754794,2000,19.485572 12 | -72.893132,2000,21.180301 13 | -100.400577,2000,22.862713 14 | -115.384812,2000,24.573101 15 | -111.380875,2000,26.278326 16 | -113.021511,2000,27.959434 17 | -72.503946,2000,29.715913 18 | -118.90079,2000,31.564067 19 | -104.994169,2000,33.428279 20 | -112.627715,2000,35.262558 21 | -76.1824,2000,36.904961 22 | -76.263312,2000,38.549288 23 | -115.804972,2000,40.212135 24 | -112.246159,2000,41.872432 25 | -91.741549,2000,43.504618 26 | -82.195084,2000,45.10595 27 | -79.176994,2000,46.691015 28 | -99.303968,2000,48.336355 29 | -87.31807,2000,49.946664 30 | -95.103121,2000,51.592625 31 | -106.846239,2000,53.230053 32 | -113.287192,2000,54.865682 33 | -102.919775,2000,56.480147 34 | -90.229798,2000,58.131267 35 | -115.558105,2000,59.771812 36 | -108.654203,2000,61.383754 37 | -115.18619,2000,63.12032 38 | -76.182868,2000,64.965528 39 | -81.919436,2000,66.800929 40 | -94.051451,2000,68.635713 41 | -100.343992,2000,70.541423 42 | -107.29148,2000,72.423547 43 | -89.732649,2000,74.275149 44 | -101.208089,2000,76.139937 45 | -73.837888,2000,78.020883 46 | -108.810423,2000,79.869502 47 | -90.368393,2000,81.699364 48 | -118.587039,2000,83.560536 49 | -90.45015,2000,85.259661 50 | -111.38551,2000,86.979527 51 | -90.475702,2000,88.684398 52 | -115.950619,2000,90.399352 53 | -82.306352,2000,92.135506 54 | -73.167066,2000,93.870667 55 | -116.350767,2000,95.585823 56 | -104.899976,2000,97.168645 57 | -109.348082,2000,98.77277 58 | -89.763986,2000,100.360732 59 | -75.626801,2000,101.942616 60 | -114.163178,2000,103.571591 61 | -108.562208,2000,105.200625 62 | -81.079522,2000,106.821184 63 | -94.88129,2000,108.5505 64 | -95.688965,2000,110.409605 65 | -96.025015,2000,112.25252 66 | -115.966983,2000,114.110633 67 | -104.212579,2000,115.954515 68 | -108.756676,2000,117.801634 69 | -77.533709,2000,119.665398 70 | -108.630552,2000,121.521044 71 | -90.906734,2000,123.2634 72 | -132.124042,2000,124.980735 73 | -82.543561,2000,126.690589 74 | -110.444789,2000,128.422873 75 | -81.163665,2000,130.135998 76 | -116.381794,2000,131.861476 77 | -100.473055,2000,133.715868 78 | -------------------------------------------------------------------------------- /logs/benchmark/tqc-BipedalWalkerHardcore-v3/0.monitor.csv: -------------------------------------------------------------------------------- 1 | #{"t_start": 1614972944.323092, "env_id": "BipedalWalkerHardcore-v3"} 2 | r,l,t 3 | 158.985985,844,4.102602 4 | 300.32726,1050,5.244834 5 | 300.535558,1038,6.388168 6 | 296.875619,1085,7.575825 7 | 302.358559,1013,8.64315 8 | 25.923265,577,9.246357 9 | 228.283257,2000,11.337982 10 | 311.316454,975,12.353261 11 | 52.706594,568,12.944439 12 | -23.643158,330,13.288158 13 | 298.875393,1063,14.3944 14 | 307.395613,1004,15.440466 15 | 291.257043,1082,16.563598 16 | 302.321319,1016,17.622731 17 | 292.55956,1117,18.783862 18 | 300.52161,1030,19.861118 19 | 305.402259,1032,20.934168 20 | 305.712174,960,21.97389 21 | 15.765116,441,22.457574 22 | 286.893706,1176,23.768661 23 | 292.588987,1106,25.062059 24 | 303.161458,1006,26.238271 25 | 124.72623,2000,28.58553 26 | 299.188807,1067,29.837964 27 | 297.265926,1055,31.059305 28 | 299.246128,1058,32.225142 29 | 138.067304,2000,34.423318 30 | 158.101962,2000,36.611668 31 | 307.149411,982,37.682646 32 | 301.376617,1033,38.810155 33 | 52.525784,2000,41.012082 34 | -5.957255,2000,43.22972 35 | 303.222906,1042,44.368109 36 | -30.104262,296,44.690454 37 | 302.856284,1033,45.814348 38 | 299.417344,1052,46.969577 39 | 299.568715,1052,48.122539 40 | 302.167447,1038,49.255569 41 | 300.055837,1071,50.428728 42 | 304.075581,974,51.535719 43 | 300.606989,1064,52.697526 44 | 21.185638,2000,54.917194 45 | 293.851598,1069,56.089131 46 | 35.995669,2000,58.279191 47 | 218.588658,2000,60.470027 48 | 44.859679,2000,62.676093 49 | 298.081841,1028,63.770969 50 | 291.965119,1100,64.95215 51 | 288.8617,1160,66.225818 52 | 65.046742,883,67.195506 53 | 303.598842,1029,68.311282 54 | 298.681165,1122,69.500515 55 | 134.920099,816,70.363624 56 | 303.944964,963,71.381186 57 | 261.650718,2000,73.633541 58 | 298.924762,1050,74.818844 59 | 178.425483,2000,77.085008 60 | 301.952066,1029,78.24571 61 | 297.590377,1096,79.475244 62 | 308.256382,999,80.60499 63 | 303.66264,1048,81.782448 64 | 233.439754,2000,83.977091 65 | -43.841983,314,84.32413 66 | 291.240187,1248,85.687433 67 | 303.171049,967,86.73503 68 | 307.011786,993,87.805815 69 | 307.201248,1034,88.926074 70 | 92.128525,2000,91.102643 71 | 298.651352,1050,92.212756 72 | 296.614816,1089,93.362487 73 | 195.222046,2000,95.562203 74 | 305.238567,1023,96.697325 75 | 303.564229,1010,97.819911 76 | 291.643838,1115,99.053748 77 | 295.809282,1120,100.295977 78 | 303.4209,967,101.361445 79 | 307.599352,973,102.432886 80 | 296.079821,1059,103.588525 81 | 296.641486,1042,104.726761 82 | 304.632718,974,105.792635 83 | 300.684866,1019,106.913619 84 | 300.337727,1037,108.04645 85 | 294.32521,1111,109.264762 86 | 299.984938,1028,110.397997 87 | 288.785893,1167,111.689689 88 | 303.119877,1011,112.865363 89 | 298.647242,1058,114.10566 90 | 48.931804,2000,116.453665 91 | 187.262679,2000,118.781733 92 | 291.822427,1128,120.056754 93 | 303.411501,1061,121.223087 94 | 297.410528,1064,122.394384 95 | 45.52435,545,122.994992 96 | 298.206199,1059,124.1576 97 | 302.624397,1032,125.287297 98 | 237.922429,2000,127.487648 99 | 12.073966,514,128.050607 100 | 307.005817,971,129.119031 101 | 217.421274,2000,131.331841 102 | -54.266479,274,131.637203 103 | -45.189495,272,131.948989 104 | 301.654401,983,133.04844 105 | 305.29741,1005,134.153365 106 | 110.310658,2000,136.342585 107 | 304.438778,963,137.356552 108 | 299.639912,1068,138.473265 109 | -61.984761,317,138.806689 110 | 296.903281,1087,139.96187 111 | 311.947863,989,141.033978 112 | 305.173788,968,142.080554 113 | -24.936318,449,142.561262 114 | 293.318206,1155,143.815958 115 | 302.575292,1039,144.945063 116 | 301.656432,1026,146.112909 117 | 298.843551,1037,147.290129 118 | 295.248605,1068,148.497394 119 | 305.653108,1032,149.650847 120 | 169.665353,2000,151.820901 121 | 295.730421,1071,152.976426 122 | 41.452605,2000,155.148677 123 | 304.44089,1024,156.245353 124 | 305.12927,1002,157.330209 125 | 300.891507,1032,158.455373 126 | 121.746389,867,159.394768 127 | 300.548733,1040,160.525973 128 | 97.863702,758,161.330169 129 | 300.743761,1009,162.391677 130 | 301.44053,1067,163.515258 131 | 57.961807,713,164.271593 132 | 290.951927,1117,165.47083 133 | 299.077015,1095,166.676866 134 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | # Same as Black. 3 | line-length = 127 4 | # Assume Python 3.9 5 | target-version = "py39" 6 | 7 | [tool.ruff.lint] 8 | # See https://beta.ruff.rs/docs/rules/ 9 | select = ["E", "F", "B", "UP", "C90", "RUF"] 10 | # Ignore explicit stacklevel` 11 | ignore = ["B028"] 12 | 13 | [tool.ruff.lint.per-file-ignores] 14 | "./rl_zoo3/import_envs.py"= ["F401"] 15 | # "./rl_zoo3/plots/plot_train.py"= ["E501"] 16 | 17 | 18 | [tool.ruff.lint.mccabe] 19 | # Unlike Flake8, default to a complexity level of 10. 20 | max-complexity = 15 21 | 22 | [tool.black] 23 | line-length = 127 24 | 25 | [tool.mypy] 26 | ignore_missing_imports = true 27 | follow_imports = "silent" 28 | show_error_codes = true 29 | exclude = """(?x)( 30 | tests/dummy_env/*$ 31 | )""" 32 | 33 | [tool.pytest.ini_options] 34 | # Deterministic ordering for tests; useful for pytest-xdist. 35 | env = [ 36 | "PYTHONHASHSEED=0" 37 | ] 38 | 39 | filterwarnings = [ 40 | # Tensorboard warnings 41 | "ignore::DeprecationWarning:tensorboard", 42 | # Gym warnings 43 | "ignore::UserWarning:gym", 44 | ] 45 | markers = [ 46 | "slow: marks tests as slow (deselect with '-m \"not slow\"')" 47 | ] 48 | 49 | [tool.coverage.run] 50 | disable_warnings = ["couldnt-parse"] 51 | branch = false 52 | omit = [ 53 | "tests/*", 54 | "setup.py", 55 | "rl_zoo3/plots/*", 56 | "rl_zoo3/push_to_hub.py", 57 | "scripts/*", 58 | ] 59 | 60 | [tool.coverage.report] 61 | exclude_lines = [ "pragma: no cover", "raise NotImplementedError()", "if typing.TYPE_CHECKING:"] 62 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | gym==0.26.2 2 | stable-baselines3[extra,tests,docs]>=2.6.1a1,<3.0 3 | box2d-py==2.3.8 4 | pybullet_envs_gymnasium>=0.6.0 5 | # minigrid 6 | cloudpickle>=2.2.1 7 | # Optuna auto 8 | optunahub>=0.2.0 9 | # optuna plots: 10 | plotly 11 | # need to upgrade to gymnasium: 12 | # panda-gym~=3.0.1 13 | wandb 14 | moviepy>=1.0.0 15 | -------------------------------------------------------------------------------- /rl_zoo3/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # isort: off 4 | 5 | import rl_zoo3.gym_patches # noqa: F401 6 | 7 | # isort: on 8 | 9 | from rl_zoo3.utils import ( 10 | ALGOS, 11 | create_test_env, 12 | get_latest_run_id, 13 | get_saved_hyperparams, 14 | get_trained_models, 15 | get_wrapper_class, 16 | linear_schedule, 17 | ) 18 | 19 | # Read version from file 20 | version_file = os.path.join(os.path.dirname(__file__), "version.txt") 21 | with open(version_file) as file_handler: 22 | __version__ = file_handler.read().strip() 23 | 24 | __all__ = [ 25 | "ALGOS", 26 | "create_test_env", 27 | "get_latest_run_id", 28 | "get_saved_hyperparams", 29 | "get_trained_models", 30 | "get_wrapper_class", 31 | "linear_schedule", 32 | ] 33 | -------------------------------------------------------------------------------- /rl_zoo3/cli.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from rl_zoo3.enjoy import enjoy 4 | from rl_zoo3.plots import all_plots, plot_from_file, plot_train 5 | from rl_zoo3.train import train 6 | 7 | 8 | def main(): 9 | script_name = sys.argv[1] 10 | # Remove script name 11 | del sys.argv[1] 12 | # Execute known script 13 | known_scripts = { 14 | "train": train, 15 | "enjoy": enjoy, 16 | "plot_train": plot_train, 17 | "plot_from_file": plot_from_file, 18 | "all_plots": all_plots, 19 | } 20 | if script_name not in known_scripts.keys(): 21 | raise ValueError(f"The script {script_name} is unknown, please use one of {known_scripts.keys()}") 22 | known_scripts[script_name]() 23 | 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /rl_zoo3/gym_patches.py: -------------------------------------------------------------------------------- 1 | """ 2 | Patches for gym 0.26+ so RL Zoo3 keeps working as before 3 | (notably TimeLimit wrapper and Pybullet envs) 4 | """ 5 | 6 | import numpy as np 7 | 8 | # Deprecation warning with gym 0.26 and numpy 1.24 9 | np.bool8 = np.bool_ # type: ignore[attr-defined] 10 | 11 | import gymnasium # noqa: E402 12 | 13 | 14 | class PatchedTimeLimit(gymnasium.wrappers.TimeLimit): 15 | """ 16 | See https://github.com/openai/gym/issues/3102 17 | and https://github.com/Farama-Foundation/Gymnasium/pull/101: 18 | keep the behavior as before and provide additionnal info 19 | that the episode reached a timeout, but only 20 | when the episode is over because of that. 21 | """ 22 | 23 | def step(self, action): 24 | observation, reward, terminated, truncated, info = self.env.step(action) 25 | self._elapsed_steps += 1 26 | 27 | if self._elapsed_steps >= self._max_episode_steps: 28 | done = truncated or terminated 29 | # TimeLimit.truncated key may have been already set by the environment 30 | # do not overwrite it 31 | # only set it when the episode is not over for other reasons 32 | episode_truncated = not done or info.get("TimeLimit.truncated", False) 33 | info["TimeLimit.truncated"] = episode_truncated 34 | # truncated may have been set by the env too 35 | truncated = truncated or episode_truncated 36 | 37 | return observation, reward, terminated, truncated, info 38 | 39 | 40 | # Patch Gymnasium TimeLimit 41 | gymnasium.wrappers.TimeLimit = PatchedTimeLimit # type: ignore[misc] 42 | try: 43 | gymnasium.wrappers.time_limit.TimeLimit = PatchedTimeLimit # type: ignore[misc] 44 | except AttributeError: 45 | gymnasium.wrappers.common.TimeLimit = PatchedTimeLimit # type: ignore 46 | gymnasium.envs.registration.TimeLimit = PatchedTimeLimit # type: ignore[misc,attr-defined] 47 | -------------------------------------------------------------------------------- /rl_zoo3/import_envs.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional 2 | 3 | import gymnasium as gym 4 | from gymnasium.envs.registration import register, register_envs 5 | 6 | from rl_zoo3.wrappers import MaskVelocityWrapper 7 | 8 | try: 9 | import pybullet_envs_gymnasium 10 | except ImportError: 11 | pass 12 | 13 | try: 14 | import ale_py 15 | 16 | # no-op 17 | gym.register_envs(ale_py) 18 | except ImportError: 19 | pass 20 | 21 | try: 22 | import highway_env 23 | except ImportError: 24 | pass 25 | else: 26 | # hotfix for highway_env 27 | import numpy as np 28 | 29 | np.float = np.float32 # type: ignore[attr-defined] 30 | 31 | try: 32 | import custom_envs 33 | except ImportError: 34 | pass 35 | 36 | try: 37 | import gym_donkeycar 38 | except ImportError: 39 | pass 40 | 41 | try: 42 | import panda_gym 43 | except ImportError: 44 | pass 45 | 46 | try: 47 | import rocket_lander_gym 48 | except ImportError: 49 | pass 50 | 51 | try: 52 | import minigrid 53 | except ImportError: 54 | pass 55 | 56 | 57 | # Register no vel envs 58 | def create_no_vel_env(env_id: str) -> Callable[[Optional[str]], gym.Env]: 59 | def make_env(render_mode: Optional[str] = None) -> gym.Env: 60 | env = gym.make(env_id, render_mode=render_mode) 61 | env = MaskVelocityWrapper(env) 62 | return env 63 | 64 | return make_env 65 | 66 | 67 | for env_id in MaskVelocityWrapper.velocity_indices.keys(): 68 | name, version = env_id.split("-v") 69 | register( 70 | id=f"{name}NoVel-v{version}", 71 | entry_point=create_no_vel_env(env_id), # type: ignore[arg-type] 72 | ) 73 | -------------------------------------------------------------------------------- /rl_zoo3/plots/__init__.py: -------------------------------------------------------------------------------- 1 | from rl_zoo3.plots.all_plots import all_plots 2 | from rl_zoo3.plots.plot_from_file import plot_from_file 3 | from rl_zoo3.plots.plot_train import plot_train 4 | 5 | __all__ = [ 6 | "all_plots", 7 | "plot_from_file", 8 | "plot_train", 9 | ] 10 | -------------------------------------------------------------------------------- /rl_zoo3/plots/plot_train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Plot training reward/success rate 3 | """ 4 | 5 | import argparse 6 | import os 7 | 8 | import numpy as np 9 | import seaborn 10 | from matplotlib import pyplot as plt 11 | from stable_baselines3.common.monitor import LoadMonitorResultsError, load_results 12 | from stable_baselines3.common.results_plotter import X_EPISODES, X_TIMESTEPS, X_WALLTIME, ts2xy, window_func 13 | 14 | # Activate seaborn 15 | seaborn.set() 16 | 17 | 18 | def plot_train(): 19 | parser = argparse.ArgumentParser("Gather results, plot training reward/success") 20 | parser.add_argument("-a", "--algo", help="Algorithm to include", type=str, required=True) 21 | parser.add_argument("-e", "--env", help="Environment(s) to include", nargs="+", type=str, required=True) 22 | parser.add_argument("-f", "--exp-folder", help="Folders to include", type=str, required=True) 23 | parser.add_argument("--figsize", help="Figure size, width, height in inches.", nargs=2, type=int, default=[6.4, 4.8]) 24 | parser.add_argument("--fontsize", help="Font size", type=int, default=14) 25 | parser.add_argument("-max", "--max-timesteps", help="Max number of timesteps to display", type=int) 26 | parser.add_argument("-x", "--x-axis", help="X-axis", choices=["steps", "episodes", "time"], type=str, default="steps") 27 | parser.add_argument("-y", "--y-axis", help="Y-axis", choices=["success", "reward", "length"], type=str, default="reward") 28 | parser.add_argument("-w", "--episode-window", help="Rolling window size", type=int, default=100) 29 | 30 | args = parser.parse_args() 31 | 32 | algo = args.algo 33 | envs = args.env 34 | log_path = os.path.join(args.exp_folder, algo) 35 | 36 | x_axis = { 37 | "steps": X_TIMESTEPS, 38 | "episodes": X_EPISODES, 39 | "time": X_WALLTIME, 40 | }[args.x_axis] 41 | x_label = { 42 | "steps": "Timesteps", 43 | "episodes": "Episodes", 44 | "time": "Walltime (in hours)", 45 | }[args.x_axis] 46 | 47 | y_axis = { 48 | "success": "is_success", 49 | "reward": "r", 50 | "length": "l", 51 | }[args.y_axis] 52 | y_label = { 53 | "success": "Training Success Rate", 54 | "reward": "Training Episodic Reward", 55 | "length": "Training Episode Length", 56 | }[args.y_axis] 57 | 58 | dirs = [] 59 | 60 | for env in envs: 61 | # Sort by last modification 62 | entries = sorted(os.scandir(log_path), key=lambda entry: entry.stat().st_mtime) 63 | dirs.extend(entry.path for entry in entries if env in entry.name and entry.is_dir()) 64 | 65 | plt.figure(y_label, figsize=args.figsize) 66 | plt.title(y_label, fontsize=args.fontsize) 67 | plt.xlabel(f"{x_label}", fontsize=args.fontsize) 68 | plt.ylabel(y_label, fontsize=args.fontsize) 69 | for folder in dirs: 70 | try: 71 | data_frame = load_results(folder) 72 | except LoadMonitorResultsError: 73 | continue 74 | if args.max_timesteps is not None: 75 | data_frame = data_frame[data_frame.l.cumsum() <= args.max_timesteps] 76 | try: 77 | y = np.array(data_frame[y_axis]) 78 | except KeyError: 79 | print(f"No data available for {folder}") 80 | continue 81 | x, _ = ts2xy(data_frame, x_axis) 82 | 83 | # Do not plot the smoothed curve at all if the timeseries is shorter than window size. 84 | if x.shape[0] >= args.episode_window: 85 | # Compute and plot rolling mean with window of size args.episode_window 86 | x, y_mean = window_func(x, y, args.episode_window, np.mean) 87 | plt.plot(x, y_mean, linewidth=2, label=folder.split("/")[-1]) 88 | 89 | plt.legend() 90 | plt.tight_layout() 91 | plt.show() 92 | 93 | 94 | if __name__ == "__main__": 95 | plot_train() 96 | -------------------------------------------------------------------------------- /rl_zoo3/plots/score_normalization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Min and Max score for each env for normalization when plotting. 3 | Min score corresponds to random agent. 4 | Max score corresponds to acceptable performance, for instance 5 | human level performance in the case of Atari games. 6 | """ 7 | 8 | from typing import NamedTuple 9 | 10 | import numpy as np 11 | 12 | 13 | class ReferenceScore(NamedTuple): 14 | env_id: str 15 | min: float 16 | max: float 17 | 18 | 19 | reference_scores = [ 20 | # PyBullet Envs 21 | ReferenceScore("HalfCheetahBulletEnv-v0", -1400, 3000), 22 | ReferenceScore("AntBulletEnv-v0", 300, 3500), 23 | ReferenceScore("HopperBulletEnv-v0", 20, 2500), 24 | ReferenceScore("Walker2DBulletEnv-v0", 200, 2500), 25 | ] 26 | 27 | # Alternative scaling 28 | # Min is a poorly optimized algorithm 29 | # reference_scores = [ 30 | # ReferenceScore("HalfCheetahBulletEnv-v0", 1000, 3000), 31 | # ReferenceScore("AntBulletEnv-v0", 1000, 3500), 32 | # ReferenceScore("HopperBulletEnv-v0", 1000, 2500), 33 | # ReferenceScore("Walker2DBulletEnv-v0", 500, 2500), 34 | # ] 35 | 36 | min_max_score_per_env = {reference_score.env_id: reference_score for reference_score in reference_scores} 37 | 38 | 39 | def normalize_score(score: np.ndarray, env_id: str) -> np.ndarray: 40 | """ 41 | Normalize score to be in [0, 1] where 1 is maximal performance. 42 | 43 | :param score: unnormalized score 44 | :param env_id: environment id 45 | :return: normalized score 46 | """ 47 | if env_id not in min_max_score_per_env: 48 | raise KeyError(f"No reference score for {env_id}") 49 | reference_score = min_max_score_per_env[env_id] 50 | return (score - reference_score.min) / (reference_score.max - reference_score.min) 51 | 52 | 53 | # From rliable, for atari games: 54 | # 55 | # RANDOM_SCORES = { 56 | # 'Alien': 227.8, 57 | # 'Amidar': 5.8, 58 | # 'Assault': 222.4, 59 | # 'Asterix': 210.0, 60 | # 'BankHeist': 14.2, 61 | # 'BattleZone': 2360.0, 62 | # 'Boxing': 0.1, 63 | # 'Breakout': 1.7, 64 | # 'ChopperCommand': 811.0, 65 | # 'CrazyClimber': 10780.5, 66 | # 'DemonAttack': 152.1, 67 | # 'Freeway': 0.0, 68 | # 'Frostbite': 65.2, 69 | # 'Gopher': 257.6, 70 | # 'Hero': 1027.0, 71 | # 'Jamesbond': 29.0, 72 | # 'Kangaroo': 52.0, 73 | # 'Krull': 1598.0, 74 | # 'KungFuMaster': 258.5, 75 | # 'MsPacman': 307.3, 76 | # 'Pong': -20.7, 77 | # 'PrivateEye': 24.9, 78 | # 'Qbert': 163.9, 79 | # 'RoadRunner': 11.5, 80 | # 'Seaquest': 68.4, 81 | # 'UpNDown': 533.4 82 | # } 83 | # 84 | # HUMAN_SCORES = { 85 | # 'Alien': 7127.7, 86 | # 'Amidar': 1719.5, 87 | # 'Assault': 742.0, 88 | # 'Asterix': 8503.3, 89 | # 'BankHeist': 753.1, 90 | # 'BattleZone': 37187.5, 91 | # 'Boxing': 12.1, 92 | # 'Breakout': 30.5, 93 | # 'ChopperCommand': 7387.8, 94 | # 'CrazyClimber': 35829.4, 95 | # 'DemonAttack': 1971.0, 96 | # 'Freeway': 29.6, 97 | # 'Frostbite': 4334.7, 98 | # 'Gopher': 2412.5, 99 | # 'Hero': 30826.4, 100 | # 'Jamesbond': 302.8, 101 | # 'Kangaroo': 3035.0, 102 | # 'Krull': 2665.5, 103 | # 'KungFuMaster': 22736.3, 104 | # 'MsPacman': 6951.6, 105 | # 'Pong': 14.6, 106 | # 'PrivateEye': 69571.3, 107 | # 'Qbert': 13455.0, 108 | # 'RoadRunner': 7845.0, 109 | # 'Seaquest': 42054.7, 110 | # 'UpNDown': 11693.2 111 | # } 112 | -------------------------------------------------------------------------------- /rl_zoo3/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DLR-RM/rl-baselines3-zoo/577616cb9f13341579953cb0f6111e007acc0a1d/rl_zoo3/py.typed -------------------------------------------------------------------------------- /rl_zoo3/version.txt: -------------------------------------------------------------------------------- 1 | 2.6.1a1 2 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DLR-RM/rl-baselines3-zoo/577616cb9f13341579953cb0f6111e007acc0a1d/scripts/__init__.py -------------------------------------------------------------------------------- /scripts/all_plots.py: -------------------------------------------------------------------------------- 1 | from rl_zoo3.plots.all_plots import all_plots 2 | 3 | if __name__ == "__main__": 4 | all_plots() 5 | -------------------------------------------------------------------------------- /scripts/build_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PARENT=stablebaselines/stable-baselines3 4 | 5 | TAG=stablebaselines/rl-baselines3-zoo 6 | VERSION=$(cat ./rl_zoo3/version.txt) 7 | 8 | if [[ ${USE_GPU} == "True" ]]; then 9 | PARENT="${PARENT}:${VERSION}" 10 | else 11 | PARENT="${PARENT}-cpu:${VERSION}" 12 | TAG="${TAG}-cpu" 13 | fi 14 | 15 | docker build --build-arg PARENT_IMAGE=${PARENT} -t ${TAG}:${VERSION} . -f docker/Dockerfile 16 | docker tag ${TAG}:${VERSION} ${TAG}:latest 17 | 18 | if [[ ${RELEASE} == "True" ]]; then 19 | docker push ${TAG}:${VERSION} 20 | docker push ${TAG}:latest 21 | fi 22 | -------------------------------------------------------------------------------- /scripts/create_cluster_jobs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Send multiple jobs to the cluster. 3 | """ 4 | 5 | import os 6 | import subprocess 7 | import time 8 | 9 | import numpy as np 10 | 11 | ALGOS = ["sac"] 12 | ENVS = ["HalfCheetahBulletEnv-v0"] 13 | N_SEEDS = 5 14 | N_EVAL_EPISODES = 10 15 | LOG_STD_INIT = [-6, -5, -4, -3, -2, -1, 0, 1] 16 | 17 | os.makedirs(os.path.join("logs", "slurm"), exist_ok=True) 18 | 19 | for algo in ALGOS: 20 | for env_id in ENVS: 21 | for log_std_init in LOG_STD_INIT: 22 | log_folder = f"logs_std_{np.exp(log_std_init):.4f}" 23 | for _ in range(N_SEEDS): 24 | args = [ 25 | "--algo", 26 | algo, 27 | "--env", 28 | env_id, 29 | "--hyperparams", 30 | f'policy_kwargs:"dict(log_std_init={log_std_init}, net_arch=[400, 300])"', 31 | "--eval-episodes", 32 | N_EVAL_EPISODES, 33 | "-f", 34 | log_folder, 35 | "-uuid", 36 | ] 37 | arg_str_list: list[str] = list(map(str, args)) 38 | 39 | command = " ".join(["python", "-u", "train.py", *arg_str_list]) 40 | 41 | ok = subprocess.call(["sbatch", "cluster_torchy.sh", algo, env_id, "ablation", command]) 42 | time.sleep(0.05) 43 | -------------------------------------------------------------------------------- /scripts/create_mujoco_jobs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import time 4 | 5 | import numpy as np 6 | 7 | ALGOS = ["sac", "td3", "tqc"] 8 | # "Humanoid-v3", 9 | ENVS = ["HalfCheetah-v3", "Ant-v3", "Hopper-v3", "Walker2d-v3", "Swimmer-v3"] 10 | N_SEEDS = 1 11 | EVAL_FREQ = 25000 12 | N_EVAL_EPISODES = 20 13 | N_EVAL_ENVS = 5 14 | np.random.seed(8) 15 | SEEDS = np.random.randint(2**20, size=(N_SEEDS,)) 16 | # N_TIMESTEPS = int(1e6) 17 | 18 | os.makedirs(os.path.join("logs", "slurm"), exist_ok=True) 19 | log_folder = "logs/" 20 | 21 | 22 | for algo in ALGOS: 23 | for env_id in ENVS: 24 | for seed in SEEDS: 25 | args = [ 26 | "--algo", 27 | algo, 28 | "--env", 29 | env_id, 30 | # "--hyperparams", 31 | # "use_sde:False", 32 | "--eval-episodes", 33 | N_EVAL_EPISODES, 34 | "--eval-freq", 35 | EVAL_FREQ, 36 | "--n-eval-envs", 37 | N_EVAL_ENVS, 38 | "-f", 39 | log_folder, 40 | "--seed", 41 | seed, 42 | "--log-interval", 43 | 10, 44 | "--num-threads", 45 | 2, 46 | # "-n", 47 | # N_TIMESTEPS, 48 | "-uuid", 49 | ] 50 | args = list(map(str, args)) 51 | 52 | command = " ".join(["python", "-u", "train.py", *args]) 53 | 54 | ok = subprocess.call(["sbatch", "cluster_torchy.sh", algo, env_id, "ablation", command]) 55 | time.sleep(0.05) 56 | -------------------------------------------------------------------------------- /scripts/migrate_to_hub.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | from rl_zoo3.utils import get_hf_trained_models, get_trained_models 4 | 5 | folder = "rl-trained-agents" 6 | orga = "sb3" 7 | trained_models_local = get_trained_models(folder) 8 | trained_models_hub = get_hf_trained_models(orga) 9 | remaining_models = set(trained_models_local.keys()) - set(trained_models_hub.keys()) 10 | 11 | for trained_model in list(remaining_models): 12 | algo, env_id = trained_models_local[trained_model] 13 | args = ["-orga", orga, "-f", folder, "--algo", algo, "--env", env_id] 14 | 15 | # Since SB3 >= 1.1.0, HER is no more an algorithm but a replay buffer class 16 | if algo == "her": 17 | continue 18 | 19 | return_code = subprocess.call(["python", "-m", "rl_zoo3.push_to_hub", *args]) 20 | -------------------------------------------------------------------------------- /scripts/parse_study.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | import pickle 5 | from pprint import pprint 6 | 7 | import optuna 8 | from optuna.trial import FrozenTrial 9 | 10 | 11 | def value_key(trial: FrozenTrial) -> float: 12 | # Returns value of trial object for sorting 13 | if trial.value is None: 14 | return float("-inf") 15 | else: 16 | return trial.value 17 | 18 | 19 | print( 20 | "DEPRECATED: `parse_study.py` is deprecated, please use optuna-dashboard " 21 | "together with the `--trial-id` argument in the train script." 22 | ) 23 | 24 | 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument("-i", "--study-file", help="Path to a pickle file contained a saved study", type=str) 27 | parser.add_argument( 28 | "-f", 29 | "--folder", 30 | help="Folder where the best hyperparameter json files will be written", 31 | type=str, 32 | default="logs/hyperparameter_jsons", 33 | ) 34 | parser.add_argument("--study-name", help="Study name used during hyperparameter optimization", type=str) 35 | parser.add_argument("--storage", help="Database storage path used during hyperparameter optimization", type=str) 36 | parser.add_argument("--print-n-best-trials", help="Show final return values for n best trials", type=int, default=0) 37 | parser.add_argument( 38 | "--save-n-best-hyperparameters", 39 | help="Save the hyperparameters for the n best trials that resulted in the best returns", 40 | type=int, 41 | default=0, 42 | ) 43 | args = parser.parse_args() 44 | 45 | if args.study_name is None: 46 | assert args.study_file is not None, "No --study-file, nor --study-name were provided." 47 | with open(args.study_file, "rb") as f: 48 | study = pickle.load(f) 49 | 50 | else: 51 | assert args.storage is not None, "No storage was specified." 52 | 53 | study = optuna.create_study( 54 | study_name=args.study_name, 55 | storage=args.storage, 56 | load_if_exists=True, 57 | direction="maximize", 58 | ) 59 | 60 | trials = study.trials 61 | trials.sort(key=value_key, reverse=True) 62 | 63 | for idx, trial in enumerate(trials): 64 | if idx < args.print_n_best_trials: 65 | print(f"# Top {idx + 1} - value: {trial.value:.2f}") 66 | print() 67 | pprint(trial.params) 68 | print() 69 | 70 | if args.save_n_best_hyperparameters > 0: 71 | os.makedirs(f"{args.folder}", exist_ok=True) 72 | for i in range(min(args.save_n_best_hyperparameters, len(trials))): 73 | params = trials[i].params 74 | with open(f"{args.folder}/hyperparameters_{i + 1}.json", "w+") as json_file: 75 | json_file.write(json.dumps(trials[i].params, indent=4)) 76 | print(f"Saved best hyperparameters to {args.folder}") 77 | -------------------------------------------------------------------------------- /scripts/plot_from_file.py: -------------------------------------------------------------------------------- 1 | from rl_zoo3.plots.plot_from_file import plot_from_file 2 | 3 | if __name__ == "__main__": 4 | plot_from_file() 5 | -------------------------------------------------------------------------------- /scripts/plot_train.py: -------------------------------------------------------------------------------- 1 | from rl_zoo3.plots.plot_train import plot_train 2 | 3 | if __name__ == "__main__": 4 | plot_train() 5 | -------------------------------------------------------------------------------- /scripts/run_docker_cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Launch an experiment using the docker cpu image 3 | 4 | cmd_line="$@" 5 | 6 | echo "Executing in the docker (cpu image):" 7 | echo $cmd_line 8 | 9 | # Note: --user=root is needed, as the current user id/group id will be used 10 | # to mount the log directory (and $MAMBAUSER is not root) 11 | docker run -it --user=root --rm --network host --ipc=host \ 12 | --mount src=$(pwd),target=/home/mambauser/code/rl_zoo3,type=bind stablebaselines/rl-baselines3-zoo-cpu:latest\ 13 | bash -c "cd /home/mambauser/code/rl_zoo3/ && $cmd_line" 14 | -------------------------------------------------------------------------------- /scripts/run_docker_gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Launch an experiment using the docker gpu image 3 | 4 | cmd_line="$@" 5 | 6 | echo "Executing in the docker (gpu image):" 7 | echo $cmd_line 8 | 9 | # Note: --user=root is needed, as the current user id/group id will be used 10 | # to mount the log directory (and $MAMBAUSER is not root) 11 | docker run -it --user=root --gpus all --rm --network host --ipc=host \ 12 | --mount src=$(pwd),target=/home/mambauser/code/rl_zoo3,type=bind stablebaselines/rl-baselines3-zoo:latest\ 13 | bash -c "cd /home/mambauser/code/rl_zoo3/ && $cmd_line" 14 | -------------------------------------------------------------------------------- /scripts/run_jobs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run multiple experiments on a single machine. 3 | """ 4 | 5 | import subprocess 6 | 7 | import numpy as np 8 | 9 | ALGOS = ["sac"] 10 | ENVS = ["MountainCarContinuous-v0"] 11 | N_SEEDS = 10 12 | EVAL_FREQ = 5000 13 | N_EVAL_EPISODES = 10 14 | LOG_STD_INIT = [-6, -5, -4, -3, -2, -1, 0, 1] 15 | 16 | for algo in ALGOS: 17 | for env_id in ENVS: 18 | for log_std_init in LOG_STD_INIT: 19 | log_folder = f"logs_std_{np.exp(log_std_init):.4f}" 20 | for _ in range(N_SEEDS): 21 | args = [ 22 | "--algo", 23 | algo, 24 | "--env", 25 | env_id, 26 | "--hyperparams", 27 | f"policy_kwargs:dict(log_std_init={log_std_init}, net_arch=[64, 64])", 28 | "--eval-episodes", 29 | N_EVAL_EPISODES, 30 | "--eval-freq", 31 | EVAL_FREQ, 32 | "-f", 33 | log_folder, 34 | ] 35 | arg_str_list: list[str] = list(map(str, args)) 36 | 37 | ok = subprocess.call(["python", "train.py", *arg_str_list]) 38 | -------------------------------------------------------------------------------- /scripts/run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python -m pytest --cov-config .coveragerc --cov-report html --cov-report term --cov=. -v -m "not slow" --color=yes 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | from setuptools import setup 5 | 6 | with open(os.path.join("rl_zoo3", "version.txt")) as file_handler: 7 | __version__ = file_handler.read().strip() 8 | 9 | # Copy hyperparams files for packaging 10 | shutil.copytree("hyperparams", os.path.join("rl_zoo3", "hyperparams")) 11 | 12 | long_description = """ 13 | # RL Baselines3 Zoo: A Training Framework for Stable Baselines3 Reinforcement Learning Agents 14 | 15 | See https://github.com/DLR-RM/rl-baselines3-zoo 16 | """ 17 | install_requires = [ 18 | "sb3_contrib>=2.6.1a1,<3.0", 19 | "gymnasium>=0.29.1,<1.2.0", 20 | "huggingface_sb3>=3.0,<4.0", 21 | "tqdm", 22 | "rich", 23 | "optuna>=3.0", 24 | "pyyaml>=5.1", 25 | "pytablewriter~=1.2", 26 | "shimmy~=2.0", 27 | ] 28 | plots_requires = ["seaborn", "rliable~=1.2.0", "scipy~=1.10"] 29 | test_requires = [ 30 | # for MuJoCo envs v4: 31 | "mujoco>=2.3,<4", 32 | # install parking-env to test HER 33 | "highway-env>=1.10.1,<1.11.0", 34 | ] 35 | 36 | setup( 37 | name="rl_zoo3", 38 | packages=["rl_zoo3", "rl_zoo3.plots"], 39 | package_data={ 40 | "rl_zoo3": [ 41 | "py.typed", 42 | "version.txt", 43 | "hyperparams/*.yml", 44 | ] 45 | }, 46 | entry_points={"console_scripts": ["rl_zoo3=rl_zoo3.cli:main"]}, 47 | install_requires=install_requires, 48 | extras_require={"plots": plots_requires, "tests": test_requires}, 49 | description="A Training Framework for Stable Baselines3 Reinforcement Learning Agents", 50 | author="Antonin Raffin", 51 | url="https://github.com/DLR-RM/rl-baselines3-zoo", 52 | author_email="antonin.raffin@dlr.de", 53 | keywords="reinforcement-learning-algorithms reinforcement-learning machine-learning " 54 | "gymnasium openai stable baselines sb3 toolbox python data-science", 55 | license="MIT", 56 | long_description=long_description, 57 | long_description_content_type="text/markdown", 58 | version=__version__, 59 | python_requires=">=3.9", 60 | # PyPI package information. 61 | project_urls={ 62 | "Code": "https://github.com/DLR-RM/rl-baselines3-zoo", 63 | "Documentation": "https://rl-baselines3-zoo.readthedocs.io/en/master/", 64 | "Changelog": "https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/CHANGELOG.md", 65 | "Stable-Baselines3": "https://github.com/DLR-RM/stable-baselines3", 66 | "RL-Zoo": "https://github.com/DLR-RM/rl-baselines3-zoo", 67 | "SBX": "https://github.com/araffin/sbx", 68 | }, 69 | classifiers=[ 70 | "Programming Language :: Python :: 3", 71 | "Programming Language :: Python :: 3.9", 72 | "Programming Language :: Python :: 3.10", 73 | "Programming Language :: Python :: 3.11", 74 | "Programming Language :: Python :: 3.12", 75 | ], 76 | ) 77 | 78 | # Remove copied files after packaging 79 | shutil.rmtree(os.path.join("rl_zoo3", "hyperparams")) 80 | -------------------------------------------------------------------------------- /tests/dummy_env/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name="test_env") 4 | -------------------------------------------------------------------------------- /tests/dummy_env/test_env/__init__.py: -------------------------------------------------------------------------------- 1 | from gymnasium.envs.registration import register 2 | 3 | register( 4 | id="TestEnv-v0", 5 | entry_point="test_env.test_env:TestEnv", 6 | ) 7 | -------------------------------------------------------------------------------- /tests/dummy_env/test_env/config.py: -------------------------------------------------------------------------------- 1 | hyperparams = { 2 | "TestEnv-v0": { 3 | "policy": "MlpPolicy", 4 | "n_timesteps": 20000, 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /tests/dummy_env/test_env/test_env.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar 2 | 3 | import gymnasium as gym 4 | import numpy as np 5 | from gymnasium import spaces 6 | 7 | 8 | class TestEnv(gym.Env): 9 | metadata: ClassVar[dict] = {"render_modes": ["human"], "render_fps": 4} 10 | __test__ = False 11 | 12 | def __init__(self, render_mode=None): 13 | super().__init__() 14 | 15 | self.action_space = spaces.Discrete(2) 16 | self.observation_space = spaces.Box(low=0, high=1, shape=(2,), dtype=np.float32) 17 | 18 | def step(self, action): 19 | return self.observation_space.sample(), 0.0, self.np_random.random() > 0.5, False, {} 20 | 21 | def reset(self, *, seed=None, options=None): 22 | super().reset(seed=seed) 23 | if seed is not None: 24 | self.observation_space.seed(seed) 25 | return self.observation_space.sample(), {} 26 | 27 | def render(self, mode="human"): 28 | pass 29 | 30 | 31 | if __name__ == "__main__": 32 | from gymnasium.utils.env_checker import check_env as gym_check 33 | from stable_baselines3.common.env_checker import check_env 34 | 35 | check_env(TestEnv()) 36 | gym_check(TestEnv()) 37 | -------------------------------------------------------------------------------- /tests/test_callbacks.py: -------------------------------------------------------------------------------- 1 | import shlex 2 | import subprocess 3 | 4 | import pytest 5 | import stable_baselines3 as sb3 6 | 7 | from rl_zoo3.utils import get_callback_list 8 | 9 | 10 | def _assert_eq(left, right): 11 | assert left == right, f"{left} != {right}" 12 | 13 | 14 | def test_raw_stat_callback(tmp_path): 15 | cmd = ( 16 | f"python train.py -n 200 --algo ppo --env CartPole-v1 --log-folder {tmp_path} " 17 | f"--tensorboard-log {tmp_path} -params callback:\"'rl_zoo3.callbacks.RawStatisticsCallback'\"" 18 | ) 19 | return_code = subprocess.call(shlex.split(cmd)) 20 | _assert_eq(return_code, 0) 21 | 22 | 23 | @pytest.mark.parametrize( 24 | "callback", 25 | [ 26 | None, 27 | "rl_zoo3.callbacks.RawStatisticsCallback", 28 | [ 29 | {"stable_baselines3.common.callbacks.StopTrainingOnMaxEpisodes": dict(max_episodes=3)}, 30 | "rl_zoo3.callbacks.RawStatisticsCallback", 31 | ], 32 | [sb3.common.callbacks.StopTrainingOnMaxEpisodes(3)], 33 | ], 34 | ) 35 | def test_get_callback(callback): 36 | hyperparams = {"callback": callback} 37 | callback_list = get_callback_list(hyperparams) 38 | if callback is None: 39 | assert len(callback_list) == 0 40 | elif isinstance(callback, str): 41 | assert len(callback_list) == 1 42 | else: 43 | assert len(callback_list) == len(callback) 44 | -------------------------------------------------------------------------------- /tests/test_wrappers.py: -------------------------------------------------------------------------------- 1 | import gymnasium as gym 2 | import numpy as np 3 | import pytest 4 | import stable_baselines3 as sb3 5 | from sb3_contrib.common.wrappers import TimeFeatureWrapper 6 | from stable_baselines3 import A2C 7 | from stable_baselines3.common.env_checker import check_env 8 | from stable_baselines3.common.env_util import DummyVecEnv 9 | 10 | import rl_zoo3.import_envs 11 | import rl_zoo3.wrappers 12 | from rl_zoo3.utils import SimpleLinearSchedule, get_wrapper_class, linear_schedule 13 | from rl_zoo3.wrappers import ActionNoiseWrapper, DelayedRewardWrapper, HistoryWrapper 14 | 15 | 16 | def test_wrappers(): 17 | env = gym.make("Ant-v4") 18 | env = DelayedRewardWrapper(env) 19 | env = ActionNoiseWrapper(env) 20 | env = HistoryWrapper(env) 21 | env = TimeFeatureWrapper(env) 22 | check_env(env) 23 | 24 | 25 | @pytest.mark.parametrize( 26 | "env_wrapper", 27 | [ 28 | None, 29 | {"rl_zoo3.wrappers.HistoryWrapper": dict(horizon=2)}, 30 | [{"rl_zoo3.wrappers.HistoryWrapper": dict(horizon=3)}, "rl_zoo3.wrappers.TimeFeatureWrapper"], 31 | [{rl_zoo3.wrappers.HistoryWrapper: dict(horizon=3)}, "rl_zoo3.wrappers.TimeFeatureWrapper"], 32 | ], 33 | ) 34 | def test_get_wrapper(env_wrapper): 35 | env = gym.make("Ant-v4") 36 | hyperparams = {"env_wrapper": env_wrapper} 37 | wrapper_class = get_wrapper_class(hyperparams) 38 | if env_wrapper is not None: 39 | env = wrapper_class(env) 40 | check_env(env) 41 | 42 | 43 | @pytest.mark.parametrize( 44 | "vec_env_wrapper", 45 | [ 46 | None, 47 | {"stable_baselines3.common.vec_env.VecFrameStack": dict(n_stack=2)}, 48 | {sb3.common.vec_env.VecFrameStack: dict(n_stack=2)}, 49 | [{"stable_baselines3.common.vec_env.VecFrameStack": dict(n_stack=3)}, "stable_baselines3.common.vec_env.VecMonitor"], 50 | ], 51 | ) 52 | def test_get_vec_env_wrapper(vec_env_wrapper): 53 | env = gym.make("Ant-v4") 54 | env = DummyVecEnv([lambda: env]) 55 | hyperparams = {"vec_env_wrapper": vec_env_wrapper} 56 | wrapper_class = get_wrapper_class(hyperparams, "vec_env_wrapper") 57 | if wrapper_class is not None: 58 | env = wrapper_class(env) 59 | A2C("MlpPolicy", env).learn(16) 60 | 61 | 62 | def test_linear_schedule(): 63 | schedule = linear_schedule(100) 64 | assert isinstance(schedule, SimpleLinearSchedule) 65 | assert np.allclose(schedule(1.0), 100.0) 66 | assert np.allclose(schedule(0.5), 50.0) 67 | assert np.allclose(schedule(0.0), 0.0) 68 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from rl_zoo3.train import train 2 | 3 | if __name__ == "__main__": 4 | train() 5 | --------------------------------------------------------------------------------