├── .dockerignore
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.yml
    │   ├── documentation.yml
    │   ├── feature_request.yml
    │   └── question.yml
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── ci.yml
    │   └── trained_agents.yml
├── .gitignore
├── .gitmodules
├── .readthedocs.yml
├── CHANGELOG.md
├── LICENSE
├── Makefile
├── README.md
├── benchmark.md
├── docker
    └── Dockerfile
├── docs
    ├── Makefile
    ├── README.md
    ├── _static
    │   ├── css
    │   │   └── baselines_theme.css
    │   └── img
    │   │   ├── colab-badge.svg
    │   │   └── colab.svg
    ├── conda_env.yml
    ├── conf.py
    ├── guide
    │   ├── config.rst
    │   ├── custom_env.rst
    │   ├── enjoy.rst
    │   ├── install.rst
    │   ├── integrations.rst
    │   ├── plot.rst
    │   ├── quickstart.rst
    │   ├── sbx.rst
    │   ├── train.rst
    │   └── tuning.rst
    ├── index.rst
    ├── make.bat
    ├── misc
    │   └── changelog.rst
    ├── modules
    │   ├── callbacks.rst
    │   ├── exp_manager.rst
    │   ├── utils.rst
    │   └── wrappers.rst
    └── spelling_wordlist.txt
├── enjoy.py
├── hyperparams
    ├── a2c.yml
    ├── ars.yml
    ├── crossq.yml
    ├── ddpg.yml
    ├── dqn.yml
    ├── her.yml
    ├── ppo.yml
    ├── ppo_lstm.yml
    ├── python
    │   └── ppo_config_example.py
    ├── qrdqn.yml
    ├── sac.yml
    ├── td3.yml
    ├── tqc.yml
    └── trpo.yml
├── images
    └── car.jpg
├── logs
    └── benchmark
    │   ├── a2c-Acrobot-v1
    │       └── 0.monitor.csv
    │   ├── a2c-Ant-v3
    │       └── 0.monitor.csv
    │   ├── a2c-AntBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── a2c-AsteroidsNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── a2c-BeamRiderNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── a2c-BipedalWalker-v3
    │       └── 0.monitor.csv
    │   ├── a2c-BipedalWalkerHardcore-v3
    │       └── 0.monitor.csv
    │   ├── a2c-BreakoutNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── a2c-CartPole-v1
    │       └── 0.monitor.csv
    │   ├── a2c-EnduroNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── a2c-HalfCheetah-v3
    │       └── 0.monitor.csv
    │   ├── a2c-HalfCheetahBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── a2c-Hopper-v3
    │       └── 0.monitor.csv
    │   ├── a2c-HopperBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── a2c-Humanoid-v3
    │       └── 0.monitor.csv
    │   ├── a2c-LunarLander-v2
    │       └── 0.monitor.csv
    │   ├── a2c-LunarLanderContinuous-v2
    │       └── 0.monitor.csv
    │   ├── a2c-MountainCar-v0
    │       └── 0.monitor.csv
    │   ├── a2c-MountainCarContinuous-v0
    │       └── 0.monitor.csv
    │   ├── a2c-MsPacmanNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── a2c-Pendulum-v1
    │       └── 0.monitor.csv
    │   ├── a2c-PongNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── a2c-QbertNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── a2c-ReacherBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── a2c-RoadRunnerNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── a2c-SeaquestNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── a2c-SpaceInvadersNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── a2c-Swimmer-v3
    │       └── 0.monitor.csv
    │   ├── a2c-Walker2DBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── a2c-Walker2d-v3
    │       └── 0.monitor.csv
    │   ├── ars-Acrobot-v1
    │       └── 0.monitor.csv
    │   ├── ars-Ant-v3
    │       └── 0.monitor.csv
    │   ├── ars-CartPole-v1
    │       └── 0.monitor.csv
    │   ├── ars-HalfCheetah-v3
    │       └── 0.monitor.csv
    │   ├── ars-Hopper-v3
    │       └── 0.monitor.csv
    │   ├── ars-LunarLanderContinuous-v2
    │       └── 0.monitor.csv
    │   ├── ars-MountainCar-v0
    │       └── 0.monitor.csv
    │   ├── ars-MountainCarContinuous-v0
    │       └── 0.monitor.csv
    │   ├── ars-Pendulum-v1
    │       └── 0.monitor.csv
    │   ├── ars-Swimmer-v3
    │       └── 0.monitor.csv
    │   ├── ars-Walker2d-v3
    │       └── 0.monitor.csv
    │   ├── benchmark.md
    │   ├── ddpg-AntBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── ddpg-BipedalWalker-v3
    │       └── 0.monitor.csv
    │   ├── ddpg-HalfCheetahBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── ddpg-HopperBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── ddpg-LunarLanderContinuous-v2
    │       └── 0.monitor.csv
    │   ├── ddpg-MountainCarContinuous-v0
    │       └── 0.monitor.csv
    │   ├── ddpg-Pendulum-v1
    │       └── 0.monitor.csv
    │   ├── ddpg-ReacherBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── ddpg-Walker2DBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── dqn-Acrobot-v1
    │       └── 0.monitor.csv
    │   ├── dqn-AsteroidsNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── dqn-BeamRiderNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── dqn-BreakoutNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── dqn-CartPole-v1
    │       └── 0.monitor.csv
    │   ├── dqn-EnduroNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── dqn-LunarLander-v2
    │       └── 0.monitor.csv
    │   ├── dqn-MountainCar-v0
    │       └── 0.monitor.csv
    │   ├── dqn-MsPacmanNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── dqn-PongNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── dqn-QbertNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── dqn-RoadRunnerNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── dqn-SeaquestNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── dqn-SpaceInvadersNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── her-FetchPickAndPlace-v1
    │       └── 0.monitor.csv
    │   ├── her-FetchPush-v1
    │       └── 0.monitor.csv
    │   ├── her-FetchReach-v1
    │       └── 0.monitor.csv
    │   ├── her-FetchSlide-v1
    │       └── 0.monitor.csv
    │   ├── her-parking-v0
    │       └── 0.monitor.csv
    │   ├── ppo-Acrobot-v1
    │       └── 0.monitor.csv
    │   ├── ppo-Ant-v3
    │       └── 0.monitor.csv
    │   ├── ppo-AntBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── ppo-AsteroidsNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── ppo-BeamRiderNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── ppo-BipedalWalker-v3
    │       └── 0.monitor.csv
    │   ├── ppo-BipedalWalkerHardcore-v3
    │       └── 0.monitor.csv
    │   ├── ppo-BreakoutNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── ppo-CarRacing-v0
    │       └── 0.monitor.csv
    │   ├── ppo-CartPole-v1
    │       └── 0.monitor.csv
    │   ├── ppo-EnduroNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── ppo-HalfCheetah-v3
    │       └── 0.monitor.csv
    │   ├── ppo-HalfCheetahBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── ppo-Hopper-v3
    │       └── 0.monitor.csv
    │   ├── ppo-HopperBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── ppo-LunarLander-v2
    │       └── 0.monitor.csv
    │   ├── ppo-LunarLanderContinuous-v2
    │       └── 0.monitor.csv
    │   ├── ppo-MiniGrid-DoorKey-5x5-v0
    │       └── 0.monitor.csv
    │   ├── ppo-MiniGrid-Empty-Random-5x5-v0
    │       └── 0.monitor.csv
    │   ├── ppo-MiniGrid-FourRooms-v0
    │       └── 0.monitor.csv
    │   ├── ppo-MiniGrid-GoToDoor-5x5-v0
    │       └── 0.monitor.csv
    │   ├── ppo-MiniGrid-KeyCorridorS3R1-v0
    │       └── 0.monitor.csv
    │   ├── ppo-MiniGrid-LockedRoom-v0
    │       └── 0.monitor.csv
    │   ├── ppo-MiniGrid-MultiRoom-N4-S5-v0
    │       └── 0.monitor.csv
    │   ├── ppo-MiniGrid-ObstructedMaze-2Dlh-v0
    │       └── 0.monitor.csv
    │   ├── ppo-MiniGrid-PutNear-6x6-N2-v0
    │       └── 0.monitor.csv
    │   ├── ppo-MiniGrid-RedBlueDoors-6x6-v0
    │       └── 0.monitor.csv
    │   ├── ppo-MiniGrid-Unlock-v0
    │       └── 0.monitor.csv
    │   ├── ppo-MountainCar-v0
    │       └── 0.monitor.csv
    │   ├── ppo-MountainCarContinuous-v0
    │       └── 0.monitor.csv
    │   ├── ppo-MsPacmanNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── ppo-Pendulum-v1
    │       └── 0.monitor.csv
    │   ├── ppo-PongNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── ppo-QbertNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── ppo-ReacherBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── ppo-RoadRunnerNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── ppo-SeaquestNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── ppo-SpaceInvadersNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── ppo-Swimmer-v3
    │       └── 0.monitor.csv
    │   ├── ppo-Walker2DBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── ppo-Walker2d-v3
    │       └── 0.monitor.csv
    │   ├── ppo_lstm-CarRacing-v0
    │       └── 0.monitor.csv
    │   ├── ppo_lstm-CartPoleNoVel-v1
    │       └── 0.monitor.csv
    │   ├── ppo_lstm-MountainCarContinuousNoVel-v0
    │       └── 0.monitor.csv
    │   ├── ppo_lstm-PendulumNoVel-v1
    │       └── 0.monitor.csv
    │   ├── qrdqn-Acrobot-v1
    │       └── 0.monitor.csv
    │   ├── qrdqn-AsteroidsNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── qrdqn-BeamRiderNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── qrdqn-BreakoutNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── qrdqn-CartPole-v1
    │       └── 0.monitor.csv
    │   ├── qrdqn-EnduroNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── qrdqn-LunarLander-v2
    │       └── 0.monitor.csv
    │   ├── qrdqn-MountainCar-v0
    │       └── 0.monitor.csv
    │   ├── qrdqn-MsPacmanNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── qrdqn-PongNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── qrdqn-QbertNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── qrdqn-RoadRunnerNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── qrdqn-SeaquestNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── qrdqn-SpaceInvadersNoFrameskip-v4
    │       └── 0.monitor.csv
    │   ├── sac-Ant-v3
    │       └── 0.monitor.csv
    │   ├── sac-AntBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── sac-BipedalWalker-v3
    │       └── 0.monitor.csv
    │   ├── sac-BipedalWalkerHardcore-v3
    │       └── 0.monitor.csv
    │   ├── sac-HalfCheetah-v3
    │       └── 0.monitor.csv
    │   ├── sac-HalfCheetahBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── sac-Hopper-v3
    │       └── 0.monitor.csv
    │   ├── sac-HopperBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── sac-Humanoid-v3
    │       └── 0.monitor.csv
    │   ├── sac-LunarLanderContinuous-v2
    │       └── 0.monitor.csv
    │   ├── sac-MountainCarContinuous-v0
    │       └── 0.monitor.csv
    │   ├── sac-Pendulum-v1
    │       └── 0.monitor.csv
    │   ├── sac-ReacherBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── sac-Swimmer-v3
    │       └── 0.monitor.csv
    │   ├── sac-Walker2DBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── sac-Walker2d-v3
    │       └── 0.monitor.csv
    │   ├── td3-Ant-v3
    │       └── 0.monitor.csv
    │   ├── td3-AntBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── td3-BipedalWalker-v3
    │       └── 0.monitor.csv
    │   ├── td3-BipedalWalkerHardcore-v3
    │       └── 0.monitor.csv
    │   ├── td3-HalfCheetah-v3
    │       └── 0.monitor.csv
    │   ├── td3-HalfCheetahBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── td3-Hopper-v3
    │       └── 0.monitor.csv
    │   ├── td3-HopperBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── td3-Humanoid-v3
    │       └── 0.monitor.csv
    │   ├── td3-LunarLanderContinuous-v2
    │       └── 0.monitor.csv
    │   ├── td3-MountainCarContinuous-v0
    │       └── 0.monitor.csv
    │   ├── td3-Pendulum-v1
    │       └── 0.monitor.csv
    │   ├── td3-ReacherBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── td3-Swimmer-v3
    │       └── 0.monitor.csv
    │   ├── td3-Walker2DBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── td3-Walker2d-v3
    │       └── 0.monitor.csv
    │   ├── tqc-Ant-v3
    │       └── 0.monitor.csv
    │   ├── tqc-AntBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── tqc-BipedalWalker-v3
    │       └── 0.monitor.csv
    │   ├── tqc-BipedalWalkerHardcore-v3
    │       └── 0.monitor.csv
    │   ├── tqc-FetchPickAndPlace-v1
    │       └── 0.monitor.csv
    │   ├── tqc-FetchPush-v1
    │       └── 0.monitor.csv
    │   ├── tqc-FetchReach-v1
    │       └── 0.monitor.csv
    │   ├── tqc-FetchSlide-v1
    │       └── 0.monitor.csv
    │   ├── tqc-HalfCheetah-v3
    │       └── 0.monitor.csv
    │   ├── tqc-HalfCheetahBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── tqc-Hopper-v3
    │       └── 0.monitor.csv
    │   ├── tqc-HopperBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── tqc-Humanoid-v3
    │       └── 0.monitor.csv
    │   ├── tqc-LunarLanderContinuous-v2
    │       └── 0.monitor.csv
    │   ├── tqc-MountainCarContinuous-v0
    │       └── 0.monitor.csv
    │   ├── tqc-PandaPickAndPlace-v1
    │       └── 0.monitor.csv
    │   ├── tqc-PandaPush-v1
    │       └── 0.monitor.csv
    │   ├── tqc-PandaReach-v1
    │       └── 0.monitor.csv
    │   ├── tqc-PandaSlide-v1
    │       └── 0.monitor.csv
    │   ├── tqc-PandaStack-v1
    │       └── 0.monitor.csv
    │   ├── tqc-Pendulum-v1
    │       └── 0.monitor.csv
    │   ├── tqc-ReacherBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── tqc-Swimmer-v3
    │       └── 0.monitor.csv
    │   ├── tqc-Walker2DBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── tqc-Walker2d-v3
    │       └── 0.monitor.csv
    │   ├── tqc-parking-v0
    │       └── 0.monitor.csv
    │   ├── trpo-Acrobot-v1
    │       └── 0.monitor.csv
    │   ├── trpo-Ant-v3
    │       └── 0.monitor.csv
    │   ├── trpo-AntBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── trpo-BipedalWalker-v3
    │       └── 0.monitor.csv
    │   ├── trpo-CartPole-v1
    │       └── 0.monitor.csv
    │   ├── trpo-HalfCheetah-v3
    │       └── 0.monitor.csv
    │   ├── trpo-HalfCheetahBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── trpo-Hopper-v3
    │       └── 0.monitor.csv
    │   ├── trpo-HopperBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── trpo-LunarLander-v2
    │       └── 0.monitor.csv
    │   ├── trpo-LunarLanderContinuous-v2
    │       └── 0.monitor.csv
    │   ├── trpo-MountainCar-v0
    │       └── 0.monitor.csv
    │   ├── trpo-MountainCarContinuous-v0
    │       └── 0.monitor.csv
    │   ├── trpo-Pendulum-v1
    │       └── 0.monitor.csv
    │   ├── trpo-ReacherBulletEnv-v0
    │       └── 0.monitor.csv
    │   ├── trpo-Swimmer-v3
    │       └── 0.monitor.csv
    │   ├── trpo-Walker2DBulletEnv-v0
    │       └── 0.monitor.csv
    │   └── trpo-Walker2d-v3
    │       └── 0.monitor.csv
├── pyproject.toml
├── requirements.txt
├── rl_zoo3
    ├── __init__.py
    ├── benchmark.py
    ├── callbacks.py
    ├── cli.py
    ├── enjoy.py
    ├── exp_manager.py
    ├── gym_patches.py
    ├── hyperparams_opt.py
    ├── import_envs.py
    ├── load_from_hub.py
    ├── plots
    │   ├── __init__.py
    │   ├── all_plots.py
    │   ├── plot_from_file.py
    │   ├── plot_train.py
    │   └── score_normalization.py
    ├── push_to_hub.py
    ├── py.typed
    ├── record_training.py
    ├── record_video.py
    ├── train.py
    ├── utils.py
    ├── version.txt
    └── wrappers.py
├── scripts
    ├── __init__.py
    ├── all_plots.py
    ├── build_docker.sh
    ├── create_cluster_jobs.py
    ├── create_mujoco_jobs.py
    ├── migrate_to_hub.py
    ├── parse_study.py
    ├── plot_from_file.py
    ├── plot_train.py
    ├── run_docker_cpu.sh
    ├── run_docker_gpu.sh
    ├── run_jobs.py
    └── run_tests.sh
├── setup.py
├── tests
    ├── dummy_env
    │   ├── setup.py
    │   └── test_env
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   └── test_env.py
    ├── test_callbacks.py
    ├── test_enjoy.py
    ├── test_hyperparams_opt.py
    ├── test_train.py
    └── test_wrappers.py
└── train.py


/.dockerignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | logs
 3 | .pytest_cache/
 4 | .coverage
 5 | .coverage.*
 6 | .idea/
 7 | cluster_sbatch.sh
 8 | cluster_sbatch_mpi.sh
 9 | cluster_torchy.sh
10 | logs/
11 | .pytype/
12 | htmlcov/
13 | git_rewrite_commit_history.sh
14 | .vscode/
15 | # ignore for docker builds
16 | rl-trained-agents/
17 | .git/
18 | .mypy_cache/
19 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F41B Bug Report"
 2 | description: If you encounter an unexpected behavior, software crash, or other bug.
 3 | title: "[Bug]: bug title"
 4 | labels: ["bug"]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         **Important Note: We do not do technical support, nor consulting** and don't answer personal questions per email.
10 |         Please post your question on the [RL Discord](https://discord.com/invite/xhfNqQv), [Reddit](https://www.reddit.com/r/reinforcementlearning/) or [Stack Overflow](https://stackoverflow.com/) in that case.
11 |   - type: textarea
12 |     id: description
13 |     attributes:
14 |       label: 🐛 Bug
15 |       description: A clear and concise description of what the bug is.
16 |     validations:
17 |       required: true
18 |   - type: textarea
19 |     id: reproduce
20 |     attributes:
21 |       label: To Reproduce
22 |       description: |
23 |         Steps to reproduce the behavior. Please try to provide a minimal example to reproduce the bug. Error messages and stack traces are also helpful.
24 |         Please use the [markdown code blocks](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) for both code and stack traces.
25 |       value: |
26 |         ```shell
27 |         python train.py --algo ...
28 |         ```
29 | 
30 |   - type: textarea
31 |     id: traceback
32 |     attributes:
33 |       label: Relevant log output / Error message
34 |       description: Please copy and paste any relevant log output / error message. This will be automatically formatted into code, so no need for backticks.
35 |       placeholder: "Traceback (most recent call last): File ..."
36 |       render: shell
37 | 
38 |   - type: textarea
39 |     id: system-info
40 |     attributes:
41 |       label: System Info
42 |       description: |
43 |         Describe the characteristic of your environment:
44 |          * Describe how the library was installed (pip, docker, source, ...)
45 |          * GPU models and configuration
46 |          * Python version
47 |          * PyTorch version
48 |          * Gymnasium version
49 |          * (if installed) OpenAI Gym version
50 |          * Versions of any other relevant libraries
51 | 
52 |         You can use `sb3.get_system_info()` to print relevant packages info:
53 |         ```sh
54 |         python -c 'import stable_baselines3 as sb3; sb3.get_system_info()'
55 |         ```
56 |   - type: checkboxes
57 |     id: terms
58 |     attributes:
59 |       label: Checklist
60 |       options:
61 |         - label: I have checked that there is no similar [issue](https://github.com/DLR-RM/rl-baselines3-zoo/issues) in the repo
62 |           required: true
63 |         - label: I have read the [SB3 documentation](https://stable-baselines3.readthedocs.io/en/master/)
64 |           required: true
65 |         - label: I have read the [RL Zoo documentation](https://rl-baselines3-zoo.readthedocs.io)
66 |           required: true
67 |         - label: I have provided a [minimal and working](https://github.com/DLR-RM/stable-baselines3/issues/982#issuecomment-1197044014) example to reproduce the bug
68 |           required: true
69 |         - label: I've used the [markdown code blocks](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) for both code and stack traces.
70 |           required: true
71 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F4DA Documentation"
 2 | description: If you want to improve the documentation by reporting errors, inconsistencies, or missing information.
 3 | labels: ["documentation"]
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: |
 8 |         **Important Note: We do not do technical support, nor consulting** and don't answer personal questions per email.
 9 |         Please post your question on the [RL Discord](https://discord.com/invite/xhfNqQv), [Reddit](https://www.reddit.com/r/reinforcementlearning/) or [Stack Overflow](https://stackoverflow.com/) in that case.
10 |   - type: textarea
11 |     id: description
12 |     attributes:
13 |       label: 📚 Documentation
14 |       description: A clear and concise description of what should be improved in the documentation.
15 |     validations:
16 |       required: true
17 |   - type: checkboxes
18 |     id: terms
19 |     attributes:
20 |       label: Checklist
21 |       options:
22 |         - label: I have checked that there is no similar [issue](https://github.com/DLR-RM/rl-baselines3-zoo/issues) in the repo
23 |           required: true
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F680 Feature Request"
 2 | description: How to create an issue for requesting a feature
 3 | title: "[Feature Request] request title"
 4 | labels: ["enhancement"]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         **Important Note: We do not do technical support, nor consulting** and don't answer personal questions per email.
10 |         Please post your question on the [RL Discord](https://discord.com/invite/xhfNqQv), [Reddit](https://www.reddit.com/r/reinforcementlearning/) or [Stack Overflow](https://stackoverflow.com/) in that case.
11 |   - type: textarea
12 |     id: description
13 |     attributes:
14 |       label: 🚀 Feature
15 |       description: A clear and concise description of the feature proposal.
16 |     validations:
17 |       required: true
18 |   - type: textarea
19 |     id: motivation
20 |     attributes:
21 |       label: Motivation
22 |       description: Please outline the motivation for the proposal. Is your feature request related to a problem? e.g.,"I'm always frustrated when [...]". If this is related to another GitHub issue, please link here too.
23 |   - type: textarea
24 |     id: pitch
25 |     attributes:
26 |       label: Pitch
27 |       description: A clear and concise description of what you want to happen.
28 |   - type: textarea
29 |     id: alternatives
30 |     attributes:
31 |       label: Alternatives
32 |       description: A clear and concise description of any alternative solutions or features you've considered, if any.
33 |   - type: textarea
34 |     id: additional-context
35 |     attributes:
36 |       label: Additional context
37 |       description: Add any other context or screenshots about the feature request here.
38 |   - type: checkboxes
39 |     id: terms
40 |     attributes:
41 |       label: Checklist
42 |       options:
43 |         - label: I have checked that there is no similar [issue](https://github.com/DLR-RM/rl-baselines3-zoo/issues) in the repo
44 |           required: true
45 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.yml:
--------------------------------------------------------------------------------
 1 | name: "❓ Question"
 2 | description: If you have a general question about  RL Baselines3 Zoo
 3 | title: "[Question] question title"
 4 | labels: ["question"]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         **Important Note: We do not do technical support, nor consulting** and don't answer personal questions per email.
10 |         Please post your question on the [RL Discord](https://discord.com/invite/xhfNqQv), [Reddit](https://www.reddit.com/r/reinforcementlearning/) or [Stack Overflow](https://stackoverflow.com/) in that case.
11 |   - type: textarea
12 |     id: question
13 |     attributes:
14 |       label: ❓ Question
15 |       description: |
16 |         Your question. This can be e.g. questions regarding confusing or unclear behaviour of functions or a question if X can be done using stable-baselines3. Make sure to check out the documentation first.
17 |         **Important Note: If your question is anything like "Why is my code generating this error?", you must [submit a bug report](https://github.com/DLR-RM/rl-baselines3-zoo/issues/new?assignees=&labels=bug&projects=&template=bug_report.yml&title=%5BBug%5D%3A+bug+title) instead.**
18 |     validations:
19 |       required: true
20 |   - type: checkboxes
21 |     id: terms
22 |     attributes:
23 |       label: Checklist
24 |       options:
25 |         - label: I have checked that there is no similar [issue](https://github.com/DLR-RM/rl-baselines3-zoo/issues) in the repo
26 |           required: true
27 |         - label: I have read the [SB3 documentation](https://stable-baselines3.readthedocs.io/en/master/)
28 |           required: true
29 |         - label: I have read the [RL Zoo documentation](https://rl-baselines3-zoo.readthedocs.io)
30 |           required: true
31 |         - label: If code there is, it is [minimal and working](https://github.com/DLR-RM/stable-baselines3/issues/982#issuecomment-1197044014)
32 |           required: true
33 |         - label: If code there is, it is formatted using the [markdown code blocks](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) for both code and stack traces.
34 |           required: true
35 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--- Provide a general summary of your changes in the Title above -->
 2 | 
 3 | ## Description
 4 | <!--- Describe your changes in detail -->
 5 | 
 6 | ## Motivation and Context
 7 | <!--- Why is this change required? What problem does it solve? -->
 8 | <!--- If it fixes an open issue, please link to the issue here. -->
 9 | <!--- You can use the syntax `closes #100` if this solves the issue #100 -->
10 | - [ ] I have raised an issue to propose this change ([required](https://github.com/DLR-RM/stable-baselines3/blob/master/CONTRIBUTING.md) for new features and bug fixes)
11 | 
12 | ## Types of changes
13 | <!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->
14 | - [ ] Bug fix (non-breaking change which fixes an issue)
15 | - [ ] New feature (non-breaking change which adds functionality)
16 | - [ ] Breaking change (fix or feature that would cause existing functionality to change)
17 | - [ ] Documentation (update in the documentation)
18 | 
19 | ## Checklist:
20 | <!--- Go over all the following points, and put an `x` in all the boxes that apply. -->
21 | <!--- If you're unsure about any of these, don't hesitate to ask. We're here to help! -->
22 | - [ ] I've read the [CONTRIBUTION](https://github.com/DLR-RM/stable-baselines3/blob/master/CONTRIBUTING.md) guide (**required**)
23 | - [ ] I have updated the changelog accordingly (**required**).
24 | - [ ] My change requires a change to the documentation.
25 | - [ ] I have updated the tests accordingly (*required for a bug fix or a new feature*).
26 | - [ ] I have updated the documentation accordingly.
27 | - [ ] I have reformatted the code using `make format` (**required**)
28 | - [ ] I have checked the codestyle using `make check-codestyle` and `make lint` (**required**)
29 | - [ ] I have ensured `make pytest` and `make type` both pass. (**required**)
30 | 
31 | 
32 | Note: we are using a maximum length of 127 characters per line
33 | 
34 | <!--- This Template is an edited version of the one from https://github.com/evilsocket/pwnagotchi/ -->
35 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: CI
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [master]
 9 |   pull_request:
10 |     branches: [master]
11 | 
12 | jobs:
13 |   build:
14 |     env:
15 |       TERM: xterm-256color
16 |       FORCE_COLOR: 1
17 |       HF_TOKEN: ${{ secrets.HF_TOKEN }}
18 |     # Skip CI if [ci skip] in the commit message
19 |     if: "! contains(toJSON(github.event.commits.*.message), '[ci skip]')"
20 |     runs-on: ubuntu-latest
21 |     strategy:
22 |       matrix:
23 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
24 |         include:
25 |           # Default version
26 |           - gymnasium-version: "1.0.0"
27 |           # Add a new config to test gym<1.0
28 |           - python-version: "3.10"
29 |             gymnasium-version: "0.29.1"
30 |     steps:
31 |       - uses: actions/checkout@v3
32 |         with:
33 |           submodules: true
34 |       - name: Set up Python ${{ matrix.python-version }}
35 |         uses: actions/setup-python@v4
36 |         with:
37 |           python-version: ${{ matrix.python-version }}
38 |       - name: Install dependencies
39 |         run: |
40 |           python -m pip install --upgrade pip
41 |           # Use uv for faster downloads
42 |           pip install uv
43 |           # cpu version of pytorch
44 |           # See https://github.com/astral-sh/uv/issues/1497
45 |           uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu
46 |           # Install full requirements (for additional envs and test tools)
47 |           uv pip install --system -r requirements.txt
48 |           # Use headless version
49 |           uv pip install --system opencv-python-headless
50 |           uv pip install --system -e .[plots,tests]
51 | 
52 |       - name: Install specific version of gym
53 |         run: |
54 |           uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
55 |           uv pip install --system "numpy<2"
56 |         # Only run for python 3.10, downgrade gym to 0.29.1
57 |         if: matrix.gymnasium-version != '1.0.0'
58 | 
59 |       - name: Lint with ruff
60 |         run: |
61 |           make lint
62 |       - name: Check codestyle
63 |         run: |
64 |           make check-codestyle
65 |       - name: Build the doc
66 |         run: |
67 |           make doc
68 |       - name: Type check
69 |         run: |
70 |           make type
71 |       - name: Test with pytest
72 |         run: |
73 |           make pytest
74 | 


--------------------------------------------------------------------------------
/.github/workflows/trained_agents.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run check on trained agents
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Check-Trained-Agents
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [master]
 9 |   pull_request:
10 |     branches: [master]
11 | 
12 | jobs:
13 |   build:
14 |     env:
15 |       TERM: xterm-256color
16 |       FORCE_COLOR: 1
17 |       HF_TOKEN: ${{ secrets.HF_TOKEN }}
18 |     # Skip CI if [ci skip] in the commit message
19 |     if: "! contains(toJSON(github.event.commits.*.message), '[ci skip]')"
20 |     runs-on: ubuntu-latest
21 |     strategy:
22 |       matrix:
23 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
24 |         include:
25 |           # Default version
26 |           - gymnasium-version: "1.0.0"
27 |           # Add a new config to test gym<1.0
28 |           - python-version: "3.10"
29 |             gymnasium-version: "0.29.1"
30 |     steps:
31 |       - uses: actions/checkout@v3
32 |         with:
33 |           submodules: true
34 |       - name: Set up Python ${{ matrix.python-version }}
35 |         uses: actions/setup-python@v4
36 |         with:
37 |           python-version: ${{ matrix.python-version }}
38 |       - name: Install dependencies
39 |         run: |
40 |           python -m pip install --upgrade pip
41 | 
42 |           # Use uv for faster downloads
43 |           pip install uv
44 |           # cpu version of pytorch
45 |           # See https://github.com/astral-sh/uv/issues/1497
46 |           uv pip install --system torch==2.4.1+cpu --index https://download.pytorch.org/whl/cpu
47 |           # Install full requirements (for additional envs and test tools)
48 |           uv pip install --system -r requirements.txt
49 |           # Use headless version
50 |           uv pip install --system opencv-python-headless
51 |           uv pip install --system -e .[plots,tests]
52 |           # Downgrade numpy to run pybullet agents
53 |           # See https://github.com/bulletphysics/bullet3/issues/4649
54 |           uv pip install --system "numpy<2"
55 | 
56 |       - name: Install specific version of gym
57 |         run: |
58 |           uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
59 |           uv pip install --system "numpy<2"
60 |         # Only run for python 3.10, downgrade gym to 0.29.1
61 |         if: matrix.gymnasium-version != '1.0.0'
62 | 
63 |       - name: Check trained agents
64 |         run: |
65 |           make check-trained-agents
66 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | logs
 3 | .pytest_cache/
 4 | .coverage
 5 | .coverage.*
 6 | .idea/
 7 | cluster_sbatch.sh
 8 | cluster_sbatch_mpi.sh
 9 | cluster_torchy.sh
10 | logs/
11 | rl-trained_agents/
12 | .pytype/
13 | htmlcov/
14 | git_rewrite_commit_history.sh
15 | .vscode/
16 | wandb
17 | runs
18 | hub
19 | *.mp4
20 | *.json
21 | _build/
22 | 
23 | tests/dummy_env/build/
24 | 
25 | 
26 | # Setuptools distribution and build folders.
27 | /dist/
28 | /build
29 | keys/
30 | *.egg-info
31 | .cache
32 | *.lprof
33 | *.prof
34 | 
35 | # Environments
36 | .env
37 | .venv
38 | env/
39 | venv/
40 | ENV/
41 | env.bak/
42 | venv.bak/
43 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "rl-trained-agents"]
2 | 	path = rl-trained-agents
3 | 	url = https://github.com/DLR-RM/rl-trained-agents
4 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Build documentation in the docs/ directory with Sphinx
 8 | sphinx:
 9 |   configuration: docs/conf.py
10 | 
11 | # Optionally build your docs in additional formats such as PDF and ePub
12 | formats: all
13 | 
14 | # Set requirements using conda env
15 | conda:
16 |   environment: docs/conda_env.yml
17 | 
18 | build:
19 |   os: ubuntu-24.04
20 |   tools:
21 |     python: "mambaforge-23.11"
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Antonin RAFFIN
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | LINT_PATHS = *.py tests/ scripts/ rl_zoo3/ hyperparams/python/*.py docs/conf.py
 2 | 
 3 | # Run pytest and coverage report
 4 | pytest:
 5 | 	./scripts/run_tests.sh
 6 | 
 7 | # check all trained agents (slow)
 8 | check-trained-agents:
 9 | 	python -m pytest -v tests/test_enjoy.py -k trained_agent --color=yes
10 | 
11 | mypy:
12 | 	mypy ${LINT_PATHS} --install-types --non-interactive
13 | 
14 | type: mypy
15 | 
16 | lint:
17 | 	# stop the build if there are Python syntax errors or undefined names
18 | 	# see https://www.flake8rules.com/
19 | 	ruff check ${LINT_PATHS} --select=E9,F63,F7,F82 --output-format=full
20 | 	# exit-zero treats all errors as warnings.
21 | 	ruff check ${LINT_PATHS} --exit-zero --output-format=concise
22 | 
23 | format:
24 | 	# Sort imports
25 | 	ruff check --select I ${LINT_PATHS} --fix
26 | 	# Reformat using black
27 | 	black ${LINT_PATHS}
28 | 
29 | check-codestyle:
30 | 	# Sort imports
31 | 	ruff check --select I ${LINT_PATHS}
32 | 	# Reformat using black
33 | 	black --check ${LINT_PATHS}
34 | 
35 | commit-checks: format type lint
36 | 
37 | doc:
38 | 	cd docs && make html
39 | 
40 | spelling:
41 | 	cd docs && make spelling
42 | 
43 | clean:
44 | 	cd docs && make clean
45 | 
46 | docker: docker-cpu docker-gpu
47 | 
48 | docker-cpu:
49 | 	./scripts/build_docker.sh
50 | 
51 | docker-gpu:
52 | 	USE_GPU=True ./scripts/build_docker.sh
53 | 
54 | # PyPi package release
55 | release:
56 | 	# rm -r build/* dist/*
57 | 	python -m build -s
58 | 	python -m build -w
59 | 	twine upload dist/*
60 | 
61 | # Test PyPi package release
62 | test-release:
63 | 	# rm -r build/* dist/*
64 | 	python -m build -s
65 | 	python -m build -w
66 | 	twine upload --repository-url https://test.pypi.org/legacy/ dist/*
67 | 
68 | .PHONY: lint format check-codestyle commit-checks doc spelling docker type pytest
69 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PARENT_IMAGE=stablebaselines/stable-baselines3
 2 | FROM $PARENT_IMAGE
 3 | 
 4 | USER root
 5 | 
 6 | RUN apt-get -y update \
 7 |     && apt-get -y install \
 8 |     ffmpeg \
 9 |     swig \
10 |     # For building box2d
11 |     build-essential \
12 |     && apt-get clean \
13 |     && rm -rf /var/lib/apt/lists/*
14 | 
15 | USER $MAMBA_USER
16 | 
17 | ENV CODE_DIR=/home/$MAMBA_USER/code
18 | COPY requirements.txt /tmp/
19 | 
20 | # Copy setup file only to install dependencies
21 | COPY --chown=$MAMBA_USER:$MAMBA_USER ./setup.py ${CODE_DIR}/rl_zoo3/setup.py
22 | COPY --chown=$MAMBA_USER:$MAMBA_USER ./rl_zoo3/version.txt ${CODE_DIR}/rl_zoo3/rl_zoo3/version.txt
23 | COPY --chown=$MAMBA_USER:$MAMBA_USER ./hyperparams ${CODE_DIR}/rl_zoo3/hyperparams
24 | COPY --chown=$MAMBA_USER:$MAMBA_USER ./rl_zoo3/plots ${CODE_DIR}/rl_zoo3/rl_zoo3/plots
25 | 
26 | 
27 | RUN \
28 |     eval "$(micromamba shell hook --shell bash)" && \
29 |     micromamba activate && \
30 |     cd ${CODE_DIR}/rl_zoo3 && \
31 |     uv pip uninstall stable-baselines3 && \
32 |     uv pip install --system -r /tmp/requirements.txt && \
33 |     uv pip install --system -e ".[plots,tests]" && \
34 |     uv cache clean
35 | 
36 | CMD /bin/bash
37 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | # For debug: SPHINXOPTS = -nWT --keep-going -vvv
 6 | SPHINXOPTS    = -W  # make warnings fatal (disabled because of gym in the wrappers)
 7 | SPHINXBUILD   = sphinx-build
 8 | SPHINXPROJ    = RLZoo
 9 | SOURCEDIR     = .
10 | BUILDDIR      = _build
11 | 
12 | # Put it first so that "make" without argument is like "make help".
13 | help:
14 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
15 | 
16 | .PHONY: help Makefile
17 | 
18 | # Catch-all target: route all unknown targets to Sphinx using the new
19 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
20 | %: Makefile
21 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | ## RL Zoo3 Documentation
 2 | 
 3 | This folder contains documentation for the RL Zoo.
 4 | 
 5 | 
 6 | ### Build the Documentation
 7 | 
 8 | #### Install Sphinx and Theme
 9 | Execute this command in the project root:
10 | ```
11 | pip install stable_baselines3[docs]
12 | pip install -e .
13 | ```
14 | 
15 | #### Building the Docs
16 | 
17 | In the `docs/` folder:
18 | ```
19 | make html
20 | ```
21 | 
22 | if you want to building each time a file is changed:
23 | 
24 | ```
25 | sphinx-autobuild . _build/html
26 | ```
27 | 


--------------------------------------------------------------------------------
/docs/_static/css/baselines_theme.css:
--------------------------------------------------------------------------------
 1 | /* Main colors  adapted from pytorch doc */
 2 | :root{
 3 |   --main-bg-color: #343A40;
 4 |   --link-color: #FD7E14;
 5 | }
 6 | 
 7 | /* Header fonts y */
 8 | h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption {
 9 |     font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;
10 | }
11 | 
12 | 
13 | /* Docs background */
14 | .wy-side-nav-search{
15 |   background-color: var(--main-bg-color);
16 | }
17 | 
18 | /* Mobile version */
19 | .wy-nav-top{
20 |   background-color: var(--main-bg-color);
21 | }
22 | 
23 | /* Change link colors (except for the menu) */
24 | a {
25 |     color: var(--link-color);
26 | }
27 | 
28 | a:hover {
29 |     color: #4F778F;
30 | }
31 | 
32 | .wy-menu a {
33 |     color: #b3b3b3;
34 | }
35 | 
36 | .wy-menu a:hover {
37 |     color: #b3b3b3;
38 | }
39 | 
40 | a.icon.icon-home {
41 |     color: #b3b3b3;
42 | }
43 | 
44 | .version{
45 |     color: var(--link-color) !important;
46 | }
47 | 
48 | 
49 | /* Make code blocks have a background */
50 | .codeblock,pre.literal-block,.rst-content .literal-block,.rst-content pre.literal-block,div[class^='highlight'] {
51 |         background: #f8f8f8;;
52 | }
53 | 
54 | /* Change style of types in the docstrings .rst-content .field-list */
55 | .field-list .xref.py.docutils, .field-list code.docutils, .field-list .docutils.literal.notranslate
56 | {
57 |   border: None;
58 |   padding-left: 0;
59 |   padding-right: 0;
60 |   color: #404040;
61 | }
62 | 


--------------------------------------------------------------------------------
/docs/_static/img/colab-badge.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="117" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="117" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h30v20H0z"/><path fill="#007ec6" d="M30 0h87v20H30z"/><path fill="url(#b)" d="M0 0h117v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"><svg x="4px" y="0px" width="22px" height="20px" viewBox="-2 0 28 24" style="background-color: #fff;border-radius: 1px;"><path style="fill:#ef9008;" d="M1.977,16.77c-2.667-2.277-2.605-7.079,0-9.357C2.919,8.057,3.522,9.075,4.49,9.691c-1.152,1.6-1.146,3.201-0.004,4.803C3.522,15.111,2.918,16.126,1.977,16.77z"/><path style="fill:#fdba18;" d="M12.257,17.114c-1.767-1.633-2.485-3.658-2.118-6.02c0.451-2.91,2.139-4.893,4.946-5.678c2.565-0.718,4.964-0.217,6.878,1.819c-0.884,0.743-1.707,1.547-2.434,2.446C18.488,8.827,17.319,8.435,16,8.856c-2.404,0.767-3.046,3.241-1.494,5.644c-0.241,0.275-0.493,0.541-0.721,0.826C13.295,15.939,12.511,16.3,12.257,17.114z"/><path style="fill:#ef9008;" d="M19.529,9.682c0.727-0.899,1.55-1.703,2.434-2.446c2.703,2.783,2.701,7.031-0.005,9.764c-2.648,2.674-6.936,2.725-9.701,0.115c0.254-0.814,1.038-1.175,1.528-1.788c0.228-0.285,0.48-0.552,0.721-0.826c1.053,0.916,2.254,1.268,3.6,0.83C20.502,14.551,21.151,11.927,19.529,9.682z"/><path style="fill:#fdba18;" d="M4.49,9.691C3.522,9.075,2.919,8.057,1.977,7.413c2.209-2.398,5.721-2.942,8.476-1.355c0.555,0.32,0.719,0.606,0.285,1.128c-0.157,0.188-0.258,0.422-0.391,0.631c-0.299,0.47-0.509,1.067-0.929,1.371C8.933,9.539,8.523,8.847,8.021,8.746C6.673,8.475,5.509,8.787,4.49,9.691z"/><path style="fill:#fdba18;" d="M1.977,16.77c0.941-0.644,1.545-1.659,2.509-2.277c1.373,1.152,2.85,1.433,4.45,0.499c0.332-0.194,0.503-0.088,0.673,0.19c0.386,0.635,0.753,1.285,1.181,1.89c0.34,0.48,0.222,0.715-0.253,1.006C7.84,19.73,4.205,19.188,1.977,16.77z"/></svg><text x="245" y="140" transform="scale(.1)" textLength="30"> </text><text x="725" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="770">Open in Colab</text><text x="725" y="140" transform="scale(.1)" textLength="770">Open in Colab</text></g> </svg>
2 | 


--------------------------------------------------------------------------------
/docs/_static/img/colab.svg:
--------------------------------------------------------------------------------
1 | <svg width="24px" height="15px" viewBox="0 0 24 15" version="1.1" xmlns="http://www.w3.org/2000/svg">
2 |   <path d="M1.977,11.77 C-0.69,9.493 -0.628,4.691 1.977,2.413 C2.919,3.057 3.522,4.075 4.49,4.691 C3.338,6.291 3.344,7.892 4.486,9.494 C3.522,10.111 2.918,11.126 1.977,11.77 Z" fill="#FDBA18"/>
3 |   <path d="M12.257,12.114 C10.49,10.481 9.772,8.456 10.139,6.094 C10.59,3.184 12.278,1.201 15.085,0.416 C17.65,-0.302 20.049,0.199 21.963,2.235 C21.079,2.978 20.256,3.782 19.529,4.681 C18.488,3.827 17.319,3.435 16,3.856 C13.596,4.623 12.954,7.097 14.506,9.5 C14.265,9.775 14.013,10.041 13.785,10.326 C13.295,10.939 12.511,11.3 12.257,12.114 Z" fill="#FCD93D"/>
4 |   <path d="M19.529,4.682 C20.256,3.783 21.079,2.979 21.963,2.236 C24.666,5.019 24.664,9.267 21.958,12 C19.31,14.674 15.022,14.725 12.257,12.115 C12.511,11.301 13.295,10.94 13.785,10.327 C14.013,10.042 14.265,9.775 14.506,9.501 C15.559,10.417 16.76,10.769 18.106,10.331 C20.502,9.551 21.151,6.927 19.529,4.682 Z" fill="#FDBA18"/>
5 |   <path d="M4.49,4.691 C3.522,4.075 2.919,3.057 1.977,2.413 C4.186,0.015 7.698,-0.529 10.453,1.058 C11.008,1.378 11.172,1.664 10.738,2.186 C10.581,2.374 10.48,2.608 10.347,2.817 C10.048,3.287 9.838,3.884 9.418,4.188 C8.933,4.539 8.523,3.847 8.021,3.746 C6.673,3.475 5.509,3.787 4.49,4.691 Z" fill="#FCD93D"/>
6 |   <path d="M1.977,11.77 C2.918,11.126 3.522,10.111 4.486,9.493 C5.859,10.645 7.336,10.926 8.936,9.992 C9.268,9.798 9.439,9.904 9.609,10.182 C9.995,10.817 10.362,11.467 10.79,12.072 C11.13,12.552 11.012,12.787 10.537,13.078 C7.84,14.73 4.205,14.188 1.977,11.77 Z" fill="#FCD93D"/>
7 | </svg>


--------------------------------------------------------------------------------
/docs/conda_env.yml:
--------------------------------------------------------------------------------
 1 | name: root
 2 | channels:
 3 |   - pytorch
 4 |   - conda-forge
 5 | dependencies:
 6 |   - cpuonly=1.0=0
 7 |   - pip=24.2
 8 |   - python=3.11
 9 |   - pytorch=2.5.0=py3.11_cpu_0
10 |   - pip:
11 |     - gymnasium>=0.28.1,<0.30
12 |     - cloudpickle
13 |     - opencv-python-headless
14 |     - pandas
15 |     - numpy
16 |     - matplotlib
17 |     - sphinx>=5,<8
18 |     - sphinx_rtd_theme>=1.3.0
19 |     - sphinx_copybutton
20 |     - sb3-contrib>=2.4.0a10,<3.0
21 |     - optuna>=3.0,<5.0
22 |     - huggingface_sb3>=3.0
23 |     - rich
24 |     - tqdm
25 |     - pyyaml>=5.1
26 |     - pytablewriter==1.2.0
27 | 


--------------------------------------------------------------------------------
/docs/guide/config.rst:
--------------------------------------------------------------------------------
  1 | .. _config:
  2 | 
  3 | =============
  4 | Configuration
  5 | =============
  6 | 
  7 | Hyperparameter yaml syntax
  8 | --------------------------
  9 | 
 10 | The syntax used in ``hyperparameters/algo_name.yml`` for setting
 11 | hyperparameters (likewise the syntax to `overwrite
 12 | hyperparameters <https://github.com/DLR-RM/rl-baselines3-zoo#overwrite-hyperparameters>`__
 13 | on the cli) may be specialized if the argument is a function. See
 14 | examples in the ``hyperparameters/`` directory. For example:
 15 | 
 16 | -  Specify a linear schedule for the learning rate:
 17 | 
 18 | .. code:: yaml
 19 | 
 20 |      learning_rate: lin_0.012486195510232303
 21 | 
 22 | Specify a different activation function for the network:
 23 | 
 24 | .. code:: yaml
 25 | 
 26 |      policy_kwargs: "dict(activation_fn=nn.ReLU)"
 27 | 
 28 | For a custom policy:
 29 | 
 30 | .. code:: yaml
 31 | 
 32 |      policy: my_package.MyCustomPolicy  # for instance stable_baselines3.ppo.MlpPolicy
 33 | 
 34 | Env Normalization
 35 | -----------------
 36 | 
 37 | In the hyperparameter file, ``normalize: True`` means that the training
 38 | environment will be wrapped in a
 39 | `VecNormalize <https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/vec_env/vec_normalize.py#L13>`__
 40 | wrapper.
 41 | 
 42 | `Normalization
 43 | uses <https://github.com/DLR-RM/rl-baselines3-zoo/issues/64>`__ the
 44 | default parameters of ``VecNormalize``, with the exception of ``gamma``
 45 | which is set to match that of the agent. This can be
 46 | `overridden <https://github.com/DLR-RM/rl-baselines3-zoo/blob/v0.10.0/hyperparams/sac.yml#L239>`__
 47 | using the appropriate ``hyperparameters/algo_name.yml``, e.g.
 48 | 
 49 | .. code:: yaml
 50 | 
 51 |     normalize: "{'norm_obs': True, 'norm_reward': False}"
 52 | 
 53 | Env Wrappers
 54 | ------------
 55 | 
 56 | You can specify in the hyperparameter config one or more wrapper to use
 57 | around the environment:
 58 | 
 59 | for one wrapper:
 60 | 
 61 | .. code:: yaml
 62 | 
 63 |   env_wrapper: gym_minigrid.wrappers.FlatObsWrapper
 64 | 
 65 | for multiple, specify a list:
 66 | 
 67 | .. code:: yaml
 68 | 
 69 |   env_wrapper:
 70 |       - rl_zoo3.wrappers.TruncatedOnSuccessWrapper:
 71 |           reward_offset: 1.0
 72 |       - sb3_contrib.common.wrappers.TimeFeatureWrapper
 73 | 
 74 | Note that you can easily specify parameters too.
 75 | 
 76 | By default, the environment is wrapped with a ``Monitor`` wrapper to
 77 | record episode statistics. You can specify arguments to it using
 78 | ``monitor_kwargs`` parameter to log additional data. That data *must* be
 79 | present in the info dictionary at the last step of each episode.
 80 | 
 81 | For instance, for recording success with goal envs
 82 | (e.g. ``FetchReach-v1``):
 83 | 
 84 | .. code:: yaml
 85 | 
 86 |   monitor_kwargs: dict(info_keywords=('is_success',))
 87 | 
 88 | or recording final x position with ``Ant-v3``:
 89 | 
 90 | .. code:: yaml
 91 | 
 92 |   monitor_kwargs: dict(info_keywords=('x_position',))
 93 | 
 94 | Note: for known ``GoalEnv`` like ``FetchReach``,
 95 | ``info_keywords=('is_success',)`` is actually the default.
 96 | 
 97 | VecEnvWrapper
 98 | -------------
 99 | 
100 | You can specify which ``VecEnvWrapper`` to use in the config, the same
101 | way as for env wrappers (see above), using the ``vec_env_wrapper`` key:
102 | 
103 | For instance:
104 | 
105 | .. code:: yaml
106 | 
107 |   vec_env_wrapper: stable_baselines3.common.vec_env.VecMonitor
108 | 
109 | Note: ``VecNormalize`` is supported separately using ``normalize``
110 | keyword, and ``VecFrameStack`` has a dedicated keyword ``frame_stack``.
111 | 
112 | Callbacks
113 | ---------
114 | 
115 | Following the same syntax as env wrappers, you can also add custom
116 | callbacks to use during training.
117 | 
118 | .. code:: yaml
119 | 
120 |   callback:
121 |     - rl_zoo3.callbacks.ParallelTrainCallback:
122 |         gradient_steps: 256
123 | 


--------------------------------------------------------------------------------
/docs/guide/custom_env.rst:
--------------------------------------------------------------------------------
 1 | .. _custom:
 2 | 
 3 | ==================
 4 | Custom Environment
 5 | ==================
 6 | 
 7 | The easiest way to add support for a custom environment is to edit
 8 | ``rl_zoo3/import_envs.py`` and register your environment here. Then, you
 9 | need to add a section for it in the hyperparameters file
10 | (``hyperparams/algo.yml`` or a custom yaml file that you can specify
11 | using ``--conf-file`` argument).
12 | 


--------------------------------------------------------------------------------
/docs/guide/enjoy.rst:
--------------------------------------------------------------------------------
  1 | .. _enjoy:
  2 | 
  3 | =====================
  4 | Enjoy a Trained Agent
  5 | =====================
  6 | 
  7 | .. note::
  8 | 
  9 |   To download the repo with the trained agents, you must use
 10 |   ``git clone --recursive https://github.com/DLR-RM/rl-baselines3-zoo``
 11 |   in order to clone the submodule too.
 12 | 
 13 | 
 14 | Enjoy a trained agent
 15 | ---------------------
 16 | 
 17 | If the trained agent exists, then you can see it in action using:
 18 | 
 19 | ::
 20 | 
 21 |    python enjoy.py --algo algo_name --env env_id
 22 | 
 23 | For example, enjoy A2C on Breakout during 5000 timesteps:
 24 | 
 25 | ::
 26 | 
 27 |    python enjoy.py --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000
 28 | 
 29 | If you have trained an agent yourself, you need to do:
 30 | 
 31 | ::
 32 | 
 33 |    # exp-id 0 corresponds to the last experiment, otherwise, you can specify another ID
 34 |    python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 0
 35 | 
 36 | Load Checkpoints, Best Model
 37 | -----------------------------
 38 | 
 39 | To load the best model (when using evaluation environment):
 40 | 
 41 | ::
 42 | 
 43 |    python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-best
 44 | 
 45 | To load a checkpoint (here the checkpoint name is
 46 | ``rl_model_10000_steps.zip``):
 47 | 
 48 | ::
 49 | 
 50 |    python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-checkpoint 10000
 51 | 
 52 | To load the latest checkpoint:
 53 | 
 54 | ::
 55 | 
 56 |    python enjoy.py --algo algo_name --env env_id -f logs/ --exp-id 1 --load-last-checkpoint
 57 | 
 58 | 
 59 | Record a Video of a Trained Agent
 60 | ---------------------------------
 61 | 
 62 | Record 1000 steps with the latest saved model:
 63 | 
 64 | ::
 65 | 
 66 |   python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000
 67 | 
 68 | Use the best saved model instead:
 69 | 
 70 | ::
 71 | 
 72 |   python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-best
 73 | 
 74 | Record a video of a checkpoint saved during training (here the
 75 | checkpoint name is ``rl_model_10000_steps.zip``):
 76 | 
 77 | ::
 78 | 
 79 |   python -m rl_zoo3.record_video --algo ppo --env BipedalWalkerHardcore-v3 -n 1000 --load-checkpoint 10000
 80 | 
 81 | 
 82 | Record a Video of a Training Experiment
 83 | ---------------------------------------
 84 | 
 85 | Apart from recording videos of specific saved models, it is also
 86 | possible to record a video of a training experiment where checkpoints
 87 | have been saved.
 88 | 
 89 | Record 1000 steps for each checkpoint, latest and best saved models:
 90 | 
 91 | ::
 92 | 
 93 |   python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic
 94 | 
 95 | The previous command will create a ``mp4`` file. To convert this file to
 96 | ``gif`` format as well:
 97 | 
 98 | ::
 99 | 
100 |   python -m rl_zoo3.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic --gif
101 | 


--------------------------------------------------------------------------------
/docs/guide/install.rst:
--------------------------------------------------------------------------------
 1 | .. _install:
 2 | 
 3 | Installation
 4 | ============
 5 | 
 6 | Prerequisites
 7 | -------------
 8 | 
 9 | RL Zoo requires python 3.8+ and PyTorch >= 1.13
10 | 
11 | 
12 | Minimal Installation
13 | --------------------
14 | 
15 | To install RL Zoo with pip, execute:
16 | 
17 | .. code-block:: bash
18 | 
19 |     pip install rl_zoo3
20 | 
21 | From source:
22 | 
23 | .. code-block:: bash
24 | 
25 | 	git clone https://github.com/DLR-RM/rl-baselines3-zoo
26 | 	cd rl-baselines3-zoo/
27 | 	pip install -e .
28 | 
29 | .. note::
30 | 
31 | 	You can do ``python -m rl_zoo3.train`` from any folder and you have access to ``rl_zoo3`` command line interface, for instance, ``rl_zoo3 train`` is equivalent to ``python train.py``
32 | 
33 | 
34 | 
35 | Full installation
36 | -----------------
37 | 
38 | With extra envs and test dependencies:
39 | 
40 | 
41 | .. note::
42 | 
43 |   If you want to use Atari games, you will need to do ``pip install "autorom[accept-rom-license]"``
44 |   additionally to download the ROMs
45 | 
46 | 
47 | .. code-block:: bash
48 | 
49 | 	apt-get install swig cmake ffmpeg
50 | 	pip install -r requirements.txt
51 |   pip install -e .[plots,tests]
52 | 
53 | 
54 | Please see `Stable Baselines3 documentation <https://stable-baselines3.readthedocs.io/en/master/>`_ for alternatives to install stable baselines3.
55 | 
56 | 
57 | Docker Images
58 | -------------
59 | 
60 | Build docker image (CPU):
61 | 
62 | ::
63 | 
64 |    make docker-cpu
65 | 
66 | GPU:
67 | 
68 | ::
69 | 
70 |    USE_GPU=True make docker-gpu
71 | 
72 | Pull built docker image (CPU):
73 | 
74 | ::
75 | 
76 |    docker pull stablebaselines/rl-baselines3-zoo-cpu
77 | 
78 | GPU image:
79 | 
80 | ::
81 | 
82 |    docker pull stablebaselines/rl-baselines3-zoo
83 | 
84 | Run script in the docker image:
85 | 
86 | ::
87 | 
88 |    ./scripts/run_docker_cpu.sh python train.py --algo ppo --env CartPole-v1
89 | 


--------------------------------------------------------------------------------
/docs/guide/integrations.rst:
--------------------------------------------------------------------------------
 1 | .. _integrations:
 2 | 
 3 | ============
 4 | Integrations
 5 | ============
 6 | 
 7 | Huggingface Hub Integration
 8 | ---------------------------
 9 | 
10 | List and videos of trained agents can be found on our Huggingface page: https://huggingface.co/sb3
11 | 
12 | 
13 | Upload model to hub (same syntax as for ``enjoy.py``):
14 | 
15 | ::
16 | 
17 |    python -m rl_zoo3.push_to_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3 -m "Initial commit"
18 | 
19 | you can choose custom ``repo-name`` (default: ``{algo}-{env_id}``) by
20 | passing a ``--repo-name`` argument.
21 | 
22 | Download model from hub:
23 | 
24 | ::
25 | 
26 |    python -m rl_zoo3.load_from_hub --algo ppo --env CartPole-v1 -f logs/ -orga sb3
27 | 
28 | 
29 | Experiment tracking
30 | -------------------
31 | 
32 | We support tracking experiment data such as learning curves and
33 | hyperparameters via `Weights and Biases <https://wandb.ai>`__.
34 | 
35 | The following command
36 | 
37 | ::
38 | 
39 |   python train.py --algo ppo --env CartPole-v1 --track --wandb-project-name sb3
40 | 
41 | yields a tracked experiment at this
42 | `URL <https://wandb.ai/openrlbenchmark/sb3/runs/1b65ldmh>`__.
43 | 
44 | To add a tag to the run, (e.g. ``optimized``), use the argument
45 | ``--wandb-tags optimized``.
46 | 


--------------------------------------------------------------------------------
/docs/guide/plot.rst:
--------------------------------------------------------------------------------
 1 | .. _plot:
 2 | 
 3 | ============
 4 | Plot Scripts
 5 | ============
 6 | 
 7 | 
 8 | Plot scripts (to be documented, see "Results" sections in SB3
 9 | documentation):
10 | 
11 | - ``scripts/all_plots.py``/``scripts/plot_from_file.py`` for plotting evaluations
12 | 
13 | - ``scripts/plot_train.py`` for plotting training reward/success
14 | 
15 | 
16 | Examples
17 | --------
18 | 
19 | Plot training success (y-axis) w.r.t. timesteps (x-axis) with a moving
20 | window of 500 episodes for all the ``Fetch`` environment with ``HER``
21 | algorithm:
22 | 
23 | ::
24 | 
25 |    python scripts/plot_train.py -a her -e Fetch -y success -f rl-trained-agents/ -w 500 -x steps
26 | 
27 | Plot evaluation reward curve for TQC, SAC and TD3 on the HalfCheetah and
28 | Ant PyBullet environments:
29 | 
30 | ::
31 | 
32 |    python3 scripts/all_plots.py -a sac td3 tqc --env HalfCheetahBullet AntBullet -f rl-trained-agents/
33 | 
34 | Plot with the rliable library
35 | -----------------------------
36 | 
37 | The RL zoo integrates some of
38 | `rliable <https://agarwl.github.io/rliable/>`__ library features. You
39 | can find a visual explanation of the tools used by rliable in this `blog
40 | post <https://araffin.github.io/post/rliable/>`__.
41 | 
42 | First, you need to install
43 | `rliable <https://github.com/google-research/rliable>`__.
44 | 
45 | Note: Python 3.7+ is required in that case.
46 | 
47 | Then export your results to a file using the ``all_plots.py`` script
48 | (see above):
49 | 
50 | ::
51 | 
52 |    python scripts/all_plots.py -a sac td3 tqc --env Half Ant -f logs/ -o logs/offpolicy
53 | 
54 | You can now use the ``plot_from_file.py`` script with ``--rliable``,
55 | ``--versus`` and ``--iqm`` arguments:
56 | 
57 | ::
58 | 
59 |    python scripts/plot_from_file.py -i logs/offpolicy.pkl --skip-timesteps --rliable --versus -l SAC TD3 TQC
60 | 
61 | .. note::
62 | 
63 |   you may need to edit ``plot_from_file.py``, in particular the
64 |   ``env_key_to_env_id`` dictionary and the
65 |   ``scripts/score_normalization.py`` which stores min and max score for
66 |   each environment.
67 | 
68 | 
69 | Remark: plotting with the ``--rliable`` option is usually slow as
70 | confidence interval need to be computed using bootstrap sampling.
71 | 


--------------------------------------------------------------------------------
/docs/guide/quickstart.rst:
--------------------------------------------------------------------------------
 1 | .. _quickstart:
 2 | 
 3 | ===============
 4 | Getting Started
 5 | ===============
 6 | 
 7 | .. note::
 8 | 
 9 |   You can try the following examples online using Google Colab |Colab|
10 |   notebook: `RL Baselines zoo notebook`_
11 | 
12 | 
13 | .. _RL Baselines zoo notebook: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/rl-baselines-zoo.ipynb
14 | .. |Colab| image:: ../_static/img/colab.svg
15 | 
16 | 
17 | The hyperparameters for each environment are defined in
18 | ``hyperparameters/algo_name.yml``.
19 | 
20 | If the environment exists in this file, then you can train an agent
21 | using:
22 | 
23 | ::
24 | 
25 |  python -m rl_zoo3.train --algo algo_name --env env_id
26 | 
27 | Or if you are in the RL Zoo3 folder:
28 | 
29 | ::
30 | 
31 |   python train.py --algo algo_name --env env_id
32 | 
33 | For example (with evaluation and checkpoints):
34 | 
35 | ::
36 | 
37 |  python -m rl_zoo3.train --algo ppo --env CartPole-v1 --eval-freq 10000 --save-freq 50000
38 | 
39 | 
40 | 
41 | If the trained agent exists, then you can see it in action using:
42 | 
43 | ::
44 | 
45 |  python -m rl_zoo3.enjoy --algo algo_name --env env_id
46 | 
47 | For example, enjoy A2C on Breakout during 5000 timesteps:
48 | 
49 | ::
50 | 
51 |  python -m rl_zoo3.enjoy --algo a2c --env BreakoutNoFrameskip-v4 --folder rl-trained-agents/ -n 5000
52 | 


--------------------------------------------------------------------------------
/docs/guide/sbx.rst:
--------------------------------------------------------------------------------
 1 | .. _sbx:
 2 | 
 3 | ==========================
 4 | Stable Baselines Jax (SBX)
 5 | ==========================
 6 | 
 7 | `Stable Baselines Jax (SBX) <https://github.com/araffin/sbx>`_ is a proof of concept version of Stable-Baselines3 in Jax.
 8 | 
 9 | It provides a minimal number of features compared to SB3 but can be much faster (up to 20x times!): https://twitter.com/araffin2/status/1590714558628253698
10 | 
11 | 
12 | It is also compatible with the RL Zoo.
13 | For that you will need to create two files.
14 | 
15 | ``train_sbx.py``:
16 | 
17 | .. code-block:: python
18 | 
19 |   import rl_zoo3
20 |   import rl_zoo3.train
21 |   from rl_zoo3.train import train
22 |   from sbx import DQN, PPO, SAC, TQC, DroQ
23 | 
24 | 
25 |   rl_zoo3.ALGOS["tqc"] = TQC
26 |   rl_zoo3.ALGOS["droq"] = DroQ
27 |   rl_zoo3.ALGOS["sac"] = SAC
28 |   rl_zoo3.ALGOS["ppo"] = PPO
29 |   rl_zoo3.ALGOS["dqn"] = DQN
30 |   rl_zoo3.train.ALGOS = rl_zoo3.ALGOS
31 |   rl_zoo3.exp_manager.ALGOS = rl_zoo3.ALGOS
32 | 
33 |   if __name__ == "__main__":
34 |       train()
35 | 
36 | Then you can call ``python train_sbx.py --algo sac --env Pendulum-v1`` and use the RL Zoo CLI.
37 | 
38 | 
39 | ``enjoy_sbx.py``:
40 | 
41 | .. code-block:: python
42 | 
43 |   import rl_zoo3
44 |   import rl_zoo3.enjoy
45 |   from rl_zoo3.enjoy import enjoy
46 |   from sbx import DQN, PPO, SAC, TQC, DroQ
47 | 
48 | 
49 |   rl_zoo3.ALGOS["tqc"] = TQC
50 |   rl_zoo3.ALGOS["droq"] = DroQ
51 |   rl_zoo3.ALGOS["sac"] = SAC
52 |   rl_zoo3.ALGOS["ppo"] = PPO
53 |   rl_zoo3.ALGOS["dqn"] = DQN
54 |   rl_zoo3.enjoy.ALGOS = rl_zoo3.ALGOS
55 |   rl_zoo3.exp_manager.ALGOS = rl_zoo3.ALGOS
56 | 
57 |   if __name__ == "__main__":
58 |       enjoy()
59 | 


--------------------------------------------------------------------------------
/docs/guide/train.rst:
--------------------------------------------------------------------------------
  1 | .. _train:
  2 | 
  3 | ==============
  4 | Train an Agent
  5 | ==============
  6 | 
  7 | Basic Usage
  8 | -----------
  9 | 
 10 | The hyperparameters for each environment are defined in
 11 | ``hyperparameters/algo_name.yml``.
 12 | 
 13 | 
 14 | .. note::
 15 | 
 16 | 	Once RL Zoo3 is install, you can do ``python -m rl_zoo3.train`` from any folder, it is equivalent to ``python train.py``
 17 | 
 18 | 
 19 | If the environment exists in this file, then you can train an agent using:
 20 | 
 21 | ::
 22 | 
 23 |   python train.py --algo algo_name --env env_id
 24 | 
 25 | 
 26 | .. note::
 27 | 
 28 | 	You can use ``-P`` (``--progress``) option to display a progress bar.
 29 | 
 30 | 
 31 | Custom Config File
 32 | ------------------
 33 | 
 34 | Using a custom config file when it is a yaml file with a which contains a ``env_id`` entry:
 35 | 
 36 | ::
 37 | 
 38 |   python train.py --algo algo_name --env env_id --conf-file my_yaml.yml
 39 | 
 40 | 
 41 | You can also use a python file that contains a dictionary called `hyperparams` with an entry for each ``env_id``.
 42 | (see ``hyperparams/python/ppo_config_example.py`` for an example)
 43 | 
 44 | ::
 45 | 
 46 |   # You can pass a path to a python file
 47 |   python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams/python/ppo_config_example.py
 48 |   # Or pass a path to a file from a module (for instance my_package.my_file)
 49 |   python train.py --algo ppo --env MountainCarContinuous-v0 --conf-file hyperparams.python.ppo_config_example
 50 | 
 51 | The advantage of this approach is that you can specify arbitrary python dictionaries
 52 | and ensure that all their dependencies are imported in the config file itself.
 53 | 
 54 | Tensorboard, Checkpoints, Evaluation
 55 | ------------------------------------
 56 | 
 57 | For example (with tensorboard support):
 58 | 
 59 | ::
 60 | 
 61 |   python train.py --algo ppo --env CartPole-v1 --tensorboard-log /tmp/stable-baselines/
 62 | 
 63 | 
 64 | Evaluate the agent every 10000 steps using 10 episodes for evaluation (using only one evaluation env):
 65 | 
 66 | ::
 67 | 
 68 |   python train.py --algo sac --env AntBulletEnv-v0 --eval-freq 10000 --eval-episodes 10 --n-eval-envs 1
 69 | 
 70 | 
 71 | Save a checkpoint of the agent every 100000 steps:
 72 | 
 73 | ::
 74 | 
 75 |   python train.py --algo td3 --env AntBulletEnv-v0 --save-freq 100000
 76 | 
 77 | Resume Training
 78 | ---------------
 79 | 
 80 | Continue training (here, load pretrained agent for Breakout and continue training for 5000 steps):
 81 | 
 82 | ::
 83 | 
 84 |   python train.py --algo a2c --env BreakoutNoFrameskip-v4 -i rl-trained-agents/a2c/BreakoutNoFrameskip-v4_1/BreakoutNoFrameskip-v4.zip -n 5000
 85 | 
 86 | Save Replay Buffer
 87 | ------------------
 88 | 
 89 | When using off-policy algorithms, you can also **save the replay buffer** after training:
 90 | 
 91 | ::
 92 | 
 93 |   python train.py --algo sac --env Pendulum-v1 --save-replay-buffer
 94 | 
 95 | It will be automatically loaded if present when continuing training.
 96 | 
 97 | 
 98 | Env keyword arguments
 99 | ---------------------
100 | 
101 | You can specify keyword arguments to pass to the env constructor in the
102 | command line, using ``--env-kwargs``:
103 | 
104 | ::
105 | 
106 |    python enjoy.py --algo ppo --env MountainCar-v0 --env-kwargs goal_velocity:10
107 | 
108 | 
109 | Overwrite hyperparameters
110 | -------------------------
111 | 
112 | You can easily overwrite hyperparameters in the command line, using
113 | ``--hyperparams``:
114 | 
115 | ::
116 | 
117 |    python train.py --algo a2c --env MountainCarContinuous-v0 --hyperparams learning_rate:0.001 policy_kwargs:"dict(net_arch=[64, 64])"
118 | 
119 | Note: if you want to pass a string, you need to escape it like that:
120 | ``my_string:"'value'"``
121 | 


--------------------------------------------------------------------------------
/docs/guide/tuning.rst:
--------------------------------------------------------------------------------
 1 | .. _tuning:
 2 | 
 3 | =====================
 4 | Hyperparameter Tuning
 5 | =====================
 6 | 
 7 | Automated hyperparameter optimization
 8 | -------------------------------------
 9 | 
10 | Blog post: `Automatic Hyperparameter Tuning - A Visual Guide <https://araffin.github.io/post/hyperparam-tuning/>`_
11 | 
12 | Video: https://www.youtube.com/watch?v=AidFTOdGNFQ
13 | 
14 | We use `Optuna <https://optuna.org/>`__ for optimizing the
15 | hyperparameters. Not all hyperparameters are tuned, and tuning enforces
16 | certain default hyperparameter settings that may be different from the
17 | official defaults. See
18 | `rl_zoo3/hyperparams_opt.py <https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py>`__
19 | for the current settings for each agent.
20 | 
21 | Hyperparameters not specified in
22 | `rl_zoo3/hyperparams_opt.py <https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py>`__
23 | are taken from the associated YAML file and fallback to the default
24 | values of SB3 if not present.
25 | 
26 | Note: when using SuccessiveHalvingPruner (“halving”), you must specify
27 | ``--n-jobs > 1``
28 | 
29 | Budget of 1000 trials with a maximum of 50000 steps:
30 | 
31 | ::
32 | 
33 |    python train.py --algo ppo --env MountainCar-v0 -n 50000 -optimize --n-trials 1000 --n-jobs 2 \
34 |      --sampler tpe --pruner median
35 | 
36 | Distributed optimization using a shared database is also possible (see
37 | the corresponding `Optuna
38 | documentation <https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/004_distributed.html>`__):
39 | 
40 | ::
41 | 
42 |    python train.py --algo ppo --env MountainCar-v0 -optimize --study-name test --storage logs/demo.log
43 | 
44 | 
45 | 
46 | Visualize live using `optuna-dashboard <https://optuna-dashboard.readthedocs.io/en/latest/getting-started.html>`__
47 | 
48 | .. code:: bash
49 | 
50 |    optuna-dashboard logs/demo.log
51 | 
52 | Load hyperparameters from trial number 21 and train an agent with it:
53 | 
54 | .. code:: bash
55 | 
56 |    python train.py --algo ppo --env MountainCar-v0 --study-name test --storage logs/demo.log --trial-id 21
57 | 
58 | 
59 | The default budget for hyperparameter tuning is 500 trials and there is
60 | one intermediate evaluation for pruning/early stopping per 100k time
61 | steps.
62 | 
63 | Hyperparameters search space
64 | ----------------------------
65 | 
66 | Note that the default hyperparameters used in the zoo when tuning are
67 | not always the same as the defaults provided in
68 | `stable-baselines3 <https://stable-baselines3.readthedocs.io/en/master/modules/base.html>`__.
69 | Consult the latest source code to be sure of these settings. For
70 | example:
71 | 
72 | -  PPO tuning assumes a network architecture with ``ortho_init = False``
73 |    when tuning, though it is ``True`` by
74 |    `default <https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#ppo-policies>`__.
75 |    You can change that by updating
76 |    `rl_zoo3/hyperparams_opt.py <https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/rl_zoo3/hyperparams_opt.py>`__.
77 | 
78 | -  Non-episodic rollout in TD3 and DDPG assumes
79 |    ``gradient_steps = train_freq`` and so tunes only ``train_freq`` to
80 |    reduce the search space.
81 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | RL Baselines3 Zoo Docs - A Training Framework for Stable Baselines3
 2 | ===================================================================
 3 | 
 4 | `RL Baselines3 Zoo  <https://github.com/DLR-RM/stable-baselines3>`_  s a training framework for Reinforcement Learning (RL), using `Stable Baselines3 (SB3) <https://github.com/DLR-RM/stable-baselines3>`_,
 5 | reliable implementations of reinforcement learning algorithms in PyTorch.
 6 | 
 7 | Github repository: https://github.com/DLR-RM/rl-baselines3-zoo
 8 | 
 9 | It provides scripts for training, evaluating agents, tuning hyperparameters, plotting results and recording videos.
10 | 
11 | In addition, it includes a collection of tuned hyperparameters for common environments and RL algorithms, and agents trained with those settings.
12 | 
13 | .. toctree::
14 |    :maxdepth: 2
15 |    :caption: User Guide
16 | 
17 |    guide/install
18 |    guide/quickstart
19 |    guide/train
20 |    guide/plot
21 |    guide/enjoy
22 |    guide/custom_env
23 |    guide/config
24 |    guide/integrations
25 |    guide/tuning
26 |    guide/sbx
27 | 
28 | 
29 | .. toctree::
30 |   :maxdepth: 1
31 |   :caption: RL Zoo API
32 | 
33 |   modules/exp_manager
34 |   modules/wrappers
35 |   modules/callbacks
36 |   modules/utils
37 | 
38 | .. toctree::
39 |   :maxdepth: 1
40 |   :caption: Misc
41 | 
42 |   misc/changelog
43 | 
44 | 
45 | Citing RL Baselines3 Zoo
46 | ------------------------
47 | To cite this project in publications:
48 | 
49 | .. code-block:: bibtex
50 | 
51 |   @misc{rl-zoo3,
52 |     author = {Raffin, Antonin},
53 |     title = {RL Baselines3 Zoo},
54 |     year = {2020},
55 |     publisher = {GitHub},
56 |     journal = {GitHub repository},
57 |     howpublished = {\url{https://github.com/DLR-RM/rl-baselines3-zoo}},
58 |   }
59 | 
60 | Contributing
61 | ------------
62 | 
63 | To any interested in making the rl baselines better, there are still some improvements
64 | that need to be done.
65 | You can check issues in the `repo <https://github.com/DLR-RM/rl-baselines3-zoo/issues>`_.
66 | 
67 | If you want to contribute, please read `CONTRIBUTING.md <https://github.com/DLR-RM/stable-baselines3/blob/master/CONTRIBUTING.md>`_ first.
68 | 
69 | Indices and tables
70 | -------------------
71 | 
72 | * :ref:`genindex`
73 | * :ref:`search`
74 | * :ref:`modindex`
75 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=StableBaselines
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/misc/changelog.rst:
--------------------------------------------------------------------------------
1 | .. _changelog:
2 | 
3 | Changelog
4 | ==========
5 | 
6 | 
7 | See https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/CHANGELOG.md
8 | 


--------------------------------------------------------------------------------
/docs/modules/callbacks.rst:
--------------------------------------------------------------------------------
1 | .. _callbacks:
2 | 
3 | 
4 | Callbacks
5 | =========
6 | 
7 | .. automodule:: rl_zoo3.callbacks
8 |   :members:
9 | 


--------------------------------------------------------------------------------
/docs/modules/exp_manager.rst:
--------------------------------------------------------------------------------
 1 | .. _manager:
 2 | 
 3 | .. automodule:: rl_zoo3.exp_manager
 4 | 
 5 | 
 6 | Experiment Manager
 7 | ==================
 8 | 
 9 | 
10 | Parameters
11 | ----------
12 | 
13 | .. autoclass:: ExperimentManager
14 |   :members:
15 |   :inherited-members:
16 | 


--------------------------------------------------------------------------------
/docs/modules/utils.rst:
--------------------------------------------------------------------------------
1 | .. _utils:
2 | 
3 | 
4 | Utils
5 | =====
6 | 
7 | .. automodule:: rl_zoo3.utils
8 |   :members:
9 | 


--------------------------------------------------------------------------------
/docs/modules/wrappers.rst:
--------------------------------------------------------------------------------
1 | .. _wrappers:
2 | 
3 | 
4 | Wrappers
5 | ========
6 | 
7 | .. automodule:: rl_zoo3.wrappers
8 |   :members:
9 | 


--------------------------------------------------------------------------------
/docs/spelling_wordlist.txt:
--------------------------------------------------------------------------------
  1 | py
  2 | env
  3 | atari
  4 | argparse
  5 | Argparse
  6 | TensorFlow
  7 | feedforward
  8 | envs
  9 | VecEnv
 10 | pretrain
 11 | petrained
 12 | tf
 13 | th
 14 | nn
 15 | np
 16 | str
 17 | mujoco
 18 | cpu
 19 | ndarray
 20 | ndarrays
 21 | timestep
 22 | timesteps
 23 | stepsize
 24 | dataset
 25 | adam
 26 | fn
 27 | normalisation
 28 | Kullback
 29 | Leibler
 30 | boolean
 31 | deserialized
 32 | pretrained
 33 | minibatch
 34 | subprocesses
 35 | ArgumentParser
 36 | Tensorflow
 37 | Gaussian
 38 | approximator
 39 | minibatches
 40 | hyperparameters
 41 | hyperparameter
 42 | vectorized
 43 | rl
 44 | colab
 45 | dataloader
 46 | npz
 47 | datasets
 48 | vf
 49 | logits
 50 | num
 51 | Utils
 52 | backpropagate
 53 | prepend
 54 | NaN
 55 | preprocessing
 56 | Cloudpickle
 57 | async
 58 | multiprocess
 59 | tensorflow
 60 | mlp
 61 | cnn
 62 | neglogp
 63 | tanh
 64 | coef
 65 | repo
 66 | Huber
 67 | params
 68 | ppo
 69 | arxiv
 70 | Arxiv
 71 | func
 72 | DQN
 73 | Uhlenbeck
 74 | Ornstein
 75 | multithread
 76 | cancelled
 77 | Tensorboard
 78 | parallelize
 79 | customising
 80 | serializable
 81 | Multiprocessed
 82 | cartpole
 83 | toolset
 84 | lstm
 85 | rescale
 86 | ffmpeg
 87 | avconv
 88 | unnormalized
 89 | Github
 90 | pre
 91 | preprocess
 92 | backend
 93 | attr
 94 | preprocess
 95 | Antonin
 96 | Raffin
 97 | araffin
 98 | Homebrew
 99 | Numpy
100 | Theano
101 | rollout
102 | kfac
103 | Piecewise
104 | csv
105 | nvidia
106 | visdom
107 | tensorboard
108 | preprocessed
109 | namespace
110 | sklearn
111 | GoalEnv
112 | Torchy
113 | pytorch
114 | dicts
115 | optimizers
116 | Deprecations
117 | forkserver
118 | cuda
119 | Polyak
120 | gSDE
121 | rollouts
122 | Pyro
123 | softmax
124 | stdout
125 | Contrib
126 | Quantile
127 | Huggingface
128 | Jax
129 | Optuna
130 | 


--------------------------------------------------------------------------------
/enjoy.py:
--------------------------------------------------------------------------------
1 | from rl_zoo3.enjoy import enjoy
2 | 
3 | if __name__ == "__main__":
4 |     enjoy()
5 | 


--------------------------------------------------------------------------------
/hyperparams/a2c.yml:
--------------------------------------------------------------------------------
  1 | atari:
  2 |   env_wrapper:
  3 |     - stable_baselines3.common.atari_wrappers.AtariWrapper
  4 |   # Equivalent to
  5 |   # vec_env_wrapper:
  6 |   #   - stable_baselines3.common.vec_env.VecFrameStack:
  7 |   #         n_stack: 4
  8 |   frame_stack: 4
  9 |   policy: 'CnnPolicy'
 10 |   n_envs: 16
 11 |   n_timesteps: !!float 1e7
 12 |   ent_coef: 0.01
 13 |   vf_coef: 0.25
 14 |   policy_kwargs: "dict(optimizer_class=RMSpropTFLike, optimizer_kwargs=dict(eps=1e-5))"
 15 | 
 16 | CartPole-v1:
 17 |   n_envs: 8
 18 |   n_timesteps: !!float 5e5
 19 |   policy: 'MlpPolicy'
 20 |   ent_coef: 0.0
 21 | 
 22 | LunarLander-v2:
 23 |   n_envs: 8
 24 |   n_timesteps: !!float 2e5
 25 |   policy: 'MlpPolicy'
 26 |   gamma: 0.995
 27 |   n_steps: 5
 28 |   learning_rate: lin_0.00083
 29 |   ent_coef: 0.00001
 30 | 
 31 | MountainCar-v0:
 32 |   normalize: true
 33 |   n_envs: 16
 34 |   n_timesteps: !!float 1e6
 35 |   policy: 'MlpPolicy'
 36 |   ent_coef: .0
 37 | 
 38 | Acrobot-v1:
 39 |   normalize: true
 40 |   n_envs: 16
 41 |   n_timesteps: !!float 5e5
 42 |   policy: 'MlpPolicy'
 43 |   ent_coef: .0
 44 | 
 45 | # Tuned
 46 | Pendulum-v1:
 47 |   normalize: True
 48 |   n_envs: 8
 49 |   n_timesteps: !!float 1e6
 50 |   policy: 'MlpPolicy'
 51 |   ent_coef: 0.0
 52 |   max_grad_norm: 0.5
 53 |   n_steps: 8
 54 |   gae_lambda: 0.9
 55 |   vf_coef: 0.4
 56 |   gamma: 0.9
 57 |   use_rms_prop: True
 58 |   normalize_advantage: False
 59 |   learning_rate: lin_7e-4
 60 |   use_sde: True
 61 |   policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"
 62 | 
 63 | # Tuned
 64 | LunarLanderContinuous-v3:
 65 |   normalize: true
 66 |   n_envs: 4
 67 |   n_timesteps: !!float 5e6
 68 |   policy: 'MlpPolicy'
 69 |   ent_coef: 0.0
 70 |   max_grad_norm: 0.5
 71 |   n_steps: 8
 72 |   gae_lambda: 0.9
 73 |   vf_coef: 0.4
 74 |   gamma: 0.99
 75 |   use_rms_prop: True
 76 |   normalize_advantage: False
 77 |   learning_rate: lin_7e-4
 78 |   use_sde: True
 79 |   policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"
 80 | 
 81 | # Tuned
 82 | MountainCarContinuous-v0:
 83 |   normalize: true
 84 |   n_envs: 4
 85 |   n_steps: 100
 86 |   n_timesteps: !!float 1e5
 87 |   policy: 'MlpPolicy'
 88 |   ent_coef: 0.0
 89 |   use_sde: True
 90 |   sde_sample_freq: 16
 91 |   policy_kwargs: "dict(log_std_init=0.0, ortho_init=False)"
 92 | 
 93 | # Tuned
 94 | BipedalWalker-v3:
 95 |   normalize: true
 96 |   n_envs: 16
 97 |   n_timesteps: !!float 5e6
 98 |   policy: 'MlpPolicy'
 99 |   ent_coef: 0.0
100 |   max_grad_norm: 0.5
101 |   n_steps: 8
102 |   gae_lambda: 0.9
103 |   vf_coef: 0.4
104 |   gamma: 0.99
105 |   use_rms_prop: True
106 |   normalize_advantage: False
107 |   learning_rate: lin_0.00096
108 |   use_sde: True
109 |   policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"
110 | 
111 | # Tuned
112 | BipedalWalkerHardcore-v3:
113 |   normalize: true
114 |   n_envs: 32
115 |   n_timesteps: !!float 20e7
116 |   policy: 'MlpPolicy'
117 |   ent_coef: 0.001
118 |   max_grad_norm: 0.5
119 |   n_steps: 8
120 |   gae_lambda: 0.9
121 |   vf_coef: 0.4
122 |   gamma: 0.99
123 |   use_rms_prop: True
124 |   normalize_advantage: False
125 |   learning_rate: lin_0.0008
126 |   use_sde: True
127 |   policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"
128 | 
129 | # Tuned
130 | HalfCheetahBulletEnv-v0: &pybullet-defaults
131 |   normalize: true
132 |   n_envs: 4
133 |   n_timesteps: !!float 2e6
134 |   policy: 'MlpPolicy'
135 |   ent_coef: 0.0
136 |   max_grad_norm: 0.5
137 |   n_steps: 8
138 |   gae_lambda: 0.9
139 |   vf_coef: 0.4
140 |   gamma: 0.99
141 |   use_rms_prop: True
142 |   normalize_advantage: False
143 |   # Both works
144 |   learning_rate: lin_0.00096
145 |   # learning_rate: !!float 3e-4
146 |   use_sde: True
147 |   policy_kwargs: "dict(log_std_init=-2, ortho_init=False)"
148 | 
149 | Walker2DBulletEnv-v0:
150 |   <<: *pybullet-defaults
151 | 
152 | # Tuned
153 | AntBulletEnv-v0:
154 |   <<: *pybullet-defaults
155 | 
156 | # Tuned
157 | HopperBulletEnv-v0:
158 |   <<: *pybullet-defaults
159 | 
160 | # Tuned but unstable
161 | # Not working without SDE?
162 | ReacherBulletEnv-v0:
163 |   <<: *pybullet-defaults
164 |   learning_rate: lin_0.0008
165 | 
166 | # === Mujoco Envs ===
167 | 
168 | HalfCheetah-v4: &mujoco-defaults
169 |   normalize: true
170 |   n_timesteps: !!float 1e6
171 |   policy: 'MlpPolicy'
172 | 
173 | Ant-v4:
174 |   <<: *mujoco-defaults
175 | 
176 | Hopper-v4:
177 |   <<: *mujoco-defaults
178 | 
179 | Walker2d-v4:
180 |   <<: *mujoco-defaults
181 | 
182 | Humanoid-v4:
183 |   <<: *mujoco-defaults
184 |   n_timesteps: !!float 2e6
185 | 
186 | Swimmer-v4:
187 |   <<: *mujoco-defaults
188 |   gamma: 0.9999
189 | 


--------------------------------------------------------------------------------
/hyperparams/crossq.yml:
--------------------------------------------------------------------------------
 1 | MountainCarContinuous-v0:
 2 |   n_timesteps: !!float 50000
 3 |   policy: 'MlpPolicy'
 4 |   learning_rate: !!float 7e-4
 5 |   buffer_size: 50000
 6 |   train_freq: 32
 7 |   gradient_steps: 32
 8 |   gamma: 0.9999
 9 |   learning_starts: 100
10 |   use_sde: True
11 |   policy_delay: 2
12 |   policy_kwargs: "dict(use_expln=True, log_std_init=-1, net_arch=[64, 64])"
13 | 
14 | Pendulum-v1:
15 |   n_timesteps: 20000
16 |   policy: 'MlpPolicy'
17 |   policy_delay: 2
18 |   policy_kwargs: "dict(net_arch=[256, 256])"
19 | 
20 | 
21 | LunarLanderContinuous-v3:
22 |   n_timesteps: !!float 2e5
23 |   policy: 'MlpPolicy'
24 |   buffer_size: 1000000
25 |   learning_starts: 10000
26 | 
27 | 
28 | BipedalWalker-v3:
29 |   n_timesteps: !!float 2e5
30 |   policy: 'MlpPolicy'
31 |   buffer_size: 300000
32 |   gamma: 0.98
33 |   learning_starts: 10000
34 |   policy_kwargs: "dict(net_arch=dict(pi=[256, 256], qf=[1024, 1024]))"
35 | 
36 | # === Mujoco Envs ===
37 | 
38 | HalfCheetah-v4: &mujoco-defaults
39 |   buffer_size: 1_000_000
40 |   learning_rate: !!float 1e-3
41 |   learning_starts: 5000
42 |   n_timesteps: !!float 5e6
43 |   policy: 'MlpPolicy'
44 |   policy_delay: 3
45 |   policy_kwargs: "dict(net_arch=dict(pi=[256, 256], qf=[2048, 2048]))"
46 | 
47 | Ant-v4:
48 |   <<: *mujoco-defaults
49 | 
50 | Hopper-v4:
51 |   <<: *mujoco-defaults
52 | 
53 | Walker2d-v4:
54 |   <<: *mujoco-defaults
55 | 
56 | Humanoid-v4:
57 |   <<: *mujoco-defaults
58 | 
59 | HumanoidStandup-v4:
60 |   <<: *mujoco-defaults
61 | 
62 | Swimmer-v4:
63 |   <<: *mujoco-defaults
64 |   gamma: 0.999
65 | 
66 | # Tuned for SAC, need to check with CrossQ
67 | HalfCheetahBulletEnv-v0: &pybullet-defaults
68 |   n_timesteps: !!float 1e6
69 |   policy: 'MlpPolicy'
70 |   learning_rate: !!float 7.3e-4
71 |   buffer_size: 300000
72 |   batch_size: 256
73 |   ent_coef: 'auto'
74 |   gamma: 0.98
75 |   train_freq: 8
76 |   gradient_steps: 8
77 |   learning_starts: 10000
78 |   use_sde: True
79 |   policy_kwargs: "dict(use_expln=True, log_std_init=-3)"
80 | 
81 | # Tuned
82 | AntBulletEnv-v0:
83 |   <<: *pybullet-defaults
84 | 
85 | HopperBulletEnv-v0:
86 |   <<: *pybullet-defaults
87 |   learning_rate: lin_7.3e-4
88 | 
89 | Walker2DBulletEnv-v0:
90 |   <<: *pybullet-defaults
91 |   learning_rate: lin_7.3e-4
92 | 


--------------------------------------------------------------------------------
/hyperparams/ddpg.yml:
--------------------------------------------------------------------------------
  1 | # Tuned
  2 | MountainCarContinuous-v0:
  3 |   n_timesteps: 300000
  4 |   policy: 'MlpPolicy'
  5 |   noise_type: 'ornstein-uhlenbeck'
  6 |   noise_std: 0.5
  7 |   gradient_steps: 1
  8 |   train_freq: 1
  9 |   learning_rate: !!float 1e-3
 10 |   batch_size: 256
 11 |   policy_kwargs: "dict(net_arch=[400, 300])"
 12 | 
 13 | Pendulum-v1:
 14 |   n_timesteps: 20000
 15 |   policy: 'MlpPolicy'
 16 |   gamma: 0.98
 17 |   buffer_size: 200000
 18 |   learning_starts: 10000
 19 |   noise_type: 'normal'
 20 |   noise_std: 0.1
 21 |   gradient_steps: 1
 22 |   train_freq: 1
 23 |   learning_rate: !!float 1e-3
 24 |   policy_kwargs: "dict(net_arch=[400, 300])"
 25 | 
 26 | LunarLanderContinuous-v3:
 27 |   n_timesteps: !!float 3e5
 28 |   policy: 'MlpPolicy'
 29 |   gamma: 0.98
 30 |   buffer_size: 200000
 31 |   learning_starts: 10000
 32 |   noise_type: 'normal'
 33 |   noise_std: 0.1
 34 |   gradient_steps: 1
 35 |   train_freq: 1
 36 |   learning_rate: !!float 1e-3
 37 |   policy_kwargs: "dict(net_arch=[400, 300])"
 38 | 
 39 | BipedalWalker-v3:
 40 |   n_timesteps: !!float 1e6
 41 |   policy: 'MlpPolicy'
 42 |   gamma: 0.98
 43 |   buffer_size: 200000
 44 |   learning_starts: 10000
 45 |   noise_type: 'normal'
 46 |   noise_std: 0.1
 47 |   gradient_steps: 1
 48 |   train_freq: 1
 49 |   learning_rate: !!float 1e-3
 50 |   policy_kwargs: "dict(net_arch=[400, 300])"
 51 | 
 52 | # To be tuned
 53 | BipedalWalkerHardcore-v3:
 54 |   n_timesteps: !!float 1e7
 55 |   policy: 'MlpPolicy'
 56 |   gamma: 0.99
 57 |   buffer_size: 1000000
 58 |   learning_starts: 10000
 59 |   noise_type: 'normal'
 60 |   noise_std: 0.1
 61 |   batch_size: 256
 62 |   train_freq: 1
 63 |   learning_rate: lin_7e-4
 64 |   policy_kwargs: "dict(net_arch=[400, 300])"
 65 | 
 66 | # Tuned
 67 | HalfCheetahBulletEnv-v0: &pybullet-defaults
 68 |   n_timesteps: !!float 1e6
 69 |   policy: 'MlpPolicy'
 70 |   gamma: 0.98
 71 |   buffer_size: 200000
 72 |   learning_starts: 10000
 73 |   noise_type: 'normal'
 74 |   noise_std: 0.1
 75 |   gradient_steps: 1
 76 |   train_freq: 1
 77 |   batch_size: 256
 78 |   learning_rate: !!float 7e-4
 79 |   policy_kwargs: "dict(net_arch=[400, 300])"
 80 | 
 81 | # Tuned
 82 | AntBulletEnv-v0:
 83 |   <<: *pybullet-defaults
 84 | 
 85 | # Tuned
 86 | HopperBulletEnv-v0:
 87 |   <<: *pybullet-defaults
 88 | 
 89 | # Tuned
 90 | Walker2DBulletEnv-v0:
 91 |   <<: *pybullet-defaults
 92 | 
 93 | # TO BE tested
 94 | HumanoidBulletEnv-v0:
 95 |   n_timesteps: !!float 2e6
 96 |   policy: 'MlpPolicy'
 97 |   gamma: 0.98
 98 |   buffer_size: 200000
 99 |   learning_starts: 10000
100 |   noise_type: 'normal'
101 |   noise_std: 0.1
102 |   gradient_steps: -1
103 |   train_freq: [1, "episode"]
104 |   learning_rate: !!float 1e-3
105 |   policy_kwargs: "dict(net_arch=[400, 300])"
106 | 
107 | # Tuned
108 | ReacherBulletEnv-v0:
109 |   <<: *pybullet-defaults
110 |   n_timesteps: !!float 3e5
111 | 
112 | 
113 | # To be tuned
114 | InvertedDoublePendulumBulletEnv-v0:
115 |   <<: *pybullet-defaults
116 |   n_timesteps: !!float 1e6
117 | 
118 | # To be tuned
119 | InvertedPendulumSwingupBulletEnv-v0:
120 |   <<: *pybullet-defaults
121 |   n_timesteps: !!float 3e5
122 | 
123 | # === Mujoco Envs ===
124 | HalfCheetah-v4: &mujoco-defaults
125 |   n_timesteps: !!float 1e6
126 |   policy: 'MlpPolicy'
127 |   learning_starts: 10000
128 |   noise_type: 'normal'
129 |   noise_std: 0.1
130 |   train_freq: 1
131 |   gradient_steps: 1
132 |   learning_rate: !!float 1e-3
133 |   batch_size: 256
134 |   policy_kwargs: "dict(net_arch=[400, 300])"
135 | 
136 | Ant-v4:
137 |   <<: *mujoco-defaults
138 | 
139 | Hopper-v4:
140 |   <<: *mujoco-defaults
141 | 
142 | Walker2d-v4:
143 |   <<: *mujoco-defaults
144 | 
145 | Humanoid-v4:
146 |   <<: *mujoco-defaults
147 |   n_timesteps: !!float 2e6
148 | 
149 | Swimmer-v4:
150 |   <<: *mujoco-defaults
151 |   gamma: 0.9999
152 | 


--------------------------------------------------------------------------------
/hyperparams/dqn.yml:
--------------------------------------------------------------------------------
 1 | atari:
 2 |   env_wrapper:
 3 |     - stable_baselines3.common.atari_wrappers.AtariWrapper
 4 |   frame_stack: 4
 5 |   policy: 'CnnPolicy'
 6 |   n_timesteps: !!float 1e7
 7 |   buffer_size: 100000
 8 |   learning_rate: !!float 1e-4
 9 |   batch_size: 32
10 |   learning_starts: 100000
11 |   target_update_interval: 1000
12 |   train_freq: 4
13 |   gradient_steps: 1
14 |   exploration_fraction: 0.1
15 |   exploration_final_eps: 0.01
16 |   # If True, you need to deactivate handle_timeout_termination
17 |   # in the replay_buffer_kwargs
18 |   optimize_memory_usage: False
19 | 
20 | # Almost Tuned
21 | CartPole-v1:
22 |   n_timesteps: !!float 5e4
23 |   policy: 'MlpPolicy'
24 |   learning_rate: !!float 2.3e-3
25 |   batch_size: 64
26 |   buffer_size: 100000
27 |   learning_starts: 1000
28 |   gamma: 0.99
29 |   target_update_interval: 10
30 |   train_freq: 256
31 |   gradient_steps: 128
32 |   exploration_fraction: 0.16
33 |   exploration_final_eps: 0.04
34 |   policy_kwargs: "dict(net_arch=[256, 256])"
35 | 
36 | # Tuned
37 | MountainCar-v0:
38 |   n_timesteps: !!float 1.2e5
39 |   policy: 'MlpPolicy'
40 |   learning_rate: !!float 4e-3
41 |   batch_size: 128
42 |   buffer_size: 10000
43 |   learning_starts: 1000
44 |   gamma: 0.98
45 |   target_update_interval: 600
46 |   train_freq: 16
47 |   gradient_steps: 8
48 |   exploration_fraction: 0.2
49 |   exploration_final_eps: 0.07
50 |   policy_kwargs: "dict(net_arch=[256, 256])"
51 | 
52 | # Tuned
53 | LunarLander-v2:
54 |   n_timesteps: !!float 1e5
55 |   policy: 'MlpPolicy'
56 |   learning_rate: !!float 6.3e-4
57 |   batch_size: 128
58 |   buffer_size: 50000
59 |   learning_starts: 0
60 |   gamma: 0.99
61 |   target_update_interval: 250
62 |   train_freq: 4
63 |   gradient_steps: -1
64 |   exploration_fraction: 0.12
65 |   exploration_final_eps: 0.1
66 |   policy_kwargs: "dict(net_arch=[256, 256])"
67 | 
68 | # Tuned
69 | Acrobot-v1:
70 |   n_timesteps: !!float 1e5
71 |   policy: 'MlpPolicy'
72 |   learning_rate: !!float 6.3e-4
73 |   batch_size: 128
74 |   buffer_size: 50000
75 |   learning_starts: 0
76 |   gamma: 0.99
77 |   target_update_interval: 250
78 |   train_freq: 4
79 |   gradient_steps: -1
80 |   exploration_fraction: 0.12
81 |   exploration_final_eps: 0.1
82 |   policy_kwargs: "dict(net_arch=[256, 256])"
83 | 


--------------------------------------------------------------------------------
/hyperparams/her.yml:
--------------------------------------------------------------------------------
  1 | ############################################################
  2 | # NOTE: STARTING WITH SB3 >= 1.1.0, because HER is now HerReplayBuffer,
  3 | # this file is no longer used.
  4 | # It is only here as a reference.
  5 | #############################################################
  6 | 
  7 | parking-v0:
  8 |   n_timesteps: !!float 1e5
  9 |   policy: 'MlpPolicy'
 10 |   model_class: 'tqc'
 11 |   n_sampled_goal: 4
 12 |   goal_selection_strategy: 'future'
 13 |   buffer_size: 1000000
 14 |   batch_size: 1024
 15 |   gamma: 0.95
 16 |   learning_rate: !!float 1e-3
 17 |   tau: 0.05
 18 |   policy_kwargs: "dict(n_critics=2, net_arch=[512, 512, 512])"
 19 |   online_sampling: True
 20 |   max_episode_length: 100
 21 |   # normalize: True
 22 | 
 23 | # Mujoco Robotic Env
 24 | 
 25 | FetchPush-v1:
 26 |   env_wrapper:
 27 |     - sb3_contrib.common.wrappers.TimeFeatureWrapper
 28 |   n_timesteps: !!float 1e6
 29 |   policy: 'MlpPolicy'
 30 |   model_class: 'tqc'
 31 |   n_sampled_goal: 4
 32 |   goal_selection_strategy: 'future'
 33 |   buffer_size: 1000000
 34 |   batch_size: 2048
 35 |   gamma: 0.95
 36 |   learning_rate: !!float 1e-3
 37 |   tau: 0.05
 38 |   policy_kwargs: "dict(n_critics=2, net_arch=[512, 512, 512])"
 39 |   online_sampling: True
 40 | 
 41 | FetchSlide-v1:
 42 |   env_wrapper:
 43 |     - sb3_contrib.common.wrappers.TimeFeatureWrapper
 44 |   n_timesteps: !!float 2.5e6
 45 |   policy: 'MlpPolicy'
 46 |   model_class: 'tqc'
 47 |   n_sampled_goal: 4
 48 |   goal_selection_strategy: 'future'
 49 |   buffer_size: 1000000
 50 |   batch_size: 2048
 51 |   gamma: 0.95
 52 |   learning_rate: !!float 1e-3
 53 |   tau: 0.05
 54 |   # ent_coef: 0.01
 55 |   policy_kwargs: "dict(n_critics=2, net_arch=[512, 512, 512])"
 56 |   online_sampling: True
 57 | 
 58 | 
 59 | FetchPickAndPlace-v1:
 60 |   env_wrapper:
 61 |     - sb3_contrib.common.wrappers.TimeFeatureWrapper
 62 |     # - rl_zoo3.wrappers.TruncatedOnSuccessWrapper:
 63 |     #     reward_offset: 0
 64 |     #     n_successes: 4
 65 |     # - stable_baselines3.common.monitor.Monitor
 66 |   n_timesteps: !!float 1e6
 67 |   policy: 'MlpPolicy'
 68 |   model_class: 'tqc'
 69 |   n_sampled_goal: 4
 70 |   goal_selection_strategy: 'future'
 71 |   buffer_size: 1000000
 72 |   batch_size: 1024
 73 |   gamma: 0.95
 74 |   learning_rate: !!float 1e-3
 75 |   tau: 0.05
 76 |   policy_kwargs: "dict(n_critics=2, net_arch=[512, 512, 512])"
 77 |   online_sampling: True
 78 | 
 79 | # SAC hyperparams
 80 | FetchReach-v1:
 81 |   n_timesteps: !!float 20000
 82 |   policy: 'MlpPolicy'
 83 |   model_class: 'sac'
 84 |   n_sampled_goal: 4
 85 |   goal_selection_strategy: 'future'
 86 |   buffer_size: 1000000
 87 |   ent_coef: 'auto'
 88 |   batch_size: 256
 89 |   gamma: 0.95
 90 |   learning_rate: 0.001
 91 |   learning_starts: 1000
 92 |   online_sampling: True
 93 |   normalize: True
 94 | 
 95 | # === Real Robot envs
 96 | NeckGoalEnvRelativeSparse-v2:
 97 |   model_class: 'sac'
 98 |   # env_wrapper:
 99 |   #   - rl_zoo3.wrappers.HistoryWrapper:
100 |   #       horizon: 2
101 |   #   - sb3_contrib.common.wrappers.TimeFeatureWrapper
102 |   n_timesteps: !!float 1e6
103 |   policy: 'MlpPolicy'
104 |   learning_rate: !!float 7.3e-4
105 |   buffer_size: 100000
106 |   batch_size: 256
107 |   ent_coef: 'auto'
108 |   gamma: 0.99
109 |   tau: 0.02
110 |   train_freq: [1, "episode"]
111 |   gradient_steps: -1
112 |   # 10 episodes of warm-up
113 |   learning_starts: 1500
114 |   use_sde_at_warmup: True
115 |   use_sde: True
116 |   sde_sample_freq: 64
117 |   policy_kwargs: "dict(log_std_init=-2, net_arch=[256, 256])"
118 |   n_sampled_goal: 4
119 |   goal_selection_strategy: 'future'
120 |   online_sampling: False
121 | 
122 | NeckGoalEnvRelativeDense-v2:
123 |   model_class: 'sac'
124 |   env_wrapper:
125 |     - rl_zoo3.wrappers.HistoryWrapperObsDict:
126 |         horizon: 2
127 |   #   - sb3_contrib.common.wrappers.TimeFeatureWrapper
128 |   n_timesteps: !!float 1e6
129 |   policy: 'MlpPolicy'
130 |   learning_rate: !!float 7.3e-4
131 |   buffer_size: 200000
132 |   batch_size: 256
133 |   ent_coef: 'auto'
134 |   gamma: 0.99
135 |   tau: 0.02
136 |   train_freq: [1, "episode"]
137 |   gradient_steps: -1
138 |   # 10 episodes of warm-up
139 |   learning_starts: 1500
140 |   use_sde_at_warmup: True
141 |   use_sde: True
142 |   sde_sample_freq: 64
143 |   policy_kwargs: "dict(log_std_init=-2, net_arch=[256, 256])"
144 |   n_sampled_goal: 4
145 |   goal_selection_strategy: 'future'
146 |   online_sampling: False
147 | 


--------------------------------------------------------------------------------
/hyperparams/python/ppo_config_example.py:
--------------------------------------------------------------------------------
 1 | """This file just serves as an example on how to configure the zoo
 2 | using python scripts instead of yaml files."""
 3 | 
 4 | import torch
 5 | 
 6 | hyperparams = {
 7 |     "MountainCarContinuous-v0": dict(
 8 |         env_wrapper=[{"gymnasium.wrappers.TimeLimit": {"max_episode_steps": 100}}],
 9 |         normalize=True,
10 |         n_envs=1,
11 |         n_timesteps=20000.0,
12 |         policy="MlpPolicy",
13 |         batch_size=8,
14 |         n_steps=8,
15 |         gamma=0.9999,
16 |         learning_rate=7.77e-05,
17 |         ent_coef=0.00429,
18 |         clip_range=0.1,
19 |         n_epochs=2,
20 |         gae_lambda=0.9,
21 |         max_grad_norm=5,
22 |         vf_coef=0.19,
23 |         use_sde=True,
24 |         policy_kwargs=dict(
25 |             log_std_init=-3.29,
26 |             ortho_init=False,
27 |             activation_fn=torch.nn.ReLU,
28 |         ),
29 |     )
30 | }
31 | 


--------------------------------------------------------------------------------
/hyperparams/qrdqn.yml:
--------------------------------------------------------------------------------
 1 | atari:
 2 |   env_wrapper:
 3 |     - stable_baselines3.common.atari_wrappers.AtariWrapper
 4 |   frame_stack: 4
 5 |   policy: 'CnnPolicy'
 6 |   n_timesteps: !!float 1e7
 7 |   learning_starts: 50000
 8 |   exploration_fraction: 0.025  # explore 250k steps = 10M * 0.025
 9 |   # If True, you need to deactivate handle_timeout_termination
10 |   # in the replay_buffer_kwargs
11 |   optimize_memory_usage: False
12 | 
13 | # Tuned
14 | CartPole-v1:
15 |   n_timesteps: !!float 5e4
16 |   policy: 'MlpPolicy'
17 |   learning_rate: !!float 2.3e-3
18 |   batch_size: 64
19 |   buffer_size: 100000
20 |   learning_starts: 1000
21 |   gamma: 0.99
22 |   target_update_interval: 10
23 |   train_freq: 256
24 |   gradient_steps: 128
25 |   exploration_fraction: 0.16
26 |   exploration_final_eps: 0.04
27 |   policy_kwargs: "dict(net_arch=[256, 256], n_quantiles=10)"
28 | 
29 | # Tuned
30 | MountainCar-v0:
31 |   n_timesteps: !!float 1.2e5
32 |   policy: 'MlpPolicy'
33 |   learning_rate: !!float 4e-3
34 |   batch_size: 128
35 |   buffer_size: 10000
36 |   learning_starts: 1000
37 |   gamma: 0.98
38 |   target_update_interval: 600
39 |   train_freq: 16
40 |   gradient_steps: 8
41 |   exploration_fraction: 0.2
42 |   exploration_final_eps: 0.07
43 |   policy_kwargs: "dict(net_arch=[256, 256], n_quantiles=25)"
44 | 
45 | # Tuned
46 | LunarLander-v2:
47 |   n_timesteps: !!float 1e5
48 |   policy: 'MlpPolicy'
49 |   learning_rate: lin_1.5e-3
50 |   batch_size: 128
51 |   buffer_size: 100000
52 |   learning_starts: 10000
53 |   gamma: 0.995
54 |   target_update_interval: 1
55 |   train_freq: 256
56 |   gradient_steps: -1
57 |   exploration_fraction: 0.24
58 |   exploration_final_eps: 0.18
59 |   policy_kwargs: "dict(net_arch=[256, 256], n_quantiles=170)"
60 | 
61 | # Tuned
62 | Acrobot-v1:
63 |   n_timesteps: !!float 1e5
64 |   policy: 'MlpPolicy'
65 |   learning_rate: !!float 6.3e-4
66 |   batch_size: 128
67 |   buffer_size: 50000
68 |   learning_starts: 0
69 |   gamma: 0.99
70 |   target_update_interval: 250
71 |   train_freq: 4
72 |   gradient_steps: -1
73 |   exploration_fraction: 0.12
74 |   exploration_final_eps: 0.1
75 |   policy_kwargs: "dict(net_arch=[256, 256], n_quantiles=25)"
76 | 


--------------------------------------------------------------------------------
/hyperparams/td3.yml:
--------------------------------------------------------------------------------
  1 | # Tuned
  2 | MountainCarContinuous-v0:
  3 |   n_timesteps: 300000
  4 |   policy: 'MlpPolicy'
  5 |   noise_type: 'ornstein-uhlenbeck'
  6 |   noise_std: 0.5
  7 |   gradient_steps: 1
  8 |   train_freq: 1
  9 |   learning_rate: !!float 1e-3
 10 |   batch_size: 256
 11 |   policy_kwargs: "dict(net_arch=[400, 300])"
 12 | 
 13 | Pendulum-v1:
 14 |   n_timesteps: 20000
 15 |   policy: 'MlpPolicy'
 16 |   gamma: 0.98
 17 |   buffer_size: 200000
 18 |   learning_starts: 10000
 19 |   noise_type: 'normal'
 20 |   noise_std: 0.1
 21 |   gradient_steps: 1
 22 |   train_freq: 1
 23 |   learning_rate: !!float 1e-3
 24 |   policy_kwargs: "dict(net_arch=[400, 300])"
 25 | 
 26 | LunarLanderContinuous-v3:
 27 |   n_timesteps: !!float 3e5
 28 |   policy: 'MlpPolicy'
 29 |   gamma: 0.98
 30 |   buffer_size: 200000
 31 |   learning_starts: 10000
 32 |   noise_type: 'normal'
 33 |   noise_std: 0.1
 34 |   gradient_steps: 1
 35 |   train_freq: 1
 36 |   learning_rate: !!float 1e-3
 37 |   policy_kwargs: "dict(net_arch=[400, 300])"
 38 | 
 39 | BipedalWalker-v3:
 40 |   n_timesteps: !!float 1e6
 41 |   policy: 'MlpPolicy'
 42 |   gamma: 0.98
 43 |   buffer_size: 200000
 44 |   learning_starts: 10000
 45 |   noise_type: 'normal'
 46 |   noise_std: 0.1
 47 |   gradient_steps: 1
 48 |   train_freq: 1
 49 |   learning_rate: !!float 1e-3
 50 |   policy_kwargs: "dict(net_arch=[400, 300])"
 51 | 
 52 | # To be tuned
 53 | BipedalWalkerHardcore-v3:
 54 |   n_timesteps: !!float 1e7
 55 |   policy: 'MlpPolicy'
 56 |   gamma: 0.99
 57 |   buffer_size: 1000000
 58 |   learning_starts: 10000
 59 |   noise_type: 'normal'
 60 |   noise_std: 0.1
 61 |   batch_size: 256
 62 |   train_freq: 1
 63 |   learning_rate: lin_7e-4
 64 |   policy_kwargs: "dict(net_arch=[400, 300])"
 65 | 
 66 | # Tuned
 67 | HalfCheetahBulletEnv-v0: &pybullet-defaults
 68 |   n_timesteps: !!float 1e6
 69 |   policy: 'MlpPolicy'
 70 |   gamma: 0.98
 71 |   buffer_size: 200000
 72 |   learning_starts: 10000
 73 |   noise_type: 'normal'
 74 |   noise_std: 0.1
 75 |   gradient_steps: 1
 76 |   train_freq: 1
 77 |   batch_size: 256
 78 |   learning_rate: !!float 7e-4
 79 |   policy_kwargs: "dict(net_arch=[400, 300])"
 80 | 
 81 | AntBulletEnv-v0:
 82 |   <<: *pybullet-defaults
 83 | 
 84 | HopperBulletEnv-v0:
 85 |   <<: *pybullet-defaults
 86 | 
 87 | Walker2DBulletEnv-v0:
 88 |   <<: *pybullet-defaults
 89 | 
 90 | 
 91 | # TO BE tested
 92 | HumanoidBulletEnv-v0:
 93 |   n_timesteps: !!float 2e6
 94 |   policy: 'MlpPolicy'
 95 |   gamma: 0.98
 96 |   buffer_size: 200000
 97 |   learning_starts: 10000
 98 |   noise_type: 'normal'
 99 |   noise_std: 0.1
100 |   train_freq: 1
101 |   learning_rate: !!float 1e-3
102 |   policy_kwargs: "dict(net_arch=[400, 300])"
103 | 
104 | # Tuned
105 | ReacherBulletEnv-v0:
106 |   <<: *pybullet-defaults
107 |   n_timesteps: !!float 3e5
108 | 
109 | # Tuned
110 | InvertedDoublePendulumBulletEnv-v0:
111 |   <<: *pybullet-defaults
112 | 
113 | # Tuned
114 | InvertedPendulumSwingupBulletEnv-v0:
115 |   <<: *pybullet-defaults
116 |   n_timesteps: !!float 3e5
117 | 
118 | 
119 | MinitaurBulletEnv-v0:
120 |   n_timesteps: !!float 1e6
121 |   policy: 'MlpPolicy'
122 |   gamma: 0.99
123 |   buffer_size: 1000000
124 |   noise_type: 'normal'
125 |   noise_std: 0.1
126 |   learning_starts: 10000
127 |   batch_size: 256
128 |   learning_rate: !!float 1e-3
129 |   train_freq: 1
130 |   gradient_steps: 1
131 |   policy_kwargs: "dict(net_arch=[400, 300])"
132 | 
133 | # === Mujoco Envs ===
134 | HalfCheetah-v4: &mujoco-defaults
135 |   n_timesteps: !!float 1e6
136 |   policy: 'MlpPolicy'
137 |   learning_starts: 10000
138 |   noise_type: 'normal'
139 |   noise_std: 0.1
140 |   train_freq: 1
141 |   gradient_steps: 1
142 |   learning_rate: !!float 1e-3
143 |   batch_size: 256
144 |   policy_kwargs: "dict(net_arch=[400, 300])"
145 | 
146 | Ant-v4:
147 |   <<: *mujoco-defaults
148 | 
149 | Hopper-v4:
150 |   <<: *mujoco-defaults
151 | 
152 | Walker2d-v4:
153 |   <<: *mujoco-defaults
154 | 
155 | Humanoid-v4:
156 |   <<: *mujoco-defaults
157 |   n_timesteps: !!float 2e6
158 | 
159 | # Tuned
160 | Swimmer-v4:
161 |   <<: *mujoco-defaults
162 |   gamma: 0.9999
163 | 


--------------------------------------------------------------------------------
/images/car.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DLR-RM/rl-baselines3-zoo/577616cb9f13341579953cb0f6111e007acc0a1d/images/car.jpg


--------------------------------------------------------------------------------
/logs/benchmark/a2c-BeamRiderNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614768288.4174142, "env_id": "BeamRiderNoFrameskip-v4"}
 2 | r,l,t
 3 | 5310.0,19931,11.331775
 4 | 5278.0,18863,19.145522
 5 | 4412.0,18499,26.784143
 6 | 2160.0,10025,30.907018
 7 | 1380.0,7261,33.878756
 8 | 1848.0,9623,37.852282
 9 | 6372.0,22457,47.104666
10 | 2160.0,10896,51.553509
11 | 5470.0,17871,58.882525
12 | 1380.0,7366,61.998566
13 | 1380.0,7597,65.10874
14 | 1380.0,7625,68.216608
15 | 1188.0,6035,70.682402
16 | 2496.0,12475,75.791831
17 | 3000.0,13264,81.213101
18 | 3840.0,14565,87.340508
19 | 1588.0,9303,91.146296
20 | 5024.0,18179,98.595311
21 | 4734.0,15926,105.128943
22 | 2496.0,11991,110.020293
23 | 3000.0,12733,115.234923
24 | 2160.0,10681,119.611882
25 | 1284.0,9009,123.288294
26 | 2160.0,10005,127.381307
27 | 1536.0,7641,130.503966
28 | 3964.0,16999,137.472344
29 | 1796.0,9815,141.513283
30 | 2776.0,12343,146.616668
31 | 1188.0,7561,149.761088
32 | 1952.0,9335,153.61017
33 | 4028.0,17717,160.872984
34 | 2160.0,11651,165.693949
35 | 1692.0,9569,169.702119
36 | 2160.0,10413,173.962517
37 | 3420.0,13499,179.485842
38 | 1484.0,7885,182.708011
39 | 3360.0,14125,188.490419
40 | 2056.0,11479,193.165845
41 | 3000.0,14509,199.107143
42 | 2832.0,11739,203.909528
43 | 4832.0,17885,211.235447
44 | 4576.0,18013,218.621224
45 | 4348.0,17821,226.003222
46 | 4384.0,15790,232.468659
47 | 2160.0,11752,237.280512
48 | 1640.0,8824,241.053055
49 | 3000.0,12559,246.359317
50 | 


--------------------------------------------------------------------------------
/logs/benchmark/a2c-BipedalWalkerHardcore-v3/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1615193378.914093, "env_id": "BipedalWalkerHardcore-v3"}
  2 | r,l,t
  3 | 267.45342,1121,3.428861
  4 | 52.082271,706,4.11655
  5 | 96.57381,881,4.968083
  6 | -54.718438,2000,6.907327
  7 | -40.642382,356,7.254022
  8 | -74.03119,183,7.43183
  9 | 92.052367,897,8.295187
 10 | 131.774118,959,9.223105
 11 | 42.129698,788,9.97768
 12 | -13.796958,2000,11.907291
 13 | 258.424171,1285,13.191344
 14 | 270.596795,1127,14.412434
 15 | 266.009622,1171,15.68599
 16 | -37.48283,307,16.011555
 17 | 5.6025,498,16.489811
 18 | 71.054019,668,17.134797
 19 | 226.553959,2000,19.053179
 20 | -29.138281,320,19.361876
 21 | 114.494889,2000,21.290722
 22 | 14.069756,475,21.746166
 23 | -33.844138,2000,23.699775
 24 | -71.932055,140,23.835918
 25 | 43.003696,663,24.471339
 26 | 266.815498,1151,25.580017
 27 | -50.527918,2000,27.503519
 28 | -43.358147,305,27.799032
 29 | 64.183689,2000,29.724874
 30 | 47.123,664,30.363194
 31 | 267.460295,1167,31.487743
 32 | 211.121624,2000,33.415691
 33 | -5.723699,2000,35.34649
 34 | 45.352271,2000,37.356237
 35 | 31.473474,2000,39.551819
 36 | -22.38959,2000,41.54504
 37 | 262.385989,1233,42.732931
 38 | -56.927479,2000,44.666323
 39 | -35.414777,311,44.971309
 40 | 37.318868,2000,46.90783
 41 | 216.945029,2000,48.835921
 42 | 268.85666,1135,49.923489
 43 | 258.979918,1245,51.121087
 44 | -53.220669,280,51.395679
 45 | -52.96222,2000,53.395831
 46 | -30.573014,2000,55.571042
 47 | 86.642791,823,56.416
 48 | 262.444665,1200,57.577227
 49 | 264.190806,1200,58.728613
 50 | -16.522844,490,59.203664
 51 | 29.094346,2000,61.154416
 52 | -7.494107,484,61.623292
 53 | 146.257517,1037,62.624309
 54 | 87.786025,705,63.30837
 55 | 244.124279,1425,64.674047
 56 | -5.306314,392,65.052497
 57 | 206.864062,2000,66.985329
 58 | 68.390227,2000,68.913324
 59 | -78.383474,141,69.049458
 60 | 32.96207,601,69.629996
 61 | 266.996667,1162,70.747888
 62 | 146.95878,967,71.681916
 63 | -20.563405,2000,73.619046
 64 | -41.638012,2000,75.55533
 65 | -35.748264,2000,77.584659
 66 | 262.590096,1215,78.910465
 67 | 266.482528,1177,80.171633
 68 | 261.938973,1184,81.313759
 69 | -17.894561,2000,83.252325
 70 | 256.634081,1282,84.484862
 71 | 264.271622,1192,85.628885
 72 | -56.296586,2000,87.590536
 73 | 79.150195,2000,89.522783
 74 | 260.159147,1227,90.70997
 75 | 267.130668,1162,91.83126
 76 | 125.648535,2000,93.906298
 77 | -70.775179,2000,96.076391
 78 | 267.10438,1163,97.195431
 79 | 266.611174,1175,98.323065
 80 | -35.588589,2000,100.260114
 81 | 166.139694,2000,102.199227
 82 | 265.671085,1187,103.346298
 83 | 78.281103,717,104.035931
 84 | -30.56892,2000,105.959747
 85 | 104.053373,2000,107.905378
 86 | 110.668196,2000,109.837001
 87 | 27.119705,706,110.521208
 88 | 250.962943,1363,111.834788
 89 | 259.842996,1249,113.04013
 90 | 116.491483,2000,114.972856
 91 | 110.491584,2000,116.926345
 92 | 137.124841,2000,119.112893
 93 | 44.207205,2000,121.151075
 94 | 97.941463,1188,122.29184
 95 | 13.754588,438,122.717643
 96 | -35.686061,2000,124.655047
 97 | 268.467145,1161,125.767297
 98 | -85.521298,95,125.862401
 99 | -15.321094,2000,127.812872
100 | 262.569942,1170,128.940263
101 | 37.488241,2000,130.87561
102 | -60.510632,2000,132.82389
103 | -67.947069,2000,135.042222
104 | 267.845441,1157,136.268686
105 | 265.24081,1146,137.37269
106 | 71.176076,2000,139.299905
107 | 19.659466,556,139.834481
108 | 157.192219,2000,141.771152
109 | -45.485992,297,142.060811
110 | 94.526833,953,142.978194
111 | 275.8062,1055,143.992682
112 | 147.267272,2000,145.937913
113 | -6.264808,2000,147.862182
114 | 46.27028,592,148.43445
115 | 260.913623,1234,149.621845
116 | 


--------------------------------------------------------------------------------
/logs/benchmark/a2c-BreakoutNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614767776.374316, "env_id": "BreakoutNoFrameskip-v4"}
 2 | r,l,t
 3 | 375.0,7606,6.076401
 4 | 86.0,5504,8.302102
 5 | 421.0,9923,12.304659
 6 | 270.0,7092,15.171841
 7 | 416.0,10123,19.272449
 8 | 375.0,7936,22.472544
 9 | 66.0,4884,24.44387
10 | 155.0,3572,25.881367
11 | 353.0,8509,29.312325
12 | 206.0,5237,31.436018
13 | 152.0,6099,33.923915
14 | 261.0,4678,35.802962
15 | 420.0,10346,39.934088
16 | 414.0,9589,43.782337
17 | 76.0,4809,45.771475
18 | 369.0,7685,48.882821
19 | 378.0,9284,52.609996
20 | 422.0,9561,56.527488
21 | 360.0,9243,60.541219
22 | 285.0,7841,63.847421
23 | 388.0,9060,67.705209
24 | 424.0,11123,72.290951
25 | 398.0,11786,77.135814
26 | 381.0,8441,80.522488
27 | 306.0,6006,82.938207
28 | 97.0,5661,85.224353
29 | 217.0,7602,88.45842
30 | 424.0,10106,92.498425
31 | 408.0,9388,96.425963
32 | 388.0,9039,100.346374
33 | 365.0,7323,103.434267
34 | 254.0,4257,105.377463
35 | 377.0,6523,108.253703
36 | 172.0,4913,110.310678
37 | 54.0,4115,111.961039
38 | 427.0,9623,115.799037
39 | 290.0,8287,119.121087
40 | 321.0,6662,121.808789
41 | 341.0,8114,125.064535
42 | 299.0,7691,128.199606
43 | 101.0,5482,130.39826
44 | 25.0,3313,131.728176
45 | 149.0,7167,134.684085
46 | 396.0,6986,137.652418
47 | 20.0,3088,138.91907
48 | 367.0,6303,141.544503
49 | 60.0,5134,143.608451
50 | 272.0,7200,146.649598
51 | 241.0,8170,150.087444
52 | 323.0,7407,153.176795
53 | 60.0,5605,155.588469
54 | 357.0,7098,158.436126
55 | 382.0,11383,162.98275
56 | 346.0,7662,166.189121
57 | 433.0,11569,170.862319
58 | 322.0,8534,174.628042
59 | 364.0,9157,178.331138
60 | 393.0,11215,182.997787
61 | 33.0,3490,184.547825
62 | 184.0,6600,187.359139
63 | 116.0,6307,189.997411
64 | 389.0,7875,193.394374
65 | 162.0,6677,196.233643
66 | 227.0,5508,198.446902
67 | 349.0,8482,201.844003
68 | 232.0,5904,204.213293
69 | 322.0,7344,207.185952
70 | 98.0,5867,209.543781
71 | 409.0,7671,212.636367
72 | 256.0,5574,214.934022
73 | 78.0,3914,216.506063
74 | 232.0,5971,219.082668
75 | 277.0,7206,222.017627
76 | 345.0,8122,225.279926
77 | 258.0,8608,228.830755
78 | 134.0,6329,231.487061
79 | 224.0,5912,234.179806
80 | 377.0,7698,237.315182
81 | 412.0,9315,241.053151
82 | 304.0,6834,243.793879
83 | 363.0,9009,247.567084
84 | 360.0,10184,251.635881
85 | 


--------------------------------------------------------------------------------
/logs/benchmark/a2c-EnduroNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614793054.584203, "env_id": "EnduroNoFrameskip-v4"}
 2 | r,l,t
 3 | 0.0,13312,8.52731
 4 | 0.0,13312,14.248429
 5 | 0.0,13312,19.970835
 6 | 0.0,13312,25.683224
 7 | 0.0,13312,31.395868
 8 | 0.0,13312,37.1207
 9 | 0.0,13312,42.844106
10 | 0.0,13312,48.554223
11 | 0.0,13312,54.268349
12 | 0.0,13312,59.978596
13 | 0.0,13312,65.714733
14 | 0.0,13312,71.429556
15 | 0.0,13312,77.140308
16 | 0.0,13312,82.848562
17 | 0.0,13312,88.577056
18 | 0.0,13312,94.29743
19 | 0.0,13312,100.011838
20 | 0.0,13312,105.724653
21 | 0.0,13312,111.4436
22 | 0.0,13312,117.16673
23 | 0.0,13312,122.898437
24 | 0.0,13312,128.624969
25 | 0.0,13312,134.347584
26 | 0.0,13312,140.069764
27 | 0.0,13312,145.793799
28 | 0.0,13312,151.52352
29 | 0.0,13312,157.250938
30 | 0.0,13312,162.968618
31 | 0.0,13312,168.684224
32 | 0.0,13312,174.38758
33 | 0.0,13312,180.092482
34 | 0.0,13312,185.795563
35 | 0.0,13312,191.487232
36 | 0.0,13312,197.166874
37 | 0.0,13312,202.842334
38 | 0.0,13312,208.507896
39 | 0.0,13312,214.19725
40 | 0.0,13312,219.878992
41 | 0.0,13312,225.556518
42 | 0.0,13312,231.221534
43 | 0.0,13312,236.901492
44 | 0.0,13312,242.571724
45 | 0.0,13312,248.255306
46 | 0.0,13312,253.931271
47 | 0.0,13312,259.607796
48 | 


--------------------------------------------------------------------------------
/logs/benchmark/a2c-PongNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614937367.290064, "env_id": "PongNoFrameskip-v4"}
 2 | r,l,t
 3 | 17.0,9410,6.490385
 4 | 20.0,6460,9.053506
 5 | 21.0,6612,11.682447
 6 | 16.0,10462,15.840014
 7 | 16.0,9991,19.805953
 8 | 18.0,9430,23.54848
 9 | 14.0,12478,28.513371
10 | 21.0,8217,31.778995
11 | 13.0,10952,36.132016
12 | 15.0,9796,40.019487
13 | 20.0,8360,43.334407
14 | 18.0,8692,46.782233
15 | 19.0,8696,50.229097
16 | 15.0,9513,54.008494
17 | 18.0,8654,57.441068
18 | 18.0,9672,61.291382
19 | 16.0,9992,65.258582
20 | 18.0,8518,68.636641
21 | 21.0,7975,71.803567
22 | 14.0,11213,76.254159
23 | 13.0,11658,80.887247
24 | 19.0,8659,84.324623
25 | 19.0,8699,87.776711
26 | 17.0,8714,91.239745
27 | 14.0,10644,95.463618
28 | 20.0,6268,97.947606
29 | 16.0,8232,101.212081
30 | 17.0,8342,104.524828
31 | 13.0,10497,108.687028
32 | 21.0,7977,111.853881
33 | 9.0,16950,118.590817
34 | 19.0,8345,121.907908
35 | 17.0,8330,125.210845
36 | 19.0,8607,128.627715
37 | 21.0,7977,131.794741
38 | 16.0,10361,135.957308
39 | 19.0,7406,138.89799
40 | 14.0,9998,142.860846
41 | 19.0,8425,146.197888
42 | 9.0,13049,151.363693
43 | 18.0,7094,154.169493
44 | 13.0,10650,158.380911
45 | 18.0,9850,162.280141
46 | 20.0,8748,165.737029
47 | 11.0,14626,171.512476
48 | 20.0,7334,174.406643
49 | 19.0,7206,177.238544
50 | 17.0,7696,180.272341
51 | 20.0,8487,183.609726
52 | 18.0,9257,187.252612
53 | 20.0,6972,189.992589
54 | 20.0,6478,192.620501
55 | 19.0,7444,195.968758
56 | 19.0,9188,199.581835
57 | 15.0,10271,203.622099
58 | 19.0,8481,206.963809
59 | 18.0,9057,210.536548
60 | 21.0,7905,213.649117
61 | 20.0,7008,216.409976
62 | 6.0,14798,222.241111
63 | 21.0,7975,225.376031
64 | 15.0,10766,229.613505
65 | 20.0,6676,232.238959
66 | 17.0,8737,235.679444
67 | 21.0,7975,238.823486
68 | 


--------------------------------------------------------------------------------
/logs/benchmark/a2c-SeaquestNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1615281451.6329484, "env_id": "SeaquestNoFrameskip-v4"}
 2 | r,l,t
 3 | 1720.0,9082,7.009356
 4 | 1720.0,9082,11.020819
 5 | 1800.0,9081,15.104989
 6 | 1720.0,9081,19.505034
 7 | 1780.0,9082,23.829598
 8 | 1680.0,9082,28.13943
 9 | 1720.0,9081,32.502165
10 | 1720.0,9082,36.898275
11 | 1780.0,9082,41.240505
12 | 1720.0,8826,45.521239
13 | 1760.0,9082,49.81492
14 | 1780.0,9081,54.232201
15 | 1760.0,9082,58.573261
16 | 1760.0,9082,62.944622
17 | 1460.0,7674,66.655156
18 | 1740.0,9082,71.012287
19 | 1760.0,9082,75.39344
20 | 1600.0,8538,79.487796
21 | 1760.0,9082,83.874407
22 | 1760.0,9081,88.244735
23 | 1800.0,9081,92.608476
24 | 1620.0,8449,96.688586
25 | 1680.0,8985,100.902476
26 | 1760.0,9081,105.253726
27 | 1760.0,9082,109.753073
28 | 1660.0,9082,114.145087
29 | 1660.0,9082,118.488832
30 | 1760.0,9081,122.930631
31 | 1760.0,9082,127.255234
32 | 1760.0,9082,131.570187
33 | 1740.0,9082,135.983795
34 | 1540.0,8262,140.112782
35 | 1560.0,8458,144.235981
36 | 1740.0,9082,148.655545
37 | 1820.0,9082,153.08726
38 | 1780.0,9081,157.483775
39 | 1760.0,9082,161.925575
40 | 1800.0,9081,166.24076
41 | 1780.0,9082,170.670106
42 | 1780.0,9082,175.08615
43 | 1620.0,8890,179.489985
44 | 1760.0,9082,183.932905
45 | 1720.0,9082,188.377881
46 | 1740.0,9082,192.827981
47 | 1660.0,9082,197.229679
48 | 1720.0,9082,201.669629
49 | 1740.0,9082,206.061735
50 | 1760.0,9081,210.484065
51 | 1800.0,9082,214.923074
52 | 1820.0,9082,219.321101
53 | 1760.0,9082,223.750549
54 | 1680.0,8746,227.91953
55 | 1760.0,9081,232.40319
56 | 1760.0,9081,236.780692
57 | 1780.0,9082,241.235071
58 | 1560.0,8409,245.320963
59 | 1740.0,9081,249.75418
60 | 1780.0,9082,254.156047
61 | 1800.0,9082,258.546376
62 | 1820.0,9081,263.008498
63 | 1660.0,8570,267.151795
64 | 1780.0,9082,271.580838
65 | 1760.0,9082,275.890209
66 | 1760.0,9082,280.313025
67 | 1080.0,6389,283.3866
68 | 1740.0,9082,287.816725
69 | 1760.0,9082,292.247788
70 | 


--------------------------------------------------------------------------------
/logs/benchmark/a2c-SpaceInvadersNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1614768543.9226692, "env_id": "SpaceInvadersNoFrameskip-v4"}
  2 | r,l,t
  3 | 495.0,3553,4.361108
  4 | 550.0,3429,5.705929
  5 | 550.0,3287,7.173348
  6 | 730.0,3349,8.490565
  7 | 545.0,3337,9.800351
  8 | 525.0,3355,11.111516
  9 | 545.0,2675,12.16402
 10 | 570.0,2978,13.33383
 11 | 570.0,3676,14.939109
 12 | 545.0,3137,16.191074
 13 | 800.0,3321,17.490357
 14 | 515.0,3126,18.717305
 15 | 580.0,3390,20.041372
 16 | 575.0,3362,21.372456
 17 | 345.0,2841,22.496135
 18 | 515.0,3585,23.907067
 19 | 575.0,2803,25.019226
 20 | 520.0,3360,26.338413
 21 | 545.0,3477,27.708291
 22 | 925.0,5786,29.996137
 23 | 835.0,5663,32.236963
 24 | 525.0,3168,33.48305
 25 | 1450.0,7057,36.259171
 26 | 600.0,3823,37.755884
 27 | 520.0,3563,39.157691
 28 | 575.0,3715,40.611313
 29 | 570.0,3322,41.912127
 30 | 550.0,3481,43.280879
 31 | 570.0,3459,44.628411
 32 | 515.0,3459,46.029524
 33 | 575.0,3795,47.527183
 34 | 525.0,3062,48.729837
 35 | 545.0,3865,50.2381
 36 | 745.0,2843,51.355055
 37 | 575.0,3545,52.73239
 38 | 600.0,3691,54.164446
 39 | 485.0,4321,55.862061
 40 | 1315.0,7361,58.748237
 41 | 515.0,3484,60.111922
 42 | 560.0,2739,61.186349
 43 | 555.0,3289,62.582255
 44 | 525.0,3659,64.016016
 45 | 570.0,3440,65.357781
 46 | 1155.0,6121,67.756124
 47 | 545.0,3315,69.061701
 48 | 570.0,3365,70.485821
 49 | 570.0,3858,72.024043
 50 | 540.0,2799,73.125923
 51 | 775.0,4102,74.728046
 52 | 1130.0,7383,77.617019
 53 | 545.0,4152,79.233018
 54 | 545.0,2633,80.268746
 55 | 575.0,3038,81.466269
 56 | 570.0,3209,82.723093
 57 | 545.0,4051,84.292974
 58 | 600.0,3437,85.638784
 59 | 390.0,3047,87.014406
 60 | 975.0,6881,89.72367
 61 | 515.0,3819,91.222727
 62 | 725.0,3221,92.494269
 63 | 745.0,3249,93.762644
 64 | 540.0,3393,95.268128
 65 | 515.0,3435,96.621554
 66 | 600.0,4159,98.248236
 67 | 1115.0,7433,101.158546
 68 | 370.0,2565,102.171789
 69 | 570.0,2896,103.312471
 70 | 540.0,3486,104.682461
 71 | 525.0,3669,106.128078
 72 | 1160.0,7370,109.019551
 73 | 510.0,3912,110.559911
 74 | 520.0,3469,111.917484
 75 | 420.0,3453,113.272565
 76 | 545.0,3116,114.497209
 77 | 550.0,3450,115.850051
 78 | 745.0,3147,117.086844
 79 | 520.0,3287,118.369515
 80 | 570.0,4413,120.086121
 81 | 575.0,4237,121.742209
 82 | 520.0,3777,123.223256
 83 | 545.0,3377,124.550506
 84 | 575.0,3467,125.921681
 85 | 520.0,3360,127.257644
 86 | 545.0,2761,128.345333
 87 | 575.0,3862,129.863054
 88 | 490.0,3505,131.242345
 89 | 415.0,2915,132.388172
 90 | 575.0,3159,133.637486
 91 | 545.0,3223,134.904184
 92 | 515.0,3273,136.193307
 93 | 600.0,3703,137.632767
 94 | 770.0,3621,139.062721
 95 | 540.0,3294,140.35698
 96 | 545.0,2845,141.470494
 97 | 515.0,3457,142.824692
 98 | 600.0,4255,144.484802
 99 | 600.0,3891,145.999692
100 | 460.0,3043,147.201257
101 | 525.0,3496,148.572112
102 | 745.0,3746,150.075762
103 | 600.0,3229,151.412536
104 | 800.0,3625,152.829677
105 | 505.0,3329,154.141243
106 | 1495.0,6859,156.837327
107 | 395.0,3243,158.115792
108 | 750.0,3166,159.360218
109 | 570.0,3679,160.795473
110 | 1010.0,4911,162.731656
111 | 570.0,3457,164.089787
112 | 605.0,2957,165.261341
113 | 1050.0,4771,167.193539
114 | 575.0,3417,168.530889
115 | 775.0,3331,169.843351
116 | 545.0,3855,171.345333
117 | 550.0,3275,172.629834
118 | 500.0,3263,173.936338
119 | 750.0,4023,175.632566
120 | 520.0,3051,176.833315
121 | 740.0,3550,178.231961
122 | 525.0,3157,179.469186
123 | 980.0,5809,181.75283
124 | 545.0,3373,183.07182
125 | 680.0,3205,184.332636
126 | 600.0,3309,185.631534
127 | 490.0,3827,187.135568
128 | 550.0,3344,188.445078
129 | 725.0,4961,190.382123
130 | 950.0,3156,191.617991
131 | 570.0,3672,193.044935
132 | 520.0,3355,194.369309
133 | 540.0,3834,195.876062
134 | 545.0,3689,197.317266
135 | 545.0,2803,198.413652
136 | 515.0,3243,199.68673
137 | 575.0,3441,201.035745
138 | 555.0,3152,202.270614
139 | 715.0,3957,203.821104
140 | 520.0,3490,205.186715
141 | 550.0,3545,206.592009
142 | 580.0,3537,207.970124
143 | 1255.0,7445,210.877233
144 | 605.0,3449,212.216876
145 | 520.0,3051,213.408286
146 | 1625.0,9443,217.101115
147 | 570.0,3159,218.331664
148 | 605.0,3249,219.598299
149 | 545.0,3231,220.860407
150 | 575.0,2931,221.999535
151 | 510.0,2737,223.074782
152 | 600.0,3699,224.509123
153 | 575.0,4609,226.294702
154 | 600.0,4095,227.885526
155 | 600.0,3505,229.250235
156 | 730.0,3457,230.704194
157 | 745.0,3130,231.928799
158 | 575.0,3216,233.187673
159 | 600.0,3293,234.468616
160 | 545.0,3218,235.712941
161 | 500.0,3685,237.147581
162 | 545.0,3973,238.775266
163 | 600.0,3801,240.252894
164 | 550.0,3391,241.570879
165 | 


--------------------------------------------------------------------------------
/logs/benchmark/dqn-AsteroidsNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1615283610.9370732, "env_id": "AsteroidsNoFrameskip-v4"}
  2 | r,l,t
  3 | 630.0,2948,3.806405
  4 | 880.0,4636,5.560121
  5 | 1080.0,3886,7.029938
  6 | 630.0,5936,9.272279
  7 | 780.0,4538,10.979955
  8 | 1080.0,5056,12.878166
  9 | 730.0,2726,13.92655
 10 | 530.0,1782,14.599309
 11 | 830.0,3484,15.916041
 12 | 780.0,4220,17.516311
 13 | 1320.0,21476,25.599429
 14 | 680.0,2820,26.653483
 15 | 580.0,5602,28.763651
 16 | 630.0,3204,29.963536
 17 | 830.0,3230,31.171206
 18 | 580.0,4414,32.818263
 19 | 1350.0,13594,37.866124
 20 | 530.0,2182,38.679443
 21 | 980.0,6624,41.277205
 22 | 380.0,2260,42.171792
 23 | 530.0,2246,43.034077
 24 | 680.0,2662,44.036025
 25 | 830.0,3606,45.39094
 26 | 980.0,3120,46.550376
 27 | 730.0,3518,47.880018
 28 | 380.0,2260,48.734687
 29 | 660.0,3296,49.979011
 30 | 830.0,4678,51.755557
 31 | 580.0,2652,52.757058
 32 | 630.0,2612,53.747996
 33 | 380.0,1268,54.225423
 34 | 780.0,3730,55.635921
 35 | 1180.0,8152,58.709851
 36 | 430.0,1970,59.45361
 37 | 930.0,9838,63.172857
 38 | 930.0,14486,68.64684
 39 | 680.0,4178,70.234863
 40 | 1180.0,22736,78.745257
 41 | 630.0,1780,79.413591
 42 | 730.0,3642,80.78565
 43 | 380.0,1938,81.516054
 44 | 230.0,1494,82.075747
 45 | 930.0,4740,83.871758
 46 | 530.0,4052,85.406782
 47 | 930.0,2948,86.517116
 48 | 930.0,3048,87.663416
 49 | 1340.0,16218,93.72735
 50 | 360.0,1496,94.288277
 51 | 630.0,2744,95.325491
 52 | 630.0,1780,95.993038
 53 | 780.0,3612,97.35601
 54 | 880.0,3692,98.752235
 55 | 730.0,2452,99.674829
 56 | 780.0,2676,100.681265
 57 | 780.0,2324,101.55319
 58 | 780.0,2678,102.566934
 59 | 630.0,2612,103.553079
 60 | 1080.0,4050,105.075956
 61 | 1460.0,10328,108.963586
 62 | 630.0,2438,109.88712
 63 | 1180.0,16976,116.303472
 64 | 1080.0,4614,118.073532
 65 | 1080.0,4690,119.866369
 66 | 780.0,3664,121.273588
 67 | 830.0,4382,122.946313
 68 | 830.0,4578,124.703727
 69 | 730.0,2838,125.787912
 70 | 430.0,2502,126.745888
 71 | 610.0,3276,127.999948
 72 | 780.0,2678,129.026407
 73 | 730.0,4440,130.724924
 74 | 1080.0,8960,134.155202
 75 | 1080.0,5556,136.260477
 76 | 630.0,3438,137.511145
 77 | 1080.0,6520,139.76433
 78 | 630.0,1780,140.379913
 79 | 410.0,1252,140.832459
 80 | 780.0,2678,141.805494
 81 | 380.0,2260,142.623271
 82 | 630.0,1780,143.278926
 83 | 1030.0,4518,145.009336
 84 | 980.0,4404,146.690564
 85 | 630.0,1780,147.365815
 86 | 830.0,4266,148.99672
 87 | 430.0,3342,150.270021
 88 | 680.0,3476,151.585912
 89 | 1080.0,10406,155.475321
 90 | 730.0,4966,157.342425
 91 | 930.0,4308,158.954403
 92 | 880.0,4486,160.639949
 93 | 1400.0,11686,165.001423
 94 | 630.0,2992,166.126216
 95 | 580.0,2652,167.116391
 96 | 980.0,3866,168.562401
 97 | 930.0,5498,170.620965
 98 | 630.0,1780,171.284593
 99 | 980.0,5008,173.16315
100 | 830.0,3448,174.455275
101 | 830.0,4424,176.110061
102 | 1030.0,4918,177.942581
103 | 230.0,1494,178.497264
104 | 830.0,4946,180.345697
105 | 930.0,6104,182.637209
106 | 780.0,4566,184.351494
107 | 1080.0,7800,187.232992
108 | 1320.0,8124,190.256808
109 | 430.0,1760,190.907736
110 | 880.0,6200,193.223942
111 | 880.0,2678,194.225293
112 | 680.0,3120,195.386348
113 | 780.0,2676,196.381358
114 | 530.0,3896,197.835135
115 | 1080.0,5948,200.057237
116 | 680.0,3930,201.528874
117 | 1180.0,4240,203.113463
118 | 630.0,3372,204.369068
119 | 430.0,2502,205.304568
120 | 830.0,4266,206.894893
121 | 630.0,3306,208.131841
122 | 730.0,4428,209.786296
123 | 830.0,5548,211.859851
124 | 830.0,4014,213.35282
125 | 430.0,2502,214.288179
126 | 680.0,4510,215.979968
127 | 930.0,4202,217.558226
128 | 780.0,5600,219.657292
129 | 1410.0,6062,221.910441
130 | 780.0,4486,223.582816
131 | 430.0,2502,224.521364
132 | 330.0,2164,225.330174
133 | 380.0,2010,226.078029
134 | 530.0,2628,227.055925
135 | 1280.0,7440,229.784199
136 | 1030.0,3484,231.088481
137 | 


--------------------------------------------------------------------------------
/logs/benchmark/dqn-BeamRiderNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614793581.2582045, "env_id": "BeamRiderNoFrameskip-v4"}
 2 | r,l,t
 3 | 6576.0,20983,10.683172
 4 | 2720.0,13592,15.828768
 5 | 7740.0,23813,24.863982
 6 | 4320.0,18685,31.91907
 7 | 2108.0,8766,35.233023
 8 | 1380.0,7675,38.103883
 9 | 5696.0,19085,45.328726
10 | 5768.0,17864,52.062259
11 | 4320.0,13583,57.184806
12 | 4606.0,18828,64.292317
13 | 6478.0,19078,71.502656
14 | 2160.0,10273,75.362682
15 | 4260.0,16581,81.626119
16 | 6742.0,22736,90.241258
17 | 1952.0,9649,93.879019
18 | 3480.0,16215,99.980142
19 | 2160.0,12859,104.814137
20 | 3360.0,15497,110.641296
21 | 5376.0,19580,118.032608
22 | 6440.0,20723,125.866222
23 | 4734.0,17357,132.415582
24 | 4350.0,17837,139.141831
25 | 1432.0,8217,142.219953
26 | 4804.0,17582,148.852138
27 | 3420.0,14283,154.229172
28 | 4448.0,16951,160.630883
29 | 7140.0,21519,168.752505
30 | 6810.0,21377,176.88184
31 | 5182.0,20176,184.515731
32 | 3780.0,15807,190.476166
33 | 4796.0,20191,198.123916
34 | 1380.0,7373,200.899911
35 | 6292.0,22057,209.207382
36 | 1744.0,9035,212.610699
37 | 2160.0,9577,216.214398
38 | 4680.0,16599,222.475892
39 | 4156.0,18829,229.597445
40 | 


--------------------------------------------------------------------------------
/logs/benchmark/dqn-BreakoutNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614778785.8173542, "env_id": "BreakoutNoFrameskip-v4"}
 2 | r,l,t
 3 | 390.0,14936,8.298947
 4 | 310.0,8119,11.281157
 5 | 137.0,5823,13.430059
 6 | 308.0,8376,16.514826
 7 | 405.0,9718,20.091962
 8 | 375.0,10544,23.962725
 9 | 396.0,11052,28.012314
10 | 399.0,10183,31.745516
11 | 77.0,4572,33.434542
12 | 332.0,10465,37.285213
13 | 377.0,11866,41.624256
14 | 261.0,6623,44.058955
15 | 402.0,6847,46.571992
16 | 387.0,9037,49.892307
17 | 303.0,9094,53.333262
18 | 319.0,22476,61.599988
19 | 372.0,8259,64.668601
20 | 396.0,26450,74.652882
21 | 318.0,6843,77.411975
22 | 372.0,8167,80.415497
23 | 400.0,21998,88.477855
24 | 351.0,20941,96.400516
25 | 397.0,14151,101.604712
26 | 403.0,8723,104.806063
27 | 370.0,9713,108.366861
28 | 386.0,7965,111.298332
29 | 381.0,10702,115.23716
30 | 408.0,10803,119.190315
31 | 387.0,8583,122.349447
32 | 419.0,8411,125.439177
33 | 374.0,8879,128.696841
34 | 403.0,11577,133.006629
35 | 405.0,14521,138.325462
36 | 354.0,8603,141.489714
37 | 355.0,8642,144.662911
38 | 390.0,15271,150.377894
39 | 393.0,9215,153.764675
40 | 401.0,8586,157.064263
41 | 369.0,9011,160.368164
42 | 405.0,8090,163.333655
43 | 306.0,13302,168.210222
44 | 399.0,23832,177.095047
45 | 320.0,8164,180.124154
46 | 404.0,10433,184.107888
47 | 379.0,9634,187.624143
48 | 399.0,8575,190.755317
49 | 336.0,13848,195.785423
50 | 383.0,7562,198.536605
51 | 368.0,8137,201.509571
52 | 387.0,8579,204.624538
53 | 292.0,7483,207.352157
54 | 407.0,9572,210.8472
55 | 342.0,25215,220.13302
56 | 304.0,6047,222.341196
57 | 295.0,7243,224.988355
58 | 


--------------------------------------------------------------------------------
/logs/benchmark/dqn-EnduroNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614937878.9643338, "env_id": "EnduroNoFrameskip-v4"}
 2 | r,l,t
 3 | 773.0,39936,18.466804
 4 | 1076.0,53248,39.507428
 5 | 1022.0,53248,60.545804
 6 | 1049.0,53248,81.489567
 7 | 792.0,39936,97.197674
 8 | 1012.0,53248,118.132145
 9 | 771.0,39936,133.855354
10 | 785.0,39936,149.558964
11 | 484.0,26624,160.020037
12 | 788.0,39936,175.723603
13 | 792.0,39936,191.440947
14 | 784.0,39936,207.150612
15 | 433.0,26624,217.608288
16 | 1072.0,53248,238.547646
17 | 


--------------------------------------------------------------------------------
/logs/benchmark/dqn-MsPacmanNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1659728618.0716512, "env_id": "MsPacmanNoFrameskip-v4"}
  2 | r,l,t
  3 | 3240.0,4610,4.565343
  4 | 2380.0,3650,6.179767
  5 | 3240.0,4722,8.242586
  6 | 2950.0,4218,9.901634
  7 | 2440.0,4450,11.644655
  8 | 1600.0,3226,12.901034
  9 | 3140.0,3970,14.512564
 10 | 2570.0,4658,16.381635
 11 | 2720.0,4242,18.09169
 12 | 2540.0,4274,19.819639
 13 | 2450.0,4098,21.46781
 14 | 2450.0,4330,23.219339
 15 | 2980.0,4578,25.034832
 16 | 2750.0,4714,26.924132
 17 | 3050.0,4386,28.647907
 18 | 2840.0,4490,30.418036
 19 | 2300.0,4402,32.153116
 20 | 2290.0,3586,33.554561
 21 | 1750.0,3802,35.044998
 22 | 2400.0,4074,36.637052
 23 | 3240.0,4546,38.421748
 24 | 1760.0,4346,40.121146
 25 | 2650.0,4402,41.842615
 26 | 2500.0,3530,43.217673
 27 | 2450.0,4570,45.000941
 28 | 3140.0,4618,46.799562
 29 | 2880.0,3730,48.256968
 30 | 2240.0,3842,49.760671
 31 | 3050.0,5538,51.925182
 32 | 2730.0,4466,53.678259
 33 | 3630.0,4698,55.515136
 34 | 3150.0,4578,57.304975
 35 | 3290.0,4794,59.179325
 36 | 2210.0,3146,60.410003
 37 | 2850.0,4450,62.155477
 38 | 2480.0,4154,63.773322
 39 | 3240.0,4458,65.523309
 40 | 2850.0,4450,67.262537
 41 | 2640.0,4066,68.852192
 42 | 2440.0,4226,70.500446
 43 | 3240.0,4626,72.309346
 44 | 2830.0,4498,74.066068
 45 | 2040.0,3810,75.559353
 46 | 2850.0,4938,77.493622
 47 | 2180.0,3202,78.742667
 48 | 2960.0,4474,80.49608
 49 | 2640.0,4714,82.340948
 50 | 2240.0,4426,84.070252
 51 | 2020.0,4474,85.819671
 52 | 2550.0,4234,87.476478
 53 | 2300.0,3754,88.943698
 54 | 2740.0,3970,90.497502
 55 | 1760.0,3698,91.945864
 56 | 3110.0,4266,93.617598
 57 | 2060.0,3546,94.994061
 58 | 2440.0,4258,96.665241
 59 | 2850.0,4530,98.440153
 60 | 2850.0,4450,100.183625
 61 | 2430.0,5162,102.198829
 62 | 3030.0,4178,103.835441
 63 | 2580.0,3618,105.243359
 64 | 2850.0,4450,106.991979
 65 | 2720.0,4242,108.651297
 66 | 3030.0,4562,110.433888
 67 | 2450.0,4354,112.137468
 68 | 2700.0,4162,113.759301
 69 | 3640.0,4898,115.678251
 70 | 2850.0,3986,117.23745
 71 | 3030.0,3946,118.78445
 72 | 2890.0,4994,120.743981
 73 | 2580.0,4786,122.618189
 74 | 2640.0,5162,124.632866
 75 | 2290.0,4098,126.237313
 76 | 2280.0,4842,128.13284
 77 | 2640.0,4650,129.952747
 78 | 2440.0,4258,131.618814
 79 | 2430.0,4226,133.269789
 80 | 2640.0,4642,135.086737
 81 | 2230.0,3522,136.464286
 82 | 2200.0,3146,137.690324
 83 | 2530.0,4778,139.554016
 84 | 2960.0,4066,141.136904
 85 | 2850.0,4450,142.878844
 86 | 2840.0,4386,144.598397
 87 | 3240.0,4626,146.411167
 88 | 2180.0,3202,147.667167
 89 | 2440.0,4338,149.359808
 90 | 2230.0,3330,150.663123
 91 | 2450.0,4810,152.543828
 92 | 2840.0,4218,154.185746
 93 | 2280.0,4458,155.923587
 94 | 3640.0,4418,157.645611
 95 | 2350.0,6634,160.237373
 96 | 2450.0,4450,161.976687
 97 | 2050.0,4258,163.636643
 98 | 2740.0,4322,165.323966
 99 | 2570.0,4162,166.970684
100 | 2740.0,4378,168.688692
101 | 2850.0,4130,170.294853
102 | 2320.0,3474,171.649862
103 | 3040.0,4186,173.286216
104 | 2730.0,4274,174.960623
105 | 2850.0,4322,176.654881
106 | 3040.0,4394,178.378233
107 | 4230.0,4306,180.056643
108 | 1840.0,4114,181.660494
109 | 3240.0,4842,183.554174
110 | 2570.0,3658,184.979727
111 | 2220.0,4146,186.601411
112 | 1830.0,3378,187.914714
113 | 1670.0,3226,189.174523
114 | 2300.0,3850,190.687076
115 | 2710.0,3842,192.195436
116 | 2850.0,4450,193.952946
117 | 3080.0,5314,196.122141
118 | 2880.0,4922,198.095347
119 | 2240.0,4066,199.693413
120 | 2450.0,4578,201.546361
121 | 2440.0,4066,203.163933
122 | 5040.0,4266,204.833798
123 | 1830.0,4090,206.447688
124 | 2230.0,3650,207.992231
125 | 3240.0,5170,210.199356
126 | 4040.0,4970,212.319058
127 | 3230.0,5090,214.496098
128 | 2740.0,4338,216.337857
129 | 2740.0,4386,218.197287
130 | 3640.0,4378,220.113769
131 | 2270.0,3418,221.690941
132 | 3050.0,5098,223.746212
133 | 2450.0,3482,225.097706
134 | 3240.0,4578,226.889236
135 | 2450.0,3906,228.403416
136 | 3030.0,3970,230.008759
137 | 2310.0,3906,231.596919
138 | 2640.0,4850,233.561483
139 | 2730.0,4274,235.296346
140 | 2640.0,4290,237.03278
141 | 3040.0,4402,238.817874
142 | 2540.0,3778,240.350946
143 | 


--------------------------------------------------------------------------------
/logs/benchmark/dqn-PongNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614779237.7516618, "env_id": "PongNoFrameskip-v4"}
 2 | r,l,t
 3 | 20.0,6948,5.213674
 4 | 21.0,6683,7.638885
 5 | 20.0,6939,10.154375
 6 | 20.0,7144,12.737012
 7 | 21.0,6533,15.094103
 8 | 21.0,6527,17.458776
 9 | 21.0,6692,19.88431
10 | 20.0,7397,22.561709
11 | 20.0,7379,25.2273
12 | 21.0,6755,27.670537
13 | 21.0,6535,30.035142
14 | 19.0,7189,32.634809
15 | 21.0,6533,34.991524
16 | 21.0,6527,37.351326
17 | 21.0,6527,39.702946
18 | 21.0,6687,42.109237
19 | 21.0,6533,44.463506
20 | 21.0,6620,46.849023
21 | 21.0,6533,49.212942
22 | 21.0,6995,51.728969
23 | 21.0,6860,54.200454
24 | 21.0,6533,56.553215
25 | 20.0,7258,59.159745
26 | 20.0,6752,61.596981
27 | 19.0,7025,64.160838
28 | 21.0,6687,66.576899
29 | 21.0,6927,69.062689
30 | 21.0,6692,71.471189
31 | 19.0,7170,74.047396
32 | 20.0,7392,76.718277
33 | 21.0,6687,79.130105
34 | 20.0,6853,81.594396
35 | 20.0,6992,84.121735
36 | 21.0,6533,86.475407
37 | 19.0,7365,89.140565
38 | 21.0,6527,91.490788
39 | 21.0,6692,93.91832
40 | 20.0,7269,96.531502
41 | 21.0,6527,98.888406
42 | 20.0,7273,101.498355
43 | 20.0,6681,103.895792
44 | 20.0,7136,106.459496
45 | 21.0,6620,108.843947
46 | 21.0,6533,111.20154
47 | 21.0,6692,113.604377
48 | 21.0,6692,116.014685
49 | 21.0,7000,118.542409
50 | 21.0,6687,120.945755
51 | 21.0,7258,123.563952
52 | 19.0,7286,126.18596
53 | 21.0,6763,128.66123
54 | 21.0,6690,131.142843
55 | 21.0,6687,133.553499
56 | 21.0,6687,135.956716
57 | 21.0,6671,138.370338
58 | 21.0,6527,140.729741
59 | 21.0,6527,143.078396
60 | 21.0,6527,145.443633
61 | 21.0,6683,147.850354
62 | 21.0,6692,150.267021
63 | 20.0,6922,152.779045
64 | 21.0,6527,155.125277
65 | 20.0,7446,157.806427
66 | 20.0,6737,160.240098
67 | 21.0,6533,162.594134
68 | 21.0,6906,165.074382
69 | 21.0,6533,167.429593
70 | 20.0,6745,169.864267
71 | 21.0,6999,172.378367
72 | 21.0,6508,174.721212
73 | 21.0,6915,177.212817
74 | 20.0,7224,179.820872
75 | 20.0,7279,182.435939
76 | 21.0,6692,184.85815
77 | 20.0,6942,187.353981
78 | 21.0,6527,189.70991
79 | 21.0,6533,192.060815
80 | 21.0,6527,194.421159
81 | 21.0,6755,196.848697
82 | 21.0,6595,199.219058
83 | 20.0,7042,201.748603
84 | 21.0,6692,204.171969
85 | 21.0,6527,206.523586
86 | 20.0,6853,208.986539
87 | 19.0,7552,211.70457
88 | 21.0,6692,214.113817
89 | 21.0,6533,216.462623
90 | 21.0,6533,218.815706
91 | 


--------------------------------------------------------------------------------
/logs/benchmark/dqn-QbertNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1614779461.8854597, "env_id": "QbertNoFrameskip-v4"}
  2 | r,l,t
  3 | 500.0,2147,3.450365
  4 | 675.0,2280,4.281284
  5 | 850.0,3327,5.504148
  6 | 11350.0,6037,7.714523
  7 | 3900.0,2954,8.786828
  8 | 15300.0,6627,11.213221
  9 | 4225.0,4297,12.786068
 10 | 14600.0,6943,15.321182
 11 | 425.0,1954,16.032893
 12 | 775.0,2362,16.890738
 13 | 14800.0,6422,19.253953
 14 | 4250.0,3370,20.482237
 15 | 11525.0,5167,22.36452
 16 | 11425.0,4544,24.030699
 17 | 11350.0,4444,25.65287
 18 | 8575.0,5000,27.47626
 19 | 14725.0,6593,29.891513
 20 | 4100.0,2769,30.893262
 21 | 14800.0,6422,33.239472
 22 | 650.0,2632,34.196923
 23 | 11700.0,7037,36.776742
 24 | 11625.0,6226,39.061646
 25 | 11525.0,5167,40.944191
 26 | 3925.0,2408,41.811353
 27 | 11675.0,7397,44.505452
 28 | 11950.0,4852,46.267992
 29 | 700.0,2432,47.143335
 30 | 11625.0,6157,49.401633
 31 | 11425.0,4604,51.083146
 32 | 11425.0,4604,52.764815
 33 | 875.0,2887,53.816217
 34 | 14600.0,5348,55.76363
 35 | 4000.0,2792,56.77684
 36 | 14650.0,6588,59.178909
 37 | 11425.0,4604,60.859552
 38 | 14650.0,5609,63.0423
 39 | 11625.0,6157,65.393578
 40 | 725.0,2217,66.189359
 41 | 14725.0,6612,68.602587
 42 | 14800.0,6687,71.035443
 43 | 7775.0,4892,72.811937
 44 | 14625.0,5478,74.801383
 45 | 14600.0,5088,76.658427
 46 | 11100.0,4034,78.122569
 47 | 50.0,1199,78.547241
 48 | 15150.0,6012,80.753348
 49 | 11575.0,6872,83.254765
 50 | 4425.0,3952,84.685961
 51 | 4250.0,2785,85.690472
 52 | 7800.0,3592,86.996258
 53 | 700.0,2424,87.872761
 54 | 7875.0,5553,89.891262
 55 | 650.0,2722,90.881686
 56 | 475.0,1785,91.521566
 57 | 14600.0,5678,93.58827
 58 | 14650.0,5558,95.610656
 59 | 11425.0,4604,97.288862
 60 | 11425.0,4604,98.97228
 61 | 650.0,2592,99.915128
 62 | 11400.0,5527,101.934255
 63 | 4925.0,4992,103.74385
 64 | 14750.0,7337,106.41443
 65 | 750.0,2639,107.370049
 66 | 11875.0,5802,109.488307
 67 | 14800.0,6422,112.072564
 68 | 11950.0,5387,114.030024
 69 | 15050.0,7730,116.853713
 70 | 14600.0,5798,118.965109
 71 | 875.0,3017,120.060684
 72 | 11425.0,4604,121.752643
 73 | 4325.0,3712,123.10552
 74 | 4025.0,3307,124.304083
 75 | 14600.0,6413,126.636068
 76 | 14800.0,6157,128.87671
 77 | 750.0,2373,129.735242
 78 | 14650.0,6723,132.529886
 79 | 11975.0,6208,134.864778
 80 | 11425.0,4664,136.570588
 81 | 775.0,2362,137.425285
 82 | 4000.0,2717,138.410109
 83 | 11750.0,6157,140.658257
 84 | 11625.0,6157,143.046972
 85 | 9225.0,3942,144.577907
 86 | 14600.0,5403,146.544769
 87 | 14875.0,6683,148.984606
 88 | 15075.0,6292,151.476602
 89 | 11250.0,3950,152.95514
 90 | 14700.0,7993,155.873396
 91 | 14725.0,6352,158.215972
 92 | 14700.0,7312,160.962415
 93 | 14700.0,6803,163.442979
 94 | 3675.0,2443,164.332046
 95 | 14600.0,6743,166.793701
 96 | 3700.0,3134,167.926943
 97 | 4150.0,4097,169.420132
 98 | 11550.0,5930,171.578741
 99 | 11425.0,4604,173.259727
100 | 8075.0,3280,174.520545
101 | 14800.0,6422,176.900329
102 | 8025.0,3705,178.240064
103 | 11575.0,7623,181.017266
104 | 750.0,2803,182.034169
105 | 11425.0,4604,183.710549
106 | 11625.0,6157,185.954851
107 | 14800.0,6432,188.295226
108 | 14600.0,6813,190.778485
109 | 14850.0,6704,193.219888
110 | 650.0,2722,194.206065
111 | 15100.0,6267,196.488646
112 | 15025.0,5802,198.600253
113 | 750.0,2803,199.618398
114 | 775.0,2397,200.487827
115 | 14600.0,5838,202.614424
116 | 11100.0,4049,204.087964
117 | 3625.0,2323,204.925828
118 | 14800.0,6432,207.265786
119 | 14725.0,6812,209.751004
120 | 14600.0,6068,212.2734
121 | 12250.0,7827,215.238562
122 | 14800.0,6422,217.575494
123 | 825.0,3072,218.685553
124 | 11525.0,5064,220.524356
125 | 14650.0,5578,222.557985
126 | 14975.0,7839,225.419132
127 | 


--------------------------------------------------------------------------------
/logs/benchmark/dqn-RoadRunnerNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1615283352.8105607, "env_id": "RoadRunnerNoFrameskip-v4"}
  2 | r,l,t
  3 | 45600.0,4331,4.578996
  4 | 35200.0,4359,6.388526
  5 | 49100.0,4577,8.294341
  6 | 46300.0,4812,10.291539
  7 | 30400.0,2996,11.530835
  8 | 40600.0,4622,13.434747
  9 | 37400.0,3984,15.079498
 10 | 46600.0,4608,16.980999
 11 | 41300.0,4311,18.765128
 12 | 41500.0,4272,20.535463
 13 | 38300.0,4694,22.473144
 14 | 26400.0,4265,24.230634
 15 | 28700.0,4342,26.017114
 16 | 28800.0,4423,27.848736
 17 | 41000.0,4308,29.627312
 18 | 47100.0,4669,31.560422
 19 | 37300.0,4405,33.359143
 20 | 36700.0,4353,35.152945
 21 | 34900.0,4492,37.008729
 22 | 41400.0,4212,38.757955
 23 | 32400.0,4408,40.58818
 24 | 28500.0,4744,42.547467
 25 | 35700.0,4446,44.38607
 26 | 52500.0,4346,46.184409
 27 | 33100.0,4171,47.908489
 28 | 38000.0,4278,49.682371
 29 | 44100.0,4193,51.412412
 30 | 30800.0,4582,53.31371
 31 | 43600.0,4589,55.214548
 32 | 36600.0,4183,56.942748
 33 | 44600.0,4574,58.847455
 34 | 53600.0,4590,60.767837
 35 | 44600.0,4267,62.550773
 36 | 30900.0,4176,64.288576
 37 | 35600.0,4288,66.056793
 38 | 56000.0,4673,67.991279
 39 | 49000.0,4516,69.869656
 40 | 46800.0,4460,71.706389
 41 | 36600.0,4630,73.619184
 42 | 50000.0,4389,75.435945
 43 | 42600.0,4499,77.273676
 44 | 38500.0,4665,79.194321
 45 | 34300.0,4593,81.097265
 46 | 45300.0,4486,82.948642
 47 | 50100.0,4341,84.745933
 48 | 35700.0,4318,86.536117
 49 | 39600.0,4860,88.540414
 50 | 33300.0,4382,90.352031
 51 | 43400.0,4286,92.123165
 52 | 48500.0,4734,94.088329
 53 | 22900.0,3440,95.512706
 54 | 37700.0,4222,97.262531
 55 | 39200.0,4558,99.151146
 56 | 51700.0,4582,101.045657
 57 | 30400.0,4121,102.746956
 58 | 41500.0,4542,104.62781
 59 | 36400.0,4351,106.431224
 60 | 35100.0,4365,108.237686
 61 | 42900.0,4423,110.065588
 62 | 52600.0,4707,112.015754
 63 | 52000.0,4405,113.839923
 64 | 32100.0,4448,115.679077
 65 | 34000.0,4537,117.548685
 66 | 43800.0,4494,119.412248
 67 | 39900.0,4682,121.348915
 68 | 33300.0,4531,123.198588
 69 | 35700.0,4454,125.039904
 70 | 43600.0,4270,126.80856
 71 | 37400.0,4224,128.562729
 72 | 45000.0,4304,130.349417
 73 | 48000.0,4372,132.164388
 74 | 40500.0,4486,134.016303
 75 | 46300.0,4519,135.889534
 76 | 59500.0,4447,137.728427
 77 | 46100.0,4420,139.553422
 78 | 38500.0,4342,141.348007
 79 | 40200.0,4308,143.126172
 80 | 31600.0,4513,144.993962
 81 | 40900.0,4414,146.819435
 82 | 38300.0,4590,148.714608
 83 | 38400.0,4326,150.503732
 84 | 48000.0,4393,152.340948
 85 | 46600.0,4398,154.171558
 86 | 34900.0,4412,156.00668
 87 | 52500.0,4694,157.958101
 88 | 43100.0,4529,159.842753
 89 | 39800.0,4345,161.651971
 90 | 42600.0,4415,163.485021
 91 | 47100.0,4774,165.471995
 92 | 41500.0,4421,167.306136
 93 | 41400.0,4230,169.038306
 94 | 32700.0,4277,170.820947
 95 | 50000.0,4406,172.647852
 96 | 25600.0,4351,174.455871
 97 | 37400.0,4655,176.389951
 98 | 42100.0,4412,178.224521
 99 | 44500.0,4539,180.112359
100 | 42700.0,4422,181.948988
101 | 37700.0,4614,183.867699
102 | 43900.0,4190,185.605365
103 | 37700.0,4233,187.360487
104 | 45000.0,3984,189.032385
105 | 47700.0,4382,190.859018
106 | 45100.0,4139,192.587108
107 | 41800.0,4318,194.378205
108 | 45000.0,4390,196.197077
109 | 46800.0,4318,197.987917
110 | 44100.0,4251,199.751307
111 | 40300.0,4446,201.604496
112 | 33900.0,4309,203.388354
113 | 49600.0,4560,205.283296
114 | 32000.0,4601,207.192033
115 | 31000.0,4477,209.046702
116 | 44300.0,4318,210.844528
117 | 39700.0,4454,212.696337
118 | 45700.0,4409,214.494696
119 | 26800.0,4208,216.242845
120 | 26600.0,4133,217.952397
121 | 43600.0,4781,219.946394
122 | 34900.0,4468,221.796999
123 | 47100.0,4464,223.652724
124 | 48100.0,4216,225.403106
125 | 31900.0,4136,227.11711
126 | 36100.0,4235,228.873448
127 | 30300.0,3572,230.364285
128 | 27000.0,4702,232.3186
129 | 50000.0,4601,234.225036
130 | 50100.0,4604,236.139172
131 | 33900.0,4382,237.965039
132 | 33600.0,4544,239.851667
133 | 40600.0,4619,241.768723
134 | 35000.0,4284,243.543154
135 | 41800.0,4520,245.416899
136 | 48600.0,4593,247.330962
137 | 33600.0,4376,249.147818
138 | 41000.0,4128,250.868949
139 | 45500.0,4526,252.75774
140 | 


--------------------------------------------------------------------------------
/logs/benchmark/dqn-SeaquestNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1615283104.8211045, "env_id": "SeaquestNoFrameskip-v4"}
 2 | r,l,t
 3 | 760.0,4674,4.580214
 4 | 1080.0,6074,6.95292
 5 | 1900.0,9529,10.720694
 6 | 1840.0,9065,14.293803
 7 | 1840.0,9082,17.876858
 8 | 1840.0,9114,21.572851
 9 | 1660.0,7806,24.662641
10 | 2320.0,8890,28.260154
11 | 2000.0,7674,31.37266
12 | 1320.0,6921,34.148088
13 | 2600.0,9818,38.122417
14 | 1940.0,9530,41.912591
15 | 2880.0,10146,45.971469
16 | 2600.0,9529,49.750961
17 | 1340.0,6297,52.203349
18 | 2020.0,8334,55.703941
19 | 2200.0,10841,60.11372
20 | 1060.0,5982,62.467764
21 | 1880.0,9321,66.280149
22 | 2500.0,9450,70.017334
23 | 2460.0,9274,73.811825
24 | 2660.0,9526,77.645413
25 | 1840.0,7705,80.782437
26 | 940.0,5402,82.964944
27 | 2080.0,10182,87.057083
28 | 1840.0,7298,90.006956
29 | 1600.0,7890,93.164201
30 | 1660.0,8666,96.604625
31 | 2960.0,10586,100.910327
32 | 1420.0,7834,103.996339
33 | 2380.0,9149,107.735687
34 | 1440.0,7401,110.651457
35 | 1860.0,9114,114.266929
36 | 2920.0,10269,118.386817
37 | 2000.0,9786,122.288618
38 | 1880.0,9082,125.961307
39 | 2580.0,9690,129.830575
40 | 1480.0,7609,132.819636
41 | 3560.0,11865,137.549402
42 | 3040.0,10390,141.719084
43 | 1160.0,6614,144.408233
44 | 1240.0,6746,147.14411
45 | 2200.0,9546,150.988787
46 | 1760.0,9329,154.759344
47 | 1920.0,9273,158.51737
48 | 1960.0,8138,161.755239
49 | 3500.0,12137,166.541739
50 | 1660.0,7074,169.483862
51 | 1880.0,9466,173.323722
52 | 1400.0,6690,175.960091
53 | 1400.0,7418,178.906495
54 | 2880.0,10650,183.116402
55 | 1960.0,9450,186.79492
56 | 2200.0,8982,190.068956
57 | 1900.0,7634,193.011389
58 | 1940.0,7545,196.121619
59 | 2120.0,8689,199.655051
60 | 2820.0,10073,203.763259
61 | 1020.0,5914,206.09671
62 | 2200.0,10458,210.370167
63 | 3000.0,11034,214.772371
64 | 1940.0,9786,218.751435
65 | 760.0,4710,220.653248
66 | 1680.0,8433,224.079637
67 | 1840.0,9137,227.823316
68 | 2400.0,8550,231.249566
69 | 2180.0,10842,235.681875
70 | 2400.0,9022,239.407329
71 | 2520.0,9370,243.204571
72 | 


--------------------------------------------------------------------------------
/logs/benchmark/dqn-SpaceInvadersNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1614779016.280488, "env_id": "SpaceInvadersNoFrameskip-v4"}
  2 | r,l,t
  3 | 775.0,3344,3.886279
  4 | 455.0,2929,4.941654
  5 | 550.0,3315,6.127118
  6 | 570.0,3923,7.515705
  7 | 800.0,4259,9.018671
  8 | 395.0,2559,9.939902
  9 | 530.0,3307,11.118369
 10 | 710.0,3229,12.272132
 11 | 515.0,3663,13.577838
 12 | 1120.0,3797,14.927365
 13 | 900.0,4037,16.356874
 14 | 570.0,3796,17.697156
 15 | 515.0,3819,19.042766
 16 | 630.0,3821,20.39839
 17 | 720.0,3329,21.582514
 18 | 875.0,6020,23.740323
 19 | 545.0,3898,25.119765
 20 | 395.0,2387,25.980374
 21 | 545.0,3995,27.394111
 22 | 730.0,3886,28.770375
 23 | 525.0,3565,30.034888
 24 | 600.0,3905,31.412884
 25 | 600.0,4225,32.896876
 26 | 600.0,4113,34.346998
 27 | 605.0,4073,35.774542
 28 | 555.0,3933,37.161738
 29 | 575.0,3301,38.332035
 30 | 520.0,3607,39.609969
 31 | 375.0,2529,40.513565
 32 | 525.0,2848,41.528867
 33 | 600.0,4647,43.174772
 34 | 770.0,4485,44.771373
 35 | 485.0,2689,45.728803
 36 | 605.0,3853,47.0837
 37 | 480.0,3251,48.242046
 38 | 395.0,2559,49.15564
 39 | 550.0,3486,50.391175
 40 | 395.0,2559,51.305544
 41 | 470.0,3189,52.442224
 42 | 395.0,2559,53.357437
 43 | 1205.0,6295,55.603836
 44 | 605.0,3041,56.676831
 45 | 600.0,3563,57.931883
 46 | 775.0,4513,59.52345
 47 | 485.0,3935,60.921994
 48 | 600.0,3349,62.106138
 49 | 570.0,3127,63.205454
 50 | 570.0,3536,64.455716
 51 | 465.0,3911,65.845823
 52 | 610.0,4385,67.388742
 53 | 545.0,3415,68.59359
 54 | 525.0,3900,69.970141
 55 | 480.0,3377,71.175281
 56 | 570.0,3422,72.396504
 57 | 570.0,4113,73.83675
 58 | 445.0,2557,74.757191
 59 | 530.0,2628,75.687501
 60 | 395.0,2559,76.603561
 61 | 450.0,4293,78.111617
 62 | 550.0,3052,79.200124
 63 | 800.0,4829,80.877832
 64 | 540.0,3731,82.18373
 65 | 1200.0,7364,84.811868
 66 | 525.0,3062,85.919444
 67 | 600.0,3921,87.297699
 68 | 600.0,3675,88.588161
 69 | 900.0,6521,90.903198
 70 | 775.0,2945,91.956404
 71 | 525.0,2912,92.994804
 72 | 395.0,2559,93.915735
 73 | 580.0,3116,95.02866
 74 | 1130.0,5943,97.1432
 75 | 775.0,6133,99.324602
 76 | 575.0,3140,100.432
 77 | 405.0,2711,101.396831
 78 | 545.0,3581,102.666251
 79 | 570.0,3307,103.841793
 80 | 455.0,3815,105.192782
 81 | 570.0,3029,106.258779
 82 | 600.0,3971,107.646472
 83 | 770.0,3308,108.822203
 84 | 485.0,2625,109.757926
 85 | 755.0,5625,111.753243
 86 | 395.0,2559,112.669612
 87 | 555.0,3192,113.804427
 88 | 705.0,3009,114.880053
 89 | 570.0,3796,116.211087
 90 | 575.0,3643,117.496128
 91 | 545.0,3017,118.564473
 92 | 600.0,4729,120.210074
 93 | 535.0,3305,121.376765
 94 | 570.0,4695,123.022804
 95 | 515.0,3699,124.317297
 96 | 770.0,3763,125.638394
 97 | 605.0,3841,126.9832
 98 | 515.0,2848,127.989394
 99 | 600.0,4197,129.467359
100 | 600.0,3801,130.805654
101 | 570.0,3796,132.143114
102 | 915.0,6533,134.45919
103 | 545.0,3844,135.80943
104 | 1085.0,7745,138.561323
105 | 405.0,3527,139.808429
106 | 520.0,2923,140.851297
107 | 550.0,3647,142.133963
108 | 630.0,4517,143.718867
109 | 515.0,3697,145.023435
110 | 600.0,4089,146.449435
111 | 405.0,3109,147.56174
112 | 520.0,3488,148.786294
113 | 700.0,3435,149.999451
114 | 580.0,3116,151.111986
115 | 660.0,4089,152.550673
116 | 395.0,2559,153.464829
117 | 570.0,3628,154.739216
118 | 975.0,7079,157.248444
119 | 575.0,3952,158.639437
120 | 1445.0,9755,162.092638
121 | 700.0,2947,163.148235
122 | 925.0,5439,165.057395
123 | 480.0,2897,166.090477
124 | 575.0,3414,167.296689
125 | 1120.0,7605,169.986964
126 | 515.0,3697,171.291006
127 | 350.0,2758,172.281944
128 | 515.0,2987,173.335978
129 | 550.0,3742,174.657949
130 | 1345.0,6667,177.009458
131 | 575.0,3514,178.250407
132 | 655.0,3745,179.576672
133 | 510.0,3217,180.717338
134 | 485.0,3217,181.861857
135 | 570.0,3796,183.193002
136 | 495.0,3169,184.315878
137 | 745.0,3692,185.618728
138 | 1010.0,7547,188.308126
139 | 575.0,3406,189.514064
140 | 395.0,2559,190.430743
141 | 520.0,3545,191.687078
142 | 500.0,3345,192.894131
143 | 715.0,3435,194.115859
144 | 520.0,3289,195.27842
145 | 600.0,4399,196.819674
146 | 1225.0,7181,199.368081
147 | 645.0,4529,200.957543
148 | 420.0,3121,202.073241
149 | 1180.0,6825,204.4855
150 | 545.0,3841,205.829714
151 | 1110.0,7663,208.533523
152 | 680.0,5273,210.396127
153 | 545.0,2817,211.400691
154 | 520.0,4191,212.877653
155 | 535.0,3305,214.050907
156 | 670.0,3757,215.378709
157 | 600.0,3675,216.67445
158 | 


--------------------------------------------------------------------------------
/logs/benchmark/ppo-AsteroidsNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1615282346.0657709, "env_id": "AsteroidsNoFrameskip-v4"}
  2 | r,l,t
  3 | 2100.0,3140,4.222781
  4 | 2150.0,5416,6.706464
  5 | 3100.0,4628,8.867571
  6 | 1880.0,2628,10.130192
  7 | 2430.0,2560,11.29486
  8 | 2700.0,5986,14.025688
  9 | 1540.0,2844,15.321319
 10 | 1350.0,3142,16.800679
 11 | 2800.0,5302,19.29634
 12 | 1300.0,2948,20.63113
 13 | 2400.0,2892,21.965651
 14 | 2030.0,3832,23.704556
 15 | 2180.0,3728,25.185869
 16 | 1950.0,4020,26.805397
 17 | 2530.0,2228,27.717258
 18 | 1780.0,3134,29.029559
 19 | 1630.0,3020,30.50439
 20 | 1470.0,2142,31.474842
 21 | 2500.0,3398,33.044588
 22 | 3330.0,5052,35.419518
 23 | 1830.0,3190,36.876289
 24 | 2750.0,5234,39.277096
 25 | 1390.0,3112,40.718593
 26 | 1280.0,2428,41.852868
 27 | 1610.0,3018,43.288413
 28 | 1760.0,2242,44.305527
 29 | 2350.0,6784,47.503645
 30 | 2400.0,4238,49.467506
 31 | 2080.0,2898,50.785973
 32 | 1950.0,2936,52.131212
 33 | 3000.0,4662,54.34914
 34 | 2130.0,2802,55.651604
 35 | 1980.0,3042,57.03654
 36 | 1780.0,3176,58.489574
 37 | 2850.0,4772,60.665724
 38 | 1710.0,3336,62.201884
 39 | 2200.0,4294,64.166347
 40 | 4770.0,6812,67.312451
 41 | 1590.0,5984,70.017936
 42 | 930.0,1372,70.625459
 43 | 2950.0,5328,73.043853
 44 | 1800.0,3476,74.64306
 45 | 1300.0,3418,76.182155
 46 | 1080.0,1678,76.929373
 47 | 1800.0,2962,78.262535
 48 | 1650.0,3306,79.759297
 49 | 2800.0,5370,82.260684
 50 | 1910.0,2682,83.461249
 51 | 1930.0,3740,85.154804
 52 | 2150.0,4872,87.459444
 53 | 2300.0,4200,89.39806
 54 | 1300.0,3444,90.949188
 55 | 2150.0,5580,93.4792
 56 | 1490.0,2158,94.444278
 57 | 1530.0,3430,96.019706
 58 | 2500.0,5000,98.31589
 59 | 2180.0,4054,100.149905
 60 | 3000.0,6512,103.194933
 61 | 1800.0,3770,104.926376
 62 | 1280.0,2132,105.897955
 63 | 1850.0,7126,109.109429
 64 | 1930.0,4274,111.140121
 65 | 2180.0,3110,112.584334
 66 | 3480.0,7728,116.169647
 67 | 2230.0,3396,117.747863
 68 | 980.0,2250,118.789643
 69 | 1950.0,4102,120.655639
 70 | 880.0,1532,121.345101
 71 | 1320.0,2418,122.527914
 72 | 4820.0,7680,126.119789
 73 | 2350.0,3574,127.748252
 74 | 1610.0,4640,129.888136
 75 | 2350.0,3484,131.468878
 76 | 1280.0,2376,132.534037
 77 | 1300.0,3206,133.973154
 78 | 2400.0,6146,136.792883
 79 | 1390.0,4482,138.916353
 80 | 1300.0,3828,140.728047
 81 | 1280.0,4782,142.936024
 82 | 2400.0,4924,145.211154
 83 | 4470.0,6594,148.311394
 84 | 3510.0,6092,151.181304
 85 | 2900.0,3804,152.932375
 86 | 1730.0,3090,154.344851
 87 | 2600.0,4410,156.450595
 88 | 2450.0,3180,157.89357
 89 | 1280.0,3298,159.373876
 90 | 3100.0,7616,162.852006
 91 | 2450.0,3494,164.390613
 92 | 2130.0,5722,167.096026
 93 | 2600.0,4060,168.987177
 94 | 1280.0,2130,170.001644
 95 | 2000.0,3638,171.701467
 96 | 2950.0,4134,173.57986
 97 | 780.0,1532,174.299056
 98 | 2150.0,2936,175.649798
 99 | 2750.0,2776,176.935084
100 | 2900.0,5568,179.56799
101 | 630.0,1532,180.246592
102 | 3410.0,5974,183.051007
103 | 1830.0,5428,185.546254
104 | 1930.0,6056,188.33652
105 | 1930.0,3252,189.853228
106 | 3000.0,5410,192.396713
107 | 1490.0,3532,193.986223
108 | 2700.0,5472,196.570051
109 | 1030.0,2348,197.62768
110 | 3000.0,4760,199.796481
111 | 3100.0,5448,202.300959
112 | 2700.0,4180,204.198598
113 | 3220.0,7278,207.60019
114 | 1950.0,4532,209.646224
115 | 1180.0,2006,210.611366
116 | 2050.0,3532,212.217238
117 | 2010.0,4226,214.121241
118 | 1730.0,4034,215.952861
119 | 2400.0,3260,217.4612
120 | 1080.0,1516,218.132377
121 | 1750.0,2932,219.454946
122 | 1420.0,4148,221.331489
123 | 2100.0,3928,223.107199
124 | 1300.0,3846,224.866148
125 | 2500.0,3884,226.630986
126 | 2200.0,3220,228.092993
127 | 2900.0,5000,230.362537
128 | 2550.0,5290,232.791035
129 | 1650.0,3432,234.413234
130 | 1440.0,2356,235.4701
131 | 3260.0,7632,239.029046
132 | 2550.0,3822,240.787463
133 | 2500.0,4572,242.955574
134 | 4020.0,6168,245.781019
135 | 1300.0,2792,247.0935
136 | 2150.0,4436,249.139655
137 | 1950.0,4634,251.331867
138 | 2200.0,4092,253.184912
139 | 2250.0,5500,255.666379
140 | 3200.0,5960,258.392801
141 | 2330.0,4604,260.466561
142 | 2500.0,4170,262.367958
143 | 2900.0,3674,264.053509
144 | 2450.0,3360,265.599392
145 | 3100.0,6872,268.700958
146 | 1860.0,3258,270.17276
147 | 1280.0,3534,271.760526
148 | 1760.0,2262,272.81702
149 | 3000.0,3806,274.614552
150 | 2280.0,4826,276.806577
151 | 2550.0,5690,279.494636
152 | 


--------------------------------------------------------------------------------
/logs/benchmark/ppo-BeamRiderNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614793322.1209238, "env_id": "BeamRiderNoFrameskip-v4"}
 2 | r,l,t
 3 | 4080.0,14525,8.79847
 4 | 5666.0,18681,16.575911
 5 | 6236.0,17193,23.729032
 6 | 2160.0,9873,27.818356
 7 | 5154.0,17049,34.863509
 8 | 1380.0,7291,37.89434
 9 | 3300.0,14357,43.820789
10 | 3000.0,13109,49.238173
11 | 4798.0,17485,56.472782
12 | 3420.0,13915,62.209094
13 | 1796.0,8485,65.697914
14 | 2160.0,9735,69.714388
15 | 1332.0,7213,72.670604
16 | 1284.0,6343,75.278639
17 | 996.0,5281,77.437288
18 | 3900.0,14931,83.591233
19 | 3420.0,12495,88.744709
20 | 1380.0,7236,91.72037
21 | 2056.0,9511,95.644122
22 | 4732.0,17507,102.892356
23 | 2160.0,10658,107.295106
24 | 756.0,4491,109.137778
25 | 3308.0,12521,114.306549
26 | 4770.0,15463,120.695994
27 | 3900.0,16307,127.439384
28 | 4476.0,17315,134.610267
29 | 3240.0,15185,140.870411
30 | 4734.0,17433,148.066092
31 | 4110.0,16029,154.67535
32 | 3690.0,12429,159.81653
33 | 1284.0,6317,162.416749
34 | 2916.0,10475,166.734342
35 | 3000.0,13817,172.429865
36 | 3300.0,14313,178.328795
37 | 2328.0,11129,182.941328
38 | 1332.0,6787,185.724657
39 | 4764.0,18671,193.465057
40 | 1900.0,9041,197.198546
41 | 2160.0,10553,201.540245
42 | 2804.0,10999,206.083617
43 | 4380.0,15989,212.692215
44 | 3728.0,13888,218.42536
45 | 8358.0,21151,227.209689
46 | 5310.0,17458,234.420478
47 | 3900.0,15619,240.886745
48 | 7404.0,22673,250.270449
49 | 


--------------------------------------------------------------------------------
/logs/benchmark/ppo-BipedalWalker-v3/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1654205553.3965414, "env_id": "BipedalWalker-v3"}
  2 | r,l,t
  3 | 286.498863,1232,3.148969
  4 | 290.03395,1193,3.942982
  5 | 285.871843,1236,4.766715
  6 | 291.850978,1198,5.564058
  7 | 288.768871,1210,6.368981
  8 | 288.157038,1212,7.176008
  9 | 289.361869,1202,7.97352
 10 | 287.842606,1225,8.786903
 11 | 287.280252,1190,9.578171
 12 | 284.324796,1247,10.404098
 13 | 288.956237,1213,11.208744
 14 | 288.669449,1191,11.998618
 15 | 285.979899,1247,12.825689
 16 | 289.639171,1190,13.618726
 17 | 286.947732,1244,14.446376
 18 | 287.606982,1188,15.235259
 19 | 286.782178,1223,16.048814
 20 | 288.687124,1216,16.856583
 21 | 285.721313,1232,17.67472
 22 | 287.000542,1216,18.483637
 23 | 288.605325,1213,19.286785
 24 | 291.318049,1207,20.087602
 25 | 289.566242,1193,20.877623
 26 | 292.464293,1179,21.661781
 27 | 290.672828,1192,22.457295
 28 | 290.689305,1182,23.244957
 29 | 287.682502,1206,24.045446
 30 | 291.957954,1163,24.819003
 31 | 282.784247,1263,25.659568
 32 | 287.454218,1216,26.470194
 33 | 285.451189,1228,27.283888
 34 | 287.095007,1272,28.128968
 35 | 287.733682,1228,28.945501
 36 | 290.772435,1195,29.742402
 37 | 288.365004,1236,30.56567
 38 | 285.35095,1225,31.380461
 39 | 289.413562,1208,32.184891
 40 | 288.015387,1211,32.991751
 41 | 288.731107,1205,33.791121
 42 | 290.727256,1189,34.581942
 43 | 289.43828,1207,35.3862
 44 | 287.729774,1227,36.204048
 45 | 285.253831,1260,37.041651
 46 | 289.768125,1185,37.826303
 47 | 285.958823,1233,38.646922
 48 | 287.200568,1222,39.46325
 49 | 290.043164,1185,40.250997
 50 | 289.101907,1211,41.05368
 51 | 286.462372,1243,41.87645
 52 | 285.64948,1221,42.688791
 53 | 287.519667,1187,43.476118
 54 | 284.971831,1267,44.316454
 55 | 288.742228,1214,45.120938
 56 | 285.628617,1255,45.954174
 57 | 284.757907,1250,46.783232
 58 | 284.347205,1245,47.609643
 59 | 289.301662,1211,48.409139
 60 | 284.519905,1255,49.241136
 61 | 283.713437,1242,50.06516
 62 | 289.766618,1189,50.851352
 63 | 282.590464,1273,51.692035
 64 | 289.451637,1175,52.468679
 65 | 282.330197,1285,53.320945
 66 | 290.373129,1201,54.118149
 67 | 285.502483,1222,54.927324
 68 | 290.665951,1163,55.698771
 69 | 289.852728,1213,56.503432
 70 | 287.244561,1205,57.306046
 71 | 286.817512,1223,58.116854
 72 | 291.118836,1183,58.904925
 73 | 289.975692,1205,59.704293
 74 | 291.492401,1159,60.475756
 75 | 287.211862,1228,61.291739
 76 | 284.231949,1244,62.120625
 77 | 287.456086,1196,62.914544
 78 | 286.782568,1231,63.733352
 79 | 290.014788,1182,64.517536
 80 | 285.797936,1238,65.341084
 81 | 286.128281,1239,66.164153
 82 | 291.3156,1168,66.940698
 83 | 285.707421,1227,67.7551
 84 | 287.444993,1217,68.563281
 85 | 287.287142,1235,69.380708
 86 | 287.296313,1252,70.212887
 87 | 285.059782,1244,71.037529
 88 | 285.075845,1250,71.86698
 89 | 289.072542,1172,72.643633
 90 | 286.401297,1247,73.470394
 91 | 289.753727,1171,74.244725
 92 | 288.401924,1186,75.030774
 93 | 289.74776,1210,75.834194
 94 | 290.13916,1177,76.612051
 95 | 292.795935,1181,77.396483
 96 | 285.884796,1247,78.222464
 97 | 288.603669,1212,79.027307
 98 | 289.873579,1212,79.832399
 99 | 285.787775,1230,80.647568
100 | 290.36039,1202,81.441948
101 | 290.807051,1179,82.224544
102 | 285.749472,1245,83.048675
103 | 290.984798,1179,83.832279
104 | 288.575334,1226,84.645881
105 | 286.358977,1210,85.449784
106 | 287.503688,1221,86.257498
107 | 290.378601,1209,87.059999
108 | 289.137484,1223,87.869361
109 | 288.796009,1224,88.683386
110 | 287.783809,1249,89.514697
111 | 291.227006,1151,90.278997
112 | 280.305489,1277,91.127263
113 | 283.010979,1279,91.975935
114 | 286.370462,1259,92.809959
115 | 294.429234,1149,93.57178
116 | 286.001682,1213,94.377335
117 | 287.589849,1233,95.19614
118 | 290.317775,1200,95.992744
119 | 286.754871,1236,96.815568
120 | 289.788937,1205,97.617576
121 | 286.015599,1251,98.448357
122 | 287.574138,1193,99.242593
123 | 291.136559,1193,100.036958
124 | 291.611669,1221,100.848697
125 | 286.55367,1234,101.670848
126 | 


--------------------------------------------------------------------------------
/logs/benchmark/ppo-BipedalWalkerHardcore-v3/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1615193532.8653934, "env_id": "BipedalWalkerHardcore-v3"}
  2 | r,l,t
  3 | 193.275095,2000,4.145592
  4 | 278.431375,1216,5.233195
  5 | -6.798583,356,5.553727
  6 | 19.282857,935,6.39281
  7 | 250.856491,1620,7.846617
  8 | -65.818894,162,7.994552
  9 | 221.508256,2000,9.782774
 10 | 55.729149,629,10.340956
 11 | 27.481974,749,11.012363
 12 | -49.307592,2000,12.828135
 13 | -53.150271,224,13.030152
 14 | 274.377278,1245,14.144573
 15 | 273.219748,1289,15.291531
 16 | -1.475935,2000,17.097797
 17 | 275.173584,1267,18.226109
 18 | 268.651572,1333,19.420884
 19 | 262.781362,1457,20.719656
 20 | -55.50225,241,20.93576
 21 | 143.412661,2000,22.721537
 22 | 56.459139,2000,24.502594
 23 | 16.917754,2000,26.304311
 24 | 264.020378,1410,27.561328
 25 | -4.766662,458,27.972549
 26 | 33.298734,2000,29.746628
 27 | 43.781743,774,30.438653
 28 | 232.481688,2000,32.22467
 29 | 46.904917,2000,34.022865
 30 | 260.045822,1422,35.285332
 31 | 281.353043,1159,36.317426
 32 | 117.040305,1037,37.234354
 33 | -25.477002,361,37.557418
 34 | 174.199961,2000,39.350821
 35 | 200.445063,2000,41.149508
 36 | 12.565995,2000,42.934273
 37 | 84.100166,2000,44.760193
 38 | -74.903722,2000,46.59088
 39 | 271.058868,1269,47.739809
 40 | 103.401041,979,48.615643
 41 | 241.844216,2000,50.409846
 42 | -34.435864,334,50.708726
 43 | 271.148666,1302,51.86712
 44 | 33.819655,2000,53.672669
 45 | -9.80039,2000,55.49123
 46 | 75.016011,2000,57.289543
 47 | 146.260913,2000,59.093938
 48 | 274.414427,1274,60.231836
 49 | 128.360471,1076,61.19114
 50 | 264.920659,1386,62.431464
 51 | 9.68412,525,62.900779
 52 | -62.625699,173,63.058635
 53 | 78.21942,801,63.773142
 54 | 192.980267,2000,65.549125
 55 | 203.484802,2000,67.37258
 56 | 50.604619,629,67.934108
 57 | 220.073692,2000,69.73243
 58 | 94.093388,2000,71.510735
 59 | 105.880485,2000,73.284172
 60 | 272.858588,1286,74.432985
 61 | 8.159598,2000,76.250862
 62 | 159.239759,997,77.138704
 63 | 26.746829,2000,78.901894
 64 | -62.071558,176,79.060033
 65 | 25.428189,2000,80.884813
 66 | 155.089809,1392,82.126809
 67 | 143.705084,1270,83.257159
 68 | 275.999284,1248,84.372922
 69 | 276.390156,1222,85.456104
 70 | 3.633458,2000,87.313531
 71 | 43.690779,2000,89.103813
 72 | -39.888648,337,89.406915
 73 | 77.930317,756,90.083196
 74 | 264.742158,1428,91.350063
 75 | 268.022265,1343,92.542526
 76 | 104.829176,2000,94.329187
 77 | 271.579241,1272,95.462329
 78 | 7.061693,437,95.852223
 79 | 270.516807,1314,97.021596
 80 | 25.466871,2000,98.8095
 81 | 199.094501,2000,100.804588
 82 | -77.270537,2000,102.760402
 83 | 274.325678,1257,103.881253
 84 | -47.70453,279,104.128048
 85 | 269.782561,1325,105.312254
 86 | 252.021391,1595,106.737116
 87 | 76.03425,2000,108.53695
 88 | 268.948973,1338,109.724009
 89 | -10.528409,2000,111.492292
 90 | 58.413867,1108,112.472228
 91 | 160.320388,2000,114.25012
 92 | 91.350898,919,115.072932
 93 | 271.164018,1321,116.245728
 94 | -11.883781,2000,118.041878
 95 | 105.827348,2000,119.822273
 96 | 200.431321,2000,121.619765
 97 | 265.226968,1407,122.871708
 98 | 69.77737,733,123.528858
 99 | 270.591673,1306,124.703248
100 | 141.358797,2000,126.497834
101 | 244.774291,1689,128.007964
102 | 21.732235,2000,129.789535
103 | -52.589515,346,130.100523
104 | 268.640395,2000,131.877187
105 | 148.517683,1315,133.04672
106 | 22.670106,528,133.518668
107 | 100.072615,2000,135.297825
108 | 


--------------------------------------------------------------------------------
/logs/benchmark/ppo-BreakoutNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614770695.1439214, "env_id": "BreakoutNoFrameskip-v4"}
 2 | r,l,t
 3 | 417.0,12603,7.91641
 4 | 396.0,9007,11.486543
 5 | 431.0,12730,16.501665
 6 | 420.0,9783,20.353318
 7 | 393.0,10255,24.399123
 8 | 414.0,35543,38.466367
 9 | 409.0,10094,42.434626
10 | 372.0,7074,45.223526
11 | 414.0,9194,48.837086
12 | 418.0,9150,52.421882
13 | 421.0,7848,55.500098
14 | 387.0,7913,58.610305
15 | 397.0,7294,61.475845
16 | 397.0,9214,65.099294
17 | 405.0,9027,68.644321
18 | 391.0,14176,74.201695
19 | 380.0,7386,77.108295
20 | 421.0,8567,80.466578
21 | 389.0,7161,83.282692
22 | 413.0,6880,85.992517
23 | 405.0,8422,89.307504
24 | 411.0,8550,92.671517
25 | 415.0,23746,102.003397
26 | 396.0,9498,105.736333
27 | 415.0,7582,108.703916
28 | 390.0,9083,112.268808
29 | 409.0,8339,115.536329
30 | 288.0,7289,118.406694
31 | 415.0,9489,122.139135
32 | 406.0,9650,125.932637
33 | 401.0,13799,131.324341
34 | 397.0,7327,134.206139
35 | 425.0,11805,138.796122
36 | 412.0,8117,141.983996
37 | 412.0,12291,146.784082
38 | 409.0,8779,150.232458
39 | 421.0,10680,154.413628
40 | 213.0,6196,156.854681
41 | 404.0,9779,160.689395
42 | 413.0,16971,167.34178
43 | 398.0,10057,171.290612
44 | 404.0,9900,175.174741
45 | 423.0,8658,178.559094
46 | 373.0,8765,182.007169
47 | 392.0,8508,185.355729
48 | 372.0,7457,188.290559
49 | 421.0,13768,193.67705
50 | 367.0,7096,196.471112
51 | 398.0,7307,199.343706
52 | 415.0,9821,203.192525
53 | 417.0,8940,206.70469
54 | 396.0,9338,210.384888
55 | 421.0,9029,213.928488
56 | 421.0,10031,217.866087
57 | 419.0,9851,221.736578
58 | 395.0,8815,225.197859
59 | 397.0,8982,228.739918
60 | 389.0,11865,233.428803
61 | 399.0,7052,236.196647
62 | 323.0,6887,238.924023
63 | 


--------------------------------------------------------------------------------
/logs/benchmark/ppo-EnduroNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614937615.306725, "env_id": "EnduroNoFrameskip-v4"}
 2 | r,l,t
 3 | 797.0,39936,19.760402
 4 | 774.0,39936,36.865088
 5 | 1259.0,66560,65.256766
 6 | 1032.0,53248,87.962597
 7 | 984.0,53248,110.666455
 8 | 971.0,53248,133.364271
 9 | 1093.0,53248,156.086944
10 | 961.0,53248,178.782423
11 | 975.0,53248,201.486932
12 | 772.0,39936,218.450948
13 | 1342.0,66560,246.65753
14 | 


--------------------------------------------------------------------------------
/logs/benchmark/ppo-PongNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614770211.8584638, "env_id": "PongNoFrameskip-v4"}
 2 | r,l,t
 3 | 21.0,6621,5.645768
 4 | 21.0,6619,8.278026
 5 | 21.0,6619,10.91174
 6 | 21.0,6695,13.5699
 7 | 21.0,6701,16.227408
 8 | 21.0,6695,18.884109
 9 | 21.0,6621,21.507497
10 | 21.0,6695,24.158821
11 | 21.0,6619,26.782454
12 | 21.0,6619,29.404617
13 | 21.0,6701,32.060708
14 | 21.0,6701,34.718148
15 | 21.0,6701,37.365183
16 | 21.0,6695,40.013821
17 | 21.0,6695,42.648849
18 | 21.0,6619,45.254462
19 | 21.0,6701,47.898726
20 | 21.0,6621,50.505156
21 | 21.0,6701,53.146831
22 | 21.0,6619,55.753825
23 | 21.0,6621,58.36678
24 | 21.0,6701,61.006375
25 | 21.0,6701,63.655262
26 | 21.0,6621,66.262585
27 | 21.0,6701,68.902122
28 | 21.0,6619,71.509826
29 | 21.0,6619,74.11505
30 | 21.0,6621,76.72574
31 | 21.0,6695,79.360951
32 | 21.0,6695,81.99979
33 | 21.0,6619,84.609866
34 | 21.0,6695,87.247657
35 | 21.0,6621,89.853352
36 | 21.0,6701,92.504749
37 | 21.0,6695,95.142408
38 | 21.0,6695,97.778445
39 | 21.0,6621,100.390172
40 | 21.0,6695,103.025162
41 | 21.0,6695,105.660685
42 | 21.0,6695,108.294654
43 | 21.0,6621,110.901377
44 | 21.0,6621,113.507766
45 | 21.0,6621,116.119072
46 | 21.0,6701,118.759252
47 | 21.0,6621,121.366972
48 | 21.0,6621,123.986761
49 | 21.0,6621,126.592144
50 | 21.0,6619,129.200434
51 | 21.0,6701,131.840272
52 | 21.0,6695,134.475858
53 | 21.0,6619,137.083183
54 | 21.0,6621,139.692369
55 | 21.0,6619,142.300647
56 | 21.0,6619,144.905658
57 | 21.0,6701,147.542496
58 | 21.0,6695,150.183867
59 | 21.0,6695,152.830488
60 | 21.0,6695,155.470538
61 | 21.0,6619,158.079579
62 | 21.0,6621,160.693677
63 | 21.0,6701,163.329427
64 | 21.0,6695,165.966489
65 | 21.0,6619,168.575866
66 | 21.0,6695,171.215574
67 | 21.0,6701,173.854388
68 | 21.0,6619,176.460269
69 | 21.0,6701,179.104645
70 | 21.0,6701,181.746303
71 | 21.0,6619,184.365571
72 | 21.0,6695,187.007439
73 | 21.0,6619,189.617918
74 | 21.0,6701,192.254169
75 | 21.0,6701,194.89255
76 | 20.0,6905,197.61044
77 | 21.0,6619,200.216051
78 | 21.0,6695,202.851995
79 | 21.0,6701,205.491178
80 | 21.0,6695,208.131151
81 | 21.0,6619,210.740392
82 | 21.0,6695,213.388927
83 | 21.0,6619,215.992427
84 | 21.0,6621,218.596376
85 | 21.0,6695,221.233237
86 | 21.0,6695,223.871111
87 | 21.0,6621,226.481205
88 | 21.0,6621,229.09101
89 | 21.0,6701,231.728724
90 | 21.0,6701,234.371095
91 | 21.0,6701,237.008596
92 | 21.0,6701,239.64919
93 | 


--------------------------------------------------------------------------------
/logs/benchmark/ppo-QbertNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614770940.1790123, "env_id": "QbertNoFrameskip-v4"}
 2 | r,l,t
 3 | 12175.0,5927,5.415989
 4 | 16150.0,7347,8.336347
 5 | 15875.0,7725,11.41015
 6 | 15525.0,6352,13.925117
 7 | 15825.0,7957,17.082229
 8 | 16725.0,7377,20.008731
 9 | 15675.0,6632,22.6371
10 | 12375.0,5794,24.927961
11 | 12350.0,5852,27.250481
12 | 15550.0,6357,29.76556
13 | 15450.0,6159,32.201469
14 | 15375.0,5834,34.504873
15 | 16150.0,7342,37.405205
16 | 15875.0,7452,40.352528
17 | 15825.0,7452,43.2892
18 | 16100.0,6422,45.815806
19 | 15675.0,6844,48.517201
20 | 15750.0,7117,51.320857
21 | 19400.0,10657,55.528912
22 | 16325.0,7822,58.619442
23 | 19375.0,9303,62.316152
24 | 12000.0,4992,64.281549
25 | 19725.0,10072,68.251343
26 | 20000.0,10267,72.296274
27 | 15725.0,8032,75.460957
28 | 5050.0,3742,76.926301
29 | 15750.0,6782,79.59477
30 | 12125.0,5272,81.666575
31 | 16175.0,7664,84.682323
32 | 5025.0,3429,86.03526
33 | 8675.0,4537,87.817425
34 | 16075.0,7667,90.836693
35 | 4150.0,2947,91.988825
36 | 12300.0,5734,94.24798
37 | 15800.0,6792,96.923041
38 | 12325.0,5497,99.083754
39 | 19400.0,10132,103.085607
40 | 12225.0,5467,105.234322
41 | 16650.0,7732,108.279649
42 | 12000.0,4697,110.122053
43 | 15425.0,6732,112.775185
44 | 16375.0,7764,115.838148
45 | 19650.0,11002,120.187555
46 | 19350.0,10149,124.190546
47 | 19075.0,8989,127.733854
48 | 19450.0,8049,130.904344
49 | 15975.0,7953,134.032731
50 | 19925.0,9842,137.909475
51 | 15425.0,6194,140.345596
52 | 16850.0,8547,143.712478
53 | 16425.0,7277,146.584682
54 | 15675.0,6922,149.312712
55 | 15650.0,6542,151.884868
56 | 15550.0,6427,154.405081
57 | 11700.0,4314,156.093832
58 | 15750.0,6877,158.806255
59 | 19550.0,9387,162.504466
60 | 19925.0,10057,166.469106
61 | 11950.0,5177,168.502842
62 | 12275.0,4624,170.320909
63 | 16250.0,7293,173.19079
64 | 16275.0,6862,175.896776
65 | 15700.0,6622,178.511129
66 | 20200.0,10903,182.807991
67 | 15725.0,7232,185.657379
68 | 15975.0,7397,188.565925
69 | 12375.0,6042,190.947371
70 | 19175.0,8742,194.39362
71 | 15375.0,5537,196.569389
72 | 19325.0,8714,200.009571
73 | 19775.0,9317,203.681354
74 | 15625.0,7582,206.672691
75 | 16700.0,8847,210.160283
76 | 19400.0,9017,213.709782
77 | 15625.0,6912,216.445144
78 | 19125.0,8424,219.868027
79 | 20250.0,10263,223.909721
80 | 19775.0,9034,227.471911
81 | 12425.0,6132,229.885228
82 | 15525.0,6687,232.511261
83 | 15575.0,6562,235.091669
84 | 11900.0,4512,236.867217
85 | 16325.0,7612,239.867593
86 | 


--------------------------------------------------------------------------------
/logs/benchmark/ppo-RoadRunnerNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1615282634.0171642, "env_id": "RoadRunnerNoFrameskip-v4"}
  2 | r,l,t
  3 | 39500.0,3693,4.607107
  4 | 42300.0,4249,6.678031
  5 | 38200.0,3762,8.513952
  6 | 38400.0,4290,10.56961
  7 | 38900.0,3916,12.442074
  8 | 31300.0,3952,14.360368
  9 | 41500.0,3863,16.242541
 10 | 47500.0,4178,18.267558
 11 | 37300.0,3890,20.1607
 12 | 37500.0,4419,22.307001
 13 | 43400.0,3720,24.117626
 14 | 48000.0,3867,26.001089
 15 | 35600.0,3650,27.768714
 16 | 38500.0,3747,29.5914
 17 | 47600.0,3980,31.512643
 18 | 31300.0,3889,33.384604
 19 | 39500.0,3870,35.2572
 20 | 37600.0,3865,37.131667
 21 | 33600.0,3860,38.998168
 22 | 43100.0,4141,40.998706
 23 | 35200.0,3828,42.850133
 24 | 40500.0,3728,44.656435
 25 | 43800.0,4114,46.647759
 26 | 29300.0,3889,48.519863
 27 | 44500.0,3985,50.438046
 28 | 40800.0,3793,52.266009
 29 | 40900.0,3967,54.170856
 30 | 47100.0,3569,55.890799
 31 | 52100.0,4196,57.898864
 32 | 26100.0,3795,59.748093
 33 | 39000.0,3916,61.674686
 34 | 43800.0,3882,63.658678
 35 | 36000.0,3988,65.747784
 36 | 43400.0,3696,67.774647
 37 | 42500.0,3893,69.948568
 38 | 44700.0,4360,72.333062
 39 | 38500.0,4063,74.596394
 40 | 36300.0,3477,76.517315
 41 | 53500.0,3976,78.678528
 42 | 41500.0,3792,80.803227
 43 | 53000.0,3939,82.964355
 44 | 34500.0,3861,85.102964
 45 | 48000.0,3914,87.266721
 46 | 51500.0,4083,89.521037
 47 | 49100.0,4784,91.993401
 48 | 39700.0,3874,93.879836
 49 | 40400.0,4117,95.889661
 50 | 46600.0,4149,97.911088
 51 | 38000.0,4318,100.008145
 52 | 45500.0,3557,101.736813
 53 | 37000.0,3897,103.628461
 54 | 48500.0,4347,105.697235
 55 | 41600.0,3288,107.307808
 56 | 37700.0,3728,109.106499
 57 | 31800.0,3679,110.892347
 58 | 48700.0,4237,112.95045
 59 | 37000.0,3764,114.778509
 60 | 39600.0,3549,116.505887
 61 | 35000.0,3707,118.302071
 62 | 48400.0,4000,120.253019
 63 | 46000.0,3853,122.118019
 64 | 34200.0,3754,123.945879
 65 | 49700.0,4783,126.269761
 66 | 49700.0,3980,128.216163
 67 | 42700.0,4097,130.196395
 68 | 42100.0,3925,132.096195
 69 | 43100.0,4014,134.051753
 70 | 23900.0,3929,135.957948
 71 | 37700.0,3763,137.782687
 72 | 31100.0,3873,139.667927
 73 | 34000.0,3632,141.440248
 74 | 40500.0,4260,143.502083
 75 | 38100.0,3936,145.421263
 76 | 40500.0,3955,147.332393
 77 | 37400.0,4160,149.320792
 78 | 41500.0,3893,151.196564
 79 | 34700.0,3881,153.079257
 80 | 40600.0,4371,155.196352
 81 | 43100.0,3695,156.991861
 82 | 53000.0,3920,158.900923
 83 | 40300.0,3983,160.830663
 84 | 49000.0,4309,162.918265
 85 | 44300.0,3935,164.83192
 86 | 40500.0,3844,166.70039
 87 | 45500.0,4016,168.653216
 88 | 39100.0,4385,170.787611
 89 | 49200.0,3573,172.518745
 90 | 48500.0,4030,174.477761
 91 | 38500.0,3678,176.263532
 92 | 43600.0,4257,178.334697
 93 | 40500.0,4068,180.297519
 94 | 34400.0,3653,182.074754
 95 | 25400.0,3632,183.833901
 96 | 33600.0,3697,185.627832
 97 | 39200.0,3708,187.426724
 98 | 29600.0,3637,189.190862
 99 | 37400.0,3731,190.998438
100 | 32900.0,3810,192.850824
101 | 40100.0,3997,194.788953
102 | 47300.0,3844,196.605136
103 | 50000.0,4189,198.47715
104 | 36200.0,3759,200.078743
105 | 39800.0,3849,201.763661
106 | 34700.0,3828,203.485893
107 | 41500.0,4269,205.557709
108 | 43500.0,4585,207.78251
109 | 49000.0,4020,209.736035
110 | 33500.0,3650,211.500322
111 | 51500.0,4124,213.506651
112 | 49000.0,3750,215.330003
113 | 33400.0,3792,217.170306
114 | 38100.0,3677,218.956944
115 | 44000.0,4000,220.899899
116 | 42200.0,3680,222.686688
117 | 51800.0,4168,224.711082
118 | 29100.0,3644,226.473148
119 | 27200.0,3710,228.278477
120 | 37400.0,3824,230.125029
121 | 35100.0,3704,231.929292
122 | 31700.0,3988,233.857676
123 | 46400.0,3954,235.780364
124 | 53500.0,3864,237.65512
125 | 26000.0,3665,239.42042
126 | 42700.0,4071,241.389238
127 | 58500.0,3992,243.31737
128 | 50600.0,4053,245.288382
129 | 45600.0,4159,247.281524
130 | 38400.0,3943,249.153082
131 | 31300.0,2612,250.421511
132 | 43700.0,3935,252.326953
133 | 48600.0,3988,254.250969
134 | 47500.0,4095,256.223561
135 | 39500.0,3757,258.030014
136 | 34000.0,3803,259.870713
137 | 36800.0,3904,261.759659
138 | 41400.0,3565,263.491798
139 | 41300.0,3886,265.365845
140 | 44800.0,3611,267.119321
141 | 46000.0,3735,268.930228
142 | 45500.0,3790,270.756583
143 | 36300.0,4301,272.847211
144 | 38600.0,3605,274.5775
145 | 46000.0,3698,276.370366
146 | 55100.0,4186,278.385095
147 | 35200.0,3785,280.205292
148 | 41200.0,4124,282.194699
149 | 31900.0,3867,284.070051
150 | 23900.0,3808,285.909715
151 | 38700.0,3597,287.647842
152 | 44300.0,3903,289.53886
153 | 40100.0,3967,291.45401
154 | 31200.0,3711,293.213691
155 | 41300.0,3908,295.099524
156 | 34900.0,3716,296.898536
157 | 41800.0,4057,298.8676
158 | 


--------------------------------------------------------------------------------
/logs/benchmark/ppo-SeaquestNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1615282041.2912836, "env_id": "SeaquestNoFrameskip-v4"}
 2 | r,l,t
 3 | 1780.0,9082,7.165651
 4 | 1780.0,9082,11.51325
 5 | 1780.0,9081,15.896254
 6 | 1780.0,9081,20.358567
 7 | 1800.0,9082,24.865566
 8 | 1800.0,9082,29.322846
 9 | 1800.0,9081,33.796309
10 | 1800.0,9082,38.233089
11 | 1780.0,9082,42.75617
12 | 1760.0,9082,47.221684
13 | 1800.0,9082,51.683366
14 | 1780.0,9082,56.105346
15 | 1780.0,9082,60.600484
16 | 1760.0,9081,65.0473
17 | 1800.0,9082,69.466815
18 | 1800.0,9082,73.904019
19 | 1760.0,9082,78.412824
20 | 1780.0,9082,82.912151
21 | 1800.0,9082,87.340143
22 | 1820.0,9081,91.749042
23 | 1760.0,9082,96.161275
24 | 1800.0,9082,100.642852
25 | 1760.0,9081,105.134592
26 | 1780.0,9081,109.578357
27 | 1820.0,9081,114.028118
28 | 1820.0,9081,118.46495
29 | 1800.0,9082,122.987751
30 | 1820.0,9081,127.326344
31 | 1800.0,9082,131.776265
32 | 1760.0,9081,136.280469
33 | 1760.0,9082,140.79302
34 | 1780.0,9081,145.268835
35 | 1700.0,8762,149.484477
36 | 1800.0,9082,153.923026
37 | 1800.0,9082,158.353423
38 | 1820.0,9082,162.787202
39 | 1820.0,9082,167.212062
40 | 1780.0,9081,171.657153
41 | 1760.0,9081,176.071074
42 | 1780.0,9082,180.550466
43 | 1760.0,9081,185.043164
44 | 1760.0,9081,189.465687
45 | 1760.0,9082,193.88748
46 | 1800.0,9082,198.281321
47 | 1800.0,9082,202.796558
48 | 1780.0,9082,207.248083
49 | 1760.0,9082,211.688853
50 | 1780.0,9082,216.104586
51 | 1780.0,9081,220.443628
52 | 1800.0,9082,224.857749
53 | 1840.0,9082,229.29361
54 | 1760.0,9082,233.74382
55 | 1800.0,9082,238.00834
56 | 1800.0,9082,242.521467
57 | 1760.0,9082,247.000363
58 | 1820.0,9082,251.488832
59 | 1800.0,9082,255.921816
60 | 1760.0,9082,260.386473
61 | 1800.0,9082,264.896535
62 | 1800.0,9082,269.336106
63 | 1780.0,9082,273.768382
64 | 1780.0,9082,278.173506
65 | 1820.0,9082,282.615731
66 | 1800.0,9082,287.072851
67 | 1780.0,9082,291.512813
68 | 1580.0,8250,295.521151
69 | 


--------------------------------------------------------------------------------
/logs/benchmark/ppo-SpaceInvadersNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1614770456.7153661, "env_id": "SpaceInvadersNoFrameskip-v4"}
  2 | r,l,t
  3 | 600.0,2405,3.914749
  4 | 600.0,2539,4.87522
  5 | 570.0,2327,5.757872
  6 | 1230.0,5673,7.896288
  7 | 600.0,2315,8.769479
  8 | 600.0,2857,9.845313
  9 | 1590.0,7541,12.730985
 10 | 2090.0,8591,15.996526
 11 | 1585.0,6093,18.324354
 12 | 600.0,2449,19.250391
 13 | 540.0,3282,20.471908
 14 | 545.0,2405,21.384644
 15 | 570.0,2469,22.318601
 16 | 1630.0,6275,24.687005
 17 | 800.0,3021,25.82658
 18 | 600.0,2687,26.835624
 19 | 1770.0,8099,29.917496
 20 | 800.0,2893,31.008491
 21 | 600.0,2843,32.087468
 22 | 600.0,2573,33.062917
 23 | 1380.0,5372,35.103718
 24 | 570.0,2520,36.052045
 25 | 1235.0,5669,38.191427
 26 | 1175.0,5531,40.29176
 27 | 1230.0,6129,42.596846
 28 | 970.0,5423,44.657641
 29 | 1230.0,5965,46.896973
 30 | 600.0,2485,47.833416
 31 | 570.0,3545,49.162466
 32 | 555.0,2327,50.046039
 33 | 575.0,4107,51.585598
 34 | 1205.0,7102,54.245788
 35 | 600.0,2805,55.292747
 36 | 1125.0,5237,57.280273
 37 | 600.0,2777,58.334494
 38 | 1230.0,5557,60.431282
 39 | 575.0,2708,61.452153
 40 | 800.0,3143,62.625328
 41 | 570.0,2956,63.728158
 42 | 1370.0,6813,66.294163
 43 | 1370.0,6286,68.644167
 44 | 775.0,3116,69.814711
 45 | 1330.0,7499,72.653976
 46 | 1430.0,6605,75.134255
 47 | 575.0,4397,76.772299
 48 | 2030.0,10489,80.723449
 49 | 510.0,3408,81.992183
 50 | 600.0,2173,82.812686
 51 | 580.0,2668,83.820999
 52 | 1205.0,6060,86.097588
 53 | 830.0,3351,87.356693
 54 | 1230.0,5519,89.442007
 55 | 600.0,2433,90.360981
 56 | 570.0,2574,91.325186
 57 | 1170.0,6277,93.682385
 58 | 570.0,2241,94.533334
 59 | 515.0,2731,95.565007
 60 | 1435.0,5895,97.790861
 61 | 1175.0,5492,99.851393
 62 | 545.0,2853,100.930767
 63 | 1430.0,5589,103.028106
 64 | 740.0,3943,104.515226
 65 | 705.0,3711,105.905905
 66 | 570.0,2841,106.97071
 67 | 1175.0,5545,109.064234
 68 | 1175.0,5465,111.119168
 69 | 1175.0,6038,113.395968
 70 | 600.0,2439,114.312117
 71 | 570.0,3541,115.633128
 72 | 600.0,2327,116.509693
 73 | 570.0,2398,117.417454
 74 | 600.0,2861,118.497781
 75 | 1370.0,5490,120.557487
 76 | 575.0,3375,121.821491
 77 | 1210.0,4932,123.689292
 78 | 570.0,2206,124.524383
 79 | 570.0,2277,125.382965
 80 | 1175.0,6184,127.712865
 81 | 1945.0,8509,130.918364
 82 | 1280.0,5293,132.918554
 83 | 950.0,5249,134.904678
 84 | 1185.0,5208,136.870933
 85 | 1200.0,6206,139.196474
 86 | 545.0,3272,140.421652
 87 | 600.0,2695,141.433939
 88 | 570.0,2334,142.317909
 89 | 1435.0,6601,144.789252
 90 | 570.0,2305,145.66184
 91 | 970.0,4711,147.444192
 92 | 575.0,3055,148.5987
 93 | 2065.0,9483,152.164299
 94 | 1320.0,5597,154.271958
 95 | 1230.0,5921,156.488366
 96 | 540.0,2938,157.592431
 97 | 1120.0,5389,159.620881
 98 | 1145.0,5839,161.825858
 99 | 600.0,2533,162.775769
100 | 2200.0,8533,166.009311
101 | 515.0,3115,167.179821
102 | 745.0,3351,168.435554
103 | 570.0,2192,169.262837
104 | 1435.0,5819,171.446572
105 | 600.0,2885,172.526852
106 | 1780.0,6768,175.07971
107 | 570.0,2648,176.069265
108 | 600.0,2621,177.049405
109 | 570.0,2092,177.843376
110 | 1380.0,7781,180.783677
111 | 600.0,2345,181.672377
112 | 1360.0,5950,183.912733
113 | 575.0,2639,184.90189
114 | 1230.0,6295,187.263022
115 | 540.0,2561,188.228392
116 | 1705.0,9517,191.814498
117 | 580.0,1978,192.566855
118 | 600.0,2501,193.508208
119 | 1230.0,6693,196.031862
120 | 1205.0,6623,198.51251
121 | 570.0,3149,199.688567
122 | 1400.0,5209,201.64975
123 | 950.0,4733,203.450315
124 | 570.0,2149,204.261615
125 | 1205.0,5603,206.371998
126 | 1385.0,5767,208.560025
127 | 570.0,2170,209.385584
128 | 570.0,2495,210.324781
129 | 1260.0,5779,212.495315
130 | 1230.0,5477,214.555492
131 | 1800.0,8253,217.678085
132 | 1160.0,5875,219.905236
133 | 1200.0,5733,222.082316
134 | 1465.0,7059,224.754595
135 | 600.0,2269,225.608018
136 | 1230.0,5747,227.770797
137 | 605.0,2377,228.674616
138 | 1290.0,5145,230.616475
139 | 


--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-AsteroidsNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1615284368.635039, "env_id": "AsteroidsNoFrameskip-v4"}
 2 | r,l,t
 3 | 2030.0,5280,4.917788
 4 | 1280.0,7246,7.733405
 5 | 1280.0,7744,10.735203
 6 | 2110.0,5418,12.840264
 7 | 1580.0,9792,16.635882
 8 | 2700.0,14442,22.269402
 9 | 580.0,2602,23.28045
10 | 1410.0,8022,26.392479
11 | 1470.0,9992,30.243479
12 | 1300.0,4820,32.112589
13 | 880.0,3728,33.556423
14 | 1850.0,6712,36.16707
15 | 2300.0,18852,43.480112
16 | 2000.0,7066,46.231887
17 | 1880.0,6666,48.825108
18 | 3100.0,14344,54.417897
19 | 1300.0,5528,56.570307
20 | 1340.0,9790,60.490703
21 | 2080.0,8110,63.656714
22 | 1660.0,6686,66.265376
23 | 4270.0,15394,72.397557
24 | 1370.0,6100,74.749058
25 | 1470.0,5336,76.823082
26 | 1760.0,13178,81.991044
27 | 6890.0,27672,92.853137
28 | 1760.0,14066,98.439594
29 | 2900.0,8700,101.827892
30 | 1410.0,5754,104.055313
31 | 1320.0,4634,105.837138
32 | 3970.0,12582,110.797729
33 | 2080.0,7168,113.588531
34 | 1080.0,5372,115.669907
35 | 1430.0,10506,119.765551
36 | 1480.0,7942,122.972187
37 | 1440.0,4668,124.785599
38 | 2000.0,6472,127.308509
39 | 3920.0,10786,131.527477
40 | 2750.0,9510,135.345486
41 | 2150.0,6788,137.990966
42 | 2000.0,7614,140.951383
43 | 3220.0,13132,146.187673
44 | 3730.0,13448,151.436091
45 | 1580.0,7324,154.288391
46 | 1370.0,6314,156.804605
47 | 3220.0,11014,161.160282
48 | 2350.0,7398,164.036948
49 | 3000.0,9886,167.901782
50 | 2950.0,9504,171.705262
51 | 830.0,4964,173.63178
52 | 1880.0,6698,176.241872
53 | 3270.0,11406,180.728887
54 | 3220.0,15342,186.772055
55 | 2280.0,9198,190.361094
56 | 2150.0,6422,192.914549
57 | 3310.0,12340,197.784235
58 | 1880.0,8594,201.132567
59 | 1180.0,5834,203.409548
60 | 1130.0,5844,205.782184
61 | 1910.0,6416,208.304251
62 | 5540.0,15882,214.497507
63 | 1590.0,5052,216.464622
64 | 2260.0,12434,221.384965
65 | 3530.0,14708,227.130663
66 | 2050.0,8244,230.450806
67 | 1390.0,8756,233.848895
68 | 1830.0,10548,237.956959
69 | 


--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-BeamRiderNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614852096.4742026, "env_id": "BeamRiderNoFrameskip-v4"}
 2 | r,l,t
 3 | 51584.0,71433,30.11881
 4 | 22170.0,44639,48.369793
 5 | 30514.0,48741,68.151076
 6 | 16020.0,32883,80.800972
 7 | 6232.0,17303,87.346454
 8 | 4412.0,17059,93.873198
 9 | 20790.0,38463,112.188127
10 | 16820.0,36761,128.455022
11 | 16110.0,36277,144.132732
12 | 19380.0,38563,160.682819
13 | 5884.0,18413,168.312466
14 | 6988.0,19351,176.849476
15 | 14294.0,34023,191.423923
16 | 12660.0,31657,205.309299
17 | 12960.0,32987,219.264733
18 | 15690.0,38991,236.003012
19 | 18582.0,38939,252.629304
20 | 


--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-BreakoutNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614779697.375159, "env_id": "BreakoutNoFrameskip-v4"}
 2 | r,l,t
 3 | 421.0,13759,7.717858
 4 | 382.0,8885,11.008912
 5 | 368.0,7080,13.641344
 6 | 360.0,6621,16.082456
 7 | 450.0,46707,33.102802
 8 | 287.0,8744,36.332456
 9 | 393.0,8224,39.367732
10 | 395.0,8256,42.396542
11 | 432.0,9497,45.875642
12 | 373.0,8425,48.983618
13 | 408.0,13715,54.01905
14 | 400.0,12775,58.683662
15 | 408.0,17495,65.088949
16 | 773.0,21958,73.152582
17 | 423.0,22762,81.513918
18 | 349.0,8182,84.536757
19 | 373.0,21159,92.287416
20 | 320.0,8244,95.330242
21 | 210.0,6587,97.765068
22 | 327.0,9053,101.105664
23 | 438.0,11200,105.217838
24 | 425.0,28962,115.746121
25 | 290.0,5949,117.950181
26 | 407.0,21378,125.79268
27 | 375.0,13241,130.658171
28 | 451.0,16014,136.544134
29 | 423.0,12583,141.160334
30 | 403.0,7056,143.759736
31 | 421.0,20190,151.097538
32 | 252.0,7425,153.834893
33 | 407.0,19564,160.988797
34 | 364.0,28839,171.57312
35 | 383.0,8248,174.622104
36 | 411.0,13522,179.58879
37 | 385.0,10836,183.585816
38 | 410.0,9478,187.070337
39 | 373.0,7982,190.026134
40 | 431.0,28878,200.540685
41 | 428.0,29933,211.338071
42 | 415.0,10305,215.104128
43 | 


--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-EnduroNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
1 | #{"t_start": 1615798755.7603855, "env_id": "EnduroNoFrameskip-v4"}
2 | r,l,t
3 | 4693.0,133120,54.108688
4 | 2582.0,119808,99.796636
5 | 4931.0,146432,155.525393
6 | 1989.0,93184,190.736086
7 | 1961.0,93184,226.100894
8 | 


--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-PongNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614779925.3269277, "env_id": "PongNoFrameskip-v4"}
 2 | r,l,t
 3 | 21.0,7947,5.600472
 4 | 21.0,8442,8.701368
 5 | 20.0,9634,12.226275
 6 | 20.0,9762,15.790657
 7 | 20.0,9609,19.300387
 8 | 21.0,10585,23.16411
 9 | 21.0,7947,26.067608
10 | 21.0,10205,29.791635
11 | 21.0,7941,32.686685
12 | 21.0,7941,35.583624
13 | 21.0,9895,39.196352
14 | 21.0,9949,42.81496
15 | 21.0,9895,46.414241
16 | 19.0,12324,50.903394
17 | 19.0,11473,55.076255
18 | 21.0,7878,57.940445
19 | 21.0,9895,61.542388
20 | 21.0,8279,64.549082
21 | 20.0,9625,68.059175
22 | 21.0,8678,71.209772
23 | 21.0,7947,74.102408
24 | 21.0,8817,77.327215
25 | 20.0,10751,81.23354
26 | 21.0,7947,84.124312
27 | 21.0,10029,87.772565
28 | 21.0,7941,90.660588
29 | 21.0,8096,93.601612
30 | 21.0,7947,96.499756
31 | 20.0,8959,99.759803
32 | 20.0,12466,104.28981
33 | 20.0,8357,107.329129
34 | 21.0,11809,111.624942
35 | 20.0,8408,114.678773
36 | 19.0,10698,118.573543
37 | 21.0,10572,122.417779
38 | 20.0,12662,127.027967
39 | 20.0,8111,129.980286
40 | 21.0,12685,134.595775
41 | 21.0,10857,138.545573
42 | 19.0,10857,142.492337
43 | 21.0,7947,145.377651
44 | 20.0,8323,148.40299
45 | 21.0,7947,151.294838
46 | 20.0,9177,154.633734
47 | 21.0,7947,157.525611
48 | 19.0,8704,160.690106
49 | 21.0,8279,163.702176
50 | 20.0,8077,166.637348
51 | 21.0,9895,170.237087
52 | 21.0,12037,174.608734
53 | 20.0,8077,177.542576
54 | 21.0,7947,180.432275
55 | 20.0,8486,183.514008
56 | 21.0,7941,186.413429
57 | 21.0,9895,190.02161
58 | 21.0,12037,194.4052
59 | 19.0,12802,199.05999
60 | 19.0,12353,203.547561
61 | 20.0,8618,206.684987
62 | 21.0,7947,209.578497
63 | 20.0,9505,213.026868
64 | 21.0,11741,217.296149
65 | 21.0,7941,220.184218
66 | 


--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-QbertNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614780377.3798313, "env_id": "QbertNoFrameskip-v4"}
 2 | r,l,t
 3 | 15600.0,6377,5.057885
 4 | 15550.0,7312,7.795092
 5 | 15875.0,8987,11.144577
 6 | 15600.0,6372,13.514814
 7 | 15400.0,6394,15.885376
 8 | 15600.0,6372,18.256008
 9 | 16250.0,7487,21.048109
10 | 15600.0,6372,23.41627
11 | 12175.0,5163,25.33254
12 | 7750.0,3302,26.55398
13 | 15400.0,6334,28.906239
14 | 16050.0,6422,31.29228
15 | 4300.0,3159,32.456307
16 | 15825.0,5479,34.494781
17 | 15600.0,6372,36.869116
18 | 15400.0,6339,39.219169
19 | 8400.0,4895,41.03438
20 | 15450.0,6025,43.265694
21 | 15900.0,7189,45.924935
22 | 15400.0,6339,48.269106
23 | 16250.0,7487,51.050018
24 | 14925.0,6852,53.588706
25 | 16325.0,8247,56.64451
26 | 16300.0,8242,59.700643
27 | 15400.0,6334,62.046261
28 | 15400.0,6339,64.393149
29 | 15400.0,6399,66.765665
30 | 16250.0,7059,69.384406
31 | 16000.0,7414,72.127738
32 | 16000.0,7317,74.832424
33 | 16275.0,8557,78.012088
34 | 15650.0,7473,80.778527
35 | 15600.0,6377,83.135277
36 | 16700.0,8209,86.173364
37 | 15600.0,6372,88.537448
38 | 4425.0,3129,89.688693
39 | 16425.0,7340,92.412909
40 | 3975.0,2033,93.155878
41 | 15600.0,6372,95.515862
42 | 16050.0,6442,97.906537
43 | 16250.0,7487,100.682827
44 | 16025.0,6742,103.176655
45 | 16300.0,8042,106.157331
46 | 15475.0,6850,108.689824
47 | 16175.0,7827,111.588518
48 | 8550.0,6072,113.839271
49 | 16475.0,6543,116.257065
50 | 15400.0,6339,118.600999
51 | 16050.0,7100,121.234627
52 | 16150.0,7077,123.855452
53 | 15500.0,6623,126.313986
54 | 16475.0,6357,128.661455
55 | 15400.0,6339,131.005288
56 | 12100.0,5664,133.09717
57 | 15400.0,6334,135.439019
58 | 7750.0,3307,136.661919
59 | 16075.0,7510,139.44304
60 | 15450.0,6194,141.736086
61 | 15400.0,6339,144.08686
62 | 17275.0,9532,147.619866
63 | 15550.0,6025,149.842855
64 | 15600.0,6372,152.203281
65 | 15475.0,6731,154.696078
66 | 15500.0,6002,156.923521
67 | 16475.0,7882,159.837396
68 | 15550.0,6807,162.35231
69 | 16975.0,6733,164.847731
70 | 15400.0,6334,167.194746
71 | 15400.0,6399,169.56363
72 | 15500.0,6814,172.090505
73 | 15400.0,6339,174.44393
74 | 16250.0,7492,177.215289
75 | 15075.0,7007,179.807077
76 | 15650.0,6239,182.109103
77 | 7550.0,3708,183.476477
78 | 15600.0,6372,185.836382
79 | 15725.0,6467,188.226367
80 | 15600.0,6372,190.592282
81 | 15400.0,6339,192.935986
82 | 15650.0,6927,195.50159
83 | 16900.0,6274,197.820424
84 | 16250.0,7487,200.593243
85 | 16075.0,7007,203.18213
86 | 15525.0,6727,205.669727
87 | 16275.0,7587,208.47724
88 | 15600.0,6377,210.842808
89 | 15400.0,6334,213.186167
90 | 15400.0,6334,215.530586
91 | 15400.0,6334,217.876995
92 | 15400.0,6334,220.218556
93 | 4600.0,6289,222.542634
94 | 16075.0,7365,225.269416
95 | 


--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-RoadRunnerNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1615284101.0187874, "env_id": "RoadRunnerNoFrameskip-v4"}
 2 | r,l,t
 3 | 45700.0,4195,4.675652
 4 | 36400.0,10379,9.093378
 5 | 42500.0,4318,10.943592
 6 | 40000.0,4409,12.819046
 7 | 40100.0,18894,20.841767
 8 | 48000.0,16250,27.781744
 9 | 31400.0,4342,29.634131
10 | 29100.0,3676,31.203189
11 | 43100.0,7511,34.405609
12 | 29500.0,3953,36.088055
13 | 39700.0,6818,38.997077
14 | 38500.0,5285,41.252269
15 | 49500.0,4064,43.000126
16 | 46600.0,10519,47.530509
17 | 29200.0,4526,49.451559
18 | 39100.0,5894,51.960575
19 | 53500.0,11215,56.743758
20 | 42100.0,4457,58.650866
21 | 30500.0,3748,60.247491
22 | 56500.0,19033,68.358962
23 | 48500.0,25777,79.503964
24 | 33400.0,6918,82.571893
25 | 47600.0,29355,95.159505
26 | 56500.0,14117,101.289444
27 | 54000.0,3714,102.91013
28 | 39900.0,27591,114.183431
29 | 45100.0,9823,118.347341
30 | 31900.0,8528,122.001608
31 | 51000.0,5418,124.284162
32 | 39400.0,3984,125.976956
33 | 49600.0,5012,128.104257
34 | 42500.0,4851,130.154958
35 | 40900.0,4812,132.180582
36 | 49000.0,10090,136.482913
37 | 23900.0,4297,138.380678
38 | 41600.0,26677,149.766945
39 | 37900.0,55625,173.340621
40 | 54500.0,8922,177.154951
41 | 36000.0,4763,179.190263
42 | 40100.0,4745,181.219405
43 | 32200.0,6252,183.88833
44 | 47700.0,5602,186.280746
45 | 34800.0,4502,188.199655
46 | 53100.0,5188,190.433815
47 | 48600.0,7978,193.853717
48 | 44500.0,20618,202.602421
49 | 45100.0,10767,207.15412
50 | 30700.0,3959,208.843217
51 | 42600.0,9351,212.814273
52 | 28100.0,4686,214.808829
53 | 48900.0,5631,217.231784
54 | 56000.0,5127,219.426613
55 | 48000.0,11301,224.260313
56 | 30300.0,7920,227.654343
57 | 42500.0,8656,231.329389
58 | 34600.0,4871,233.404373
59 | 58500.0,34210,247.963593
60 | 49000.0,8831,251.734886
61 | 47700.0,7061,254.753482
62 | 


--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-SeaquestNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1615283846.1270072, "env_id": "SeaquestNoFrameskip-v4"}
 2 | r,l,t
 3 | 2600.0,9082,6.484057
 4 | 2580.0,9082,10.181498
 5 | 2560.0,9081,13.884617
 6 | 2580.0,9081,17.589159
 7 | 2560.0,9082,21.302521
 8 | 2580.0,9070,25.006688
 9 | 2560.0,9081,28.729359
10 | 2560.0,9081,32.440299
11 | 2580.0,9082,36.154031
12 | 2580.0,9082,39.827491
13 | 2600.0,9081,43.539016
14 | 2560.0,9082,47.245576
15 | 2600.0,9082,50.952624
16 | 2580.0,9082,54.671462
17 | 2560.0,9082,58.392135
18 | 2560.0,9082,62.102942
19 | 2560.0,9050,65.804434
20 | 2600.0,9082,69.522391
21 | 2540.0,9114,73.254171
22 | 2460.0,8694,76.806014
23 | 2540.0,9082,80.52209
24 | 2460.0,8665,84.067281
25 | 2540.0,8954,87.693721
26 | 2500.0,8794,91.284982
27 | 2580.0,9082,94.997058
28 | 2600.0,9081,98.713351
29 | 2560.0,9082,102.426905
30 | 2600.0,9081,106.259488
31 | 2580.0,9082,110.101429
32 | 2580.0,9082,113.837194
33 | 2580.0,9082,117.564113
34 | 2580.0,9082,121.291342
35 | 2580.0,9082,125.017416
36 | 2580.0,9082,128.74763
37 | 2560.0,9082,132.486106
38 | 2560.0,9081,136.265387
39 | 2500.0,9081,140.045696
40 | 2560.0,9082,143.820506
41 | 2560.0,9082,147.595112
42 | 2560.0,9113,151.388052
43 | 2560.0,9082,155.165497
44 | 1980.0,7418,158.271334
45 | 2580.0,9081,162.070482
46 | 2560.0,9082,165.861288
47 | 2600.0,9082,169.653452
48 | 2560.0,9081,173.442111
49 | 2560.0,9082,177.22763
50 | 2560.0,9082,180.942908
51 | 2600.0,9082,184.682519
52 | 2540.0,9081,188.42108
53 | 2580.0,9082,192.16452
54 | 2560.0,9081,195.902184
55 | 2580.0,9082,199.642456
56 | 2580.0,9082,203.379417
57 | 2560.0,9082,207.112618
58 | 2600.0,9081,210.860535
59 | 2560.0,9082,214.599277
60 | 2600.0,9082,218.338832
61 | 2580.0,9082,222.068007
62 | 2540.0,8794,225.65806
63 | 2580.0,9082,229.372201
64 | 2580.0,9082,233.098182
65 | 2560.0,9081,236.829339
66 | 2600.0,9082,240.560709
67 | 2560.0,9082,244.280681
68 | 2560.0,9114,248.011547
69 | 


--------------------------------------------------------------------------------
/logs/benchmark/qrdqn-SpaceInvadersNoFrameskip-v4/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1614780157.951553, "env_id": "SpaceInvadersNoFrameskip-v4"}
 2 | r,l,t
 3 | 1860.0,9433,6.017921
 4 | 1175.0,5897,8.11461
 5 | 2030.0,8071,10.982974
 6 | 1205.0,5963,13.093197
 7 | 545.0,2807,14.085371
 8 | 2715.0,11574,18.185369
 9 | 2360.0,10915,22.055546
10 | 2695.0,12400,26.42824
11 | 2150.0,10363,30.087275
12 | 1200.0,6444,32.357866
13 | 1230.0,6287,34.576591
14 | 1235.0,5855,36.641079
15 | 1775.0,8365,39.598783
16 | 1235.0,5743,41.63625
17 | 1200.0,5490,43.575229
18 | 2615.0,14253,48.580082
19 | 545.0,3365,49.769976
20 | 1805.0,8265,52.685741
21 | 2030.0,9036,55.880196
22 | 1230.0,6311,58.104826
23 | 2200.0,9673,61.514868
24 | 1200.0,6015,63.640325
25 | 1860.0,8851,66.758578
26 | 1435.0,5937,68.859172
27 | 600.0,3857,70.211838
28 | 2145.0,10022,73.769417
29 | 1375.0,6239,75.980469
30 | 1745.0,8183,78.877634
31 | 1230.0,5879,80.963021
32 | 1400.0,5782,83.007614
33 | 1860.0,8973,86.166276
34 | 2925.0,12519,90.581527
35 | 1355.0,6386,92.840083
36 | 2465.0,12334,97.176396
37 | 1235.0,6153,99.345549
38 | 2910.0,12098,103.606352
39 | 1835.0,8619,106.660892
40 | 575.0,3337,107.830249
41 | 2060.0,9007,111.002099
42 | 2695.0,12860,115.521394
43 | 2490.0,12121,119.778012
44 | 1200.0,6887,122.210271
45 | 1860.0,8317,125.142263
46 | 545.0,3244,126.286159
47 | 1175.0,5886,128.380142
48 | 2810.0,11428,132.413715
49 | 2860.0,11611,136.527394
50 | 3235.0,12789,141.018401
51 | 1145.0,5983,143.135476
52 | 1860.0,8419,146.100608
53 | 3920.0,17237,152.142236
54 | 1860.0,8593,155.161895
55 | 3725.0,14361,160.22655
56 | 2690.0,11517,164.291665
57 | 1435.0,5895,166.378403
58 | 3565.0,13548,171.137103
59 | 2540.0,10619,174.901788
60 | 2435.0,11811,179.077891
61 | 3380.0,14197,184.075026
62 | 570.0,3009,185.147563
63 | 1205.0,6201,187.337598
64 | 2720.0,12506,191.72243
65 | 1170.0,6345,193.966073
66 | 2915.0,11154,197.895742
67 | 1175.0,6241,200.099791
68 | 1805.0,9273,203.379786
69 | 2320.0,10262,206.994567
70 | 1430.0,6411,209.251219
71 | 3115.0,11792,213.408668
72 | 


--------------------------------------------------------------------------------
/logs/benchmark/sac-BipedalWalker-v3/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1614772839.3377283, "env_id": "BipedalWalker-v3"}
  2 | r,l,t
  3 | 302.360601,1114,3.36505
  4 | 299.982386,1097,4.165927
  5 | 301.305306,1118,4.981788
  6 | 300.755528,1109,5.789752
  7 | 300.41319,1103,6.591837
  8 | 300.136167,1112,7.402884
  9 | 301.942391,1111,8.212199
 10 | 300.705809,1109,9.019499
 11 | 302.139217,1109,9.826885
 12 | 300.911729,1104,10.629605
 13 | 302.272265,1109,11.437745
 14 | 299.604268,1104,12.241354
 15 | 299.278248,1100,13.040194
 16 | 300.414958,1101,13.841523
 17 | 300.574915,1113,14.650683
 18 | 299.422525,1108,15.457727
 19 | 301.058969,1113,16.265585
 20 | 300.929906,1111,17.070907
 21 | 301.217599,1112,17.876746
 22 | 299.808359,1099,18.676308
 23 | 301.97646,1103,19.475271
 24 | 299.383641,1105,20.276059
 25 | 301.048902,1119,21.089404
 26 | 300.719316,1104,21.890113
 27 | 300.35663,1104,22.689351
 28 | 299.585902,1102,23.49109
 29 | 300.387684,1101,24.289257
 30 | 301.02301,1105,25.091187
 31 | 299.283324,1103,25.888853
 32 | 298.797811,1101,26.6852
 33 | 300.889551,1105,27.484889
 34 | 301.312607,1117,28.293692
 35 | 298.839066,1104,29.090828
 36 | 301.465511,1105,29.88746
 37 | 299.999362,1107,30.688471
 38 | 300.343653,1119,31.501015
 39 | 300.794821,1105,32.298504
 40 | 301.566184,1122,33.106802
 41 | 300.154061,1104,33.904654
 42 | 300.601982,1109,34.707381
 43 | 299.783959,1116,35.512661
 44 | 299.249841,1103,36.308428
 45 | 300.459917,1103,37.104169
 46 | 301.997507,1113,37.908109
 47 | 299.710068,1101,38.703565
 48 | 301.021391,1097,39.493932
 49 | 302.55917,1113,40.297005
 50 | 300.073168,1107,41.096599
 51 | 303.078654,1119,41.897959
 52 | 298.265251,1104,42.68975
 53 | 300.982828,1104,43.482014
 54 | 300.353754,1110,44.280113
 55 | 300.748211,1112,45.076373
 56 | 300.661172,1118,45.876885
 57 | 300.173002,1106,46.671299
 58 | 302.059827,1106,47.467817
 59 | 302.151632,1115,48.265848
 60 | 298.925516,1097,49.052199
 61 | 300.456355,1118,49.853958
 62 | 300.882569,1105,50.647162
 63 | 301.373377,1115,51.445441
 64 | 301.108553,1104,52.237467
 65 | 298.812307,1104,53.032318
 66 | 298.587068,1092,53.822424
 67 | 296.778464,1099,54.612579
 68 | 299.83619,1094,55.396447
 69 | 301.041658,1104,56.188702
 70 | 298.44367,1095,56.975703
 71 | 301.994168,1104,57.765858
 72 | 299.021788,1102,58.555475
 73 | 301.347008,1109,59.351158
 74 | 300.822325,1101,60.142673
 75 | -86.211804,114,60.227066
 76 | 302.564616,1114,61.024435
 77 | 299.492492,1107,61.81947
 78 | 301.447796,1108,62.615735
 79 | 302.675531,1117,63.414827
 80 | 301.229215,1103,64.204417
 81 | 300.875007,1114,65.002628
 82 | 300.933166,1094,65.787878
 83 | 301.602342,1113,66.584519
 84 | 298.375637,1112,67.380561
 85 | 299.737012,1103,68.171237
 86 | 299.803846,1105,68.962979
 87 | 300.758485,1108,69.756849
 88 | 301.953926,1109,70.549286
 89 | 299.530853,1110,71.343521
 90 | 300.314862,1108,72.137488
 91 | 299.606724,1106,72.927233
 92 | 298.734268,1100,73.712741
 93 | 298.709114,1092,74.494166
 94 | 298.423564,1095,75.279155
 95 | 302.788395,1110,76.072666
 96 | 298.820547,1098,76.859971
 97 | 298.282516,1108,77.653742
 98 | 298.552512,1109,78.449211
 99 | 300.992863,1104,79.239589
100 | 299.893245,1100,80.026569
101 | 299.645259,1095,80.809745
102 | 300.726465,1106,81.601357
103 | 301.481949,1110,82.394461
104 | 300.000249,1109,83.191714
105 | 300.958623,1128,84.001832
106 | 300.926692,1123,84.806707
107 | 301.136796,1103,85.595889
108 | 300.421307,1104,86.386729
109 | 301.304972,1102,87.177112
110 | 299.196795,1107,87.971708
111 | 300.282078,1105,88.7635
112 | 298.923465,1110,89.557497
113 | 301.749845,1121,90.359199
114 | 300.179531,1102,91.150525
115 | 301.675692,1115,91.949993
116 | 298.664999,1098,92.738292
117 | 302.259756,1104,93.530223
118 | 300.335136,1109,94.327485
119 | 302.262681,1108,95.121866
120 | 301.742746,1102,95.910671
121 | 301.690168,1118,96.714285
122 | 299.771082,1106,97.508253
123 | 300.023118,1103,98.298843
124 | 297.838527,1092,99.080485
125 | 300.998053,1116,99.881017
126 | 300.246662,1101,100.672354
127 | 300.242191,1105,101.463338
128 | 300.060263,1099,102.250881
129 | 301.302396,1107,103.04414
130 | 301.973059,1114,103.8426
131 | 301.272366,1108,104.634917
132 | 302.632714,1108,105.426627
133 | 300.551611,1100,106.214734
134 | 302.824584,1116,107.015751
135 | 300.550722,1109,107.810142
136 | 300.636237,1107,108.603951
137 | 300.656654,1114,109.403951
138 | 299.35787,1095,110.189639
139 | 


--------------------------------------------------------------------------------
/logs/benchmark/sac-BipedalWalkerHardcore-v3/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1615282937.308309, "env_id": "BipedalWalkerHardcore-v3"}
 2 | r,l,t
 3 | 41.778326,953,3.662442
 4 | 180.467412,2000,5.824946
 5 | 97.295092,1363,7.285249
 6 | -81.182201,346,7.643032
 7 | -40.802484,2000,9.825659
 8 | -99.934007,2000,11.965935
 9 | -42.94785,2000,14.116859
10 | 282.487894,1556,15.766352
11 | -2.732902,779,16.615764
12 | -17.791582,2000,18.759749
13 | -118.597024,2000,20.948429
14 | 279.944892,1562,22.609763
15 | -37.321344,2000,24.767509
16 | -55.572132,441,25.237352
17 | 65.808814,2000,27.385929
18 | 54.506992,2000,29.527135
19 | -30.193249,2000,31.700151
20 | 262.237829,1844,33.642516
21 | -135.306954,2000,35.582859
22 | 32.545857,2000,37.762973
23 | -96.304213,2000,39.912227
24 | -62.419513,2000,42.052971
25 | -10.491988,2000,44.205731
26 | 3.009945,2000,46.351385
27 | -41.232128,2000,48.469136
28 | -25.112829,2000,50.622787
29 | 1.402057,604,51.264819
30 | 113.538035,2000,53.445968
31 | 279.57376,1561,55.120767
32 | -11.387091,512,55.665265
33 | 2.594991,2000,57.829058
34 | -46.001328,2000,59.981994
35 | -111.158098,209,60.211232
36 | 5.653206,2000,62.355254
37 | -115.540607,2000,64.528579
38 | -29.031126,2000,66.686181
39 | -20.090611,2000,68.844233
40 | 25.867034,2000,71.026041
41 | 68.224921,2000,73.172868
42 | -41.639959,2000,75.30538
43 | -63.43205,314,75.639336
44 | -55.897433,2000,77.824094
45 | -79.076376,348,78.183614
46 | -75.219312,336,78.550389
47 | 166.120098,2000,80.70091
48 | -6.885657,2000,82.591767
49 | 67.274203,2000,84.728624
50 | -100.736684,2000,86.88388
51 | 9.361807,2000,89.031049
52 | -104.43978,2000,91.185034
53 | 175.253622,2000,93.32884
54 | 72.826926,1502,94.950952
55 | -107.557122,426,95.420961
56 | 34.350763,2000,97.562223
57 | 118.684665,2000,99.711555
58 | 18.436068,918,100.70732
59 | -72.023005,512,101.245563
60 | -94.057267,2000,103.430716
61 | -14.339757,2000,105.585078
62 | 276.646473,1682,107.405251
63 | -93.379379,2000,109.552183
64 | -32.890574,2000,111.734328
65 | -19.358966,2000,113.875139
66 | -97.591226,2000,116.016107
67 | -104.305948,2000,118.203043
68 | 268.461755,1756,120.0877
69 | -83.482728,2000,122.255662
70 | 59.102619,2000,124.415739
71 | 31.694314,2000,126.600275
72 | -24.754214,2000,128.495678
73 | -61.835281,2000,130.675213
74 | -35.282332,2000,132.834678
75 | -33.850913,2000,134.96645
76 | -111.449906,2000,137.133202
77 | -113.290239,263,137.41089
78 | 7.253,2000,139.565842
79 | 100.123289,1549,141.236544
80 | -35.315291,2000,143.384549
81 | -62.103672,2000,145.527463
82 | -55.579403,458,146.024764
83 | 176.048998,2000,148.188452
84 | -15.598691,2000,150.343705
85 | 100.101637,2000,152.481565
86 | 138.802199,2000,154.663462
87 | -108.792934,2000,156.829956
88 | -6.308218,2000,158.973216
89 | -91.763453,2000,161.134987
90 | 5.102684,2000,163.266236
91 | 


--------------------------------------------------------------------------------
/logs/benchmark/td3-BipedalWalkerHardcore-v3/0.monitor.csv:
--------------------------------------------------------------------------------
 1 | #{"t_start": 1615035716.3739204, "env_id": "BipedalWalkerHardcore-v3"}
 2 | r,l,t
 3 | -75.120605,2000,4.864824
 4 | -132.737428,2000,6.84062
 5 | -75.439565,2000,8.801577
 6 | -76.590578,2000,10.690364
 7 | -77.46551,2000,12.538607
 8 | -118.239931,2000,14.351909
 9 | -104.317353,2000,16.067962
10 | -69.326927,2000,17.797764
11 | -117.754794,2000,19.485572
12 | -72.893132,2000,21.180301
13 | -100.400577,2000,22.862713
14 | -115.384812,2000,24.573101
15 | -111.380875,2000,26.278326
16 | -113.021511,2000,27.959434
17 | -72.503946,2000,29.715913
18 | -118.90079,2000,31.564067
19 | -104.994169,2000,33.428279
20 | -112.627715,2000,35.262558
21 | -76.1824,2000,36.904961
22 | -76.263312,2000,38.549288
23 | -115.804972,2000,40.212135
24 | -112.246159,2000,41.872432
25 | -91.741549,2000,43.504618
26 | -82.195084,2000,45.10595
27 | -79.176994,2000,46.691015
28 | -99.303968,2000,48.336355
29 | -87.31807,2000,49.946664
30 | -95.103121,2000,51.592625
31 | -106.846239,2000,53.230053
32 | -113.287192,2000,54.865682
33 | -102.919775,2000,56.480147
34 | -90.229798,2000,58.131267
35 | -115.558105,2000,59.771812
36 | -108.654203,2000,61.383754
37 | -115.18619,2000,63.12032
38 | -76.182868,2000,64.965528
39 | -81.919436,2000,66.800929
40 | -94.051451,2000,68.635713
41 | -100.343992,2000,70.541423
42 | -107.29148,2000,72.423547
43 | -89.732649,2000,74.275149
44 | -101.208089,2000,76.139937
45 | -73.837888,2000,78.020883
46 | -108.810423,2000,79.869502
47 | -90.368393,2000,81.699364
48 | -118.587039,2000,83.560536
49 | -90.45015,2000,85.259661
50 | -111.38551,2000,86.979527
51 | -90.475702,2000,88.684398
52 | -115.950619,2000,90.399352
53 | -82.306352,2000,92.135506
54 | -73.167066,2000,93.870667
55 | -116.350767,2000,95.585823
56 | -104.899976,2000,97.168645
57 | -109.348082,2000,98.77277
58 | -89.763986,2000,100.360732
59 | -75.626801,2000,101.942616
60 | -114.163178,2000,103.571591
61 | -108.562208,2000,105.200625
62 | -81.079522,2000,106.821184
63 | -94.88129,2000,108.5505
64 | -95.688965,2000,110.409605
65 | -96.025015,2000,112.25252
66 | -115.966983,2000,114.110633
67 | -104.212579,2000,115.954515
68 | -108.756676,2000,117.801634
69 | -77.533709,2000,119.665398
70 | -108.630552,2000,121.521044
71 | -90.906734,2000,123.2634
72 | -132.124042,2000,124.980735
73 | -82.543561,2000,126.690589
74 | -110.444789,2000,128.422873
75 | -81.163665,2000,130.135998
76 | -116.381794,2000,131.861476
77 | -100.473055,2000,133.715868
78 | 


--------------------------------------------------------------------------------
/logs/benchmark/tqc-BipedalWalkerHardcore-v3/0.monitor.csv:
--------------------------------------------------------------------------------
  1 | #{"t_start": 1614972944.323092, "env_id": "BipedalWalkerHardcore-v3"}
  2 | r,l,t
  3 | 158.985985,844,4.102602
  4 | 300.32726,1050,5.244834
  5 | 300.535558,1038,6.388168
  6 | 296.875619,1085,7.575825
  7 | 302.358559,1013,8.64315
  8 | 25.923265,577,9.246357
  9 | 228.283257,2000,11.337982
 10 | 311.316454,975,12.353261
 11 | 52.706594,568,12.944439
 12 | -23.643158,330,13.288158
 13 | 298.875393,1063,14.3944
 14 | 307.395613,1004,15.440466
 15 | 291.257043,1082,16.563598
 16 | 302.321319,1016,17.622731
 17 | 292.55956,1117,18.783862
 18 | 300.52161,1030,19.861118
 19 | 305.402259,1032,20.934168
 20 | 305.712174,960,21.97389
 21 | 15.765116,441,22.457574
 22 | 286.893706,1176,23.768661
 23 | 292.588987,1106,25.062059
 24 | 303.161458,1006,26.238271
 25 | 124.72623,2000,28.58553
 26 | 299.188807,1067,29.837964
 27 | 297.265926,1055,31.059305
 28 | 299.246128,1058,32.225142
 29 | 138.067304,2000,34.423318
 30 | 158.101962,2000,36.611668
 31 | 307.149411,982,37.682646
 32 | 301.376617,1033,38.810155
 33 | 52.525784,2000,41.012082
 34 | -5.957255,2000,43.22972
 35 | 303.222906,1042,44.368109
 36 | -30.104262,296,44.690454
 37 | 302.856284,1033,45.814348
 38 | 299.417344,1052,46.969577
 39 | 299.568715,1052,48.122539
 40 | 302.167447,1038,49.255569
 41 | 300.055837,1071,50.428728
 42 | 304.075581,974,51.535719
 43 | 300.606989,1064,52.697526
 44 | 21.185638,2000,54.917194
 45 | 293.851598,1069,56.089131
 46 | 35.995669,2000,58.279191
 47 | 218.588658,2000,60.470027
 48 | 44.859679,2000,62.676093
 49 | 298.081841,1028,63.770969
 50 | 291.965119,1100,64.95215
 51 | 288.8617,1160,66.225818
 52 | 65.046742,883,67.195506
 53 | 303.598842,1029,68.311282
 54 | 298.681165,1122,69.500515
 55 | 134.920099,816,70.363624
 56 | 303.944964,963,71.381186
 57 | 261.650718,2000,73.633541
 58 | 298.924762,1050,74.818844
 59 | 178.425483,2000,77.085008
 60 | 301.952066,1029,78.24571
 61 | 297.590377,1096,79.475244
 62 | 308.256382,999,80.60499
 63 | 303.66264,1048,81.782448
 64 | 233.439754,2000,83.977091
 65 | -43.841983,314,84.32413
 66 | 291.240187,1248,85.687433
 67 | 303.171049,967,86.73503
 68 | 307.011786,993,87.805815
 69 | 307.201248,1034,88.926074
 70 | 92.128525,2000,91.102643
 71 | 298.651352,1050,92.212756
 72 | 296.614816,1089,93.362487
 73 | 195.222046,2000,95.562203
 74 | 305.238567,1023,96.697325
 75 | 303.564229,1010,97.819911
 76 | 291.643838,1115,99.053748
 77 | 295.809282,1120,100.295977
 78 | 303.4209,967,101.361445
 79 | 307.599352,973,102.432886
 80 | 296.079821,1059,103.588525
 81 | 296.641486,1042,104.726761
 82 | 304.632718,974,105.792635
 83 | 300.684866,1019,106.913619
 84 | 300.337727,1037,108.04645
 85 | 294.32521,1111,109.264762
 86 | 299.984938,1028,110.397997
 87 | 288.785893,1167,111.689689
 88 | 303.119877,1011,112.865363
 89 | 298.647242,1058,114.10566
 90 | 48.931804,2000,116.453665
 91 | 187.262679,2000,118.781733
 92 | 291.822427,1128,120.056754
 93 | 303.411501,1061,121.223087
 94 | 297.410528,1064,122.394384
 95 | 45.52435,545,122.994992
 96 | 298.206199,1059,124.1576
 97 | 302.624397,1032,125.287297
 98 | 237.922429,2000,127.487648
 99 | 12.073966,514,128.050607
100 | 307.005817,971,129.119031
101 | 217.421274,2000,131.331841
102 | -54.266479,274,131.637203
103 | -45.189495,272,131.948989
104 | 301.654401,983,133.04844
105 | 305.29741,1005,134.153365
106 | 110.310658,2000,136.342585
107 | 304.438778,963,137.356552
108 | 299.639912,1068,138.473265
109 | -61.984761,317,138.806689
110 | 296.903281,1087,139.96187
111 | 311.947863,989,141.033978
112 | 305.173788,968,142.080554
113 | -24.936318,449,142.561262
114 | 293.318206,1155,143.815958
115 | 302.575292,1039,144.945063
116 | 301.656432,1026,146.112909
117 | 298.843551,1037,147.290129
118 | 295.248605,1068,148.497394
119 | 305.653108,1032,149.650847
120 | 169.665353,2000,151.820901
121 | 295.730421,1071,152.976426
122 | 41.452605,2000,155.148677
123 | 304.44089,1024,156.245353
124 | 305.12927,1002,157.330209
125 | 300.891507,1032,158.455373
126 | 121.746389,867,159.394768
127 | 300.548733,1040,160.525973
128 | 97.863702,758,161.330169
129 | 300.743761,1009,162.391677
130 | 301.44053,1067,163.515258
131 | 57.961807,713,164.271593
132 | 290.951927,1117,165.47083
133 | 299.077015,1095,166.676866
134 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.ruff]
 2 | # Same as Black.
 3 | line-length = 127
 4 | # Assume Python 3.9
 5 | target-version = "py39"
 6 | 
 7 | [tool.ruff.lint]
 8 | # See https://beta.ruff.rs/docs/rules/
 9 | select = ["E", "F", "B", "UP", "C90", "RUF"]
10 | # Ignore explicit stacklevel`
11 | ignore = ["B028"]
12 | 
13 | [tool.ruff.lint.per-file-ignores]
14 | "./rl_zoo3/import_envs.py"= ["F401"]
15 | # "./rl_zoo3/plots/plot_train.py"= ["E501"]
16 | 
17 | 
18 | [tool.ruff.lint.mccabe]
19 | # Unlike Flake8, default to a complexity level of 10.
20 | max-complexity = 15
21 | 
22 | [tool.black]
23 | line-length = 127
24 | 
25 | [tool.mypy]
26 | ignore_missing_imports = true
27 | follow_imports = "silent"
28 | show_error_codes = true
29 | exclude = """(?x)(
30 |     tests/dummy_env/*$
31 |   )"""
32 | 
33 | [tool.pytest.ini_options]
34 | # Deterministic ordering for tests; useful for pytest-xdist.
35 | env = [
36 | 	"PYTHONHASHSEED=0"
37 | ]
38 | 
39 | filterwarnings = [
40 |     # Tensorboard warnings
41 |     "ignore::DeprecationWarning:tensorboard",
42 |     # Gym warnings
43 |     "ignore::UserWarning:gym",
44 | ]
45 | markers = [
46 |     "slow: marks tests as slow (deselect with '-m \"not slow\"')"
47 | ]
48 | 
49 | [tool.coverage.run]
50 | disable_warnings = ["couldnt-parse"]
51 | branch = false
52 | omit = [
53 | 	"tests/*",
54 | 	"setup.py",
55 | 	"rl_zoo3/plots/*",
56 | 	"rl_zoo3/push_to_hub.py",
57 | 	"scripts/*",
58 | ]
59 | 
60 | [tool.coverage.report]
61 | exclude_lines = [ "pragma: no cover", "raise NotImplementedError()", "if typing.TYPE_CHECKING:"]
62 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | gym==0.26.2
 2 | stable-baselines3[extra,tests,docs]>=2.6.1a1,<3.0
 3 | box2d-py==2.3.8
 4 | pybullet_envs_gymnasium>=0.6.0
 5 | # minigrid
 6 | cloudpickle>=2.2.1
 7 | # Optuna auto
 8 | optunahub>=0.2.0
 9 | # optuna plots:
10 | plotly
11 | # need to upgrade to gymnasium:
12 | # panda-gym~=3.0.1
13 | wandb
14 | moviepy>=1.0.0
15 | 


--------------------------------------------------------------------------------
/rl_zoo3/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # isort: off
 4 | 
 5 | import rl_zoo3.gym_patches  # noqa: F401
 6 | 
 7 | # isort: on
 8 | 
 9 | from rl_zoo3.utils import (
10 |     ALGOS,
11 |     create_test_env,
12 |     get_latest_run_id,
13 |     get_saved_hyperparams,
14 |     get_trained_models,
15 |     get_wrapper_class,
16 |     linear_schedule,
17 | )
18 | 
19 | # Read version from file
20 | version_file = os.path.join(os.path.dirname(__file__), "version.txt")
21 | with open(version_file) as file_handler:
22 |     __version__ = file_handler.read().strip()
23 | 
24 | __all__ = [
25 |     "ALGOS",
26 |     "create_test_env",
27 |     "get_latest_run_id",
28 |     "get_saved_hyperparams",
29 |     "get_trained_models",
30 |     "get_wrapper_class",
31 |     "linear_schedule",
32 | ]
33 | 


--------------------------------------------------------------------------------
/rl_zoo3/cli.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from rl_zoo3.enjoy import enjoy
 4 | from rl_zoo3.plots import all_plots, plot_from_file, plot_train
 5 | from rl_zoo3.train import train
 6 | 
 7 | 
 8 | def main():
 9 |     script_name = sys.argv[1]
10 |     # Remove script name
11 |     del sys.argv[1]
12 |     # Execute known script
13 |     known_scripts = {
14 |         "train": train,
15 |         "enjoy": enjoy,
16 |         "plot_train": plot_train,
17 |         "plot_from_file": plot_from_file,
18 |         "all_plots": all_plots,
19 |     }
20 |     if script_name not in known_scripts.keys():
21 |         raise ValueError(f"The script {script_name} is unknown, please use one of {known_scripts.keys()}")
22 |     known_scripts[script_name]()
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/rl_zoo3/gym_patches.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Patches for gym 0.26+ so RL Zoo3 keeps working as before
 3 | (notably TimeLimit wrapper and Pybullet envs)
 4 | """
 5 | 
 6 | import numpy as np
 7 | 
 8 | # Deprecation warning with gym 0.26 and numpy 1.24
 9 | np.bool8 = np.bool_  # type: ignore[attr-defined]
10 | 
11 | import gymnasium  # noqa: E402
12 | 
13 | 
14 | class PatchedTimeLimit(gymnasium.wrappers.TimeLimit):
15 |     """
16 |     See https://github.com/openai/gym/issues/3102
17 |     and https://github.com/Farama-Foundation/Gymnasium/pull/101:
18 |     keep the behavior as before and provide additionnal info
19 |     that the episode reached a timeout, but only
20 |     when the episode is over because of that.
21 |     """
22 | 
23 |     def step(self, action):
24 |         observation, reward, terminated, truncated, info = self.env.step(action)
25 |         self._elapsed_steps += 1
26 | 
27 |         if self._elapsed_steps >= self._max_episode_steps:
28 |             done = truncated or terminated
29 |             # TimeLimit.truncated key may have been already set by the environment
30 |             # do not overwrite it
31 |             # only set it when the episode is not over for other reasons
32 |             episode_truncated = not done or info.get("TimeLimit.truncated", False)
33 |             info["TimeLimit.truncated"] = episode_truncated
34 |             # truncated may have been set by the env too
35 |             truncated = truncated or episode_truncated
36 | 
37 |         return observation, reward, terminated, truncated, info
38 | 
39 | 
40 | # Patch Gymnasium TimeLimit
41 | gymnasium.wrappers.TimeLimit = PatchedTimeLimit  # type: ignore[misc]
42 | try:
43 |     gymnasium.wrappers.time_limit.TimeLimit = PatchedTimeLimit  # type: ignore[misc]
44 | except AttributeError:
45 |     gymnasium.wrappers.common.TimeLimit = PatchedTimeLimit  # type: ignore
46 | gymnasium.envs.registration.TimeLimit = PatchedTimeLimit  # type: ignore[misc,attr-defined]
47 | 


--------------------------------------------------------------------------------
/rl_zoo3/import_envs.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Optional
 2 | 
 3 | import gymnasium as gym
 4 | from gymnasium.envs.registration import register, register_envs
 5 | 
 6 | from rl_zoo3.wrappers import MaskVelocityWrapper
 7 | 
 8 | try:
 9 |     import pybullet_envs_gymnasium
10 | except ImportError:
11 |     pass
12 | 
13 | try:
14 |     import ale_py
15 | 
16 |     # no-op
17 |     gym.register_envs(ale_py)
18 | except ImportError:
19 |     pass
20 | 
21 | try:
22 |     import highway_env
23 | except ImportError:
24 |     pass
25 | else:
26 |     # hotfix for highway_env
27 |     import numpy as np
28 | 
29 |     np.float = np.float32  # type: ignore[attr-defined]
30 | 
31 | try:
32 |     import custom_envs
33 | except ImportError:
34 |     pass
35 | 
36 | try:
37 |     import gym_donkeycar
38 | except ImportError:
39 |     pass
40 | 
41 | try:
42 |     import panda_gym
43 | except ImportError:
44 |     pass
45 | 
46 | try:
47 |     import rocket_lander_gym
48 | except ImportError:
49 |     pass
50 | 
51 | try:
52 |     import minigrid
53 | except ImportError:
54 |     pass
55 | 
56 | 
57 | # Register no vel envs
58 | def create_no_vel_env(env_id: str) -> Callable[[Optional[str]], gym.Env]:
59 |     def make_env(render_mode: Optional[str] = None) -> gym.Env:
60 |         env = gym.make(env_id, render_mode=render_mode)
61 |         env = MaskVelocityWrapper(env)
62 |         return env
63 | 
64 |     return make_env
65 | 
66 | 
67 | for env_id in MaskVelocityWrapper.velocity_indices.keys():
68 |     name, version = env_id.split("-v")
69 |     register(
70 |         id=f"{name}NoVel-v{version}",
71 |         entry_point=create_no_vel_env(env_id),  # type: ignore[arg-type]
72 |     )
73 | 


--------------------------------------------------------------------------------
/rl_zoo3/plots/__init__.py:
--------------------------------------------------------------------------------
 1 | from rl_zoo3.plots.all_plots import all_plots
 2 | from rl_zoo3.plots.plot_from_file import plot_from_file
 3 | from rl_zoo3.plots.plot_train import plot_train
 4 | 
 5 | __all__ = [
 6 |     "all_plots",
 7 |     "plot_from_file",
 8 |     "plot_train",
 9 | ]
10 | 


--------------------------------------------------------------------------------
/rl_zoo3/plots/plot_train.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Plot training reward/success rate
 3 | """
 4 | 
 5 | import argparse
 6 | import os
 7 | 
 8 | import numpy as np
 9 | import seaborn
10 | from matplotlib import pyplot as plt
11 | from stable_baselines3.common.monitor import LoadMonitorResultsError, load_results
12 | from stable_baselines3.common.results_plotter import X_EPISODES, X_TIMESTEPS, X_WALLTIME, ts2xy, window_func
13 | 
14 | # Activate seaborn
15 | seaborn.set()
16 | 
17 | 
18 | def plot_train():
19 |     parser = argparse.ArgumentParser("Gather results, plot training reward/success")
20 |     parser.add_argument("-a", "--algo", help="Algorithm to include", type=str, required=True)
21 |     parser.add_argument("-e", "--env", help="Environment(s) to include", nargs="+", type=str, required=True)
22 |     parser.add_argument("-f", "--exp-folder", help="Folders to include", type=str, required=True)
23 |     parser.add_argument("--figsize", help="Figure size, width, height in inches.", nargs=2, type=int, default=[6.4, 4.8])
24 |     parser.add_argument("--fontsize", help="Font size", type=int, default=14)
25 |     parser.add_argument("-max", "--max-timesteps", help="Max number of timesteps to display", type=int)
26 |     parser.add_argument("-x", "--x-axis", help="X-axis", choices=["steps", "episodes", "time"], type=str, default="steps")
27 |     parser.add_argument("-y", "--y-axis", help="Y-axis", choices=["success", "reward", "length"], type=str, default="reward")
28 |     parser.add_argument("-w", "--episode-window", help="Rolling window size", type=int, default=100)
29 | 
30 |     args = parser.parse_args()
31 | 
32 |     algo = args.algo
33 |     envs = args.env
34 |     log_path = os.path.join(args.exp_folder, algo)
35 | 
36 |     x_axis = {
37 |         "steps": X_TIMESTEPS,
38 |         "episodes": X_EPISODES,
39 |         "time": X_WALLTIME,
40 |     }[args.x_axis]
41 |     x_label = {
42 |         "steps": "Timesteps",
43 |         "episodes": "Episodes",
44 |         "time": "Walltime (in hours)",
45 |     }[args.x_axis]
46 | 
47 |     y_axis = {
48 |         "success": "is_success",
49 |         "reward": "r",
50 |         "length": "l",
51 |     }[args.y_axis]
52 |     y_label = {
53 |         "success": "Training Success Rate",
54 |         "reward": "Training Episodic Reward",
55 |         "length": "Training Episode Length",
56 |     }[args.y_axis]
57 | 
58 |     dirs = []
59 | 
60 |     for env in envs:
61 |         # Sort by last modification
62 |         entries = sorted(os.scandir(log_path), key=lambda entry: entry.stat().st_mtime)
63 |         dirs.extend(entry.path for entry in entries if env in entry.name and entry.is_dir())
64 | 
65 |     plt.figure(y_label, figsize=args.figsize)
66 |     plt.title(y_label, fontsize=args.fontsize)
67 |     plt.xlabel(f"{x_label}", fontsize=args.fontsize)
68 |     plt.ylabel(y_label, fontsize=args.fontsize)
69 |     for folder in dirs:
70 |         try:
71 |             data_frame = load_results(folder)
72 |         except LoadMonitorResultsError:
73 |             continue
74 |         if args.max_timesteps is not None:
75 |             data_frame = data_frame[data_frame.l.cumsum() <= args.max_timesteps]
76 |         try:
77 |             y = np.array(data_frame[y_axis])
78 |         except KeyError:
79 |             print(f"No data available for {folder}")
80 |             continue
81 |         x, _ = ts2xy(data_frame, x_axis)
82 | 
83 |         # Do not plot the smoothed curve at all if the timeseries is shorter than window size.
84 |         if x.shape[0] >= args.episode_window:
85 |             # Compute and plot rolling mean with window of size args.episode_window
86 |             x, y_mean = window_func(x, y, args.episode_window, np.mean)
87 |             plt.plot(x, y_mean, linewidth=2, label=folder.split("/")[-1])
88 | 
89 |     plt.legend()
90 |     plt.tight_layout()
91 |     plt.show()
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     plot_train()
96 | 


--------------------------------------------------------------------------------
/rl_zoo3/plots/score_normalization.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Min and Max score for each env for normalization when plotting.
  3 | Min score corresponds to random agent.
  4 | Max score corresponds to acceptable performance, for instance
  5 | human level performance in the case of Atari games.
  6 | """
  7 | 
  8 | from typing import NamedTuple
  9 | 
 10 | import numpy as np
 11 | 
 12 | 
 13 | class ReferenceScore(NamedTuple):
 14 |     env_id: str
 15 |     min: float
 16 |     max: float
 17 | 
 18 | 
 19 | reference_scores = [
 20 |     # PyBullet Envs
 21 |     ReferenceScore("HalfCheetahBulletEnv-v0", -1400, 3000),
 22 |     ReferenceScore("AntBulletEnv-v0", 300, 3500),
 23 |     ReferenceScore("HopperBulletEnv-v0", 20, 2500),
 24 |     ReferenceScore("Walker2DBulletEnv-v0", 200, 2500),
 25 | ]
 26 | 
 27 | # Alternative scaling
 28 | # Min is a poorly optimized algorithm
 29 | # reference_scores = [
 30 | #     ReferenceScore("HalfCheetahBulletEnv-v0", 1000, 3000),
 31 | #     ReferenceScore("AntBulletEnv-v0", 1000, 3500),
 32 | #     ReferenceScore("HopperBulletEnv-v0", 1000, 2500),
 33 | #     ReferenceScore("Walker2DBulletEnv-v0", 500, 2500),
 34 | # ]
 35 | 
 36 | min_max_score_per_env = {reference_score.env_id: reference_score for reference_score in reference_scores}
 37 | 
 38 | 
 39 | def normalize_score(score: np.ndarray, env_id: str) -> np.ndarray:
 40 |     """
 41 |     Normalize score to be in [0, 1] where 1 is maximal performance.
 42 | 
 43 |     :param score: unnormalized score
 44 |     :param env_id: environment id
 45 |     :return: normalized score
 46 |     """
 47 |     if env_id not in min_max_score_per_env:
 48 |         raise KeyError(f"No reference score for {env_id}")
 49 |     reference_score = min_max_score_per_env[env_id]
 50 |     return (score - reference_score.min) / (reference_score.max - reference_score.min)
 51 | 
 52 | 
 53 | # From rliable, for atari games:
 54 | #
 55 | # RANDOM_SCORES = {
 56 | #  'Alien': 227.8,
 57 | #  'Amidar': 5.8,
 58 | #  'Assault': 222.4,
 59 | #  'Asterix': 210.0,
 60 | #  'BankHeist': 14.2,
 61 | #  'BattleZone': 2360.0,
 62 | #  'Boxing': 0.1,
 63 | #  'Breakout': 1.7,
 64 | #  'ChopperCommand': 811.0,
 65 | #  'CrazyClimber': 10780.5,
 66 | #  'DemonAttack': 152.1,
 67 | #  'Freeway': 0.0,
 68 | #  'Frostbite': 65.2,
 69 | #  'Gopher': 257.6,
 70 | #  'Hero': 1027.0,
 71 | #  'Jamesbond': 29.0,
 72 | #  'Kangaroo': 52.0,
 73 | #  'Krull': 1598.0,
 74 | #  'KungFuMaster': 258.5,
 75 | #  'MsPacman': 307.3,
 76 | #  'Pong': -20.7,
 77 | #  'PrivateEye': 24.9,
 78 | #  'Qbert': 163.9,
 79 | #  'RoadRunner': 11.5,
 80 | #  'Seaquest': 68.4,
 81 | #  'UpNDown': 533.4
 82 | # }
 83 | #
 84 | # HUMAN_SCORES = {
 85 | #  'Alien': 7127.7,
 86 | #  'Amidar': 1719.5,
 87 | #  'Assault': 742.0,
 88 | #  'Asterix': 8503.3,
 89 | #  'BankHeist': 753.1,
 90 | #  'BattleZone': 37187.5,
 91 | #  'Boxing': 12.1,
 92 | #  'Breakout': 30.5,
 93 | #  'ChopperCommand': 7387.8,
 94 | #  'CrazyClimber': 35829.4,
 95 | #  'DemonAttack': 1971.0,
 96 | #  'Freeway': 29.6,
 97 | #  'Frostbite': 4334.7,
 98 | #  'Gopher': 2412.5,
 99 | #  'Hero': 30826.4,
100 | #  'Jamesbond': 302.8,
101 | #  'Kangaroo': 3035.0,
102 | #  'Krull': 2665.5,
103 | #  'KungFuMaster': 22736.3,
104 | #  'MsPacman': 6951.6,
105 | #  'Pong': 14.6,
106 | #  'PrivateEye': 69571.3,
107 | #  'Qbert': 13455.0,
108 | #  'RoadRunner': 7845.0,
109 | #  'Seaquest': 42054.7,
110 | #  'UpNDown': 11693.2
111 | # }
112 | 


--------------------------------------------------------------------------------
/rl_zoo3/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DLR-RM/rl-baselines3-zoo/577616cb9f13341579953cb0f6111e007acc0a1d/rl_zoo3/py.typed


--------------------------------------------------------------------------------
/rl_zoo3/version.txt:
--------------------------------------------------------------------------------
1 | 2.6.1a1
2 | 


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DLR-RM/rl-baselines3-zoo/577616cb9f13341579953cb0f6111e007acc0a1d/scripts/__init__.py


--------------------------------------------------------------------------------
/scripts/all_plots.py:
--------------------------------------------------------------------------------
1 | from rl_zoo3.plots.all_plots import all_plots
2 | 
3 | if __name__ == "__main__":
4 |     all_plots()
5 | 


--------------------------------------------------------------------------------
/scripts/build_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | PARENT=stablebaselines/stable-baselines3
 4 | 
 5 | TAG=stablebaselines/rl-baselines3-zoo
 6 | VERSION=$(cat ./rl_zoo3/version.txt)
 7 | 
 8 | if [[ ${USE_GPU} == "True" ]]; then
 9 |   PARENT="${PARENT}:${VERSION}"
10 | else
11 |   PARENT="${PARENT}-cpu:${VERSION}"
12 |   TAG="${TAG}-cpu"
13 | fi
14 | 
15 | docker build --build-arg PARENT_IMAGE=${PARENT} -t ${TAG}:${VERSION} . -f docker/Dockerfile
16 | docker tag ${TAG}:${VERSION} ${TAG}:latest
17 | 
18 | if [[ ${RELEASE} == "True" ]]; then
19 |   docker push ${TAG}:${VERSION}
20 |   docker push ${TAG}:latest
21 | fi
22 | 


--------------------------------------------------------------------------------
/scripts/create_cluster_jobs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Send multiple jobs to the cluster.
 3 | """
 4 | 
 5 | import os
 6 | import subprocess
 7 | import time
 8 | 
 9 | import numpy as np
10 | 
11 | ALGOS = ["sac"]
12 | ENVS = ["HalfCheetahBulletEnv-v0"]
13 | N_SEEDS = 5
14 | N_EVAL_EPISODES = 10
15 | LOG_STD_INIT = [-6, -5, -4, -3, -2, -1, 0, 1]
16 | 
17 | os.makedirs(os.path.join("logs", "slurm"), exist_ok=True)
18 | 
19 | for algo in ALGOS:
20 |     for env_id in ENVS:
21 |         for log_std_init in LOG_STD_INIT:
22 |             log_folder = f"logs_std_{np.exp(log_std_init):.4f}"
23 |             for _ in range(N_SEEDS):
24 |                 args = [
25 |                     "--algo",
26 |                     algo,
27 |                     "--env",
28 |                     env_id,
29 |                     "--hyperparams",
30 |                     f'policy_kwargs:"dict(log_std_init={log_std_init}, net_arch=[400, 300])"',
31 |                     "--eval-episodes",
32 |                     N_EVAL_EPISODES,
33 |                     "-f",
34 |                     log_folder,
35 |                     "-uuid",
36 |                 ]
37 |                 arg_str_list: list[str] = list(map(str, args))
38 | 
39 |                 command = " ".join(["python", "-u", "train.py", *arg_str_list])
40 | 
41 |                 ok = subprocess.call(["sbatch", "cluster_torchy.sh", algo, env_id, "ablation", command])
42 |                 time.sleep(0.05)
43 | 


--------------------------------------------------------------------------------
/scripts/create_mujoco_jobs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import time
 4 | 
 5 | import numpy as np
 6 | 
 7 | ALGOS = ["sac", "td3", "tqc"]
 8 | # "Humanoid-v3",
 9 | ENVS = ["HalfCheetah-v3", "Ant-v3", "Hopper-v3", "Walker2d-v3", "Swimmer-v3"]
10 | N_SEEDS = 1
11 | EVAL_FREQ = 25000
12 | N_EVAL_EPISODES = 20
13 | N_EVAL_ENVS = 5
14 | np.random.seed(8)
15 | SEEDS = np.random.randint(2**20, size=(N_SEEDS,))
16 | # N_TIMESTEPS = int(1e6)
17 | 
18 | os.makedirs(os.path.join("logs", "slurm"), exist_ok=True)
19 | log_folder = "logs/"
20 | 
21 | 
22 | for algo in ALGOS:
23 |     for env_id in ENVS:
24 |         for seed in SEEDS:
25 |             args = [
26 |                 "--algo",
27 |                 algo,
28 |                 "--env",
29 |                 env_id,
30 |                 # "--hyperparams",
31 |                 # "use_sde:False",
32 |                 "--eval-episodes",
33 |                 N_EVAL_EPISODES,
34 |                 "--eval-freq",
35 |                 EVAL_FREQ,
36 |                 "--n-eval-envs",
37 |                 N_EVAL_ENVS,
38 |                 "-f",
39 |                 log_folder,
40 |                 "--seed",
41 |                 seed,
42 |                 "--log-interval",
43 |                 10,
44 |                 "--num-threads",
45 |                 2,
46 |                 # "-n",
47 |                 # N_TIMESTEPS,
48 |                 "-uuid",
49 |             ]
50 |             args = list(map(str, args))
51 | 
52 |             command = " ".join(["python", "-u", "train.py", *args])
53 | 
54 |             ok = subprocess.call(["sbatch", "cluster_torchy.sh", algo, env_id, "ablation", command])
55 |             time.sleep(0.05)
56 | 


--------------------------------------------------------------------------------
/scripts/migrate_to_hub.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | 
 3 | from rl_zoo3.utils import get_hf_trained_models, get_trained_models
 4 | 
 5 | folder = "rl-trained-agents"
 6 | orga = "sb3"
 7 | trained_models_local = get_trained_models(folder)
 8 | trained_models_hub = get_hf_trained_models(orga)
 9 | remaining_models = set(trained_models_local.keys()) - set(trained_models_hub.keys())
10 | 
11 | for trained_model in list(remaining_models):
12 |     algo, env_id = trained_models_local[trained_model]
13 |     args = ["-orga", orga, "-f", folder, "--algo", algo, "--env", env_id]
14 | 
15 |     # Since SB3 >= 1.1.0, HER is no more an algorithm but a replay buffer class
16 |     if algo == "her":
17 |         continue
18 | 
19 |     return_code = subprocess.call(["python", "-m", "rl_zoo3.push_to_hub", *args])
20 | 


--------------------------------------------------------------------------------
/scripts/parse_study.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import os
 4 | import pickle
 5 | from pprint import pprint
 6 | 
 7 | import optuna
 8 | from optuna.trial import FrozenTrial
 9 | 
10 | 
11 | def value_key(trial: FrozenTrial) -> float:
12 |     # Returns value of trial object for sorting
13 |     if trial.value is None:
14 |         return float("-inf")
15 |     else:
16 |         return trial.value
17 | 
18 | 
19 | print(
20 |     "DEPRECATED: `parse_study.py` is deprecated, please use optuna-dashboard "
21 |     "together with the `--trial-id` argument in the train script."
22 | )
23 | 
24 | 
25 | parser = argparse.ArgumentParser()
26 | parser.add_argument("-i", "--study-file", help="Path to a pickle file contained a saved study", type=str)
27 | parser.add_argument(
28 |     "-f",
29 |     "--folder",
30 |     help="Folder where the best hyperparameter json files will be written",
31 |     type=str,
32 |     default="logs/hyperparameter_jsons",
33 | )
34 | parser.add_argument("--study-name", help="Study name used during hyperparameter optimization", type=str)
35 | parser.add_argument("--storage", help="Database storage path used during hyperparameter optimization", type=str)
36 | parser.add_argument("--print-n-best-trials", help="Show final return values for n best trials", type=int, default=0)
37 | parser.add_argument(
38 |     "--save-n-best-hyperparameters",
39 |     help="Save the hyperparameters for the n best trials that resulted in the best returns",
40 |     type=int,
41 |     default=0,
42 | )
43 | args = parser.parse_args()
44 | 
45 | if args.study_name is None:
46 |     assert args.study_file is not None, "No --study-file, nor --study-name were provided."
47 |     with open(args.study_file, "rb") as f:
48 |         study = pickle.load(f)
49 | 
50 | else:
51 |     assert args.storage is not None, "No storage was specified."
52 | 
53 |     study = optuna.create_study(
54 |         study_name=args.study_name,
55 |         storage=args.storage,
56 |         load_if_exists=True,
57 |         direction="maximize",
58 |     )
59 | 
60 | trials = study.trials
61 | trials.sort(key=value_key, reverse=True)
62 | 
63 | for idx, trial in enumerate(trials):
64 |     if idx < args.print_n_best_trials:
65 |         print(f"# Top {idx + 1} - value: {trial.value:.2f}")
66 |         print()
67 |         pprint(trial.params)
68 |         print()
69 | 
70 | if args.save_n_best_hyperparameters > 0:
71 |     os.makedirs(f"{args.folder}", exist_ok=True)
72 |     for i in range(min(args.save_n_best_hyperparameters, len(trials))):
73 |         params = trials[i].params
74 |         with open(f"{args.folder}/hyperparameters_{i + 1}.json", "w+") as json_file:
75 |             json_file.write(json.dumps(trials[i].params, indent=4))
76 |     print(f"Saved best hyperparameters to {args.folder}")
77 | 


--------------------------------------------------------------------------------
/scripts/plot_from_file.py:
--------------------------------------------------------------------------------
1 | from rl_zoo3.plots.plot_from_file import plot_from_file
2 | 
3 | if __name__ == "__main__":
4 |     plot_from_file()
5 | 


--------------------------------------------------------------------------------
/scripts/plot_train.py:
--------------------------------------------------------------------------------
1 | from rl_zoo3.plots.plot_train import plot_train
2 | 
3 | if __name__ == "__main__":
4 |     plot_train()
5 | 


--------------------------------------------------------------------------------
/scripts/run_docker_cpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Launch an experiment using the docker cpu image
 3 | 
 4 | cmd_line="$@"
 5 | 
 6 | echo "Executing in the docker (cpu image):"
 7 | echo $cmd_line
 8 | 
 9 | # Note: --user=root is needed, as the current user id/group id will be used
10 | # to mount the log directory (and $MAMBAUSER is not root)
11 | docker run -it --user=root --rm --network host --ipc=host \
12 |  --mount src=$(pwd),target=/home/mambauser/code/rl_zoo3,type=bind stablebaselines/rl-baselines3-zoo-cpu:latest\
13 |   bash -c "cd /home/mambauser/code/rl_zoo3/ && $cmd_line"
14 | 


--------------------------------------------------------------------------------
/scripts/run_docker_gpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Launch an experiment using the docker gpu image
 3 | 
 4 | cmd_line="$@"
 5 | 
 6 | echo "Executing in the docker (gpu image):"
 7 | echo $cmd_line
 8 | 
 9 | # Note: --user=root is needed, as the current user id/group id will be used
10 | # to mount the log directory (and $MAMBAUSER is not root)
11 | docker run -it --user=root --gpus all --rm --network host --ipc=host \
12 |   --mount src=$(pwd),target=/home/mambauser/code/rl_zoo3,type=bind stablebaselines/rl-baselines3-zoo:latest\
13 |   bash -c "cd /home/mambauser/code/rl_zoo3/ && $cmd_line"
14 | 


--------------------------------------------------------------------------------
/scripts/run_jobs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run multiple experiments on a single machine.
 3 | """
 4 | 
 5 | import subprocess
 6 | 
 7 | import numpy as np
 8 | 
 9 | ALGOS = ["sac"]
10 | ENVS = ["MountainCarContinuous-v0"]
11 | N_SEEDS = 10
12 | EVAL_FREQ = 5000
13 | N_EVAL_EPISODES = 10
14 | LOG_STD_INIT = [-6, -5, -4, -3, -2, -1, 0, 1]
15 | 
16 | for algo in ALGOS:
17 |     for env_id in ENVS:
18 |         for log_std_init in LOG_STD_INIT:
19 |             log_folder = f"logs_std_{np.exp(log_std_init):.4f}"
20 |             for _ in range(N_SEEDS):
21 |                 args = [
22 |                     "--algo",
23 |                     algo,
24 |                     "--env",
25 |                     env_id,
26 |                     "--hyperparams",
27 |                     f"policy_kwargs:dict(log_std_init={log_std_init}, net_arch=[64, 64])",
28 |                     "--eval-episodes",
29 |                     N_EVAL_EPISODES,
30 |                     "--eval-freq",
31 |                     EVAL_FREQ,
32 |                     "-f",
33 |                     log_folder,
34 |                 ]
35 |                 arg_str_list: list[str] = list(map(str, args))
36 | 
37 |                 ok = subprocess.call(["python", "train.py", *arg_str_list])
38 | 


--------------------------------------------------------------------------------
/scripts/run_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python -m pytest --cov-config .coveragerc --cov-report html --cov-report term --cov=. -v -m "not slow" --color=yes
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | from setuptools import setup
 5 | 
 6 | with open(os.path.join("rl_zoo3", "version.txt")) as file_handler:
 7 |     __version__ = file_handler.read().strip()
 8 | 
 9 | # Copy hyperparams files for packaging
10 | shutil.copytree("hyperparams", os.path.join("rl_zoo3", "hyperparams"))
11 | 
12 | long_description = """
13 | # RL Baselines3 Zoo: A Training Framework for Stable Baselines3 Reinforcement Learning Agents
14 | 
15 | See https://github.com/DLR-RM/rl-baselines3-zoo
16 | """
17 | install_requires = [
18 |     "sb3_contrib>=2.6.1a1,<3.0",
19 |     "gymnasium>=0.29.1,<1.2.0",
20 |     "huggingface_sb3>=3.0,<4.0",
21 |     "tqdm",
22 |     "rich",
23 |     "optuna>=3.0",
24 |     "pyyaml>=5.1",
25 |     "pytablewriter~=1.2",
26 |     "shimmy~=2.0",
27 | ]
28 | plots_requires = ["seaborn", "rliable~=1.2.0", "scipy~=1.10"]
29 | test_requires = [
30 |     # for MuJoCo envs v4:
31 |     "mujoco>=2.3,<4",
32 |     # install parking-env to test HER
33 |     "highway-env>=1.10.1,<1.11.0",
34 | ]
35 | 
36 | setup(
37 |     name="rl_zoo3",
38 |     packages=["rl_zoo3", "rl_zoo3.plots"],
39 |     package_data={
40 |         "rl_zoo3": [
41 |             "py.typed",
42 |             "version.txt",
43 |             "hyperparams/*.yml",
44 |         ]
45 |     },
46 |     entry_points={"console_scripts": ["rl_zoo3=rl_zoo3.cli:main"]},
47 |     install_requires=install_requires,
48 |     extras_require={"plots": plots_requires, "tests": test_requires},
49 |     description="A Training Framework for Stable Baselines3 Reinforcement Learning Agents",
50 |     author="Antonin Raffin",
51 |     url="https://github.com/DLR-RM/rl-baselines3-zoo",
52 |     author_email="antonin.raffin@dlr.de",
53 |     keywords="reinforcement-learning-algorithms reinforcement-learning machine-learning "
54 |     "gymnasium openai stable baselines sb3 toolbox python data-science",
55 |     license="MIT",
56 |     long_description=long_description,
57 |     long_description_content_type="text/markdown",
58 |     version=__version__,
59 |     python_requires=">=3.9",
60 |     # PyPI package information.
61 |     project_urls={
62 |         "Code": "https://github.com/DLR-RM/rl-baselines3-zoo",
63 |         "Documentation": "https://rl-baselines3-zoo.readthedocs.io/en/master/",
64 |         "Changelog": "https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/CHANGELOG.md",
65 |         "Stable-Baselines3": "https://github.com/DLR-RM/stable-baselines3",
66 |         "RL-Zoo": "https://github.com/DLR-RM/rl-baselines3-zoo",
67 |         "SBX": "https://github.com/araffin/sbx",
68 |     },
69 |     classifiers=[
70 |         "Programming Language :: Python :: 3",
71 |         "Programming Language :: Python :: 3.9",
72 |         "Programming Language :: Python :: 3.10",
73 |         "Programming Language :: Python :: 3.11",
74 |         "Programming Language :: Python :: 3.12",
75 |     ],
76 | )
77 | 
78 | # Remove copied files after packaging
79 | shutil.rmtree(os.path.join("rl_zoo3", "hyperparams"))
80 | 


--------------------------------------------------------------------------------
/tests/dummy_env/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(name="test_env")
4 | 


--------------------------------------------------------------------------------
/tests/dummy_env/test_env/__init__.py:
--------------------------------------------------------------------------------
1 | from gymnasium.envs.registration import register
2 | 
3 | register(
4 |     id="TestEnv-v0",
5 |     entry_point="test_env.test_env:TestEnv",
6 | )
7 | 


--------------------------------------------------------------------------------
/tests/dummy_env/test_env/config.py:
--------------------------------------------------------------------------------
1 | hyperparams = {
2 |     "TestEnv-v0": {
3 |         "policy": "MlpPolicy",
4 |         "n_timesteps": 20000,
5 |     }
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/dummy_env/test_env/test_env.py:
--------------------------------------------------------------------------------
 1 | from typing import ClassVar
 2 | 
 3 | import gymnasium as gym
 4 | import numpy as np
 5 | from gymnasium import spaces
 6 | 
 7 | 
 8 | class TestEnv(gym.Env):
 9 |     metadata: ClassVar[dict] = {"render_modes": ["human"], "render_fps": 4}
10 |     __test__ = False
11 | 
12 |     def __init__(self, render_mode=None):
13 |         super().__init__()
14 | 
15 |         self.action_space = spaces.Discrete(2)
16 |         self.observation_space = spaces.Box(low=0, high=1, shape=(2,), dtype=np.float32)
17 | 
18 |     def step(self, action):
19 |         return self.observation_space.sample(), 0.0, self.np_random.random() > 0.5, False, {}
20 | 
21 |     def reset(self, *, seed=None, options=None):
22 |         super().reset(seed=seed)
23 |         if seed is not None:
24 |             self.observation_space.seed(seed)
25 |         return self.observation_space.sample(), {}
26 | 
27 |     def render(self, mode="human"):
28 |         pass
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     from gymnasium.utils.env_checker import check_env as gym_check
33 |     from stable_baselines3.common.env_checker import check_env
34 | 
35 |     check_env(TestEnv())
36 |     gym_check(TestEnv())
37 | 


--------------------------------------------------------------------------------
/tests/test_callbacks.py:
--------------------------------------------------------------------------------
 1 | import shlex
 2 | import subprocess
 3 | 
 4 | import pytest
 5 | import stable_baselines3 as sb3
 6 | 
 7 | from rl_zoo3.utils import get_callback_list
 8 | 
 9 | 
10 | def _assert_eq(left, right):
11 |     assert left == right, f"{left} != {right}"
12 | 
13 | 
14 | def test_raw_stat_callback(tmp_path):
15 |     cmd = (
16 |         f"python train.py -n 200 --algo ppo --env CartPole-v1 --log-folder {tmp_path} "
17 |         f"--tensorboard-log {tmp_path} -params callback:\"'rl_zoo3.callbacks.RawStatisticsCallback'\""
18 |     )
19 |     return_code = subprocess.call(shlex.split(cmd))
20 |     _assert_eq(return_code, 0)
21 | 
22 | 
23 | @pytest.mark.parametrize(
24 |     "callback",
25 |     [
26 |         None,
27 |         "rl_zoo3.callbacks.RawStatisticsCallback",
28 |         [
29 |             {"stable_baselines3.common.callbacks.StopTrainingOnMaxEpisodes": dict(max_episodes=3)},
30 |             "rl_zoo3.callbacks.RawStatisticsCallback",
31 |         ],
32 |         [sb3.common.callbacks.StopTrainingOnMaxEpisodes(3)],
33 |     ],
34 | )
35 | def test_get_callback(callback):
36 |     hyperparams = {"callback": callback}
37 |     callback_list = get_callback_list(hyperparams)
38 |     if callback is None:
39 |         assert len(callback_list) == 0
40 |     elif isinstance(callback, str):
41 |         assert len(callback_list) == 1
42 |     else:
43 |         assert len(callback_list) == len(callback)
44 | 


--------------------------------------------------------------------------------
/tests/test_wrappers.py:
--------------------------------------------------------------------------------
 1 | import gymnasium as gym
 2 | import numpy as np
 3 | import pytest
 4 | import stable_baselines3 as sb3
 5 | from sb3_contrib.common.wrappers import TimeFeatureWrapper
 6 | from stable_baselines3 import A2C
 7 | from stable_baselines3.common.env_checker import check_env
 8 | from stable_baselines3.common.env_util import DummyVecEnv
 9 | 
10 | import rl_zoo3.import_envs
11 | import rl_zoo3.wrappers
12 | from rl_zoo3.utils import SimpleLinearSchedule, get_wrapper_class, linear_schedule
13 | from rl_zoo3.wrappers import ActionNoiseWrapper, DelayedRewardWrapper, HistoryWrapper
14 | 
15 | 
16 | def test_wrappers():
17 |     env = gym.make("Ant-v4")
18 |     env = DelayedRewardWrapper(env)
19 |     env = ActionNoiseWrapper(env)
20 |     env = HistoryWrapper(env)
21 |     env = TimeFeatureWrapper(env)
22 |     check_env(env)
23 | 
24 | 
25 | @pytest.mark.parametrize(
26 |     "env_wrapper",
27 |     [
28 |         None,
29 |         {"rl_zoo3.wrappers.HistoryWrapper": dict(horizon=2)},
30 |         [{"rl_zoo3.wrappers.HistoryWrapper": dict(horizon=3)}, "rl_zoo3.wrappers.TimeFeatureWrapper"],
31 |         [{rl_zoo3.wrappers.HistoryWrapper: dict(horizon=3)}, "rl_zoo3.wrappers.TimeFeatureWrapper"],
32 |     ],
33 | )
34 | def test_get_wrapper(env_wrapper):
35 |     env = gym.make("Ant-v4")
36 |     hyperparams = {"env_wrapper": env_wrapper}
37 |     wrapper_class = get_wrapper_class(hyperparams)
38 |     if env_wrapper is not None:
39 |         env = wrapper_class(env)
40 |     check_env(env)
41 | 
42 | 
43 | @pytest.mark.parametrize(
44 |     "vec_env_wrapper",
45 |     [
46 |         None,
47 |         {"stable_baselines3.common.vec_env.VecFrameStack": dict(n_stack=2)},
48 |         {sb3.common.vec_env.VecFrameStack: dict(n_stack=2)},
49 |         [{"stable_baselines3.common.vec_env.VecFrameStack": dict(n_stack=3)}, "stable_baselines3.common.vec_env.VecMonitor"],
50 |     ],
51 | )
52 | def test_get_vec_env_wrapper(vec_env_wrapper):
53 |     env = gym.make("Ant-v4")
54 |     env = DummyVecEnv([lambda: env])
55 |     hyperparams = {"vec_env_wrapper": vec_env_wrapper}
56 |     wrapper_class = get_wrapper_class(hyperparams, "vec_env_wrapper")
57 |     if wrapper_class is not None:
58 |         env = wrapper_class(env)
59 |     A2C("MlpPolicy", env).learn(16)
60 | 
61 | 
62 | def test_linear_schedule():
63 |     schedule = linear_schedule(100)
64 |     assert isinstance(schedule, SimpleLinearSchedule)
65 |     assert np.allclose(schedule(1.0), 100.0)
66 |     assert np.allclose(schedule(0.5), 50.0)
67 |     assert np.allclose(schedule(0.0), 0.0)
68 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | from rl_zoo3.train import train
2 | 
3 | if __name__ == "__main__":
4 |     train()
5 | 


--------------------------------------------------------------------------------