├── .gitignore ├── CleanRL ├── algorithms │ ├── a3c │ │ ├── a3c.py │ │ └── run_a3c.py │ ├── ac_a2c │ │ ├── actor_critic.py │ │ └── run_actor_critic.py │ ├── dpg_ddpg │ │ ├── dpg_ddpg.py │ │ └── run_dpg_ddpg.py │ ├── dqn │ │ ├── dqn.py │ │ └── run_dqn.py │ ├── ppo │ │ ├── ppo.py │ │ └── run_ppo.py │ ├── q_learning │ │ ├── q_learning.py │ │ └── run_q_learning.py │ ├── reinforce │ │ ├── reinforce.py │ │ └── run_reinforce.py │ ├── sarsa │ │ ├── run_sarsa.py │ │ └── sarsa.py │ └── trpo │ │ ├── run_trpo.py │ │ └── trpo.py └── common │ ├── layers.py │ ├── sum_tree.py │ └── video_writer.py ├── Examples └── super_mario │ ├── arguments.py │ ├── dqn.py │ ├── environment.py │ ├── human_play.py │ ├── run_train.py │ └── scripts │ └── train_s1-1_default.sh ├── Experiments └── run_mc_cartpole.py ├── LICENSE ├── README.md └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/.gitignore -------------------------------------------------------------------------------- /CleanRL/algorithms/a3c/a3c.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/a3c/a3c.py -------------------------------------------------------------------------------- /CleanRL/algorithms/a3c/run_a3c.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/a3c/run_a3c.py -------------------------------------------------------------------------------- /CleanRL/algorithms/ac_a2c/actor_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/ac_a2c/actor_critic.py -------------------------------------------------------------------------------- /CleanRL/algorithms/ac_a2c/run_actor_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/ac_a2c/run_actor_critic.py -------------------------------------------------------------------------------- /CleanRL/algorithms/dpg_ddpg/dpg_ddpg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/dpg_ddpg/dpg_ddpg.py -------------------------------------------------------------------------------- /CleanRL/algorithms/dpg_ddpg/run_dpg_ddpg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/dpg_ddpg/run_dpg_ddpg.py -------------------------------------------------------------------------------- /CleanRL/algorithms/dqn/dqn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/dqn/dqn.py -------------------------------------------------------------------------------- /CleanRL/algorithms/dqn/run_dqn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/dqn/run_dqn.py -------------------------------------------------------------------------------- /CleanRL/algorithms/ppo/ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/ppo/ppo.py -------------------------------------------------------------------------------- /CleanRL/algorithms/ppo/run_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/ppo/run_ppo.py -------------------------------------------------------------------------------- /CleanRL/algorithms/q_learning/q_learning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/q_learning/q_learning.py -------------------------------------------------------------------------------- /CleanRL/algorithms/q_learning/run_q_learning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/q_learning/run_q_learning.py -------------------------------------------------------------------------------- /CleanRL/algorithms/reinforce/reinforce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/reinforce/reinforce.py -------------------------------------------------------------------------------- /CleanRL/algorithms/reinforce/run_reinforce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/reinforce/run_reinforce.py -------------------------------------------------------------------------------- /CleanRL/algorithms/sarsa/run_sarsa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/sarsa/run_sarsa.py -------------------------------------------------------------------------------- /CleanRL/algorithms/sarsa/sarsa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/sarsa/sarsa.py -------------------------------------------------------------------------------- /CleanRL/algorithms/trpo/run_trpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/trpo/run_trpo.py -------------------------------------------------------------------------------- /CleanRL/algorithms/trpo/trpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/algorithms/trpo/trpo.py -------------------------------------------------------------------------------- /CleanRL/common/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/common/layers.py -------------------------------------------------------------------------------- /CleanRL/common/sum_tree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/common/sum_tree.py -------------------------------------------------------------------------------- /CleanRL/common/video_writer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/CleanRL/common/video_writer.py -------------------------------------------------------------------------------- /Examples/super_mario/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/Examples/super_mario/arguments.py -------------------------------------------------------------------------------- /Examples/super_mario/dqn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/Examples/super_mario/dqn.py -------------------------------------------------------------------------------- /Examples/super_mario/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/Examples/super_mario/environment.py -------------------------------------------------------------------------------- /Examples/super_mario/human_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/Examples/super_mario/human_play.py -------------------------------------------------------------------------------- /Examples/super_mario/run_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/Examples/super_mario/run_train.py -------------------------------------------------------------------------------- /Examples/super_mario/scripts/train_s1-1_default.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/Examples/super_mario/scripts/train_s1-1_default.sh -------------------------------------------------------------------------------- /Experiments/run_mc_cartpole.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/Experiments/run_mc_cartpole.py -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/firechecking/CleanRL/HEAD/README.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | torch 3 | nes_py==8.2.1 4 | gym-super-mario-bros==7.4.0 5 | gym==0.26.2 6 | --------------------------------------------------------------------------------