├── .gitignore ├── README.md ├── better_snake ├── models │ └── snake-basterd.zip ├── old_env │ ├── apple.py │ ├── environment.py │ └── snake.py ├── old_env_2 │ ├── cube.py │ ├── environment.py │ ├── self_play.py │ └── snake.py └── ppo.py ├── flappyb ├── dqn_rainbow.py ├── dqn_v2.py ├── dqn_v3.py ├── environment │ ├── assets │ │ ├── Pong-653x400.png │ │ ├── all_fonts_script.py │ │ ├── bg.png │ │ ├── bird.png │ │ ├── pipe.png │ │ ├── pipe_long.png │ │ └── sapcraft.jpg │ ├── bird.py │ ├── environment.py │ └── pipe.py ├── lib │ ├── common.py │ ├── dqn_model.py │ ├── dqn_rainbow.py │ └── ppo_model.py ├── models │ ├── cross_entropy │ │ └── batchsize=100-hiddensize=256-lr=0.01-gamma=.9-PART=240.pt │ ├── dqn │ │ ├── dqn-expdecay=0.99995-gamma=.9-batchsize=20-nn=512-lr=0.001-normalization-LOADED=HARDCORE-6300-lrMax=0.4-nextPipe-HELL-PART=1000.h5 │ │ ├── dqn-expdecay=0.99995-gamma=.9-batchsize=20-nn=512-lr=0.001-normalization-PART=6650.h5 │ │ └── dqn-expdecay=0.999995-gamma=.9-batchsize=20-nn=512-lr=0.001-normalization-HARDCORE-PART=6300.h5 │ ├── flappyb-test-the-rainbow254 │ ├── flappyb-test-the-rainbow350 │ └── flappyb-test-the-rainbow87 ├── play_dqn_rainbow.py ├── play_ppo.py ├── play_self.py ├── ppo.py └── saves │ └── ppo-test-flappyb │ ├── best_+10.400_555000.dat │ ├── best_+11.270_556000.dat │ ├── best_+131.310_576000.dat │ ├── best_+20.470_558000.dat │ ├── best_+4.650_165000.dat │ ├── best_+4.860_370000.dat │ ├── best_+44.070_560000.dat │ ├── best_+44.560_561000.dat │ ├── best_+5.290_475000.dat │ ├── best_+5.530_495000.dat │ ├── best_+5.740_516000.dat │ ├── best_+5.820_538000.dat │ ├── best_+56.790_570000.dat │ ├── best_+6.250_539000.dat │ ├── best_+6.820_542000.dat │ ├── best_+7.200_547000.dat │ └── best_+8.690_550000.dat ├── old_agents ├── cross_entropy.py ├── cross_entropy_advanced.py ├── dqn_snake_v2.py ├── q_iteration.py ├── q_learning.py └── value_iteration.py ├── requirements.txt ├── runTensorBoard └── snake ├── base_ppo.py ├── env_new ├── cube.py ├── environment.py ├── self_play.py └── snake.py ├── environment ├── apple.py ├── environment.py └── snake.py ├── lib ├── common.py ├── dqn_model.py ├── dqn_rainbow.py └── ppo_model.py ├── play_ppo.py ├── ppo.py └── self_play.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/README.md -------------------------------------------------------------------------------- /better_snake/models/snake-basterd.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/better_snake/models/snake-basterd.zip -------------------------------------------------------------------------------- /better_snake/old_env/apple.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/better_snake/old_env/apple.py -------------------------------------------------------------------------------- /better_snake/old_env/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/better_snake/old_env/environment.py -------------------------------------------------------------------------------- /better_snake/old_env/snake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/better_snake/old_env/snake.py -------------------------------------------------------------------------------- /better_snake/old_env_2/cube.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/better_snake/old_env_2/cube.py -------------------------------------------------------------------------------- /better_snake/old_env_2/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/better_snake/old_env_2/environment.py -------------------------------------------------------------------------------- /better_snake/old_env_2/self_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/better_snake/old_env_2/self_play.py -------------------------------------------------------------------------------- /better_snake/old_env_2/snake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/better_snake/old_env_2/snake.py -------------------------------------------------------------------------------- /better_snake/ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/better_snake/ppo.py -------------------------------------------------------------------------------- /flappyb/dqn_rainbow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/dqn_rainbow.py -------------------------------------------------------------------------------- /flappyb/dqn_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/dqn_v2.py -------------------------------------------------------------------------------- /flappyb/dqn_v3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/dqn_v3.py -------------------------------------------------------------------------------- /flappyb/environment/assets/Pong-653x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/environment/assets/Pong-653x400.png -------------------------------------------------------------------------------- /flappyb/environment/assets/all_fonts_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/environment/assets/all_fonts_script.py -------------------------------------------------------------------------------- /flappyb/environment/assets/bg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/environment/assets/bg.png -------------------------------------------------------------------------------- /flappyb/environment/assets/bird.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/environment/assets/bird.png -------------------------------------------------------------------------------- /flappyb/environment/assets/pipe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/environment/assets/pipe.png -------------------------------------------------------------------------------- /flappyb/environment/assets/pipe_long.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/environment/assets/pipe_long.png -------------------------------------------------------------------------------- /flappyb/environment/assets/sapcraft.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/environment/assets/sapcraft.jpg -------------------------------------------------------------------------------- /flappyb/environment/bird.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/environment/bird.py -------------------------------------------------------------------------------- /flappyb/environment/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/environment/environment.py -------------------------------------------------------------------------------- /flappyb/environment/pipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/environment/pipe.py -------------------------------------------------------------------------------- /flappyb/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/lib/common.py -------------------------------------------------------------------------------- /flappyb/lib/dqn_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/lib/dqn_model.py -------------------------------------------------------------------------------- /flappyb/lib/dqn_rainbow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/lib/dqn_rainbow.py -------------------------------------------------------------------------------- /flappyb/lib/ppo_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/lib/ppo_model.py -------------------------------------------------------------------------------- /flappyb/models/cross_entropy/batchsize=100-hiddensize=256-lr=0.01-gamma=.9-PART=240.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/models/cross_entropy/batchsize=100-hiddensize=256-lr=0.01-gamma=.9-PART=240.pt -------------------------------------------------------------------------------- /flappyb/models/dqn/dqn-expdecay=0.99995-gamma=.9-batchsize=20-nn=512-lr=0.001-normalization-LOADED=HARDCORE-6300-lrMax=0.4-nextPipe-HELL-PART=1000.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/models/dqn/dqn-expdecay=0.99995-gamma=.9-batchsize=20-nn=512-lr=0.001-normalization-LOADED=HARDCORE-6300-lrMax=0.4-nextPipe-HELL-PART=1000.h5 -------------------------------------------------------------------------------- /flappyb/models/dqn/dqn-expdecay=0.99995-gamma=.9-batchsize=20-nn=512-lr=0.001-normalization-PART=6650.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/models/dqn/dqn-expdecay=0.99995-gamma=.9-batchsize=20-nn=512-lr=0.001-normalization-PART=6650.h5 -------------------------------------------------------------------------------- /flappyb/models/dqn/dqn-expdecay=0.999995-gamma=.9-batchsize=20-nn=512-lr=0.001-normalization-HARDCORE-PART=6300.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/models/dqn/dqn-expdecay=0.999995-gamma=.9-batchsize=20-nn=512-lr=0.001-normalization-HARDCORE-PART=6300.h5 -------------------------------------------------------------------------------- /flappyb/models/flappyb-test-the-rainbow254: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/models/flappyb-test-the-rainbow254 -------------------------------------------------------------------------------- /flappyb/models/flappyb-test-the-rainbow350: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/models/flappyb-test-the-rainbow350 -------------------------------------------------------------------------------- /flappyb/models/flappyb-test-the-rainbow87: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/models/flappyb-test-the-rainbow87 -------------------------------------------------------------------------------- /flappyb/play_dqn_rainbow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/play_dqn_rainbow.py -------------------------------------------------------------------------------- /flappyb/play_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/play_ppo.py -------------------------------------------------------------------------------- /flappyb/play_self.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/play_self.py -------------------------------------------------------------------------------- /flappyb/ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/ppo.py -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+10.400_555000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+10.400_555000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+11.270_556000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+11.270_556000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+131.310_576000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+131.310_576000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+20.470_558000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+20.470_558000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+4.650_165000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+4.650_165000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+4.860_370000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+4.860_370000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+44.070_560000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+44.070_560000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+44.560_561000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+44.560_561000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+5.290_475000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+5.290_475000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+5.530_495000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+5.530_495000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+5.740_516000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+5.740_516000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+5.820_538000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+5.820_538000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+56.790_570000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+56.790_570000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+6.250_539000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+6.250_539000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+6.820_542000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+6.820_542000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+7.200_547000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+7.200_547000.dat -------------------------------------------------------------------------------- /flappyb/saves/ppo-test-flappyb/best_+8.690_550000.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/flappyb/saves/ppo-test-flappyb/best_+8.690_550000.dat -------------------------------------------------------------------------------- /old_agents/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/old_agents/cross_entropy.py -------------------------------------------------------------------------------- /old_agents/cross_entropy_advanced.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/old_agents/cross_entropy_advanced.py -------------------------------------------------------------------------------- /old_agents/dqn_snake_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/old_agents/dqn_snake_v2.py -------------------------------------------------------------------------------- /old_agents/q_iteration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/old_agents/q_iteration.py -------------------------------------------------------------------------------- /old_agents/q_learning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/old_agents/q_learning.py -------------------------------------------------------------------------------- /old_agents/value_iteration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/old_agents/value_iteration.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/requirements.txt -------------------------------------------------------------------------------- /runTensorBoard: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/runTensorBoard -------------------------------------------------------------------------------- /snake/base_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/base_ppo.py -------------------------------------------------------------------------------- /snake/env_new/cube.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/env_new/cube.py -------------------------------------------------------------------------------- /snake/env_new/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/env_new/environment.py -------------------------------------------------------------------------------- /snake/env_new/self_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/env_new/self_play.py -------------------------------------------------------------------------------- /snake/env_new/snake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/env_new/snake.py -------------------------------------------------------------------------------- /snake/environment/apple.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/environment/apple.py -------------------------------------------------------------------------------- /snake/environment/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/environment/environment.py -------------------------------------------------------------------------------- /snake/environment/snake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/environment/snake.py -------------------------------------------------------------------------------- /snake/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/lib/common.py -------------------------------------------------------------------------------- /snake/lib/dqn_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/lib/dqn_model.py -------------------------------------------------------------------------------- /snake/lib/dqn_rainbow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/lib/dqn_rainbow.py -------------------------------------------------------------------------------- /snake/lib/ppo_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/lib/ppo_model.py -------------------------------------------------------------------------------- /snake/play_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/play_ppo.py -------------------------------------------------------------------------------- /snake/ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/ppo.py -------------------------------------------------------------------------------- /snake/self_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VieVaWaldi/ReinforcementLearning/HEAD/snake/self_play.py --------------------------------------------------------------------------------