├── .gitignore ├── AccessControl ├── ServerAccess.ipynb ├── ServerAccess.py ├── TileCoding.py └── differential_sarsa.png ├── BairdExample ├── Baird.png ├── BairdCounterExample.ipynb ├── BairdCounterExample.py ├── TDC.png ├── TDC_v.png ├── rho.png └── weights_update.png ├── BlackJack ├── blackjack_mc.ipynb ├── blackjack_mc.py ├── blackjack_solution.ipynb ├── blackjack_solution.py ├── blackjack_test.ipynb └── policy ├── CliffWalking ├── Q-learning.png ├── cliff.png ├── cliffWalking.ipynb ├── cliffWalking.py └── sarsa.png ├── DynaMaze ├── DynaMaze.ipynb ├── DynaMaze.py ├── DynaQ+.py ├── Maze.png ├── PrioritySweeping.ipynb ├── PrioritySweeping.py ├── Tabular_Dyna-Q.png └── ps.png ├── GridWorld ├── GridBoard-Q.ipynb ├── GridWorld.ipynb ├── Q-steps.png ├── board.png ├── gridWorld.py └── gridWorld_Q.py ├── LICENSE ├── MountainCar(Lambda) ├── MountainCar(Lambda).ipynb ├── MountainCar.py ├── Sarsa(lambda).png └── TileCoding.py ├── MountainCar ├── MountainCar.ipynb ├── MountainCar.png ├── MountainCar.py ├── TileCoding.py ├── semi-sarsa.png └── update_rule.png ├── Multi-ArmBandit ├── Bandit.ipynb ├── UCB1.png └── bandit.py ├── README.md ├── RandomWalk(General) ├── RandomWalk.ipynb ├── RandomWalk.py ├── semi-TD.png └── update.png ├── RandomWalk(Lambda) ├── Gt.png ├── Gtn.png ├── TD(Lambda).ipynb ├── TD_Lambda.py ├── TD_lambda.png ├── illustration.png └── offline_lambda.png ├── RandomWalk ├── RandomWalk(n-step).ipynb ├── RandomWalk(n-step).py ├── n-step.png └── rw-game.png ├── ShortCorridor ├── ShortCorridor.ipynb ├── ShortCorridor.py ├── corridor.png ├── h.png ├── mc_policy_gradient.png └── policy.png ├── TicTacToe ├── board.png ├── policy_p1 ├── policy_p2 ├── tic-tac-toe.ipynb └── ticTacToe.py ├── TileCoding ├── TileCoding.ipynb ├── Tiling.png └── tile_coding.py └── WindyGridWorld ├── Windy_GW.ipynb ├── board.png └── windyGridWorld.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/.gitignore -------------------------------------------------------------------------------- /AccessControl/ServerAccess.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/AccessControl/ServerAccess.ipynb -------------------------------------------------------------------------------- /AccessControl/ServerAccess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/AccessControl/ServerAccess.py -------------------------------------------------------------------------------- /AccessControl/TileCoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/AccessControl/TileCoding.py -------------------------------------------------------------------------------- /AccessControl/differential_sarsa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/AccessControl/differential_sarsa.png -------------------------------------------------------------------------------- /BairdExample/Baird.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/BairdExample/Baird.png -------------------------------------------------------------------------------- /BairdExample/BairdCounterExample.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/BairdExample/BairdCounterExample.ipynb -------------------------------------------------------------------------------- /BairdExample/BairdCounterExample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/BairdExample/BairdCounterExample.py -------------------------------------------------------------------------------- /BairdExample/TDC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/BairdExample/TDC.png -------------------------------------------------------------------------------- /BairdExample/TDC_v.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/BairdExample/TDC_v.png -------------------------------------------------------------------------------- /BairdExample/rho.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/BairdExample/rho.png -------------------------------------------------------------------------------- /BairdExample/weights_update.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/BairdExample/weights_update.png -------------------------------------------------------------------------------- /BlackJack/blackjack_mc.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/BlackJack/blackjack_mc.ipynb -------------------------------------------------------------------------------- /BlackJack/blackjack_mc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/BlackJack/blackjack_mc.py -------------------------------------------------------------------------------- /BlackJack/blackjack_solution.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/BlackJack/blackjack_solution.ipynb -------------------------------------------------------------------------------- /BlackJack/blackjack_solution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/BlackJack/blackjack_solution.py -------------------------------------------------------------------------------- /BlackJack/blackjack_test.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/BlackJack/blackjack_test.ipynb -------------------------------------------------------------------------------- /BlackJack/policy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/BlackJack/policy -------------------------------------------------------------------------------- /CliffWalking/Q-learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/CliffWalking/Q-learning.png -------------------------------------------------------------------------------- /CliffWalking/cliff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/CliffWalking/cliff.png -------------------------------------------------------------------------------- /CliffWalking/cliffWalking.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/CliffWalking/cliffWalking.ipynb -------------------------------------------------------------------------------- /CliffWalking/cliffWalking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/CliffWalking/cliffWalking.py -------------------------------------------------------------------------------- /CliffWalking/sarsa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/CliffWalking/sarsa.png -------------------------------------------------------------------------------- /DynaMaze/DynaMaze.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/DynaMaze/DynaMaze.ipynb -------------------------------------------------------------------------------- /DynaMaze/DynaMaze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/DynaMaze/DynaMaze.py -------------------------------------------------------------------------------- /DynaMaze/DynaQ+.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/DynaMaze/DynaQ+.py -------------------------------------------------------------------------------- /DynaMaze/Maze.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/DynaMaze/Maze.png -------------------------------------------------------------------------------- /DynaMaze/PrioritySweeping.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/DynaMaze/PrioritySweeping.ipynb -------------------------------------------------------------------------------- /DynaMaze/PrioritySweeping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/DynaMaze/PrioritySweeping.py -------------------------------------------------------------------------------- /DynaMaze/Tabular_Dyna-Q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/DynaMaze/Tabular_Dyna-Q.png -------------------------------------------------------------------------------- /DynaMaze/ps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/DynaMaze/ps.png -------------------------------------------------------------------------------- /GridWorld/GridBoard-Q.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/GridWorld/GridBoard-Q.ipynb -------------------------------------------------------------------------------- /GridWorld/GridWorld.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/GridWorld/GridWorld.ipynb -------------------------------------------------------------------------------- /GridWorld/Q-steps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/GridWorld/Q-steps.png -------------------------------------------------------------------------------- /GridWorld/board.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/GridWorld/board.png -------------------------------------------------------------------------------- /GridWorld/gridWorld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/GridWorld/gridWorld.py -------------------------------------------------------------------------------- /GridWorld/gridWorld_Q.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/GridWorld/gridWorld_Q.py -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/LICENSE -------------------------------------------------------------------------------- /MountainCar(Lambda)/MountainCar(Lambda).ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/MountainCar(Lambda)/MountainCar(Lambda).ipynb -------------------------------------------------------------------------------- /MountainCar(Lambda)/MountainCar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/MountainCar(Lambda)/MountainCar.py -------------------------------------------------------------------------------- /MountainCar(Lambda)/Sarsa(lambda).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/MountainCar(Lambda)/Sarsa(lambda).png -------------------------------------------------------------------------------- /MountainCar(Lambda)/TileCoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/MountainCar(Lambda)/TileCoding.py -------------------------------------------------------------------------------- /MountainCar/MountainCar.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/MountainCar/MountainCar.ipynb -------------------------------------------------------------------------------- /MountainCar/MountainCar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/MountainCar/MountainCar.png -------------------------------------------------------------------------------- /MountainCar/MountainCar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/MountainCar/MountainCar.py -------------------------------------------------------------------------------- /MountainCar/TileCoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/MountainCar/TileCoding.py -------------------------------------------------------------------------------- /MountainCar/semi-sarsa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/MountainCar/semi-sarsa.png -------------------------------------------------------------------------------- /MountainCar/update_rule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/MountainCar/update_rule.png -------------------------------------------------------------------------------- /Multi-ArmBandit/Bandit.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/Multi-ArmBandit/Bandit.ipynb -------------------------------------------------------------------------------- /Multi-ArmBandit/UCB1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/Multi-ArmBandit/UCB1.png -------------------------------------------------------------------------------- /Multi-ArmBandit/bandit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/Multi-ArmBandit/bandit.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/README.md -------------------------------------------------------------------------------- /RandomWalk(General)/RandomWalk.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk(General)/RandomWalk.ipynb -------------------------------------------------------------------------------- /RandomWalk(General)/RandomWalk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk(General)/RandomWalk.py -------------------------------------------------------------------------------- /RandomWalk(General)/semi-TD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk(General)/semi-TD.png -------------------------------------------------------------------------------- /RandomWalk(General)/update.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk(General)/update.png -------------------------------------------------------------------------------- /RandomWalk(Lambda)/Gt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk(Lambda)/Gt.png -------------------------------------------------------------------------------- /RandomWalk(Lambda)/Gtn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk(Lambda)/Gtn.png -------------------------------------------------------------------------------- /RandomWalk(Lambda)/TD(Lambda).ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk(Lambda)/TD(Lambda).ipynb -------------------------------------------------------------------------------- /RandomWalk(Lambda)/TD_Lambda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk(Lambda)/TD_Lambda.py -------------------------------------------------------------------------------- /RandomWalk(Lambda)/TD_lambda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk(Lambda)/TD_lambda.png -------------------------------------------------------------------------------- /RandomWalk(Lambda)/illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk(Lambda)/illustration.png -------------------------------------------------------------------------------- /RandomWalk(Lambda)/offline_lambda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk(Lambda)/offline_lambda.png -------------------------------------------------------------------------------- /RandomWalk/RandomWalk(n-step).ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk/RandomWalk(n-step).ipynb -------------------------------------------------------------------------------- /RandomWalk/RandomWalk(n-step).py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk/RandomWalk(n-step).py -------------------------------------------------------------------------------- /RandomWalk/n-step.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk/n-step.png -------------------------------------------------------------------------------- /RandomWalk/rw-game.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/RandomWalk/rw-game.png -------------------------------------------------------------------------------- /ShortCorridor/ShortCorridor.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/ShortCorridor/ShortCorridor.ipynb -------------------------------------------------------------------------------- /ShortCorridor/ShortCorridor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/ShortCorridor/ShortCorridor.py -------------------------------------------------------------------------------- /ShortCorridor/corridor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/ShortCorridor/corridor.png -------------------------------------------------------------------------------- /ShortCorridor/h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/ShortCorridor/h.png -------------------------------------------------------------------------------- /ShortCorridor/mc_policy_gradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/ShortCorridor/mc_policy_gradient.png -------------------------------------------------------------------------------- /ShortCorridor/policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/ShortCorridor/policy.png -------------------------------------------------------------------------------- /TicTacToe/board.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/TicTacToe/board.png -------------------------------------------------------------------------------- /TicTacToe/policy_p1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/TicTacToe/policy_p1 -------------------------------------------------------------------------------- /TicTacToe/policy_p2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/TicTacToe/policy_p2 -------------------------------------------------------------------------------- /TicTacToe/tic-tac-toe.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/TicTacToe/tic-tac-toe.ipynb -------------------------------------------------------------------------------- /TicTacToe/ticTacToe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/TicTacToe/ticTacToe.py -------------------------------------------------------------------------------- /TileCoding/TileCoding.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/TileCoding/TileCoding.ipynb -------------------------------------------------------------------------------- /TileCoding/Tiling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/TileCoding/Tiling.png -------------------------------------------------------------------------------- /TileCoding/tile_coding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/TileCoding/tile_coding.py -------------------------------------------------------------------------------- /WindyGridWorld/Windy_GW.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/WindyGridWorld/Windy_GW.ipynb -------------------------------------------------------------------------------- /WindyGridWorld/board.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/WindyGridWorld/board.png -------------------------------------------------------------------------------- /WindyGridWorld/windyGridWorld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MJeremy2017/reinforcement-learning-implementation/HEAD/WindyGridWorld/windyGridWorld.py --------------------------------------------------------------------------------