├── README.md ├── chapter01 └── tic_tac_toe.py ├── chapter02 ├── exercises2.pdf ├── exercises2.tex ├── figure_e_2_11.png ├── figure_e_2_5.png └── random_walk.py ├── chapter03 ├── exercises3.pdf └── exercises3.tex ├── chapter04 ├── exercises4.pdf ├── exercises4.tex ├── gamblers_problem.py └── jacks_car_rental.py ├── chapter05 ├── env0.txt ├── env1.txt ├── env2.txt ├── env2_policy.obj ├── env3.txt ├── env3_policy.obj ├── exercises5.pdf ├── exercises5.tex ├── race_track.py └── race_track_test.py ├── chapter06 ├── exercises6.pdf ├── exercises6.tex └── windy_grid_world.py ├── chapter07 ├── chapter_7_notes.png ├── chapter_7_notes.xcf ├── exercises7.pdf ├── exercises7.tex ├── off_policy_blackjack.py ├── random_walk.py └── random_walk_5.py ├── chapter08 ├── exercises8.pdf ├── exercises8.tex ├── maze_experiments.py ├── maze_world.py └── trajectory_sampling.py ├── chapter09 ├── exercises9.pdf └── exercises9.tex ├── chapter10 ├── exercises10.pdf └── exercises10.tex ├── chapter11 ├── bairds_env.py ├── exercise_11_3.py ├── exercises11.pdf └── exercises11.tex ├── chapter12 ├── chapter_12_2_notes.png ├── chapter_12_2_notes.xcf ├── chapter_12_notes.png ├── chapter_12_notes.xcf ├── exercises12.pdf └── exercises12.tex ├── chapter13 ├── exercises13.pdf └── exercises13.tex ├── images ├── env2_5_3_1_avg.png ├── env2_5_3_1_max.png ├── env2_demo_e_5_12_3.png ├── env2_demo_e_5_12_4.png ├── env2_demo_e_5_12_5.png ├── env2_demo_e_5_12_6.png ├── env2_demo_e_5_12_7.png ├── env2_demo_e_5_12_8.png ├── env3_5_3_1_avg.png ├── env3_5_3_1_max.png ├── env3_demo_e_5_12_0.png ├── env3_demo_e_5_12_1.png ├── env3_demo_e_5_12_10.png ├── env3_demo_e_5_12_12.png ├── env3_demo_e_5_12_13.png ├── env3_demo_e_5_12_15.png ├── env3_demo_e_5_12_16.png ├── env3_demo_e_5_12_17.png ├── env3_demo_e_5_12_18.png ├── env3_demo_e_5_12_19.png ├── env3_demo_e_5_12_2.png ├── env3_demo_e_5_12_20.png ├── env3_demo_e_5_12_21.png ├── env3_demo_e_5_12_22.png ├── env3_demo_e_5_12_3.png ├── env3_demo_e_5_12_4.png ├── env3_demo_e_5_12_5.png ├── env3_demo_e_5_12_6.png ├── env3_demo_e_5_12_7.png ├── env3_demo_e_5_12_8.png ├── env3_demo_e_5_12_9.png ├── example_8_4.png ├── example_8_4_2.png ├── exercise_10_2.png ├── exercise_10_4.png ├── exercise_10_7q.png ├── exercise_10_8q.png ├── exercise_10_9.png ├── exercise_10_9q.png ├── exercise_11_3.png ├── exercise_12_2q.png ├── exercise_12_6.png ├── exercise_13_3.png ├── exercise_13_5q.png ├── exercise_8_8.png ├── figure_2_1.png ├── figure_2_2.png ├── figure_2_3.png ├── figure_2_4.png ├── figure_2_5.png ├── figure_2_6.png ├── figure_4_2.png ├── figure_4_2_e_4_7.png ├── figure_4_3.png ├── figure_4_3_e_4_9_p25.png ├── figure_4_3_e_4_9_p50.png ├── figure_4_3_e_4_9_p55.png ├── figure_6_3.png ├── figure_6_3_ex_6_10.png ├── figure_6_3_ex_6_10_grid.png ├── figure_6_3_ex_6_9_a.png ├── figure_6_3_ex_6_9_a_grid.png ├── figure_6_3_ex_6_9_b.png ├── figure_6_3_ex_6_9_b_grid.png ├── figure_6_3_grid.png ├── figure_7_10.png ├── figure_7_2.png ├── figure_7_3_5states_ret-1.png ├── figure_7_3_5states_ret0.png ├── figure_7_5.png ├── figure_7_7.png ├── figure_8_4.png ├── figure_8_5.png ├── figure_e_2_11.png └── figure_e_2_5.png └── requirements.txt /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/README.md -------------------------------------------------------------------------------- /chapter01/tic_tac_toe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter01/tic_tac_toe.py -------------------------------------------------------------------------------- /chapter02/exercises2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter02/exercises2.pdf -------------------------------------------------------------------------------- /chapter02/exercises2.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter02/exercises2.tex -------------------------------------------------------------------------------- /chapter02/figure_e_2_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter02/figure_e_2_11.png -------------------------------------------------------------------------------- /chapter02/figure_e_2_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter02/figure_e_2_5.png -------------------------------------------------------------------------------- /chapter02/random_walk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter02/random_walk.py -------------------------------------------------------------------------------- /chapter03/exercises3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter03/exercises3.pdf -------------------------------------------------------------------------------- /chapter03/exercises3.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter03/exercises3.tex -------------------------------------------------------------------------------- /chapter04/exercises4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter04/exercises4.pdf -------------------------------------------------------------------------------- /chapter04/exercises4.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter04/exercises4.tex -------------------------------------------------------------------------------- /chapter04/gamblers_problem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter04/gamblers_problem.py -------------------------------------------------------------------------------- /chapter04/jacks_car_rental.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter04/jacks_car_rental.py -------------------------------------------------------------------------------- /chapter05/env0.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter05/env0.txt -------------------------------------------------------------------------------- /chapter05/env1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter05/env1.txt -------------------------------------------------------------------------------- /chapter05/env2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter05/env2.txt -------------------------------------------------------------------------------- /chapter05/env2_policy.obj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter05/env2_policy.obj -------------------------------------------------------------------------------- /chapter05/env3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter05/env3.txt -------------------------------------------------------------------------------- /chapter05/env3_policy.obj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter05/env3_policy.obj -------------------------------------------------------------------------------- /chapter05/exercises5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter05/exercises5.pdf -------------------------------------------------------------------------------- /chapter05/exercises5.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter05/exercises5.tex -------------------------------------------------------------------------------- /chapter05/race_track.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter05/race_track.py -------------------------------------------------------------------------------- /chapter05/race_track_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter05/race_track_test.py -------------------------------------------------------------------------------- /chapter06/exercises6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter06/exercises6.pdf -------------------------------------------------------------------------------- /chapter06/exercises6.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter06/exercises6.tex -------------------------------------------------------------------------------- /chapter06/windy_grid_world.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter06/windy_grid_world.py -------------------------------------------------------------------------------- /chapter07/chapter_7_notes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter07/chapter_7_notes.png -------------------------------------------------------------------------------- /chapter07/chapter_7_notes.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter07/chapter_7_notes.xcf -------------------------------------------------------------------------------- /chapter07/exercises7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter07/exercises7.pdf -------------------------------------------------------------------------------- /chapter07/exercises7.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter07/exercises7.tex -------------------------------------------------------------------------------- /chapter07/off_policy_blackjack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter07/off_policy_blackjack.py -------------------------------------------------------------------------------- /chapter07/random_walk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter07/random_walk.py -------------------------------------------------------------------------------- /chapter07/random_walk_5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter07/random_walk_5.py -------------------------------------------------------------------------------- /chapter08/exercises8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter08/exercises8.pdf -------------------------------------------------------------------------------- /chapter08/exercises8.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter08/exercises8.tex -------------------------------------------------------------------------------- /chapter08/maze_experiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter08/maze_experiments.py -------------------------------------------------------------------------------- /chapter08/maze_world.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter08/maze_world.py -------------------------------------------------------------------------------- /chapter08/trajectory_sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter08/trajectory_sampling.py -------------------------------------------------------------------------------- /chapter09/exercises9.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter09/exercises9.pdf -------------------------------------------------------------------------------- /chapter09/exercises9.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter09/exercises9.tex -------------------------------------------------------------------------------- /chapter10/exercises10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter10/exercises10.pdf -------------------------------------------------------------------------------- /chapter10/exercises10.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter10/exercises10.tex -------------------------------------------------------------------------------- /chapter11/bairds_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter11/bairds_env.py -------------------------------------------------------------------------------- /chapter11/exercise_11_3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter11/exercise_11_3.py -------------------------------------------------------------------------------- /chapter11/exercises11.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter11/exercises11.pdf -------------------------------------------------------------------------------- /chapter11/exercises11.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter11/exercises11.tex -------------------------------------------------------------------------------- /chapter12/chapter_12_2_notes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter12/chapter_12_2_notes.png -------------------------------------------------------------------------------- /chapter12/chapter_12_2_notes.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter12/chapter_12_2_notes.xcf -------------------------------------------------------------------------------- /chapter12/chapter_12_notes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter12/chapter_12_notes.png -------------------------------------------------------------------------------- /chapter12/chapter_12_notes.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter12/chapter_12_notes.xcf -------------------------------------------------------------------------------- /chapter12/exercises12.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter12/exercises12.pdf -------------------------------------------------------------------------------- /chapter12/exercises12.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter12/exercises12.tex -------------------------------------------------------------------------------- /chapter13/exercises13.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter13/exercises13.pdf -------------------------------------------------------------------------------- /chapter13/exercises13.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/chapter13/exercises13.tex -------------------------------------------------------------------------------- /images/env2_5_3_1_avg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env2_5_3_1_avg.png -------------------------------------------------------------------------------- /images/env2_5_3_1_max.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env2_5_3_1_max.png -------------------------------------------------------------------------------- /images/env2_demo_e_5_12_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env2_demo_e_5_12_3.png -------------------------------------------------------------------------------- /images/env2_demo_e_5_12_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env2_demo_e_5_12_4.png -------------------------------------------------------------------------------- /images/env2_demo_e_5_12_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env2_demo_e_5_12_5.png -------------------------------------------------------------------------------- /images/env2_demo_e_5_12_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env2_demo_e_5_12_6.png -------------------------------------------------------------------------------- /images/env2_demo_e_5_12_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env2_demo_e_5_12_7.png -------------------------------------------------------------------------------- /images/env2_demo_e_5_12_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env2_demo_e_5_12_8.png -------------------------------------------------------------------------------- /images/env3_5_3_1_avg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_5_3_1_avg.png -------------------------------------------------------------------------------- /images/env3_5_3_1_max.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_5_3_1_max.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_0.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_1.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_10.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_12.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_13.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_15.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_16.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_17.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_18.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_19.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_2.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_20.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_21.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_22.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_3.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_4.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_5.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_6.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_7.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_8.png -------------------------------------------------------------------------------- /images/env3_demo_e_5_12_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/env3_demo_e_5_12_9.png -------------------------------------------------------------------------------- /images/example_8_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/example_8_4.png -------------------------------------------------------------------------------- /images/example_8_4_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/example_8_4_2.png -------------------------------------------------------------------------------- /images/exercise_10_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/exercise_10_2.png -------------------------------------------------------------------------------- /images/exercise_10_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/exercise_10_4.png -------------------------------------------------------------------------------- /images/exercise_10_7q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/exercise_10_7q.png -------------------------------------------------------------------------------- /images/exercise_10_8q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/exercise_10_8q.png -------------------------------------------------------------------------------- /images/exercise_10_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/exercise_10_9.png -------------------------------------------------------------------------------- /images/exercise_10_9q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/exercise_10_9q.png -------------------------------------------------------------------------------- /images/exercise_11_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/exercise_11_3.png -------------------------------------------------------------------------------- /images/exercise_12_2q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/exercise_12_2q.png -------------------------------------------------------------------------------- /images/exercise_12_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/exercise_12_6.png -------------------------------------------------------------------------------- /images/exercise_13_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/exercise_13_3.png -------------------------------------------------------------------------------- /images/exercise_13_5q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/exercise_13_5q.png -------------------------------------------------------------------------------- /images/exercise_8_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/exercise_8_8.png -------------------------------------------------------------------------------- /images/figure_2_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_2_1.png -------------------------------------------------------------------------------- /images/figure_2_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_2_2.png -------------------------------------------------------------------------------- /images/figure_2_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_2_3.png -------------------------------------------------------------------------------- /images/figure_2_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_2_4.png -------------------------------------------------------------------------------- /images/figure_2_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_2_5.png -------------------------------------------------------------------------------- /images/figure_2_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_2_6.png -------------------------------------------------------------------------------- /images/figure_4_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_4_2.png -------------------------------------------------------------------------------- /images/figure_4_2_e_4_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_4_2_e_4_7.png -------------------------------------------------------------------------------- /images/figure_4_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_4_3.png -------------------------------------------------------------------------------- /images/figure_4_3_e_4_9_p25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_4_3_e_4_9_p25.png -------------------------------------------------------------------------------- /images/figure_4_3_e_4_9_p50.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_4_3_e_4_9_p50.png -------------------------------------------------------------------------------- /images/figure_4_3_e_4_9_p55.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_4_3_e_4_9_p55.png -------------------------------------------------------------------------------- /images/figure_6_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_6_3.png -------------------------------------------------------------------------------- /images/figure_6_3_ex_6_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_6_3_ex_6_10.png -------------------------------------------------------------------------------- /images/figure_6_3_ex_6_10_grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_6_3_ex_6_10_grid.png -------------------------------------------------------------------------------- /images/figure_6_3_ex_6_9_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_6_3_ex_6_9_a.png -------------------------------------------------------------------------------- /images/figure_6_3_ex_6_9_a_grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_6_3_ex_6_9_a_grid.png -------------------------------------------------------------------------------- /images/figure_6_3_ex_6_9_b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_6_3_ex_6_9_b.png -------------------------------------------------------------------------------- /images/figure_6_3_ex_6_9_b_grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_6_3_ex_6_9_b_grid.png -------------------------------------------------------------------------------- /images/figure_6_3_grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_6_3_grid.png -------------------------------------------------------------------------------- /images/figure_7_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_7_10.png -------------------------------------------------------------------------------- /images/figure_7_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_7_2.png -------------------------------------------------------------------------------- /images/figure_7_3_5states_ret-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_7_3_5states_ret-1.png -------------------------------------------------------------------------------- /images/figure_7_3_5states_ret0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_7_3_5states_ret0.png -------------------------------------------------------------------------------- /images/figure_7_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_7_5.png -------------------------------------------------------------------------------- /images/figure_7_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_7_7.png -------------------------------------------------------------------------------- /images/figure_8_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_8_4.png -------------------------------------------------------------------------------- /images/figure_8_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_8_5.png -------------------------------------------------------------------------------- /images/figure_e_2_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_e_2_11.png -------------------------------------------------------------------------------- /images/figure_e_2_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/images/figure_e_2_5.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/habanoz/reinforcement-learning-an-introduction/HEAD/requirements.txt --------------------------------------------------------------------------------