├── .gitignore ├── LICENSE ├── README.md ├── chapter_02_k_armed_bandits ├── README.md ├── [NOTES]CH_2.pdf ├── example_2_1_distribution.py ├── example_2_2_bandits_algo.py ├── example_2_3_OIV.py ├── example_2_4_UCB.py ├── example_2_5_gradient.py ├── example_2_6_summary.py ├── exercise_2_5_non_stationary.py ├── exercise_2_6_summary_non_stationary.py ├── history │ ├── OIV_record.pkl │ ├── UCB_record.pkl │ ├── exercise_2_6.pkl │ ├── non_stationary_record.pkl │ ├── record.pkl │ ├── sga_record.pkl │ └── summary.pkl ├── plot_bandits.py ├── plot_gradient.py ├── plot_summary.py ├── plots │ ├── example_2_1.png │ ├── example_2_2_optimal_ratio.png │ ├── example_2_2_rewards.png │ ├── example_2_3_optimal_ratio.png │ ├── example_2_3_rewards.png │ ├── example_2_4_optimal_ratio.png │ ├── example_2_4_rewards.png │ ├── example_2_5_sga.png │ ├── example_2_6_summary.png │ ├── exercise_2_5_optimal_ratio.png │ ├── exercise_2_5_rewards.png │ └── exercise_2_6.png └── utils.py ├── chapter_03_finite_MDP ├── README.md ├── [NOTES]CH_3.pdf ├── example_3_5_grid_world.py ├── example_3_8_optimal_grid_world.py ├── exercice_3_8_mdp_func.py └── plots │ ├── example_3_5 │ ├── 0.png │ ├── 1.png │ ├── 10.png │ ├── 32.png │ └── 4.png │ └── example_3_8 │ ├── optimal_actions.png │ └── optimal_values.png ├── chapter_04_dynamic_programming ├── README.md ├── [NOTES]CH_4.pdf ├── example_4_1_policy_evaluation.py ├── example_4_2_JacksCarRental.py ├── example_4_3_gambler.py ├── exercise_4_7_resolve_JacksCarRental.py ├── gym_env │ ├── gym_jcr │ │ ├── __init__.py │ │ ├── jcr_env.py │ │ └── jcr_mdp.py │ └── setup.py └── plots │ ├── example_4_1.png │ ├── example_4_2 │ ├── policy_0.png │ ├── policy_1.png │ ├── policy_2.png │ ├── policy_3.png │ ├── policy_4.png │ ├── policy_5.png │ └── value_5.png │ ├── example_4_3 │ ├── full_policy.png │ ├── policy.png │ └── values.png │ ├── exercise_4_7 │ ├── policy_0.png │ ├── policy_1.png │ ├── policy_2.png │ ├── policy_3.png │ ├── policy_4.png │ ├── policy_5.png │ └── value_5.png │ └── exercise_4_9 │ ├── ph_0.25 │ ├── full_policy.png │ ├── policy.png │ └── values.png │ └── ph_0.55 │ ├── full_policy.png │ ├── policy.png │ └── values.png ├── chapter_05_monte_carlo_methods ├── README.md ├── [NOTES]CH_5.pdf ├── example_5_1_blackjack.py ├── example_5_3_solving_blackjack.py ├── example_5_4_off_policy_estimation.py ├── example_5_5_infinite_var.py ├── exercise_5_12_racetrack.py ├── history │ ├── example_5_3.pkl │ ├── example_5_4.pkl │ └── exercise_5_12 │ │ ├── track_a.pkl │ │ └── track_b.pkl ├── plots │ ├── example_5_1 │ │ ├── 10000_episodes_no_usable_ace.png │ │ ├── 10000_episodes_usable_ace.png │ │ ├── 500000_episodes_no_usable_ace.png │ │ └── 500000_episodes_usable_ace.png │ ├── example_5_3 │ │ ├── optimal_policy_no_usable_ace.png │ │ ├── optimal_policy_usable_ace.png │ │ ├── optimal_value_no_usable_ace.png │ │ └── optimal_value_usable_ace.png │ ├── example_5_4.png │ ├── example_5_5.png │ └── exercise_5_12 │ │ ├── track_a.png │ │ ├── track_a_paths.png │ │ ├── track_b.png │ │ └── track_b_paths.png ├── race_track_env │ ├── maps │ │ ├── build_tracks.py │ │ ├── track_a.npy │ │ └── track_b.npy │ └── race_track.py └── utils.py ├── chapter_06_temporal_difference_learning ├── README.md ├── [NOTES]CH_6.pdf ├── envs │ ├── random_walk_env.py │ ├── two_state_mdp.py │ └── windy_grid_env.py ├── example_6_2_random_walk.py ├── example_6_3_batch_updating.py ├── example_6_5_windy_gridworld.py ├── example_6_6_clif_walking.py ├── example_6_7_max_bias.py ├── exercise_6_10_stochastic_wind.py ├── exercise_6_9_windy_king_move.py ├── figure_6_3_TD_methods_performance.py ├── history │ └── figure_6_3 │ │ └── results.pkl └── plots │ ├── example_6_2 │ ├── rms_compare.png │ └── value_approx.png │ ├── example_6_3.png │ ├── example_6_5 │ ├── result.gif │ ├── rewards.png │ └── step_episodes.png │ ├── example_6_6 │ ├── Q_learning.gif │ ├── SARSA.gif │ └── rewards.png │ ├── example_6_7.png │ ├── exercise_6_10 │ ├── result.gif │ ├── rewards.png │ └── step_episodes.png │ ├── exercise_6_9 │ ├── result.gif │ ├── rewards.png │ └── step_episodes.png │ └── figure_6_3.png ├── chapter_07_n_step_bootstrapping └── README.md └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/README.md -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/README.md -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/[NOTES]CH_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/[NOTES]CH_2.pdf -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/example_2_1_distribution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/example_2_1_distribution.py -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/example_2_2_bandits_algo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/example_2_2_bandits_algo.py -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/example_2_3_OIV.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/example_2_3_OIV.py -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/example_2_4_UCB.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/example_2_4_UCB.py -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/example_2_5_gradient.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/example_2_5_gradient.py -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/example_2_6_summary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/example_2_6_summary.py -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/exercise_2_5_non_stationary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/exercise_2_5_non_stationary.py -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/exercise_2_6_summary_non_stationary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/exercise_2_6_summary_non_stationary.py -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/history/OIV_record.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/history/OIV_record.pkl -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/history/UCB_record.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/history/UCB_record.pkl -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/history/exercise_2_6.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/history/exercise_2_6.pkl -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/history/non_stationary_record.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/history/non_stationary_record.pkl -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/history/record.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/history/record.pkl -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/history/sga_record.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/history/sga_record.pkl -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/history/summary.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/history/summary.pkl -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plot_bandits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plot_bandits.py -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plot_gradient.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plot_gradient.py -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plot_summary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plot_summary.py -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plots/example_2_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plots/example_2_1.png -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plots/example_2_2_optimal_ratio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plots/example_2_2_optimal_ratio.png -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plots/example_2_2_rewards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plots/example_2_2_rewards.png -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plots/example_2_3_optimal_ratio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plots/example_2_3_optimal_ratio.png -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plots/example_2_3_rewards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plots/example_2_3_rewards.png -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plots/example_2_4_optimal_ratio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plots/example_2_4_optimal_ratio.png -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plots/example_2_4_rewards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plots/example_2_4_rewards.png -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plots/example_2_5_sga.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plots/example_2_5_sga.png -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plots/example_2_6_summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plots/example_2_6_summary.png -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plots/exercise_2_5_optimal_ratio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plots/exercise_2_5_optimal_ratio.png -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plots/exercise_2_5_rewards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plots/exercise_2_5_rewards.png -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/plots/exercise_2_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/plots/exercise_2_6.png -------------------------------------------------------------------------------- /chapter_02_k_armed_bandits/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_02_k_armed_bandits/utils.py -------------------------------------------------------------------------------- /chapter_03_finite_MDP/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_03_finite_MDP/README.md -------------------------------------------------------------------------------- /chapter_03_finite_MDP/[NOTES]CH_3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_03_finite_MDP/[NOTES]CH_3.pdf -------------------------------------------------------------------------------- /chapter_03_finite_MDP/example_3_5_grid_world.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_03_finite_MDP/example_3_5_grid_world.py -------------------------------------------------------------------------------- /chapter_03_finite_MDP/example_3_8_optimal_grid_world.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_03_finite_MDP/example_3_8_optimal_grid_world.py -------------------------------------------------------------------------------- /chapter_03_finite_MDP/exercice_3_8_mdp_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_03_finite_MDP/exercice_3_8_mdp_func.py -------------------------------------------------------------------------------- /chapter_03_finite_MDP/plots/example_3_5/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_03_finite_MDP/plots/example_3_5/0.png -------------------------------------------------------------------------------- /chapter_03_finite_MDP/plots/example_3_5/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_03_finite_MDP/plots/example_3_5/1.png -------------------------------------------------------------------------------- /chapter_03_finite_MDP/plots/example_3_5/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_03_finite_MDP/plots/example_3_5/10.png -------------------------------------------------------------------------------- /chapter_03_finite_MDP/plots/example_3_5/32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_03_finite_MDP/plots/example_3_5/32.png -------------------------------------------------------------------------------- /chapter_03_finite_MDP/plots/example_3_5/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_03_finite_MDP/plots/example_3_5/4.png -------------------------------------------------------------------------------- /chapter_03_finite_MDP/plots/example_3_8/optimal_actions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_03_finite_MDP/plots/example_3_8/optimal_actions.png -------------------------------------------------------------------------------- /chapter_03_finite_MDP/plots/example_3_8/optimal_values.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_03_finite_MDP/plots/example_3_8/optimal_values.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/README.md -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/[NOTES]CH_4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/[NOTES]CH_4.pdf -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/example_4_1_policy_evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/example_4_1_policy_evaluation.py -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/example_4_2_JacksCarRental.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/example_4_2_JacksCarRental.py -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/example_4_3_gambler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/example_4_3_gambler.py -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/exercise_4_7_resolve_JacksCarRental.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/exercise_4_7_resolve_JacksCarRental.py -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/gym_env/gym_jcr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/gym_env/gym_jcr/__init__.py -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/gym_env/gym_jcr/jcr_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/gym_env/gym_jcr/jcr_env.py -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/gym_env/gym_jcr/jcr_mdp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/gym_env/gym_jcr/jcr_mdp.py -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/gym_env/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/gym_env/setup.py -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/example_4_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/example_4_1.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/example_4_2/policy_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/example_4_2/policy_0.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/example_4_2/policy_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/example_4_2/policy_1.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/example_4_2/policy_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/example_4_2/policy_2.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/example_4_2/policy_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/example_4_2/policy_3.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/example_4_2/policy_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/example_4_2/policy_4.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/example_4_2/policy_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/example_4_2/policy_5.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/example_4_2/value_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/example_4_2/value_5.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/example_4_3/full_policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/example_4_3/full_policy.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/example_4_3/policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/example_4_3/policy.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/example_4_3/values.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/example_4_3/values.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/exercise_4_7/policy_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/exercise_4_7/policy_0.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/exercise_4_7/policy_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/exercise_4_7/policy_1.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/exercise_4_7/policy_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/exercise_4_7/policy_2.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/exercise_4_7/policy_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/exercise_4_7/policy_3.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/exercise_4_7/policy_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/exercise_4_7/policy_4.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/exercise_4_7/policy_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/exercise_4_7/policy_5.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/exercise_4_7/value_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/exercise_4_7/value_5.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/exercise_4_9/ph_0.25/full_policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/exercise_4_9/ph_0.25/full_policy.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/exercise_4_9/ph_0.25/policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/exercise_4_9/ph_0.25/policy.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/exercise_4_9/ph_0.25/values.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/exercise_4_9/ph_0.25/values.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/exercise_4_9/ph_0.55/full_policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/exercise_4_9/ph_0.55/full_policy.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/exercise_4_9/ph_0.55/policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/exercise_4_9/ph_0.55/policy.png -------------------------------------------------------------------------------- /chapter_04_dynamic_programming/plots/exercise_4_9/ph_0.55/values.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_04_dynamic_programming/plots/exercise_4_9/ph_0.55/values.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/README.md -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/[NOTES]CH_5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/[NOTES]CH_5.pdf -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/example_5_1_blackjack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/example_5_1_blackjack.py -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/example_5_3_solving_blackjack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/example_5_3_solving_blackjack.py -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/example_5_4_off_policy_estimation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/example_5_4_off_policy_estimation.py -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/example_5_5_infinite_var.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/example_5_5_infinite_var.py -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/exercise_5_12_racetrack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/exercise_5_12_racetrack.py -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/history/example_5_3.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/history/example_5_3.pkl -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/history/example_5_4.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/history/example_5_4.pkl -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/history/exercise_5_12/track_a.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/history/exercise_5_12/track_a.pkl -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/history/exercise_5_12/track_b.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/history/exercise_5_12/track_b.pkl -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/example_5_1/10000_episodes_no_usable_ace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/example_5_1/10000_episodes_no_usable_ace.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/example_5_1/10000_episodes_usable_ace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/example_5_1/10000_episodes_usable_ace.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/example_5_1/500000_episodes_no_usable_ace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/example_5_1/500000_episodes_no_usable_ace.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/example_5_1/500000_episodes_usable_ace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/example_5_1/500000_episodes_usable_ace.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/example_5_3/optimal_policy_no_usable_ace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/example_5_3/optimal_policy_no_usable_ace.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/example_5_3/optimal_policy_usable_ace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/example_5_3/optimal_policy_usable_ace.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/example_5_3/optimal_value_no_usable_ace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/example_5_3/optimal_value_no_usable_ace.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/example_5_3/optimal_value_usable_ace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/example_5_3/optimal_value_usable_ace.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/example_5_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/example_5_4.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/example_5_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/example_5_5.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/exercise_5_12/track_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/exercise_5_12/track_a.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/exercise_5_12/track_a_paths.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/exercise_5_12/track_a_paths.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/exercise_5_12/track_b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/exercise_5_12/track_b.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/plots/exercise_5_12/track_b_paths.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/plots/exercise_5_12/track_b_paths.png -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/race_track_env/maps/build_tracks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/race_track_env/maps/build_tracks.py -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/race_track_env/maps/track_a.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/race_track_env/maps/track_a.npy -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/race_track_env/maps/track_b.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/race_track_env/maps/track_b.npy -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/race_track_env/race_track.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/race_track_env/race_track.py -------------------------------------------------------------------------------- /chapter_05_monte_carlo_methods/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_05_monte_carlo_methods/utils.py -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/README.md -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/[NOTES]CH_6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/[NOTES]CH_6.pdf -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/envs/random_walk_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/envs/random_walk_env.py -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/envs/two_state_mdp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/envs/two_state_mdp.py -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/envs/windy_grid_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/envs/windy_grid_env.py -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/example_6_2_random_walk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/example_6_2_random_walk.py -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/example_6_3_batch_updating.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/example_6_3_batch_updating.py -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/example_6_5_windy_gridworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/example_6_5_windy_gridworld.py -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/example_6_6_clif_walking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/example_6_6_clif_walking.py -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/example_6_7_max_bias.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/example_6_7_max_bias.py -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/exercise_6_10_stochastic_wind.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/exercise_6_10_stochastic_wind.py -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/exercise_6_9_windy_king_move.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/exercise_6_9_windy_king_move.py -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/figure_6_3_TD_methods_performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/figure_6_3_TD_methods_performance.py -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/history/figure_6_3/results.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/history/figure_6_3/results.pkl -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/example_6_2/rms_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/example_6_2/rms_compare.png -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/example_6_2/value_approx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/example_6_2/value_approx.png -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/example_6_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/example_6_3.png -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/example_6_5/result.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/example_6_5/result.gif -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/example_6_5/rewards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/example_6_5/rewards.png -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/example_6_5/step_episodes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/example_6_5/step_episodes.png -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/example_6_6/Q_learning.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/example_6_6/Q_learning.gif -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/example_6_6/SARSA.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/example_6_6/SARSA.gif -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/example_6_6/rewards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/example_6_6/rewards.png -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/example_6_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/example_6_7.png -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/exercise_6_10/result.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/exercise_6_10/result.gif -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/exercise_6_10/rewards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/exercise_6_10/rewards.png -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/exercise_6_10/step_episodes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/exercise_6_10/step_episodes.png -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/exercise_6_9/result.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/exercise_6_9/result.gif -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/exercise_6_9/rewards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/exercise_6_9/rewards.png -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/exercise_6_9/step_episodes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/exercise_6_9/step_episodes.png -------------------------------------------------------------------------------- /chapter_06_temporal_difference_learning/plots/figure_6_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/chapter_06_temporal_difference_learning/plots/figure_6_3.png -------------------------------------------------------------------------------- /chapter_07_n_step_bootstrapping/README.md: -------------------------------------------------------------------------------- 1 | ## **Chapter 7 n-step Bootstrapping** 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terrence-ou/Reinforcement-Learning-2nd-Edition-Notes-Codes/HEAD/requirements.txt --------------------------------------------------------------------------------