├── Chap3 ├── pg_agent.py ├── pg_load_play.py ├── pg_main.py └── save_weights │ ├── pendulum.h5 │ └── pendulum_epi_reward.txt ├── Chap4 ├── a2c_actor.py ├── a2c_agent.py ├── a2c_critic.py ├── a2c_load_play.py ├── a2c_main.py └── save_weights │ ├── pendulum_actor.h5 │ └── pendulum_critic.h5 ├── Chap5 ├── A3CData │ ├── a3c_actor.py │ ├── a3c_agent.py │ ├── a3c_critic.py │ ├── a3c_load_play.py │ ├── a3c_main.py │ └── save_weights │ │ ├── pendulum_actor.h5 │ │ └── pendulum_critic.h5 └── A3CGradient │ ├── a3c_actor.py │ ├── a3c_agent.py │ ├── a3c_critic.py │ ├── a3c_load_play.py │ ├── a3c_main.py │ └── save_weights │ ├── pendulum_actor.h5 │ └── pendulum_critic.h5 ├── Chap6 ├── ppo_actor.py ├── ppo_agent.py ├── ppo_critic.py ├── ppo_load_play.py ├── ppo_main.py └── save_weights │ ├── pendulum_actor.h5 │ └── pendulum_critic.h5 ├── Chap7 ├── ddpg_actor.py ├── ddpg_agent.py ├── ddpg_critic.py ├── ddpg_load_play.py ├── ddpg_main.py ├── replaybuffer.py └── save_weights │ ├── pendulum_actor.h5 │ └── pendulum_critic.h5 ├── Chap9 ├── config.py ├── gaussian_control.py ├── gmm │ ├── __pycache__ │ │ ├── dynamics_prior_gmm.cpython-35.pyc │ │ ├── dynamics_prior_gmm.cpython-36.pyc │ │ ├── gmm.cpython-35.pyc │ │ └── gmm.cpython-36.pyc │ ├── dynamics_prior_gmm.py │ └── gmm.py ├── linear_dynamics.py ├── lqrflm_agent.py ├── lqrflm_load_play.py ├── lqrflm_main.py ├── sample_trajectory.py └── save_weights │ ├── global_traj1.txt │ ├── global_traj2.txt │ ├── global_traj3.txt │ ├── kalman_gain.txt │ ├── kalman_gain_keep4.txt │ ├── kalman_gain_keep5.txt │ ├── kalman_gain_keep6.txt │ ├── kalman_gain_keep7.txt │ ├── kalman_gain_keep8.txt │ ├── local_traj_keep4.txt │ ├── local_traj_keep5.txt │ ├── local_traj_keep6.txt │ ├── local_traj_keep7.txt │ ├── local_traj_keep8.txt │ ├── pendulum_iter_cost.txt │ ├── pendulum_iter_cost_keep4.txt │ ├── pendulum_iter_cost_keep5.txt │ ├── pendulum_iter_cost_keep6.txt │ ├── pendulum_iter_cost_keep7.txt │ ├── pendulum_iter_cost_keep8.txt │ └── readme.txt ├── README.md └── model_comparison.ipynb /Chap3/pg_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap3/pg_agent.py -------------------------------------------------------------------------------- /Chap3/pg_load_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap3/pg_load_play.py -------------------------------------------------------------------------------- /Chap3/pg_main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap3/pg_main.py -------------------------------------------------------------------------------- /Chap3/save_weights/pendulum.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap3/save_weights/pendulum.h5 -------------------------------------------------------------------------------- /Chap3/save_weights/pendulum_epi_reward.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap3/save_weights/pendulum_epi_reward.txt -------------------------------------------------------------------------------- /Chap4/a2c_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap4/a2c_actor.py -------------------------------------------------------------------------------- /Chap4/a2c_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap4/a2c_agent.py -------------------------------------------------------------------------------- /Chap4/a2c_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap4/a2c_critic.py -------------------------------------------------------------------------------- /Chap4/a2c_load_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap4/a2c_load_play.py -------------------------------------------------------------------------------- /Chap4/a2c_main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap4/a2c_main.py -------------------------------------------------------------------------------- /Chap4/save_weights/pendulum_actor.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap4/save_weights/pendulum_actor.h5 -------------------------------------------------------------------------------- /Chap4/save_weights/pendulum_critic.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap4/save_weights/pendulum_critic.h5 -------------------------------------------------------------------------------- /Chap5/A3CData/a3c_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CData/a3c_actor.py -------------------------------------------------------------------------------- /Chap5/A3CData/a3c_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CData/a3c_agent.py -------------------------------------------------------------------------------- /Chap5/A3CData/a3c_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CData/a3c_critic.py -------------------------------------------------------------------------------- /Chap5/A3CData/a3c_load_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CData/a3c_load_play.py -------------------------------------------------------------------------------- /Chap5/A3CData/a3c_main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CData/a3c_main.py -------------------------------------------------------------------------------- /Chap5/A3CData/save_weights/pendulum_actor.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CData/save_weights/pendulum_actor.h5 -------------------------------------------------------------------------------- /Chap5/A3CData/save_weights/pendulum_critic.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CData/save_weights/pendulum_critic.h5 -------------------------------------------------------------------------------- /Chap5/A3CGradient/a3c_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CGradient/a3c_actor.py -------------------------------------------------------------------------------- /Chap5/A3CGradient/a3c_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CGradient/a3c_agent.py -------------------------------------------------------------------------------- /Chap5/A3CGradient/a3c_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CGradient/a3c_critic.py -------------------------------------------------------------------------------- /Chap5/A3CGradient/a3c_load_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CGradient/a3c_load_play.py -------------------------------------------------------------------------------- /Chap5/A3CGradient/a3c_main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CGradient/a3c_main.py -------------------------------------------------------------------------------- /Chap5/A3CGradient/save_weights/pendulum_actor.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CGradient/save_weights/pendulum_actor.h5 -------------------------------------------------------------------------------- /Chap5/A3CGradient/save_weights/pendulum_critic.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap5/A3CGradient/save_weights/pendulum_critic.h5 -------------------------------------------------------------------------------- /Chap6/ppo_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap6/ppo_actor.py -------------------------------------------------------------------------------- /Chap6/ppo_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap6/ppo_agent.py -------------------------------------------------------------------------------- /Chap6/ppo_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap6/ppo_critic.py -------------------------------------------------------------------------------- /Chap6/ppo_load_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap6/ppo_load_play.py -------------------------------------------------------------------------------- /Chap6/ppo_main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap6/ppo_main.py -------------------------------------------------------------------------------- /Chap6/save_weights/pendulum_actor.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap6/save_weights/pendulum_actor.h5 -------------------------------------------------------------------------------- /Chap6/save_weights/pendulum_critic.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap6/save_weights/pendulum_critic.h5 -------------------------------------------------------------------------------- /Chap7/ddpg_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap7/ddpg_actor.py -------------------------------------------------------------------------------- /Chap7/ddpg_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap7/ddpg_agent.py -------------------------------------------------------------------------------- /Chap7/ddpg_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap7/ddpg_critic.py -------------------------------------------------------------------------------- /Chap7/ddpg_load_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap7/ddpg_load_play.py -------------------------------------------------------------------------------- /Chap7/ddpg_main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap7/ddpg_main.py -------------------------------------------------------------------------------- /Chap7/replaybuffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap7/replaybuffer.py -------------------------------------------------------------------------------- /Chap7/save_weights/pendulum_actor.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap7/save_weights/pendulum_actor.h5 -------------------------------------------------------------------------------- /Chap7/save_weights/pendulum_critic.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap7/save_weights/pendulum_critic.h5 -------------------------------------------------------------------------------- /Chap9/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/config.py -------------------------------------------------------------------------------- /Chap9/gaussian_control.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/gaussian_control.py -------------------------------------------------------------------------------- /Chap9/gmm/__pycache__/dynamics_prior_gmm.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/gmm/__pycache__/dynamics_prior_gmm.cpython-35.pyc -------------------------------------------------------------------------------- /Chap9/gmm/__pycache__/dynamics_prior_gmm.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/gmm/__pycache__/dynamics_prior_gmm.cpython-36.pyc -------------------------------------------------------------------------------- /Chap9/gmm/__pycache__/gmm.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/gmm/__pycache__/gmm.cpython-35.pyc -------------------------------------------------------------------------------- /Chap9/gmm/__pycache__/gmm.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/gmm/__pycache__/gmm.cpython-36.pyc -------------------------------------------------------------------------------- /Chap9/gmm/dynamics_prior_gmm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/gmm/dynamics_prior_gmm.py -------------------------------------------------------------------------------- /Chap9/gmm/gmm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/gmm/gmm.py -------------------------------------------------------------------------------- /Chap9/linear_dynamics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/linear_dynamics.py -------------------------------------------------------------------------------- /Chap9/lqrflm_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/lqrflm_agent.py -------------------------------------------------------------------------------- /Chap9/lqrflm_load_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/lqrflm_load_play.py -------------------------------------------------------------------------------- /Chap9/lqrflm_main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/lqrflm_main.py -------------------------------------------------------------------------------- /Chap9/sample_trajectory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/sample_trajectory.py -------------------------------------------------------------------------------- /Chap9/save_weights/global_traj1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/global_traj1.txt -------------------------------------------------------------------------------- /Chap9/save_weights/global_traj2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/global_traj2.txt -------------------------------------------------------------------------------- /Chap9/save_weights/global_traj3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/global_traj3.txt -------------------------------------------------------------------------------- /Chap9/save_weights/kalman_gain.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/kalman_gain.txt -------------------------------------------------------------------------------- /Chap9/save_weights/kalman_gain_keep4.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/kalman_gain_keep4.txt -------------------------------------------------------------------------------- /Chap9/save_weights/kalman_gain_keep5.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/kalman_gain_keep5.txt -------------------------------------------------------------------------------- /Chap9/save_weights/kalman_gain_keep6.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/kalman_gain_keep6.txt -------------------------------------------------------------------------------- /Chap9/save_weights/kalman_gain_keep7.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/kalman_gain_keep7.txt -------------------------------------------------------------------------------- /Chap9/save_weights/kalman_gain_keep8.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/kalman_gain_keep8.txt -------------------------------------------------------------------------------- /Chap9/save_weights/local_traj_keep4.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/local_traj_keep4.txt -------------------------------------------------------------------------------- /Chap9/save_weights/local_traj_keep5.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/local_traj_keep5.txt -------------------------------------------------------------------------------- /Chap9/save_weights/local_traj_keep6.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/local_traj_keep6.txt -------------------------------------------------------------------------------- /Chap9/save_weights/local_traj_keep7.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/local_traj_keep7.txt -------------------------------------------------------------------------------- /Chap9/save_weights/local_traj_keep8.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/local_traj_keep8.txt -------------------------------------------------------------------------------- /Chap9/save_weights/pendulum_iter_cost.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/pendulum_iter_cost.txt -------------------------------------------------------------------------------- /Chap9/save_weights/pendulum_iter_cost_keep4.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/pendulum_iter_cost_keep4.txt -------------------------------------------------------------------------------- /Chap9/save_weights/pendulum_iter_cost_keep5.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/pendulum_iter_cost_keep5.txt -------------------------------------------------------------------------------- /Chap9/save_weights/pendulum_iter_cost_keep6.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/pendulum_iter_cost_keep6.txt -------------------------------------------------------------------------------- /Chap9/save_weights/pendulum_iter_cost_keep7.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/pendulum_iter_cost_keep7.txt -------------------------------------------------------------------------------- /Chap9/save_weights/pendulum_iter_cost_keep8.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/pendulum_iter_cost_keep8.txt -------------------------------------------------------------------------------- /Chap9/save_weights/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/Chap9/save_weights/readme.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/README.md -------------------------------------------------------------------------------- /model_comparison.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pasus/Reinforcement-Learning-Book/HEAD/model_comparison.ipynb --------------------------------------------------------------------------------