├── .gitignore ├── LICENSE ├── README.md ├── bc ├── README.md ├── bash_scripts │ ├── demo.bash │ ├── gen_exp_data.sh │ └── runbc_allmujoco.sh ├── bc.py ├── experts │ ├── Ant-v1.pkl │ ├── HalfCheetah-v1.pkl │ ├── Hopper-v1.pkl │ ├── Humanoid-v1.pkl │ ├── Reacher-v1.pkl │ └── Walker2d-v1.pkl ├── figures │ ├── Ant-v1.png │ ├── HalfCheetah-v1.png │ ├── Hopper-v1.png │ ├── Humanoid-v1.png │ ├── Reacher-v1.png │ └── Walker2d-v1.png ├── load_policy.py ├── plot_bc.py ├── random_logs │ └── gen_exp_data.text ├── run_expert.py └── tf_util.py ├── ddpg ├── README.md ├── ddpg.py ├── main.py └── replay_buffer.py ├── dqn ├── README.md ├── atari_wrappers.py ├── dqn.py ├── dqn_utils.py ├── figures │ ├── BeamRider.png │ ├── Breakout.png │ ├── Enduro.png │ └── Pong.png ├── hw3.pdf ├── logs_pkls │ ├── BeamRider_s001.pkl │ ├── BeamRider_s002.pkl │ ├── Breakout_s001.pkl │ ├── Breakout_s002.pkl │ ├── Enduro_s001.pkl │ ├── Enduro_s002.pkl │ ├── Pong_s001.pkl │ └── Pong_s002.pkl ├── logs_text │ ├── BeamRider_s001.text │ ├── BeamRider_s002.text │ ├── Breakout_s001.text │ ├── Breakout_s002.text │ ├── Enduro_s001.text │ ├── Enduro_s002.text │ ├── Pong_s001.text │ └── Pong_s002.text ├── plot_dqn.py ├── run_dqn_atari.py └── run_dqn_ram.py ├── es ├── README.md ├── bash_scripts │ └── InvertedPendulum-v1.sh ├── es.py ├── figures │ ├── HalfCheetah-v1_log.png │ ├── HalfCheetah-v1_rewards_std.png │ ├── InvertedPendulum-v1-old_log.png │ ├── InvertedPendulum-v1-old_rewards_std.png │ ├── InvertedPendulum-v1_log.png │ └── InvertedPendulum-v1_rewards_std.png ├── logz.py ├── main.py ├── optimizers.py ├── plot.py ├── test.py ├── toy_es.py └── utils.py ├── g_learning ├── G-Learning.py ├── README.md ├── __init__.py └── figures │ ├── cliff_episode_length_time.png │ ├── cliff_episode_reward_time.png │ └── cliff_episodes_per_time.png ├── lib ├── __init__.py ├── envs │ ├── README.md │ ├── __init__.py │ ├── blackjack.py │ ├── cliff_walking.py │ ├── gridworld.py │ ├── two_room_domain.py │ └── windy_gridworld.py └── plotting.py ├── q_learning ├── Q-Learning.py ├── README.md ├── __init__.py └── figures │ ├── cliff_episode_length_time.png │ ├── cliff_episode_reward_time.png │ ├── cliff_episodes_per_time.png │ ├── cliff_sarsa_episode_length_time.png │ ├── cliff_sarsa_episode_reward_time.png │ ├── cliff_sarsa_episodes_per_time.png │ ├── frozenlake8x8_episode_length_time.png │ ├── frozenlake8x8_episode_reward_time.png │ ├── frozenlake8x8_episodes_per_time.png │ ├── frozenlake_episode_length_time.png │ ├── frozenlake_episode_reward_time.png │ ├── frozenlake_episodes_per_time.png │ ├── gridw_episode_length_time.png │ ├── gridw_episode_reward_time.png │ └── gridw_episodes_per_time.png ├── trpo ├── README.md ├── fxn_approx.py ├── main.py ├── trpo.py └── utils_trpo.py ├── utils ├── __init__.py ├── logz.py ├── policies.py ├── utils_pg.py └── value_functions.py └── vpg ├── README.md ├── bash_scripts ├── CartPole-v0.sh ├── Pendulum-v0.sh ├── halfcheetah.sh ├── hopper.sh └── walker.sh ├── figures ├── CartPole-v0.png ├── CartPole-v0_sm.png ├── HalfCheetah-v1.png ├── HalfCheetah-v1_sm.png ├── Hopper-v1.png ├── Hopper-v1_sm.png ├── Pendulum-v0.png ├── Pendulum-v0_sm.png ├── Walker2d-v1.png └── Walker2d-v1_sm.png ├── main.py └── plot_learning_curves.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/README.md -------------------------------------------------------------------------------- /bc/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/README.md -------------------------------------------------------------------------------- /bc/bash_scripts/demo.bash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/bash_scripts/demo.bash -------------------------------------------------------------------------------- /bc/bash_scripts/gen_exp_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/bash_scripts/gen_exp_data.sh -------------------------------------------------------------------------------- /bc/bash_scripts/runbc_allmujoco.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/bash_scripts/runbc_allmujoco.sh -------------------------------------------------------------------------------- /bc/bc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/bc.py -------------------------------------------------------------------------------- /bc/experts/Ant-v1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/experts/Ant-v1.pkl -------------------------------------------------------------------------------- /bc/experts/HalfCheetah-v1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/experts/HalfCheetah-v1.pkl -------------------------------------------------------------------------------- /bc/experts/Hopper-v1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/experts/Hopper-v1.pkl -------------------------------------------------------------------------------- /bc/experts/Humanoid-v1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/experts/Humanoid-v1.pkl -------------------------------------------------------------------------------- /bc/experts/Reacher-v1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/experts/Reacher-v1.pkl -------------------------------------------------------------------------------- /bc/experts/Walker2d-v1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/experts/Walker2d-v1.pkl -------------------------------------------------------------------------------- /bc/figures/Ant-v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/figures/Ant-v1.png -------------------------------------------------------------------------------- /bc/figures/HalfCheetah-v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/figures/HalfCheetah-v1.png -------------------------------------------------------------------------------- /bc/figures/Hopper-v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/figures/Hopper-v1.png -------------------------------------------------------------------------------- /bc/figures/Humanoid-v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/figures/Humanoid-v1.png -------------------------------------------------------------------------------- /bc/figures/Reacher-v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/figures/Reacher-v1.png -------------------------------------------------------------------------------- /bc/figures/Walker2d-v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/figures/Walker2d-v1.png -------------------------------------------------------------------------------- /bc/load_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/load_policy.py -------------------------------------------------------------------------------- /bc/plot_bc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/plot_bc.py -------------------------------------------------------------------------------- /bc/random_logs/gen_exp_data.text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/random_logs/gen_exp_data.text -------------------------------------------------------------------------------- /bc/run_expert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/run_expert.py -------------------------------------------------------------------------------- /bc/tf_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/bc/tf_util.py -------------------------------------------------------------------------------- /ddpg/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/ddpg/README.md -------------------------------------------------------------------------------- /ddpg/ddpg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/ddpg/ddpg.py -------------------------------------------------------------------------------- /ddpg/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/ddpg/main.py -------------------------------------------------------------------------------- /ddpg/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/ddpg/replay_buffer.py -------------------------------------------------------------------------------- /dqn/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/README.md -------------------------------------------------------------------------------- /dqn/atari_wrappers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/atari_wrappers.py -------------------------------------------------------------------------------- /dqn/dqn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/dqn.py -------------------------------------------------------------------------------- /dqn/dqn_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/dqn_utils.py -------------------------------------------------------------------------------- /dqn/figures/BeamRider.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/figures/BeamRider.png -------------------------------------------------------------------------------- /dqn/figures/Breakout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/figures/Breakout.png -------------------------------------------------------------------------------- /dqn/figures/Enduro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/figures/Enduro.png -------------------------------------------------------------------------------- /dqn/figures/Pong.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/figures/Pong.png -------------------------------------------------------------------------------- /dqn/hw3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/hw3.pdf -------------------------------------------------------------------------------- /dqn/logs_pkls/BeamRider_s001.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_pkls/BeamRider_s001.pkl -------------------------------------------------------------------------------- /dqn/logs_pkls/BeamRider_s002.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_pkls/BeamRider_s002.pkl -------------------------------------------------------------------------------- /dqn/logs_pkls/Breakout_s001.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_pkls/Breakout_s001.pkl -------------------------------------------------------------------------------- /dqn/logs_pkls/Breakout_s002.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_pkls/Breakout_s002.pkl -------------------------------------------------------------------------------- /dqn/logs_pkls/Enduro_s001.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_pkls/Enduro_s001.pkl -------------------------------------------------------------------------------- /dqn/logs_pkls/Enduro_s002.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_pkls/Enduro_s002.pkl -------------------------------------------------------------------------------- /dqn/logs_pkls/Pong_s001.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_pkls/Pong_s001.pkl -------------------------------------------------------------------------------- /dqn/logs_pkls/Pong_s002.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_pkls/Pong_s002.pkl -------------------------------------------------------------------------------- /dqn/logs_text/BeamRider_s001.text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_text/BeamRider_s001.text -------------------------------------------------------------------------------- /dqn/logs_text/BeamRider_s002.text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_text/BeamRider_s002.text -------------------------------------------------------------------------------- /dqn/logs_text/Breakout_s001.text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_text/Breakout_s001.text -------------------------------------------------------------------------------- /dqn/logs_text/Breakout_s002.text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_text/Breakout_s002.text -------------------------------------------------------------------------------- /dqn/logs_text/Enduro_s001.text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_text/Enduro_s001.text -------------------------------------------------------------------------------- /dqn/logs_text/Enduro_s002.text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_text/Enduro_s002.text -------------------------------------------------------------------------------- /dqn/logs_text/Pong_s001.text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_text/Pong_s001.text -------------------------------------------------------------------------------- /dqn/logs_text/Pong_s002.text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/logs_text/Pong_s002.text -------------------------------------------------------------------------------- /dqn/plot_dqn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/plot_dqn.py -------------------------------------------------------------------------------- /dqn/run_dqn_atari.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/run_dqn_atari.py -------------------------------------------------------------------------------- /dqn/run_dqn_ram.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/dqn/run_dqn_ram.py -------------------------------------------------------------------------------- /es/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/README.md -------------------------------------------------------------------------------- /es/bash_scripts/InvertedPendulum-v1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/bash_scripts/InvertedPendulum-v1.sh -------------------------------------------------------------------------------- /es/es.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/es.py -------------------------------------------------------------------------------- /es/figures/HalfCheetah-v1_log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/figures/HalfCheetah-v1_log.png -------------------------------------------------------------------------------- /es/figures/HalfCheetah-v1_rewards_std.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/figures/HalfCheetah-v1_rewards_std.png -------------------------------------------------------------------------------- /es/figures/InvertedPendulum-v1-old_log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/figures/InvertedPendulum-v1-old_log.png -------------------------------------------------------------------------------- /es/figures/InvertedPendulum-v1-old_rewards_std.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/figures/InvertedPendulum-v1-old_rewards_std.png -------------------------------------------------------------------------------- /es/figures/InvertedPendulum-v1_log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/figures/InvertedPendulum-v1_log.png -------------------------------------------------------------------------------- /es/figures/InvertedPendulum-v1_rewards_std.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/figures/InvertedPendulum-v1_rewards_std.png -------------------------------------------------------------------------------- /es/logz.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/logz.py -------------------------------------------------------------------------------- /es/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/main.py -------------------------------------------------------------------------------- /es/optimizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/optimizers.py -------------------------------------------------------------------------------- /es/plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/plot.py -------------------------------------------------------------------------------- /es/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/test.py -------------------------------------------------------------------------------- /es/toy_es.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/toy_es.py -------------------------------------------------------------------------------- /es/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/es/utils.py -------------------------------------------------------------------------------- /g_learning/G-Learning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/g_learning/G-Learning.py -------------------------------------------------------------------------------- /g_learning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/g_learning/README.md -------------------------------------------------------------------------------- /g_learning/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /g_learning/figures/cliff_episode_length_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/g_learning/figures/cliff_episode_length_time.png -------------------------------------------------------------------------------- /g_learning/figures/cliff_episode_reward_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/g_learning/figures/cliff_episode_reward_time.png -------------------------------------------------------------------------------- /g_learning/figures/cliff_episodes_per_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/g_learning/figures/cliff_episodes_per_time.png -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/envs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/lib/envs/README.md -------------------------------------------------------------------------------- /lib/envs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/envs/blackjack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/lib/envs/blackjack.py -------------------------------------------------------------------------------- /lib/envs/cliff_walking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/lib/envs/cliff_walking.py -------------------------------------------------------------------------------- /lib/envs/gridworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/lib/envs/gridworld.py -------------------------------------------------------------------------------- /lib/envs/two_room_domain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/lib/envs/two_room_domain.py -------------------------------------------------------------------------------- /lib/envs/windy_gridworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/lib/envs/windy_gridworld.py -------------------------------------------------------------------------------- /lib/plotting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/lib/plotting.py -------------------------------------------------------------------------------- /q_learning/Q-Learning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/Q-Learning.py -------------------------------------------------------------------------------- /q_learning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/README.md -------------------------------------------------------------------------------- /q_learning/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /q_learning/figures/cliff_episode_length_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/cliff_episode_length_time.png -------------------------------------------------------------------------------- /q_learning/figures/cliff_episode_reward_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/cliff_episode_reward_time.png -------------------------------------------------------------------------------- /q_learning/figures/cliff_episodes_per_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/cliff_episodes_per_time.png -------------------------------------------------------------------------------- /q_learning/figures/cliff_sarsa_episode_length_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/cliff_sarsa_episode_length_time.png -------------------------------------------------------------------------------- /q_learning/figures/cliff_sarsa_episode_reward_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/cliff_sarsa_episode_reward_time.png -------------------------------------------------------------------------------- /q_learning/figures/cliff_sarsa_episodes_per_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/cliff_sarsa_episodes_per_time.png -------------------------------------------------------------------------------- /q_learning/figures/frozenlake8x8_episode_length_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/frozenlake8x8_episode_length_time.png -------------------------------------------------------------------------------- /q_learning/figures/frozenlake8x8_episode_reward_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/frozenlake8x8_episode_reward_time.png -------------------------------------------------------------------------------- /q_learning/figures/frozenlake8x8_episodes_per_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/frozenlake8x8_episodes_per_time.png -------------------------------------------------------------------------------- /q_learning/figures/frozenlake_episode_length_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/frozenlake_episode_length_time.png -------------------------------------------------------------------------------- /q_learning/figures/frozenlake_episode_reward_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/frozenlake_episode_reward_time.png -------------------------------------------------------------------------------- /q_learning/figures/frozenlake_episodes_per_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/frozenlake_episodes_per_time.png -------------------------------------------------------------------------------- /q_learning/figures/gridw_episode_length_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/gridw_episode_length_time.png -------------------------------------------------------------------------------- /q_learning/figures/gridw_episode_reward_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/gridw_episode_reward_time.png -------------------------------------------------------------------------------- /q_learning/figures/gridw_episodes_per_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/q_learning/figures/gridw_episodes_per_time.png -------------------------------------------------------------------------------- /trpo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/trpo/README.md -------------------------------------------------------------------------------- /trpo/fxn_approx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/trpo/fxn_approx.py -------------------------------------------------------------------------------- /trpo/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/trpo/main.py -------------------------------------------------------------------------------- /trpo/trpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/trpo/trpo.py -------------------------------------------------------------------------------- /trpo/utils_trpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/trpo/utils_trpo.py -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/logz.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/utils/logz.py -------------------------------------------------------------------------------- /utils/policies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/utils/policies.py -------------------------------------------------------------------------------- /utils/utils_pg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/utils/utils_pg.py -------------------------------------------------------------------------------- /utils/value_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/utils/value_functions.py -------------------------------------------------------------------------------- /vpg/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/README.md -------------------------------------------------------------------------------- /vpg/bash_scripts/CartPole-v0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/bash_scripts/CartPole-v0.sh -------------------------------------------------------------------------------- /vpg/bash_scripts/Pendulum-v0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/bash_scripts/Pendulum-v0.sh -------------------------------------------------------------------------------- /vpg/bash_scripts/halfcheetah.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/bash_scripts/halfcheetah.sh -------------------------------------------------------------------------------- /vpg/bash_scripts/hopper.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/bash_scripts/hopper.sh -------------------------------------------------------------------------------- /vpg/bash_scripts/walker.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/bash_scripts/walker.sh -------------------------------------------------------------------------------- /vpg/figures/CartPole-v0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/figures/CartPole-v0.png -------------------------------------------------------------------------------- /vpg/figures/CartPole-v0_sm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/figures/CartPole-v0_sm.png -------------------------------------------------------------------------------- /vpg/figures/HalfCheetah-v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/figures/HalfCheetah-v1.png -------------------------------------------------------------------------------- /vpg/figures/HalfCheetah-v1_sm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/figures/HalfCheetah-v1_sm.png -------------------------------------------------------------------------------- /vpg/figures/Hopper-v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/figures/Hopper-v1.png -------------------------------------------------------------------------------- /vpg/figures/Hopper-v1_sm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/figures/Hopper-v1_sm.png -------------------------------------------------------------------------------- /vpg/figures/Pendulum-v0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/figures/Pendulum-v0.png -------------------------------------------------------------------------------- /vpg/figures/Pendulum-v0_sm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/figures/Pendulum-v0_sm.png -------------------------------------------------------------------------------- /vpg/figures/Walker2d-v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/figures/Walker2d-v1.png -------------------------------------------------------------------------------- /vpg/figures/Walker2d-v1_sm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/figures/Walker2d-v1_sm.png -------------------------------------------------------------------------------- /vpg/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/main.py -------------------------------------------------------------------------------- /vpg/plot_learning_curves.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DanielTakeshi/rl_algorithms/HEAD/vpg/plot_learning_curves.py --------------------------------------------------------------------------------