├── Atari ├── DQN_Boxing.py ├── DQN_MsPacman.py ├── DQN_Pong.py ├── cnn-100-random-episode │ ├── delete │ ├── part1_boxing_a_score.npy │ ├── part1_boxing_b_score.npy │ ├── part1_boxing_length.npy │ ├── part1_pong_a_score.npy │ ├── part1_pong_b_score.npy │ ├── part1_pong_length.npy │ ├── part_b_part1_MsPacman.py │ ├── part_b_part1_boxing.py │ └── part_b_part1_pong.py ├── cnn-untrained-Q-network │ ├── Mspacman │ │ └── part2_MsPacman_length.npy │ ├── boxing │ │ ├── 41.png │ │ ├── 42.png │ │ ├── 43.png │ │ ├── 44.png │ │ ├── check_data.py │ │ ├── cnn_for_boxing.py │ │ ├── delete │ │ ├── part2_boxing_a_score.npy │ │ ├── part2_boxing_b_score.npy │ │ ├── part2_boxing_difference_score.npy │ │ └── part2_boxing_length.npy │ └── pong │ │ ├── 21.png │ │ ├── 22.png │ │ ├── 23.png │ │ ├── check_data.py │ │ ├── cnn_for_pong.py │ │ ├── delete │ │ ├── part2_pong_a_score.npy │ │ ├── part2_pong_b_score.npy │ │ ├── part2_pong_difference_score.npy │ │ └── part2_pong_length.npy ├── pong │ ├── 22.png │ ├── 23.png │ ├── check_data.py │ ├── cnn_for_pong.py │ ├── delete │ ├── part2_pong_a_score.npy │ ├── part2_pong_b_score.npy │ ├── part2_pong_difference_score.npy │ └── part2_pong_length.npy └── readme.md ├── CartPole ├── different-neural-size-Q-learning │ ├── cartpole_5_neural_1000_load.py │ ├── cartpole_5_neural_1000_saved.py │ ├── cartpole_5_neural_30_load.py │ ├── cartpole_5_neural_30_saved.py │ └── delete ├── double-q-learning │ ├── cartpole_8_load.py │ ├── cartpole_8_saved.py │ └── delete ├── experience_replay │ ├── cartpole_6_buffer_replay_load.py │ ├── cartpole_6_buffer_replay_saved.py │ └── delete ├── hundred-random-episode │ ├── 100_random_episodes.py │ └── delete ├── offline-batch-Q-learning │ ├── batch_Q_learning_linear_0.001_length.png │ ├── batch_Q_learning_linear_0.001_reward.png │ ├── batch_Q_learning_neural_0.0001_length.png │ ├── batch_Q_learning_neural_0.0001_reward.png │ ├── cartpole_3_collect_data.py │ ├── cartpole_3_linear_4_load.py │ ├── cartpole_3_linear_4_saved.py │ ├── cartpole_3_neural_5_load.py │ ├── cartpole_3_neural_5_saved.py │ ├── check_data.py │ ├── delete │ ├── figure_1-3.png │ ├── length_data_part3_4_300.npy │ ├── loss_data_part3_4_300.npy │ └── reward_data_part3_4_300.npy ├── online-Q-learning │ ├── cartpole_4_neural_load.py │ ├── cartpole_4_neural_saved.py │ └── delete ├── readme ├── target-parameter │ ├── cartpole_7_target_load.py │ ├── cartpole_7_target_saved.py │ └── delete ├── three-random-episode │ ├── 3_random_episode.py │ └── delete └── train_data_2.npy ├── learning_curve ├── Capture_1.JPG ├── DQN_PICTURE.JPG ├── MsPacman0.png ├── MsPacman301.png ├── Pong19.png ├── Pong256.png ├── batch_Q_learning_linear_0.001_length.png ├── batch_Q_learning_linear_0.001_reward.png ├── batch_Q_learning_neural_0.0001_length.png ├── batch_Q_learning_neural_0.0001_reward.png ├── boxing0.png ├── boxing313.png ├── boxing_128_128.png ├── boxing_28_28.png ├── double_Q_learning_length.png ├── double_Q_learning_reward.png ├── experience_replay_length.png ├── experience_replay_reward.png ├── mapacman_28_28.png ├── mspacman_128_128.png ├── mspacman_28_28.png ├── online_Q_learning_neural_0.001_length.png ├── online_Q_learning_neural_0.001_reward.png ├── pong_128_128.png ├── pong_28_28.png ├── readme ├── target_parameter_length.png └── target_parameter_reward.png └── readme.md /Atari/DQN_Boxing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/DQN_Boxing.py -------------------------------------------------------------------------------- /Atari/DQN_MsPacman.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/DQN_MsPacman.py -------------------------------------------------------------------------------- /Atari/DQN_Pong.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/DQN_Pong.py -------------------------------------------------------------------------------- /Atari/cnn-100-random-episode/delete: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Atari/cnn-100-random-episode/part1_boxing_a_score.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-100-random-episode/part1_boxing_a_score.npy -------------------------------------------------------------------------------- /Atari/cnn-100-random-episode/part1_boxing_b_score.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-100-random-episode/part1_boxing_b_score.npy -------------------------------------------------------------------------------- /Atari/cnn-100-random-episode/part1_boxing_length.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-100-random-episode/part1_boxing_length.npy -------------------------------------------------------------------------------- /Atari/cnn-100-random-episode/part1_pong_a_score.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-100-random-episode/part1_pong_a_score.npy -------------------------------------------------------------------------------- /Atari/cnn-100-random-episode/part1_pong_b_score.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-100-random-episode/part1_pong_b_score.npy -------------------------------------------------------------------------------- /Atari/cnn-100-random-episode/part1_pong_length.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-100-random-episode/part1_pong_length.npy -------------------------------------------------------------------------------- /Atari/cnn-100-random-episode/part_b_part1_MsPacman.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-100-random-episode/part_b_part1_MsPacman.py -------------------------------------------------------------------------------- /Atari/cnn-100-random-episode/part_b_part1_boxing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-100-random-episode/part_b_part1_boxing.py -------------------------------------------------------------------------------- /Atari/cnn-100-random-episode/part_b_part1_pong.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-100-random-episode/part_b_part1_pong.py -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/Mspacman/part2_MsPacman_length.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/Mspacman/part2_MsPacman_length.npy -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/boxing/41.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/boxing/41.png -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/boxing/42.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/boxing/42.png -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/boxing/43.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/boxing/43.png -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/boxing/44.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/boxing/44.png -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/boxing/check_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/boxing/check_data.py -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/boxing/cnn_for_boxing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/boxing/cnn_for_boxing.py -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/boxing/delete: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/boxing/part2_boxing_a_score.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/boxing/part2_boxing_a_score.npy -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/boxing/part2_boxing_b_score.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/boxing/part2_boxing_b_score.npy -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/boxing/part2_boxing_difference_score.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/boxing/part2_boxing_difference_score.npy -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/boxing/part2_boxing_length.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/boxing/part2_boxing_length.npy -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/pong/21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/pong/21.png -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/pong/22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/pong/22.png -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/pong/23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/pong/23.png -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/pong/check_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/pong/check_data.py -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/pong/cnn_for_pong.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/pong/cnn_for_pong.py -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/pong/delete: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/pong/part2_pong_a_score.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/pong/part2_pong_a_score.npy -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/pong/part2_pong_b_score.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/pong/part2_pong_b_score.npy -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/pong/part2_pong_difference_score.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/pong/part2_pong_difference_score.npy -------------------------------------------------------------------------------- /Atari/cnn-untrained-Q-network/pong/part2_pong_length.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/cnn-untrained-Q-network/pong/part2_pong_length.npy -------------------------------------------------------------------------------- /Atari/pong/22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/pong/22.png -------------------------------------------------------------------------------- /Atari/pong/23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/pong/23.png -------------------------------------------------------------------------------- /Atari/pong/check_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/pong/check_data.py -------------------------------------------------------------------------------- /Atari/pong/cnn_for_pong.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/pong/cnn_for_pong.py -------------------------------------------------------------------------------- /Atari/pong/delete: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Atari/pong/part2_pong_a_score.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/pong/part2_pong_a_score.npy -------------------------------------------------------------------------------- /Atari/pong/part2_pong_b_score.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/pong/part2_pong_b_score.npy -------------------------------------------------------------------------------- /Atari/pong/part2_pong_difference_score.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/pong/part2_pong_difference_score.npy -------------------------------------------------------------------------------- /Atari/pong/part2_pong_length.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/pong/part2_pong_length.npy -------------------------------------------------------------------------------- /Atari/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/Atari/readme.md -------------------------------------------------------------------------------- /CartPole/different-neural-size-Q-learning/cartpole_5_neural_1000_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/different-neural-size-Q-learning/cartpole_5_neural_1000_load.py -------------------------------------------------------------------------------- /CartPole/different-neural-size-Q-learning/cartpole_5_neural_1000_saved.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/different-neural-size-Q-learning/cartpole_5_neural_1000_saved.py -------------------------------------------------------------------------------- /CartPole/different-neural-size-Q-learning/cartpole_5_neural_30_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/different-neural-size-Q-learning/cartpole_5_neural_30_load.py -------------------------------------------------------------------------------- /CartPole/different-neural-size-Q-learning/cartpole_5_neural_30_saved.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/different-neural-size-Q-learning/cartpole_5_neural_30_saved.py -------------------------------------------------------------------------------- /CartPole/different-neural-size-Q-learning/delete: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /CartPole/double-q-learning/cartpole_8_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/double-q-learning/cartpole_8_load.py -------------------------------------------------------------------------------- /CartPole/double-q-learning/cartpole_8_saved.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/double-q-learning/cartpole_8_saved.py -------------------------------------------------------------------------------- /CartPole/double-q-learning/delete: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /CartPole/experience_replay/cartpole_6_buffer_replay_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/experience_replay/cartpole_6_buffer_replay_load.py -------------------------------------------------------------------------------- /CartPole/experience_replay/cartpole_6_buffer_replay_saved.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/experience_replay/cartpole_6_buffer_replay_saved.py -------------------------------------------------------------------------------- /CartPole/experience_replay/delete: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /CartPole/hundred-random-episode/100_random_episodes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/hundred-random-episode/100_random_episodes.py -------------------------------------------------------------------------------- /CartPole/hundred-random-episode/delete: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/batch_Q_learning_linear_0.001_length.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/batch_Q_learning_linear_0.001_length.png -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/batch_Q_learning_linear_0.001_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/batch_Q_learning_linear_0.001_reward.png -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/batch_Q_learning_neural_0.0001_length.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/batch_Q_learning_neural_0.0001_length.png -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/batch_Q_learning_neural_0.0001_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/batch_Q_learning_neural_0.0001_reward.png -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/cartpole_3_collect_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/cartpole_3_collect_data.py -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/cartpole_3_linear_4_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/cartpole_3_linear_4_load.py -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/cartpole_3_linear_4_saved.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/cartpole_3_linear_4_saved.py -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/cartpole_3_neural_5_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/cartpole_3_neural_5_load.py -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/cartpole_3_neural_5_saved.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/cartpole_3_neural_5_saved.py -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/check_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/check_data.py -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/delete: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/figure_1-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/figure_1-3.png -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/length_data_part3_4_300.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/length_data_part3_4_300.npy -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/loss_data_part3_4_300.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/loss_data_part3_4_300.npy -------------------------------------------------------------------------------- /CartPole/offline-batch-Q-learning/reward_data_part3_4_300.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/offline-batch-Q-learning/reward_data_part3_4_300.npy -------------------------------------------------------------------------------- /CartPole/online-Q-learning/cartpole_4_neural_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/online-Q-learning/cartpole_4_neural_load.py -------------------------------------------------------------------------------- /CartPole/online-Q-learning/cartpole_4_neural_saved.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/online-Q-learning/cartpole_4_neural_saved.py -------------------------------------------------------------------------------- /CartPole/online-Q-learning/delete: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /CartPole/readme: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/readme -------------------------------------------------------------------------------- /CartPole/target-parameter/cartpole_7_target_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/target-parameter/cartpole_7_target_load.py -------------------------------------------------------------------------------- /CartPole/target-parameter/cartpole_7_target_saved.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/target-parameter/cartpole_7_target_saved.py -------------------------------------------------------------------------------- /CartPole/target-parameter/delete: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /CartPole/three-random-episode/3_random_episode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/three-random-episode/3_random_episode.py -------------------------------------------------------------------------------- /CartPole/three-random-episode/delete: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /CartPole/train_data_2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/CartPole/train_data_2.npy -------------------------------------------------------------------------------- /learning_curve/Capture_1.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/Capture_1.JPG -------------------------------------------------------------------------------- /learning_curve/DQN_PICTURE.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/DQN_PICTURE.JPG -------------------------------------------------------------------------------- /learning_curve/MsPacman0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/MsPacman0.png -------------------------------------------------------------------------------- /learning_curve/MsPacman301.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/MsPacman301.png -------------------------------------------------------------------------------- /learning_curve/Pong19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/Pong19.png -------------------------------------------------------------------------------- /learning_curve/Pong256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/Pong256.png -------------------------------------------------------------------------------- /learning_curve/batch_Q_learning_linear_0.001_length.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/batch_Q_learning_linear_0.001_length.png -------------------------------------------------------------------------------- /learning_curve/batch_Q_learning_linear_0.001_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/batch_Q_learning_linear_0.001_reward.png -------------------------------------------------------------------------------- /learning_curve/batch_Q_learning_neural_0.0001_length.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/batch_Q_learning_neural_0.0001_length.png -------------------------------------------------------------------------------- /learning_curve/batch_Q_learning_neural_0.0001_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/batch_Q_learning_neural_0.0001_reward.png -------------------------------------------------------------------------------- /learning_curve/boxing0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/boxing0.png -------------------------------------------------------------------------------- /learning_curve/boxing313.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/boxing313.png -------------------------------------------------------------------------------- /learning_curve/boxing_128_128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/boxing_128_128.png -------------------------------------------------------------------------------- /learning_curve/boxing_28_28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/boxing_28_28.png -------------------------------------------------------------------------------- /learning_curve/double_Q_learning_length.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/double_Q_learning_length.png -------------------------------------------------------------------------------- /learning_curve/double_Q_learning_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/double_Q_learning_reward.png -------------------------------------------------------------------------------- /learning_curve/experience_replay_length.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/experience_replay_length.png -------------------------------------------------------------------------------- /learning_curve/experience_replay_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/experience_replay_reward.png -------------------------------------------------------------------------------- /learning_curve/mapacman_28_28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/mapacman_28_28.png -------------------------------------------------------------------------------- /learning_curve/mspacman_128_128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/mspacman_128_128.png -------------------------------------------------------------------------------- /learning_curve/mspacman_28_28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/mspacman_28_28.png -------------------------------------------------------------------------------- /learning_curve/online_Q_learning_neural_0.001_length.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/online_Q_learning_neural_0.001_length.png -------------------------------------------------------------------------------- /learning_curve/online_Q_learning_neural_0.001_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/online_Q_learning_neural_0.001_reward.png -------------------------------------------------------------------------------- /learning_curve/pong_128_128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/pong_128_128.png -------------------------------------------------------------------------------- /learning_curve/pong_28_28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/pong_28_28.png -------------------------------------------------------------------------------- /learning_curve/readme: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /learning_curve/target_parameter_length.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/target_parameter_length.png -------------------------------------------------------------------------------- /learning_curve/target_parameter_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/learning_curve/target_parameter_reward.png -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohaomao/Reinforcment-Leanring-algorithm/HEAD/readme.md --------------------------------------------------------------------------------