├── README.md ├── chapter10 ├── DQN.ipynb ├── agent.py ├── cnn.py ├── deep_q.py ├── environment.py ├── experience.py ├── history.py ├── main.py └── statistic.py ├── chapter2 ├── Breakout.ipynb ├── Breakout.py ├── Environment.ipynb ├── Environment.py ├── greedy.ipynb └── greedy.py ├── chapter3 ├── Policy Evaluation.ipynb ├── Policy Evaluation.py ├── Policy Improvement.ipynb ├── Policy Improvement.py ├── Value Iteration.ipynb └── Value Iteration.py ├── chapter4 ├── MC firstvisit prediciton.ipynb ├── MC firstvisit prediciton.py ├── MC_blackjack.ipynb ├── MC_blackjack.py ├── MC_firstvisit_control .ipynb ├── MC_firstvisit_control .py ├── MC_off_policy_weighted_importance_sampleing.ipynb └── MC_off_policy_weighted_importance_sampleing.py ├── chapter5 ├── TD_CartPole.ipynb ├── TD_CartPole.py ├── TD_Qlearning.ipynb ├── TD_Qlearning.py ├── TD_sarsa.ipynb └── TD_sarsa.py ├── chapter6 ├── FA_Qlearning.ipynb ├── FA_Qlearning.py ├── FA_Qlearning2.ipynb ├── FA_Qlearning2.py ├── FA_SARSA.ipynb └── FA_SARSA.py └── chapter7 ├── PG_ACPG.ipynb └── PG_MCPG.ipynb /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/README.md -------------------------------------------------------------------------------- /chapter10/DQN.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter10/DQN.ipynb -------------------------------------------------------------------------------- /chapter10/agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter10/agent.py -------------------------------------------------------------------------------- /chapter10/cnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter10/cnn.py -------------------------------------------------------------------------------- /chapter10/deep_q.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter10/deep_q.py -------------------------------------------------------------------------------- /chapter10/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter10/environment.py -------------------------------------------------------------------------------- /chapter10/experience.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter10/experience.py -------------------------------------------------------------------------------- /chapter10/history.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter10/history.py -------------------------------------------------------------------------------- /chapter10/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter10/main.py -------------------------------------------------------------------------------- /chapter10/statistic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter10/statistic.py -------------------------------------------------------------------------------- /chapter2/Breakout.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter2/Breakout.ipynb -------------------------------------------------------------------------------- /chapter2/Breakout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter2/Breakout.py -------------------------------------------------------------------------------- /chapter2/Environment.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter2/Environment.ipynb -------------------------------------------------------------------------------- /chapter2/Environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter2/Environment.py -------------------------------------------------------------------------------- /chapter2/greedy.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter2/greedy.ipynb -------------------------------------------------------------------------------- /chapter2/greedy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter2/greedy.py -------------------------------------------------------------------------------- /chapter3/Policy Evaluation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter3/Policy Evaluation.ipynb -------------------------------------------------------------------------------- /chapter3/Policy Evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter3/Policy Evaluation.py -------------------------------------------------------------------------------- /chapter3/Policy Improvement.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter3/Policy Improvement.ipynb -------------------------------------------------------------------------------- /chapter3/Policy Improvement.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter3/Policy Improvement.py -------------------------------------------------------------------------------- /chapter3/Value Iteration.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter3/Value Iteration.ipynb -------------------------------------------------------------------------------- /chapter3/Value Iteration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter3/Value Iteration.py -------------------------------------------------------------------------------- /chapter4/MC firstvisit prediciton.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter4/MC firstvisit prediciton.ipynb -------------------------------------------------------------------------------- /chapter4/MC firstvisit prediciton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter4/MC firstvisit prediciton.py -------------------------------------------------------------------------------- /chapter4/MC_blackjack.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter4/MC_blackjack.ipynb -------------------------------------------------------------------------------- /chapter4/MC_blackjack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter4/MC_blackjack.py -------------------------------------------------------------------------------- /chapter4/MC_firstvisit_control .ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter4/MC_firstvisit_control .ipynb -------------------------------------------------------------------------------- /chapter4/MC_firstvisit_control .py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter4/MC_firstvisit_control .py -------------------------------------------------------------------------------- /chapter4/MC_off_policy_weighted_importance_sampleing.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter4/MC_off_policy_weighted_importance_sampleing.ipynb -------------------------------------------------------------------------------- /chapter4/MC_off_policy_weighted_importance_sampleing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter4/MC_off_policy_weighted_importance_sampleing.py -------------------------------------------------------------------------------- /chapter5/TD_CartPole.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter5/TD_CartPole.ipynb -------------------------------------------------------------------------------- /chapter5/TD_CartPole.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter5/TD_CartPole.py -------------------------------------------------------------------------------- /chapter5/TD_Qlearning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter5/TD_Qlearning.ipynb -------------------------------------------------------------------------------- /chapter5/TD_Qlearning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter5/TD_Qlearning.py -------------------------------------------------------------------------------- /chapter5/TD_sarsa.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter5/TD_sarsa.ipynb -------------------------------------------------------------------------------- /chapter5/TD_sarsa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter5/TD_sarsa.py -------------------------------------------------------------------------------- /chapter6/FA_Qlearning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter6/FA_Qlearning.ipynb -------------------------------------------------------------------------------- /chapter6/FA_Qlearning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter6/FA_Qlearning.py -------------------------------------------------------------------------------- /chapter6/FA_Qlearning2.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter6/FA_Qlearning2.ipynb -------------------------------------------------------------------------------- /chapter6/FA_Qlearning2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter6/FA_Qlearning2.py -------------------------------------------------------------------------------- /chapter6/FA_SARSA.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter6/FA_SARSA.ipynb -------------------------------------------------------------------------------- /chapter6/FA_SARSA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter6/FA_SARSA.py -------------------------------------------------------------------------------- /chapter7/PG_ACPG.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter7/PG_ACPG.ipynb -------------------------------------------------------------------------------- /chapter7/PG_MCPG.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenzomi12/Deep-Reinforcement-Learning/HEAD/chapter7/PG_MCPG.ipynb --------------------------------------------------------------------------------