├── 1.无状态问题 ├── .ipynb_checkpoints │ ├── 1.贪婪算法-checkpoint.ipynb │ ├── 2.递减的贪婪算法-checkpoint.ipynb │ ├── 3.上置信界算法-checkpoint.ipynb │ └── 4.汤普森采样算法-checkpoint.ipynb ├── 1.贪婪算法.ipynb ├── 2.递减的贪婪算法.ipynb ├── 3.上置信界算法.ipynb └── 4.汤普森采样算法.ipynb ├── 10.PPO算法 ├── .ipynb_checkpoints │ ├── 1.PPO算法_平衡车-checkpoint.ipynb │ └── 2.PPO算法_倒立摆-checkpoint.ipynb ├── 1.PPO算法_平衡车.ipynb └── 2.PPO算法_倒立摆.ipynb ├── 11.DDPG算法 ├── .ipynb_checkpoints │ └── 1.DDPG算法-checkpoint.ipynb └── 1.DDPG算法.ipynb ├── 12.SAC算法 ├── .ipynb_checkpoints │ ├── 1.SAC算法_倒立摆-checkpoint.ipynb │ ├── 2.SAC算法_平衡车-checkpoint.ipynb │ ├── x1.倒立摆-checkpoint.ipynb │ └── x2.平衡车-checkpoint.ipynb ├── 1.SAC算法_倒立摆.ipynb ├── 2.SAC算法_平衡车.ipynb ├── x1.倒立摆.ipynb └── x2.平衡车.ipynb ├── 13.模仿学习 ├── .ipynb_checkpoints │ └── 1.模仿学习-checkpoint.ipynb ├── 1.模仿学习_平衡车.ipynb └── 2.模仿学习_倒立摆.ipynb ├── 14.离线学习 ├── .ipynb_checkpoints │ └── 1.离线学习-checkpoint.ipynb └── 1.离线学习.ipynb ├── 15.MPC ├── .ipynb_checkpoints │ └── 1.MPC-checkpoint.ipynb └── 1.MPC.ipynb ├── 16.MBPO ├── .ipynb_checkpoints │ ├── 1.MBPO-Copy1-checkpoint.ipynb │ └── 1.MBPO-checkpoint.ipynb └── 1.MBPO.ipynb ├── 17.目标导向的强化学习 ├── .ipynb_checkpoints │ └── 1.目标导向的强化学习-checkpoint.ipynb └── 1.目标导向的强化学习.ipynb ├── 18.多智能体 ├── .ipynb_checkpoints │ ├── 1.多智能体-Copy1-checkpoint.ipynb │ └── 1.多智能体-checkpoint.ipynb ├── 1.多智能体.ipynb ├── __pycache__ │ └── combat.cpython-36.pyc └── combat.py ├── 2.马尔可夫决策过程 ├── .ipynb_checkpoints │ ├── 1.蒙特卡洛法-checkpoint.ipynb │ └── 2.贝尔曼方程矩阵-checkpoint.ipynb ├── 1.蒙特卡洛法.ipynb └── 2.贝尔曼方程矩阵.ipynb ├── 3.动态规划算法 ├── .ipynb_checkpoints │ ├── 1.策略迭代算法-checkpoint.ipynb │ ├── 2.价值迭代算法-checkpoint.ipynb │ └── 3.冰湖-checkpoint.ipynb ├── 1.策略迭代算法.ipynb ├── 2.价值迭代算法.ipynb └── 3.冰湖.ipynb ├── 4.时序差分算法 ├── .ipynb_checkpoints │ ├── 1.Sarsa算法-checkpoint.ipynb │ ├── 2.N步Sarsa算法-checkpoint.ipynb │ └── 3.QLearning-checkpoint.ipynb ├── 1.Sarsa算法.ipynb ├── 2.N步Sarsa算法.ipynb └── 3.QLearning.ipynb ├── 5.DynaQ算法 ├── .ipynb_checkpoints │ └── 1.DynaQ-checkpoint.ipynb └── 1.DynaQ.ipynb ├── 6.DQN算法 ├── .ipynb_checkpoints │ ├── 1.单模型-checkpoint.ipynb │ ├── 2.双模型_平衡车-checkpoint.ipynb │ ├── 3.双模型_倒立摆-checkpoint.ipynb │ ├── 4.DoubleDQN-checkpoint.ipynb │ └── 5.DuelingDQN-checkpoint.ipynb ├── 1.单模型.ipynb ├── 2.双模型_平衡车.ipynb ├── 3.双模型_倒立摆.ipynb ├── 4.DoubleDQN.ipynb └── 5.DuelingDQN.ipynb ├── 7.策略梯度算法 ├── .ipynb_checkpoints │ ├── 1.Reinforce算法-checkpoint.ipynb │ ├── 2.Actor_Critic算法-checkpoint.ipynb │ ├── 3.TRPO算法_未完成-checkpoint.ipynb │ └── 4.PPO算法-checkpoint.ipynb └── 1.Reinforce算法.ipynb ├── 8.Actor_Critic算法 ├── .ipynb_checkpoints │ └── 1.Actor_Critic算法-checkpoint.ipynb └── 1.Actor_Critic算法.ipynb ├── README.md └── x1.gym ├── .ipynb_checkpoints └── 1.gym-checkpoint.ipynb └── 1.gym.ipynb /1.无状态问题/.ipynb_checkpoints/1.贪婪算法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/1.无状态问题/.ipynb_checkpoints/1.贪婪算法-checkpoint.ipynb -------------------------------------------------------------------------------- /1.无状态问题/.ipynb_checkpoints/2.递减的贪婪算法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/1.无状态问题/.ipynb_checkpoints/2.递减的贪婪算法-checkpoint.ipynb -------------------------------------------------------------------------------- /1.无状态问题/.ipynb_checkpoints/3.上置信界算法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/1.无状态问题/.ipynb_checkpoints/3.上置信界算法-checkpoint.ipynb -------------------------------------------------------------------------------- /1.无状态问题/.ipynb_checkpoints/4.汤普森采样算法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/1.无状态问题/.ipynb_checkpoints/4.汤普森采样算法-checkpoint.ipynb -------------------------------------------------------------------------------- /1.无状态问题/1.贪婪算法.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/1.无状态问题/1.贪婪算法.ipynb -------------------------------------------------------------------------------- /1.无状态问题/2.递减的贪婪算法.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/1.无状态问题/2.递减的贪婪算法.ipynb -------------------------------------------------------------------------------- /1.无状态问题/3.上置信界算法.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/1.无状态问题/3.上置信界算法.ipynb -------------------------------------------------------------------------------- /1.无状态问题/4.汤普森采样算法.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/1.无状态问题/4.汤普森采样算法.ipynb -------------------------------------------------------------------------------- /10.PPO算法/.ipynb_checkpoints/1.PPO算法_平衡车-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/10.PPO算法/.ipynb_checkpoints/1.PPO算法_平衡车-checkpoint.ipynb -------------------------------------------------------------------------------- /10.PPO算法/.ipynb_checkpoints/2.PPO算法_倒立摆-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/10.PPO算法/.ipynb_checkpoints/2.PPO算法_倒立摆-checkpoint.ipynb -------------------------------------------------------------------------------- /10.PPO算法/1.PPO算法_平衡车.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/10.PPO算法/1.PPO算法_平衡车.ipynb -------------------------------------------------------------------------------- /10.PPO算法/2.PPO算法_倒立摆.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/10.PPO算法/2.PPO算法_倒立摆.ipynb -------------------------------------------------------------------------------- /11.DDPG算法/.ipynb_checkpoints/1.DDPG算法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/11.DDPG算法/.ipynb_checkpoints/1.DDPG算法-checkpoint.ipynb -------------------------------------------------------------------------------- /11.DDPG算法/1.DDPG算法.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/11.DDPG算法/1.DDPG算法.ipynb -------------------------------------------------------------------------------- /12.SAC算法/.ipynb_checkpoints/1.SAC算法_倒立摆-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/12.SAC算法/.ipynb_checkpoints/1.SAC算法_倒立摆-checkpoint.ipynb -------------------------------------------------------------------------------- /12.SAC算法/.ipynb_checkpoints/2.SAC算法_平衡车-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/12.SAC算法/.ipynb_checkpoints/2.SAC算法_平衡车-checkpoint.ipynb -------------------------------------------------------------------------------- /12.SAC算法/.ipynb_checkpoints/x1.倒立摆-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/12.SAC算法/.ipynb_checkpoints/x1.倒立摆-checkpoint.ipynb -------------------------------------------------------------------------------- /12.SAC算法/.ipynb_checkpoints/x2.平衡车-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/12.SAC算法/.ipynb_checkpoints/x2.平衡车-checkpoint.ipynb -------------------------------------------------------------------------------- /12.SAC算法/1.SAC算法_倒立摆.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/12.SAC算法/1.SAC算法_倒立摆.ipynb -------------------------------------------------------------------------------- /12.SAC算法/2.SAC算法_平衡车.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/12.SAC算法/2.SAC算法_平衡车.ipynb -------------------------------------------------------------------------------- /12.SAC算法/x1.倒立摆.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/12.SAC算法/x1.倒立摆.ipynb -------------------------------------------------------------------------------- /12.SAC算法/x2.平衡车.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/12.SAC算法/x2.平衡车.ipynb -------------------------------------------------------------------------------- /13.模仿学习/.ipynb_checkpoints/1.模仿学习-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/13.模仿学习/.ipynb_checkpoints/1.模仿学习-checkpoint.ipynb -------------------------------------------------------------------------------- /13.模仿学习/1.模仿学习_平衡车.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/13.模仿学习/1.模仿学习_平衡车.ipynb -------------------------------------------------------------------------------- /13.模仿学习/2.模仿学习_倒立摆.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/13.模仿学习/2.模仿学习_倒立摆.ipynb -------------------------------------------------------------------------------- /14.离线学习/.ipynb_checkpoints/1.离线学习-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/14.离线学习/.ipynb_checkpoints/1.离线学习-checkpoint.ipynb -------------------------------------------------------------------------------- /14.离线学习/1.离线学习.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/14.离线学习/1.离线学习.ipynb -------------------------------------------------------------------------------- /15.MPC/.ipynb_checkpoints/1.MPC-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/15.MPC/.ipynb_checkpoints/1.MPC-checkpoint.ipynb -------------------------------------------------------------------------------- /15.MPC/1.MPC.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/15.MPC/1.MPC.ipynb -------------------------------------------------------------------------------- /16.MBPO/.ipynb_checkpoints/1.MBPO-Copy1-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/16.MBPO/.ipynb_checkpoints/1.MBPO-Copy1-checkpoint.ipynb -------------------------------------------------------------------------------- /16.MBPO/.ipynb_checkpoints/1.MBPO-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/16.MBPO/.ipynb_checkpoints/1.MBPO-checkpoint.ipynb -------------------------------------------------------------------------------- /16.MBPO/1.MBPO.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/16.MBPO/1.MBPO.ipynb -------------------------------------------------------------------------------- /17.目标导向的强化学习/.ipynb_checkpoints/1.目标导向的强化学习-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/17.目标导向的强化学习/.ipynb_checkpoints/1.目标导向的强化学习-checkpoint.ipynb -------------------------------------------------------------------------------- /17.目标导向的强化学习/1.目标导向的强化学习.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/17.目标导向的强化学习/1.目标导向的强化学习.ipynb -------------------------------------------------------------------------------- /18.多智能体/.ipynb_checkpoints/1.多智能体-Copy1-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/18.多智能体/.ipynb_checkpoints/1.多智能体-Copy1-checkpoint.ipynb -------------------------------------------------------------------------------- /18.多智能体/.ipynb_checkpoints/1.多智能体-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/18.多智能体/.ipynb_checkpoints/1.多智能体-checkpoint.ipynb -------------------------------------------------------------------------------- /18.多智能体/1.多智能体.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/18.多智能体/1.多智能体.ipynb -------------------------------------------------------------------------------- /18.多智能体/__pycache__/combat.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/18.多智能体/__pycache__/combat.cpython-36.pyc -------------------------------------------------------------------------------- /18.多智能体/combat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/18.多智能体/combat.py -------------------------------------------------------------------------------- /2.马尔可夫决策过程/.ipynb_checkpoints/1.蒙特卡洛法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/2.马尔可夫决策过程/.ipynb_checkpoints/1.蒙特卡洛法-checkpoint.ipynb -------------------------------------------------------------------------------- /2.马尔可夫决策过程/.ipynb_checkpoints/2.贝尔曼方程矩阵-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/2.马尔可夫决策过程/.ipynb_checkpoints/2.贝尔曼方程矩阵-checkpoint.ipynb -------------------------------------------------------------------------------- /2.马尔可夫决策过程/1.蒙特卡洛法.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/2.马尔可夫决策过程/1.蒙特卡洛法.ipynb -------------------------------------------------------------------------------- /2.马尔可夫决策过程/2.贝尔曼方程矩阵.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/2.马尔可夫决策过程/2.贝尔曼方程矩阵.ipynb -------------------------------------------------------------------------------- /3.动态规划算法/.ipynb_checkpoints/1.策略迭代算法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/3.动态规划算法/.ipynb_checkpoints/1.策略迭代算法-checkpoint.ipynb -------------------------------------------------------------------------------- /3.动态规划算法/.ipynb_checkpoints/2.价值迭代算法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/3.动态规划算法/.ipynb_checkpoints/2.价值迭代算法-checkpoint.ipynb -------------------------------------------------------------------------------- /3.动态规划算法/.ipynb_checkpoints/3.冰湖-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/3.动态规划算法/.ipynb_checkpoints/3.冰湖-checkpoint.ipynb -------------------------------------------------------------------------------- /3.动态规划算法/1.策略迭代算法.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/3.动态规划算法/1.策略迭代算法.ipynb -------------------------------------------------------------------------------- /3.动态规划算法/2.价值迭代算法.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/3.动态规划算法/2.价值迭代算法.ipynb -------------------------------------------------------------------------------- /3.动态规划算法/3.冰湖.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/3.动态规划算法/3.冰湖.ipynb -------------------------------------------------------------------------------- /4.时序差分算法/.ipynb_checkpoints/1.Sarsa算法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/4.时序差分算法/.ipynb_checkpoints/1.Sarsa算法-checkpoint.ipynb -------------------------------------------------------------------------------- /4.时序差分算法/.ipynb_checkpoints/2.N步Sarsa算法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/4.时序差分算法/.ipynb_checkpoints/2.N步Sarsa算法-checkpoint.ipynb -------------------------------------------------------------------------------- /4.时序差分算法/.ipynb_checkpoints/3.QLearning-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/4.时序差分算法/.ipynb_checkpoints/3.QLearning-checkpoint.ipynb -------------------------------------------------------------------------------- /4.时序差分算法/1.Sarsa算法.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/4.时序差分算法/1.Sarsa算法.ipynb -------------------------------------------------------------------------------- /4.时序差分算法/2.N步Sarsa算法.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/4.时序差分算法/2.N步Sarsa算法.ipynb -------------------------------------------------------------------------------- /4.时序差分算法/3.QLearning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/4.时序差分算法/3.QLearning.ipynb -------------------------------------------------------------------------------- /5.DynaQ算法/.ipynb_checkpoints/1.DynaQ-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/5.DynaQ算法/.ipynb_checkpoints/1.DynaQ-checkpoint.ipynb -------------------------------------------------------------------------------- /5.DynaQ算法/1.DynaQ.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/5.DynaQ算法/1.DynaQ.ipynb -------------------------------------------------------------------------------- /6.DQN算法/.ipynb_checkpoints/1.单模型-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/6.DQN算法/.ipynb_checkpoints/1.单模型-checkpoint.ipynb -------------------------------------------------------------------------------- /6.DQN算法/.ipynb_checkpoints/2.双模型_平衡车-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/6.DQN算法/.ipynb_checkpoints/2.双模型_平衡车-checkpoint.ipynb -------------------------------------------------------------------------------- /6.DQN算法/.ipynb_checkpoints/3.双模型_倒立摆-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/6.DQN算法/.ipynb_checkpoints/3.双模型_倒立摆-checkpoint.ipynb -------------------------------------------------------------------------------- /6.DQN算法/.ipynb_checkpoints/4.DoubleDQN-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/6.DQN算法/.ipynb_checkpoints/4.DoubleDQN-checkpoint.ipynb -------------------------------------------------------------------------------- /6.DQN算法/.ipynb_checkpoints/5.DuelingDQN-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/6.DQN算法/.ipynb_checkpoints/5.DuelingDQN-checkpoint.ipynb -------------------------------------------------------------------------------- /6.DQN算法/1.单模型.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/6.DQN算法/1.单模型.ipynb -------------------------------------------------------------------------------- /6.DQN算法/2.双模型_平衡车.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/6.DQN算法/2.双模型_平衡车.ipynb -------------------------------------------------------------------------------- /6.DQN算法/3.双模型_倒立摆.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/6.DQN算法/3.双模型_倒立摆.ipynb -------------------------------------------------------------------------------- /6.DQN算法/4.DoubleDQN.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/6.DQN算法/4.DoubleDQN.ipynb -------------------------------------------------------------------------------- /6.DQN算法/5.DuelingDQN.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/6.DQN算法/5.DuelingDQN.ipynb -------------------------------------------------------------------------------- /7.策略梯度算法/.ipynb_checkpoints/1.Reinforce算法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/7.策略梯度算法/.ipynb_checkpoints/1.Reinforce算法-checkpoint.ipynb -------------------------------------------------------------------------------- /7.策略梯度算法/.ipynb_checkpoints/2.Actor_Critic算法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/7.策略梯度算法/.ipynb_checkpoints/2.Actor_Critic算法-checkpoint.ipynb -------------------------------------------------------------------------------- /7.策略梯度算法/.ipynb_checkpoints/3.TRPO算法_未完成-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/7.策略梯度算法/.ipynb_checkpoints/3.TRPO算法_未完成-checkpoint.ipynb -------------------------------------------------------------------------------- /7.策略梯度算法/.ipynb_checkpoints/4.PPO算法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/7.策略梯度算法/.ipynb_checkpoints/4.PPO算法-checkpoint.ipynb -------------------------------------------------------------------------------- /7.策略梯度算法/1.Reinforce算法.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/7.策略梯度算法/1.Reinforce算法.ipynb -------------------------------------------------------------------------------- /8.Actor_Critic算法/.ipynb_checkpoints/1.Actor_Critic算法-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/8.Actor_Critic算法/.ipynb_checkpoints/1.Actor_Critic算法-checkpoint.ipynb -------------------------------------------------------------------------------- /8.Actor_Critic算法/1.Actor_Critic算法.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/8.Actor_Critic算法/1.Actor_Critic算法.ipynb -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/README.md -------------------------------------------------------------------------------- /x1.gym/.ipynb_checkpoints/1.gym-checkpoint.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/x1.gym/.ipynb_checkpoints/1.gym-checkpoint.ipynb -------------------------------------------------------------------------------- /x1.gym/1.gym.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lansinuote/Simple_Reinforcement_Learning/HEAD/x1.gym/1.gym.ipynb --------------------------------------------------------------------------------