├── .gitignore ├── .gitmodules ├── README.md ├── lecture ├── notes-en │ ├── 0-Markov-Chain.md │ ├── 1-Reinforcement-Learning.md │ ├── 10-Control-as-Inference.md │ ├── 11-Inverse-RL.md │ ├── 12-RL-wtih-Sequence-Model.md │ ├── 13-Transfer-Learning.md │ ├── 2-Policy-Gradient.md │ ├── 3-Actor-Critic.md │ ├── 4-Value-Function-Methods.md │ ├── 5-Model-Based-RL.md │ ├── 6-Exploration.md │ ├── 7-Offline-RL.md │ ├── 8-RL-Theory.md │ ├── 9-Generative-Model.md │ └── pic │ │ ├── LM.png │ │ ├── Qlearning.png │ │ ├── RNN.png │ │ ├── Transformer.png │ │ └── control.png └── notes-zh │ ├── 0-preliminaries.md │ ├── 10-optimal_control_planning.md │ ├── 11-model-based.md │ ├── 12-model-based-with-policy.md │ ├── 13-exploration_1.md │ ├── 14-exploration_2.md │ ├── 15-offline-RL_1.md │ ├── 16-offline-RL_2.md │ ├── 17-RL-theory.md │ ├── 18-vae.md │ ├── 19-soft-optimality.md │ ├── 2-imitation_learning.md │ ├── 20-IRL.md │ ├── 21-RL-LM.md │ ├── 22-transfer-meta.md │ ├── 23-challenge.md │ ├── 3-pytorch.md │ ├── 4-intro2RL.md │ ├── 5-policy_grad.md │ ├── 6-actor-critic.md │ ├── 7-value_func.md │ ├── 8-Q_learning.md │ ├── 9-advanced_policy_grad.md │ ├── CS_285_Fa23_PyTorch_Tutorial.ipynb │ ├── assets │ ├── 10-1.png │ ├── 11-1.png │ ├── 12-1.jpeg │ ├── 14-1.jpeg │ ├── 14-2.png │ ├── 14-3.jpeg │ ├── 15-1.png │ ├── 16-1.png │ ├── 17-1.jpeg │ ├── 18-1.png │ ├── 19-1.png │ ├── 19-2.jpeg │ ├── 19-3.jpeg │ ├── 2-1.png │ ├── 21-1.png │ ├── 21-2.jpeg │ ├── 22-1.png │ ├── 22-2.png │ ├── 22-3.png │ ├── 22-4.png │ ├── 23-1.jpeg │ ├── 23-2.png │ ├── 4-1.png │ ├── 5-1.jpeg │ ├── 9-1.png │ ├── newton.ipynb │ └── not_implement.png │ ├── change.py │ └── takeaway.md └── tutorials ├── 0-intro.ipynb ├── assets └── 0-1.png ├── install.sh ├── requirements.txt ├── utils.py └── utils_0_intro.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/.gitmodules -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ./lecture/notes-zh/0-preliminaries.md -------------------------------------------------------------------------------- /lecture/notes-en/0-Markov-Chain.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/0-Markov-Chain.md -------------------------------------------------------------------------------- /lecture/notes-en/1-Reinforcement-Learning.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/1-Reinforcement-Learning.md -------------------------------------------------------------------------------- /lecture/notes-en/10-Control-as-Inference.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/10-Control-as-Inference.md -------------------------------------------------------------------------------- /lecture/notes-en/11-Inverse-RL.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/11-Inverse-RL.md -------------------------------------------------------------------------------- /lecture/notes-en/12-RL-wtih-Sequence-Model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/12-RL-wtih-Sequence-Model.md -------------------------------------------------------------------------------- /lecture/notes-en/13-Transfer-Learning.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/13-Transfer-Learning.md -------------------------------------------------------------------------------- /lecture/notes-en/2-Policy-Gradient.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/2-Policy-Gradient.md -------------------------------------------------------------------------------- /lecture/notes-en/3-Actor-Critic.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/3-Actor-Critic.md -------------------------------------------------------------------------------- /lecture/notes-en/4-Value-Function-Methods.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/4-Value-Function-Methods.md -------------------------------------------------------------------------------- /lecture/notes-en/5-Model-Based-RL.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/5-Model-Based-RL.md -------------------------------------------------------------------------------- /lecture/notes-en/6-Exploration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/6-Exploration.md -------------------------------------------------------------------------------- /lecture/notes-en/7-Offline-RL.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/7-Offline-RL.md -------------------------------------------------------------------------------- /lecture/notes-en/8-RL-Theory.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/8-RL-Theory.md -------------------------------------------------------------------------------- /lecture/notes-en/9-Generative-Model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/9-Generative-Model.md -------------------------------------------------------------------------------- /lecture/notes-en/pic/LM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/pic/LM.png -------------------------------------------------------------------------------- /lecture/notes-en/pic/Qlearning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/pic/Qlearning.png -------------------------------------------------------------------------------- /lecture/notes-en/pic/RNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/pic/RNN.png -------------------------------------------------------------------------------- /lecture/notes-en/pic/Transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/pic/Transformer.png -------------------------------------------------------------------------------- /lecture/notes-en/pic/control.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-en/pic/control.png -------------------------------------------------------------------------------- /lecture/notes-zh/0-preliminaries.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/0-preliminaries.md -------------------------------------------------------------------------------- /lecture/notes-zh/10-optimal_control_planning.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/10-optimal_control_planning.md -------------------------------------------------------------------------------- /lecture/notes-zh/11-model-based.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/11-model-based.md -------------------------------------------------------------------------------- /lecture/notes-zh/12-model-based-with-policy.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/12-model-based-with-policy.md -------------------------------------------------------------------------------- /lecture/notes-zh/13-exploration_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/13-exploration_1.md -------------------------------------------------------------------------------- /lecture/notes-zh/14-exploration_2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/14-exploration_2.md -------------------------------------------------------------------------------- /lecture/notes-zh/15-offline-RL_1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/15-offline-RL_1.md -------------------------------------------------------------------------------- /lecture/notes-zh/16-offline-RL_2.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/16-offline-RL_2.md -------------------------------------------------------------------------------- /lecture/notes-zh/17-RL-theory.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/17-RL-theory.md -------------------------------------------------------------------------------- /lecture/notes-zh/18-vae.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/18-vae.md -------------------------------------------------------------------------------- /lecture/notes-zh/19-soft-optimality.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/19-soft-optimality.md -------------------------------------------------------------------------------- /lecture/notes-zh/2-imitation_learning.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/2-imitation_learning.md -------------------------------------------------------------------------------- /lecture/notes-zh/20-IRL.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/20-IRL.md -------------------------------------------------------------------------------- /lecture/notes-zh/21-RL-LM.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/21-RL-LM.md -------------------------------------------------------------------------------- /lecture/notes-zh/22-transfer-meta.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/22-transfer-meta.md -------------------------------------------------------------------------------- /lecture/notes-zh/23-challenge.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/23-challenge.md -------------------------------------------------------------------------------- /lecture/notes-zh/3-pytorch.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/3-pytorch.md -------------------------------------------------------------------------------- /lecture/notes-zh/4-intro2RL.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/4-intro2RL.md -------------------------------------------------------------------------------- /lecture/notes-zh/5-policy_grad.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/5-policy_grad.md -------------------------------------------------------------------------------- /lecture/notes-zh/6-actor-critic.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/6-actor-critic.md -------------------------------------------------------------------------------- /lecture/notes-zh/7-value_func.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/7-value_func.md -------------------------------------------------------------------------------- /lecture/notes-zh/8-Q_learning.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/8-Q_learning.md -------------------------------------------------------------------------------- /lecture/notes-zh/9-advanced_policy_grad.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/9-advanced_policy_grad.md -------------------------------------------------------------------------------- /lecture/notes-zh/CS_285_Fa23_PyTorch_Tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/CS_285_Fa23_PyTorch_Tutorial.ipynb -------------------------------------------------------------------------------- /lecture/notes-zh/assets/10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/10-1.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/11-1.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/12-1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/12-1.jpeg -------------------------------------------------------------------------------- /lecture/notes-zh/assets/14-1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/14-1.jpeg -------------------------------------------------------------------------------- /lecture/notes-zh/assets/14-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/14-2.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/14-3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/14-3.jpeg -------------------------------------------------------------------------------- /lecture/notes-zh/assets/15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/15-1.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/16-1.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/17-1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/17-1.jpeg -------------------------------------------------------------------------------- /lecture/notes-zh/assets/18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/18-1.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/19-1.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/19-2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/19-2.jpeg -------------------------------------------------------------------------------- /lecture/notes-zh/assets/19-3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/19-3.jpeg -------------------------------------------------------------------------------- /lecture/notes-zh/assets/2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/2-1.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/21-1.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/21-2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/21-2.jpeg -------------------------------------------------------------------------------- /lecture/notes-zh/assets/22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/22-1.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/22-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/22-2.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/22-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/22-3.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/22-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/22-4.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/23-1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/23-1.jpeg -------------------------------------------------------------------------------- /lecture/notes-zh/assets/23-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/23-2.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/4-1.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/5-1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/5-1.jpeg -------------------------------------------------------------------------------- /lecture/notes-zh/assets/9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/9-1.png -------------------------------------------------------------------------------- /lecture/notes-zh/assets/newton.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/newton.ipynb -------------------------------------------------------------------------------- /lecture/notes-zh/assets/not_implement.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/assets/not_implement.png -------------------------------------------------------------------------------- /lecture/notes-zh/change.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/change.py -------------------------------------------------------------------------------- /lecture/notes-zh/takeaway.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/lecture/notes-zh/takeaway.md -------------------------------------------------------------------------------- /tutorials/0-intro.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/tutorials/0-intro.ipynb -------------------------------------------------------------------------------- /tutorials/assets/0-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/tutorials/assets/0-1.png -------------------------------------------------------------------------------- /tutorials/install.sh: -------------------------------------------------------------------------------- 1 | sudo apt install ffmpeg -------------------------------------------------------------------------------- /tutorials/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/tutorials/requirements.txt -------------------------------------------------------------------------------- /tutorials/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/tutorials/utils.py -------------------------------------------------------------------------------- /tutorials/utils_0_intro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hidden-Hyperparameter/RL-notes/HEAD/tutorials/utils_0_intro.py --------------------------------------------------------------------------------