├── .DS_Store
├── Archive
    ├── 2017-Course-Notes
    │   ├── CS294-DeepRL-Berkeley
    │   │   ├── .DS_Store
    │   │   ├── imgs
    │   │   │   ├── cannon.svg
    │   │   │   ├── linear-lqr.png
    │   │   │   ├── nvidia-case.png
    │   │   │   └── rl-imitation-learning.png
    │   │   └── readme.md
    │   └── Elements-Of-RL
    │   │   └── readme.md
    └── README.md
├── GETTING_STARTED.md
├── Modern-RL-Research
    ├── LLM-Code-Generation
    │   ├── PAPERS.md
    │   └── README.md
    ├── LLM-RL-Program-Synthesis
    │   ├── PAPERS.md
    │   └── README.md
    ├── README.md
    └── RLHF-and-Alignment
    │   ├── PAPERS.md
    │   └── README.md
├── content-pipeline
    ├── README.md
    ├── config
    │   └── api_keys.env.example
    ├── generators
    │   ├── llm_client.py
    │   ├── paper_to_blog.py
    │   ├── paper_to_thread.py
    │   └── test_ollama.py
    └── outputs
    │   └── blogs
    │       └── kimi_k2_test_2506.19780v5.md
├── license.md
├── readme.md
├── scripts
    ├── README.md
    ├── arxiv_paper_collector.py
    └── papers_database.json
└── self-study-lectures
    ├── README.md
    ├── cheat-sheets
        ├── RL-Math-Formulas.md
        └── RL-Quick-Reference.md
    ├── diagrams
        └── RL-Algorithm-Diagrams.md
    └── lectures
        ├── 01-mdps-bellman.md
        ├── 02-policy-gradients.md
        ├── 03-value-functions-q-learning.md
        ├── 04-actor-critic.md
        ├── 05-trpo.md
        ├── 06-ppo.md
        ├── 07-off-policy-rl.md
        ├── 08-model-based-rl.md
        ├── 09-reward-modeling.md
        ├── 10-ppo-for-llms.md
        ├── 11-dpo.md
        ├── 12-beyond-dpo.md
        └── 13-rlhf-code-generation.md


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/.DS_Store


--------------------------------------------------------------------------------
/Archive/2017-Course-Notes/CS294-DeepRL-Berkeley/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Archive/2017-Course-Notes/CS294-DeepRL-Berkeley/.DS_Store


--------------------------------------------------------------------------------
/Archive/2017-Course-Notes/CS294-DeepRL-Berkeley/imgs/cannon.svg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Archive/2017-Course-Notes/CS294-DeepRL-Berkeley/imgs/cannon.svg


--------------------------------------------------------------------------------
/Archive/2017-Course-Notes/CS294-DeepRL-Berkeley/imgs/linear-lqr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Archive/2017-Course-Notes/CS294-DeepRL-Berkeley/imgs/linear-lqr.png


--------------------------------------------------------------------------------
/Archive/2017-Course-Notes/CS294-DeepRL-Berkeley/imgs/nvidia-case.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Archive/2017-Course-Notes/CS294-DeepRL-Berkeley/imgs/nvidia-case.png


--------------------------------------------------------------------------------
/Archive/2017-Course-Notes/CS294-DeepRL-Berkeley/imgs/rl-imitation-learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Archive/2017-Course-Notes/CS294-DeepRL-Berkeley/imgs/rl-imitation-learning.png


--------------------------------------------------------------------------------
/Archive/2017-Course-Notes/CS294-DeepRL-Berkeley/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Archive/2017-Course-Notes/CS294-DeepRL-Berkeley/readme.md


--------------------------------------------------------------------------------
/Archive/2017-Course-Notes/Elements-Of-RL/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Archive/2017-Course-Notes/Elements-Of-RL/readme.md


--------------------------------------------------------------------------------
/Archive/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Archive/README.md


--------------------------------------------------------------------------------
/GETTING_STARTED.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/GETTING_STARTED.md


--------------------------------------------------------------------------------
/Modern-RL-Research/LLM-Code-Generation/PAPERS.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Modern-RL-Research/LLM-Code-Generation/PAPERS.md


--------------------------------------------------------------------------------
/Modern-RL-Research/LLM-Code-Generation/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Modern-RL-Research/LLM-Code-Generation/README.md


--------------------------------------------------------------------------------
/Modern-RL-Research/LLM-RL-Program-Synthesis/PAPERS.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Modern-RL-Research/LLM-RL-Program-Synthesis/PAPERS.md


--------------------------------------------------------------------------------
/Modern-RL-Research/LLM-RL-Program-Synthesis/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Modern-RL-Research/LLM-RL-Program-Synthesis/README.md


--------------------------------------------------------------------------------
/Modern-RL-Research/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Modern-RL-Research/README.md


--------------------------------------------------------------------------------
/Modern-RL-Research/RLHF-and-Alignment/PAPERS.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Modern-RL-Research/RLHF-and-Alignment/PAPERS.md


--------------------------------------------------------------------------------
/Modern-RL-Research/RLHF-and-Alignment/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/Modern-RL-Research/RLHF-and-Alignment/README.md


--------------------------------------------------------------------------------
/content-pipeline/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/content-pipeline/README.md


--------------------------------------------------------------------------------
/content-pipeline/config/api_keys.env.example:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/content-pipeline/config/api_keys.env.example


--------------------------------------------------------------------------------
/content-pipeline/generators/llm_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/content-pipeline/generators/llm_client.py


--------------------------------------------------------------------------------
/content-pipeline/generators/paper_to_blog.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/content-pipeline/generators/paper_to_blog.py


--------------------------------------------------------------------------------
/content-pipeline/generators/paper_to_thread.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/content-pipeline/generators/paper_to_thread.py


--------------------------------------------------------------------------------
/content-pipeline/generators/test_ollama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/content-pipeline/generators/test_ollama.py


--------------------------------------------------------------------------------
/content-pipeline/outputs/blogs/kimi_k2_test_2506.19780v5.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/content-pipeline/outputs/blogs/kimi_k2_test_2506.19780v5.md


--------------------------------------------------------------------------------
/license.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/license.md


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/readme.md


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/scripts/README.md


--------------------------------------------------------------------------------
/scripts/arxiv_paper_collector.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/scripts/arxiv_paper_collector.py


--------------------------------------------------------------------------------
/scripts/papers_database.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/scripts/papers_database.json


--------------------------------------------------------------------------------
/self-study-lectures/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/README.md


--------------------------------------------------------------------------------
/self-study-lectures/cheat-sheets/RL-Math-Formulas.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/cheat-sheets/RL-Math-Formulas.md


--------------------------------------------------------------------------------
/self-study-lectures/cheat-sheets/RL-Quick-Reference.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/cheat-sheets/RL-Quick-Reference.md


--------------------------------------------------------------------------------
/self-study-lectures/diagrams/RL-Algorithm-Diagrams.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/diagrams/RL-Algorithm-Diagrams.md


--------------------------------------------------------------------------------
/self-study-lectures/lectures/01-mdps-bellman.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/lectures/01-mdps-bellman.md


--------------------------------------------------------------------------------
/self-study-lectures/lectures/02-policy-gradients.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/lectures/02-policy-gradients.md


--------------------------------------------------------------------------------
/self-study-lectures/lectures/03-value-functions-q-learning.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/lectures/03-value-functions-q-learning.md


--------------------------------------------------------------------------------
/self-study-lectures/lectures/04-actor-critic.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/lectures/04-actor-critic.md


--------------------------------------------------------------------------------
/self-study-lectures/lectures/05-trpo.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/lectures/05-trpo.md


--------------------------------------------------------------------------------
/self-study-lectures/lectures/06-ppo.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/lectures/06-ppo.md


--------------------------------------------------------------------------------
/self-study-lectures/lectures/07-off-policy-rl.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/lectures/07-off-policy-rl.md


--------------------------------------------------------------------------------
/self-study-lectures/lectures/08-model-based-rl.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/lectures/08-model-based-rl.md


--------------------------------------------------------------------------------
/self-study-lectures/lectures/09-reward-modeling.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/lectures/09-reward-modeling.md


--------------------------------------------------------------------------------
/self-study-lectures/lectures/10-ppo-for-llms.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/lectures/10-ppo-for-llms.md


--------------------------------------------------------------------------------
/self-study-lectures/lectures/11-dpo.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/lectures/11-dpo.md


--------------------------------------------------------------------------------
/self-study-lectures/lectures/12-beyond-dpo.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/lectures/12-beyond-dpo.md


--------------------------------------------------------------------------------
/self-study-lectures/lectures/13-rlhf-code-generation.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0bserver07/Study-Reinforcement-Learning/HEAD/self-study-lectures/lectures/13-rlhf-code-generation.md


--------------------------------------------------------------------------------