├── .github ├── ISSUE_TEMPLATE │ ├── i-have-a-bug-with-a-hands-on.md │ ├── i-have-a-question.md │ └── i-want-to-improve-the-course.md └── workflows │ ├── build_documentation.yml │ ├── build_pr_documentation.yml │ └── upload_pr_documentation.yml ├── LICENSE.md ├── README.md ├── notebooks ├── bonus-unit1 │ ├── bonus-unit1.ipynb │ └── bonus_unit1.ipynb ├── unit1 │ ├── requirements-unit1.txt │ └── unit1.ipynb ├── unit2 │ ├── requirements-unit2.txt │ └── unit2.ipynb ├── unit3 │ └── unit3.ipynb ├── unit4 │ ├── requirements-unit4.txt │ └── unit4.ipynb ├── unit5 │ └── unit5.ipynb ├── unit6 │ ├── requirements-unit6.txt │ └── unit6.ipynb └── unit8 │ ├── unit8_part1.ipynb │ └── unit8_part2.ipynb └── units └── en ├── _toctree.yml ├── communication ├── certification.mdx └── conclusion.mdx ├── live1 └── live1.mdx ├── unit0 ├── discord101.mdx ├── introduction.mdx └── setup.mdx ├── unit1 ├── additional-readings.mdx ├── conclusion.mdx ├── deep-rl.mdx ├── exp-exp-tradeoff.mdx ├── glossary.mdx ├── hands-on.mdx ├── introduction.mdx ├── quiz.mdx ├── rl-framework.mdx ├── summary.mdx ├── tasks.mdx ├── two-methods.mdx └── what-is-rl.mdx ├── unit2 ├── additional-readings.mdx ├── bellman-equation.mdx ├── conclusion.mdx ├── glossary.mdx ├── hands-on.mdx ├── introduction.mdx ├── mc-vs-td.mdx ├── mid-way-quiz.mdx ├── mid-way-recap.mdx ├── q-learning-example.mdx ├── q-learning-recap.mdx ├── q-learning.mdx ├── quiz2.mdx ├── two-types-value-based-methods.mdx └── what-is-rl.mdx ├── unit3 ├── additional-readings.mdx ├── conclusion.mdx ├── deep-q-algorithm.mdx ├── deep-q-network.mdx ├── from-q-to-dqn.mdx ├── glossary.mdx ├── hands-on.mdx ├── introduction.mdx └── quiz.mdx ├── unit4 ├── additional-readings.mdx ├── advantages-disadvantages.mdx ├── conclusion.mdx ├── glossary.mdx ├── hands-on.mdx ├── introduction.mdx ├── pg-theorem.mdx ├── policy-gradient.mdx ├── quiz.mdx └── what-are-policy-based-methods.mdx ├── unit5 ├── bonus.mdx ├── conclusion.mdx ├── curiosity.mdx ├── hands-on.mdx ├── how-mlagents-works.mdx ├── introduction.mdx ├── pyramids.mdx ├── quiz.mdx └── snowball-target.mdx ├── unit6 ├── additional-readings.mdx ├── advantage-actor-critic.mdx ├── conclusion.mdx ├── hands-on.mdx ├── introduction.mdx ├── quiz.mdx └── variance-problem.mdx ├── unit7 ├── additional-readings.mdx ├── conclusion.mdx ├── hands-on.mdx ├── introduction-to-marl.mdx ├── introduction.mdx ├── multi-agent-setting.mdx ├── quiz.mdx └── self-play.mdx ├── unit8 ├── additional-readings.mdx ├── clipped-surrogate-objective.mdx ├── conclusion-sf.mdx ├── conclusion.mdx ├── hands-on-cleanrl.mdx ├── hands-on-sf.mdx ├── introduction-sf.mdx ├── introduction.mdx ├── intuition-behind-ppo.mdx └── visualize.mdx ├── unitbonus1 ├── conclusion.mdx ├── how-huggy-works.mdx ├── introduction.mdx ├── play.mdx └── train.mdx ├── unitbonus2 ├── hands-on.mdx ├── introduction.mdx └── optuna.mdx ├── unitbonus3 ├── curriculum-learning.mdx ├── decision-transformers.mdx ├── envs-to-try.mdx ├── generalisation.mdx ├── godotrl.mdx ├── introduction.mdx ├── language-models.mdx ├── learning-agents.mdx ├── model-based.mdx ├── offline-online.mdx ├── rl-documentation.mdx ├── rlhf.mdx └── student-works.mdx └── unitbonus5 ├── conclusion.mdx ├── customize-the-environment.mdx ├── getting-started.mdx ├── introduction.mdx ├── the-environment.mdx └── train-our-robot.mdx /.github/ISSUE_TEMPLATE/i-have-a-bug-with-a-hands-on.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/.github/ISSUE_TEMPLATE/i-have-a-bug-with-a-hands-on.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/i-have-a-question.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/.github/ISSUE_TEMPLATE/i-have-a-question.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/i-want-to-improve-the-course.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/.github/ISSUE_TEMPLATE/i-want-to-improve-the-course.md -------------------------------------------------------------------------------- /.github/workflows/build_documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/.github/workflows/build_documentation.yml -------------------------------------------------------------------------------- /.github/workflows/build_pr_documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/.github/workflows/build_pr_documentation.yml -------------------------------------------------------------------------------- /.github/workflows/upload_pr_documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/.github/workflows/upload_pr_documentation.yml -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/LICENSE.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/README.md -------------------------------------------------------------------------------- /notebooks/bonus-unit1/bonus-unit1.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/notebooks/bonus-unit1/bonus-unit1.ipynb -------------------------------------------------------------------------------- /notebooks/bonus-unit1/bonus_unit1.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/notebooks/bonus-unit1/bonus_unit1.ipynb -------------------------------------------------------------------------------- /notebooks/unit1/requirements-unit1.txt: -------------------------------------------------------------------------------- 1 | stable-baselines3==2.0.0a5 2 | swig 3 | gymnasium[box2d] 4 | huggingface_sb3 5 | -------------------------------------------------------------------------------- /notebooks/unit1/unit1.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/notebooks/unit1/unit1.ipynb -------------------------------------------------------------------------------- /notebooks/unit2/requirements-unit2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/notebooks/unit2/requirements-unit2.txt -------------------------------------------------------------------------------- /notebooks/unit2/unit2.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/notebooks/unit2/unit2.ipynb -------------------------------------------------------------------------------- /notebooks/unit3/unit3.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/notebooks/unit3/unit3.ipynb -------------------------------------------------------------------------------- /notebooks/unit4/requirements-unit4.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/notebooks/unit4/requirements-unit4.txt -------------------------------------------------------------------------------- /notebooks/unit4/unit4.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/notebooks/unit4/unit4.ipynb -------------------------------------------------------------------------------- /notebooks/unit5/unit5.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/notebooks/unit5/unit5.ipynb -------------------------------------------------------------------------------- /notebooks/unit6/requirements-unit6.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/notebooks/unit6/requirements-unit6.txt -------------------------------------------------------------------------------- /notebooks/unit6/unit6.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/notebooks/unit6/unit6.ipynb -------------------------------------------------------------------------------- /notebooks/unit8/unit8_part1.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/notebooks/unit8/unit8_part1.ipynb -------------------------------------------------------------------------------- /notebooks/unit8/unit8_part2.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/notebooks/unit8/unit8_part2.ipynb -------------------------------------------------------------------------------- /units/en/_toctree.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/_toctree.yml -------------------------------------------------------------------------------- /units/en/communication/certification.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/communication/certification.mdx -------------------------------------------------------------------------------- /units/en/communication/conclusion.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/communication/conclusion.mdx -------------------------------------------------------------------------------- /units/en/live1/live1.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/live1/live1.mdx -------------------------------------------------------------------------------- /units/en/unit0/discord101.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit0/discord101.mdx -------------------------------------------------------------------------------- /units/en/unit0/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit0/introduction.mdx -------------------------------------------------------------------------------- /units/en/unit0/setup.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit0/setup.mdx -------------------------------------------------------------------------------- /units/en/unit1/additional-readings.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit1/additional-readings.mdx -------------------------------------------------------------------------------- /units/en/unit1/conclusion.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit1/conclusion.mdx -------------------------------------------------------------------------------- /units/en/unit1/deep-rl.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit1/deep-rl.mdx -------------------------------------------------------------------------------- /units/en/unit1/exp-exp-tradeoff.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit1/exp-exp-tradeoff.mdx -------------------------------------------------------------------------------- /units/en/unit1/glossary.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit1/glossary.mdx -------------------------------------------------------------------------------- /units/en/unit1/hands-on.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit1/hands-on.mdx -------------------------------------------------------------------------------- /units/en/unit1/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit1/introduction.mdx -------------------------------------------------------------------------------- /units/en/unit1/quiz.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit1/quiz.mdx -------------------------------------------------------------------------------- /units/en/unit1/rl-framework.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit1/rl-framework.mdx -------------------------------------------------------------------------------- /units/en/unit1/summary.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit1/summary.mdx -------------------------------------------------------------------------------- /units/en/unit1/tasks.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit1/tasks.mdx -------------------------------------------------------------------------------- /units/en/unit1/two-methods.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit1/two-methods.mdx -------------------------------------------------------------------------------- /units/en/unit1/what-is-rl.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit1/what-is-rl.mdx -------------------------------------------------------------------------------- /units/en/unit2/additional-readings.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/additional-readings.mdx -------------------------------------------------------------------------------- /units/en/unit2/bellman-equation.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/bellman-equation.mdx -------------------------------------------------------------------------------- /units/en/unit2/conclusion.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/conclusion.mdx -------------------------------------------------------------------------------- /units/en/unit2/glossary.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/glossary.mdx -------------------------------------------------------------------------------- /units/en/unit2/hands-on.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/hands-on.mdx -------------------------------------------------------------------------------- /units/en/unit2/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/introduction.mdx -------------------------------------------------------------------------------- /units/en/unit2/mc-vs-td.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/mc-vs-td.mdx -------------------------------------------------------------------------------- /units/en/unit2/mid-way-quiz.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/mid-way-quiz.mdx -------------------------------------------------------------------------------- /units/en/unit2/mid-way-recap.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/mid-way-recap.mdx -------------------------------------------------------------------------------- /units/en/unit2/q-learning-example.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/q-learning-example.mdx -------------------------------------------------------------------------------- /units/en/unit2/q-learning-recap.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/q-learning-recap.mdx -------------------------------------------------------------------------------- /units/en/unit2/q-learning.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/q-learning.mdx -------------------------------------------------------------------------------- /units/en/unit2/quiz2.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/quiz2.mdx -------------------------------------------------------------------------------- /units/en/unit2/two-types-value-based-methods.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/two-types-value-based-methods.mdx -------------------------------------------------------------------------------- /units/en/unit2/what-is-rl.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit2/what-is-rl.mdx -------------------------------------------------------------------------------- /units/en/unit3/additional-readings.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit3/additional-readings.mdx -------------------------------------------------------------------------------- /units/en/unit3/conclusion.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit3/conclusion.mdx -------------------------------------------------------------------------------- /units/en/unit3/deep-q-algorithm.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit3/deep-q-algorithm.mdx -------------------------------------------------------------------------------- /units/en/unit3/deep-q-network.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit3/deep-q-network.mdx -------------------------------------------------------------------------------- /units/en/unit3/from-q-to-dqn.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit3/from-q-to-dqn.mdx -------------------------------------------------------------------------------- /units/en/unit3/glossary.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit3/glossary.mdx -------------------------------------------------------------------------------- /units/en/unit3/hands-on.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit3/hands-on.mdx -------------------------------------------------------------------------------- /units/en/unit3/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit3/introduction.mdx -------------------------------------------------------------------------------- /units/en/unit3/quiz.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit3/quiz.mdx -------------------------------------------------------------------------------- /units/en/unit4/additional-readings.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit4/additional-readings.mdx -------------------------------------------------------------------------------- /units/en/unit4/advantages-disadvantages.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit4/advantages-disadvantages.mdx -------------------------------------------------------------------------------- /units/en/unit4/conclusion.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit4/conclusion.mdx -------------------------------------------------------------------------------- /units/en/unit4/glossary.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit4/glossary.mdx -------------------------------------------------------------------------------- /units/en/unit4/hands-on.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit4/hands-on.mdx -------------------------------------------------------------------------------- /units/en/unit4/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit4/introduction.mdx -------------------------------------------------------------------------------- /units/en/unit4/pg-theorem.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit4/pg-theorem.mdx -------------------------------------------------------------------------------- /units/en/unit4/policy-gradient.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit4/policy-gradient.mdx -------------------------------------------------------------------------------- /units/en/unit4/quiz.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit4/quiz.mdx -------------------------------------------------------------------------------- /units/en/unit4/what-are-policy-based-methods.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit4/what-are-policy-based-methods.mdx -------------------------------------------------------------------------------- /units/en/unit5/bonus.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit5/bonus.mdx -------------------------------------------------------------------------------- /units/en/unit5/conclusion.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit5/conclusion.mdx -------------------------------------------------------------------------------- /units/en/unit5/curiosity.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit5/curiosity.mdx -------------------------------------------------------------------------------- /units/en/unit5/hands-on.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit5/hands-on.mdx -------------------------------------------------------------------------------- /units/en/unit5/how-mlagents-works.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit5/how-mlagents-works.mdx -------------------------------------------------------------------------------- /units/en/unit5/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit5/introduction.mdx -------------------------------------------------------------------------------- /units/en/unit5/pyramids.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit5/pyramids.mdx -------------------------------------------------------------------------------- /units/en/unit5/quiz.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit5/quiz.mdx -------------------------------------------------------------------------------- /units/en/unit5/snowball-target.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit5/snowball-target.mdx -------------------------------------------------------------------------------- /units/en/unit6/additional-readings.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit6/additional-readings.mdx -------------------------------------------------------------------------------- /units/en/unit6/advantage-actor-critic.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit6/advantage-actor-critic.mdx -------------------------------------------------------------------------------- /units/en/unit6/conclusion.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit6/conclusion.mdx -------------------------------------------------------------------------------- /units/en/unit6/hands-on.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit6/hands-on.mdx -------------------------------------------------------------------------------- /units/en/unit6/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit6/introduction.mdx -------------------------------------------------------------------------------- /units/en/unit6/quiz.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit6/quiz.mdx -------------------------------------------------------------------------------- /units/en/unit6/variance-problem.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit6/variance-problem.mdx -------------------------------------------------------------------------------- /units/en/unit7/additional-readings.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit7/additional-readings.mdx -------------------------------------------------------------------------------- /units/en/unit7/conclusion.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit7/conclusion.mdx -------------------------------------------------------------------------------- /units/en/unit7/hands-on.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit7/hands-on.mdx -------------------------------------------------------------------------------- /units/en/unit7/introduction-to-marl.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit7/introduction-to-marl.mdx -------------------------------------------------------------------------------- /units/en/unit7/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit7/introduction.mdx -------------------------------------------------------------------------------- /units/en/unit7/multi-agent-setting.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit7/multi-agent-setting.mdx -------------------------------------------------------------------------------- /units/en/unit7/quiz.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit7/quiz.mdx -------------------------------------------------------------------------------- /units/en/unit7/self-play.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit7/self-play.mdx -------------------------------------------------------------------------------- /units/en/unit8/additional-readings.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit8/additional-readings.mdx -------------------------------------------------------------------------------- /units/en/unit8/clipped-surrogate-objective.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit8/clipped-surrogate-objective.mdx -------------------------------------------------------------------------------- /units/en/unit8/conclusion-sf.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit8/conclusion-sf.mdx -------------------------------------------------------------------------------- /units/en/unit8/conclusion.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit8/conclusion.mdx -------------------------------------------------------------------------------- /units/en/unit8/hands-on-cleanrl.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit8/hands-on-cleanrl.mdx -------------------------------------------------------------------------------- /units/en/unit8/hands-on-sf.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit8/hands-on-sf.mdx -------------------------------------------------------------------------------- /units/en/unit8/introduction-sf.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit8/introduction-sf.mdx -------------------------------------------------------------------------------- /units/en/unit8/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit8/introduction.mdx -------------------------------------------------------------------------------- /units/en/unit8/intuition-behind-ppo.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit8/intuition-behind-ppo.mdx -------------------------------------------------------------------------------- /units/en/unit8/visualize.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unit8/visualize.mdx -------------------------------------------------------------------------------- /units/en/unitbonus1/conclusion.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus1/conclusion.mdx -------------------------------------------------------------------------------- /units/en/unitbonus1/how-huggy-works.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus1/how-huggy-works.mdx -------------------------------------------------------------------------------- /units/en/unitbonus1/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus1/introduction.mdx -------------------------------------------------------------------------------- /units/en/unitbonus1/play.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus1/play.mdx -------------------------------------------------------------------------------- /units/en/unitbonus1/train.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus1/train.mdx -------------------------------------------------------------------------------- /units/en/unitbonus2/hands-on.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus2/hands-on.mdx -------------------------------------------------------------------------------- /units/en/unitbonus2/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus2/introduction.mdx -------------------------------------------------------------------------------- /units/en/unitbonus2/optuna.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus2/optuna.mdx -------------------------------------------------------------------------------- /units/en/unitbonus3/curriculum-learning.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus3/curriculum-learning.mdx -------------------------------------------------------------------------------- /units/en/unitbonus3/decision-transformers.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus3/decision-transformers.mdx -------------------------------------------------------------------------------- /units/en/unitbonus3/envs-to-try.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus3/envs-to-try.mdx -------------------------------------------------------------------------------- /units/en/unitbonus3/generalisation.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus3/generalisation.mdx -------------------------------------------------------------------------------- /units/en/unitbonus3/godotrl.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus3/godotrl.mdx -------------------------------------------------------------------------------- /units/en/unitbonus3/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus3/introduction.mdx -------------------------------------------------------------------------------- /units/en/unitbonus3/language-models.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus3/language-models.mdx -------------------------------------------------------------------------------- /units/en/unitbonus3/learning-agents.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus3/learning-agents.mdx -------------------------------------------------------------------------------- /units/en/unitbonus3/model-based.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus3/model-based.mdx -------------------------------------------------------------------------------- /units/en/unitbonus3/offline-online.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus3/offline-online.mdx -------------------------------------------------------------------------------- /units/en/unitbonus3/rl-documentation.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus3/rl-documentation.mdx -------------------------------------------------------------------------------- /units/en/unitbonus3/rlhf.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus3/rlhf.mdx -------------------------------------------------------------------------------- /units/en/unitbonus3/student-works.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus3/student-works.mdx -------------------------------------------------------------------------------- /units/en/unitbonus5/conclusion.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus5/conclusion.mdx -------------------------------------------------------------------------------- /units/en/unitbonus5/customize-the-environment.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus5/customize-the-environment.mdx -------------------------------------------------------------------------------- /units/en/unitbonus5/getting-started.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus5/getting-started.mdx -------------------------------------------------------------------------------- /units/en/unitbonus5/introduction.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus5/introduction.mdx -------------------------------------------------------------------------------- /units/en/unitbonus5/the-environment.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus5/the-environment.mdx -------------------------------------------------------------------------------- /units/en/unitbonus5/train-our-robot.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/deep-rl-class/HEAD/units/en/unitbonus5/train-our-robot.mdx --------------------------------------------------------------------------------