├── .github └── workflows │ ├── benchmark.yml │ ├── build_documentation.yml │ ├── build_pr_documentation.yml │ ├── clear_cache.yml │ ├── stale.yml │ ├── tests.yml │ └── upload_pr_documentation.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CITATION.cff ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── benchmark ├── benchmark.py ├── benchmark_and_report.sh ├── benchmark_level1.sh ├── benchmark_level1_plot.sh ├── benchmark_level2.sh ├── benchmark_level2_plot.sh ├── benchmark_level3.sh ├── plot.sh ├── post_github_comment.py ├── post_github_comment.sbatch ├── trl.slurm_template └── upload_benchmark.py ├── docs └── source │ ├── _toctree.yml │ ├── best_of_n.mdx │ ├── customization.mdx │ ├── ddpo_trainer.mdx │ ├── detoxifying_a_lm.mdx │ ├── dpo_trainer.mdx │ ├── example_overview.md │ ├── how_to_train.md │ ├── index.mdx │ ├── installation.mdx │ ├── iterative_sft_trainer.mdx │ ├── learning_tools.mdx │ ├── logging.mdx │ ├── lora_tuning_peft.mdx │ ├── models.mdx │ ├── multi_adapter_rl.mdx │ ├── ppo_trainer.mdx │ ├── quickstart.mdx │ ├── reward_trainer.mdx │ ├── sentiment_tuning.mdx │ ├── sft_trainer.mdx │ ├── text_environments.md │ ├── trainer.mdx │ ├── use_model.md │ └── using_llama_models.mdx ├── examples ├── README.md ├── accelerate_configs │ ├── deepspeed_zero1.yaml │ ├── deepspeed_zero2.yaml │ ├── deepspeed_zero3.yaml │ └── multi_gpu.yaml ├── hello_world.py ├── notebooks │ ├── README.md │ ├── best_of_n.ipynb │ ├── gpt2-sentiment-control.ipynb │ └── gpt2-sentiment.ipynb ├── research_projects │ ├── README.md │ ├── stack_llama │ │ └── scripts │ │ │ ├── README.md │ │ │ ├── merge_peft_adapter.py │ │ │ ├── reward_modeling.py │ │ │ ├── rl_training.py │ │ │ └── supervised_finetuning.py │ ├── stack_llama_2 │ │ └── scripts │ │ │ ├── README.md │ │ │ ├── dpo_llama2.py │ │ │ ├── requirements.txt │ │ │ └── sft_llama2.py │ ├── tools │ │ ├── calculator.py │ │ ├── python_interpreter.py │ │ └── triviaqa.py │ └── toxicity │ │ ├── README.md │ │ └── scripts │ │ ├── evaluate-toxicity.py │ │ └── gpt-j-6b-toxicity.py └── scripts │ ├── ddpo.py │ ├── dpo.py │ ├── ppo.py │ ├── ppo_multi_adapter.py │ ├── reward_modeling.py │ └── sft.py ├── pyproject.toml ├── requirements.txt ├── scripts └── stale.py ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── test_best_of_n_sampler.py ├── test_core.py ├── test_data_collator_completion_only.py ├── test_ddpo_trainer.py ├── test_dpo_trainer.py ├── test_e2e.py ├── test_environments.py ├── test_iterative_sft_trainer.py ├── test_modeling_value_head.py ├── test_no_peft.py ├── test_peft_models.py ├── test_ppo_trainer.py ├── test_reward_trainer.py ├── test_sft_trainer.py ├── testing_constants.py └── testing_utils.py └── trl ├── __init__.py ├── core.py ├── environment ├── __init__.py └── base_environment.py ├── extras ├── __init__.py └── best_of_n_sampler.py ├── import_utils.py ├── models ├── __init__.py ├── modeling_base.py ├── modeling_sd_base.py └── modeling_value_head.py └── trainer ├── __init__.py ├── base.py ├── ddpo_config.py ├── ddpo_trainer.py ├── dpo_trainer.py ├── iterative_sft_trainer.py ├── ppo_config.py ├── ppo_trainer.py ├── reward_trainer.py ├── sft_trainer.py ├── training_configs.py └── utils.py /.github/workflows/benchmark.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/.github/workflows/benchmark.yml -------------------------------------------------------------------------------- /.github/workflows/build_documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/.github/workflows/build_documentation.yml -------------------------------------------------------------------------------- /.github/workflows/build_pr_documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/.github/workflows/build_pr_documentation.yml -------------------------------------------------------------------------------- /.github/workflows/clear_cache.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/.github/workflows/clear_cache.yml -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/.github/workflows/stale.yml -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/.github/workflows/tests.yml -------------------------------------------------------------------------------- /.github/workflows/upload_pr_documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/.github/workflows/upload_pr_documentation.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/CITATION.cff -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/README.md -------------------------------------------------------------------------------- /benchmark/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/benchmark/benchmark.py -------------------------------------------------------------------------------- /benchmark/benchmark_and_report.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/benchmark/benchmark_and_report.sh -------------------------------------------------------------------------------- /benchmark/benchmark_level1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/benchmark/benchmark_level1.sh -------------------------------------------------------------------------------- /benchmark/benchmark_level1_plot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/benchmark/benchmark_level1_plot.sh -------------------------------------------------------------------------------- /benchmark/benchmark_level2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/benchmark/benchmark_level2.sh -------------------------------------------------------------------------------- /benchmark/benchmark_level2_plot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/benchmark/benchmark_level2_plot.sh -------------------------------------------------------------------------------- /benchmark/benchmark_level3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/benchmark/benchmark_level3.sh -------------------------------------------------------------------------------- /benchmark/plot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/benchmark/plot.sh -------------------------------------------------------------------------------- /benchmark/post_github_comment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/benchmark/post_github_comment.py -------------------------------------------------------------------------------- /benchmark/post_github_comment.sbatch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/benchmark/post_github_comment.sbatch -------------------------------------------------------------------------------- /benchmark/trl.slurm_template: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/benchmark/trl.slurm_template -------------------------------------------------------------------------------- /benchmark/upload_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/benchmark/upload_benchmark.py -------------------------------------------------------------------------------- /docs/source/_toctree.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/_toctree.yml -------------------------------------------------------------------------------- /docs/source/best_of_n.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/best_of_n.mdx -------------------------------------------------------------------------------- /docs/source/customization.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/customization.mdx -------------------------------------------------------------------------------- /docs/source/ddpo_trainer.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/ddpo_trainer.mdx -------------------------------------------------------------------------------- /docs/source/detoxifying_a_lm.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/detoxifying_a_lm.mdx -------------------------------------------------------------------------------- /docs/source/dpo_trainer.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/dpo_trainer.mdx -------------------------------------------------------------------------------- /docs/source/example_overview.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/example_overview.md -------------------------------------------------------------------------------- /docs/source/how_to_train.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/how_to_train.md -------------------------------------------------------------------------------- /docs/source/index.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/index.mdx -------------------------------------------------------------------------------- /docs/source/installation.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/installation.mdx -------------------------------------------------------------------------------- /docs/source/iterative_sft_trainer.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/iterative_sft_trainer.mdx -------------------------------------------------------------------------------- /docs/source/learning_tools.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/learning_tools.mdx -------------------------------------------------------------------------------- /docs/source/logging.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/logging.mdx -------------------------------------------------------------------------------- /docs/source/lora_tuning_peft.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/lora_tuning_peft.mdx -------------------------------------------------------------------------------- /docs/source/models.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/models.mdx -------------------------------------------------------------------------------- /docs/source/multi_adapter_rl.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/multi_adapter_rl.mdx -------------------------------------------------------------------------------- /docs/source/ppo_trainer.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/ppo_trainer.mdx -------------------------------------------------------------------------------- /docs/source/quickstart.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/quickstart.mdx -------------------------------------------------------------------------------- /docs/source/reward_trainer.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/reward_trainer.mdx -------------------------------------------------------------------------------- /docs/source/sentiment_tuning.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/sentiment_tuning.mdx -------------------------------------------------------------------------------- /docs/source/sft_trainer.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/sft_trainer.mdx -------------------------------------------------------------------------------- /docs/source/text_environments.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/text_environments.md -------------------------------------------------------------------------------- /docs/source/trainer.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/trainer.mdx -------------------------------------------------------------------------------- /docs/source/use_model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/use_model.md -------------------------------------------------------------------------------- /docs/source/using_llama_models.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/docs/source/using_llama_models.mdx -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/README.md -------------------------------------------------------------------------------- /examples/accelerate_configs/deepspeed_zero1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/accelerate_configs/deepspeed_zero1.yaml -------------------------------------------------------------------------------- /examples/accelerate_configs/deepspeed_zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/accelerate_configs/deepspeed_zero2.yaml -------------------------------------------------------------------------------- /examples/accelerate_configs/deepspeed_zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/accelerate_configs/deepspeed_zero3.yaml -------------------------------------------------------------------------------- /examples/accelerate_configs/multi_gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/accelerate_configs/multi_gpu.yaml -------------------------------------------------------------------------------- /examples/hello_world.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/hello_world.py -------------------------------------------------------------------------------- /examples/notebooks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/notebooks/README.md -------------------------------------------------------------------------------- /examples/notebooks/best_of_n.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/notebooks/best_of_n.ipynb -------------------------------------------------------------------------------- /examples/notebooks/gpt2-sentiment-control.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/notebooks/gpt2-sentiment-control.ipynb -------------------------------------------------------------------------------- /examples/notebooks/gpt2-sentiment.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/notebooks/gpt2-sentiment.ipynb -------------------------------------------------------------------------------- /examples/research_projects/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/README.md -------------------------------------------------------------------------------- /examples/research_projects/stack_llama/scripts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/stack_llama/scripts/README.md -------------------------------------------------------------------------------- /examples/research_projects/stack_llama/scripts/merge_peft_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/stack_llama/scripts/merge_peft_adapter.py -------------------------------------------------------------------------------- /examples/research_projects/stack_llama/scripts/reward_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/stack_llama/scripts/reward_modeling.py -------------------------------------------------------------------------------- /examples/research_projects/stack_llama/scripts/rl_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/stack_llama/scripts/rl_training.py -------------------------------------------------------------------------------- /examples/research_projects/stack_llama/scripts/supervised_finetuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/stack_llama/scripts/supervised_finetuning.py -------------------------------------------------------------------------------- /examples/research_projects/stack_llama_2/scripts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/stack_llama_2/scripts/README.md -------------------------------------------------------------------------------- /examples/research_projects/stack_llama_2/scripts/dpo_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/stack_llama_2/scripts/dpo_llama2.py -------------------------------------------------------------------------------- /examples/research_projects/stack_llama_2/scripts/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/stack_llama_2/scripts/requirements.txt -------------------------------------------------------------------------------- /examples/research_projects/stack_llama_2/scripts/sft_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/stack_llama_2/scripts/sft_llama2.py -------------------------------------------------------------------------------- /examples/research_projects/tools/calculator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/tools/calculator.py -------------------------------------------------------------------------------- /examples/research_projects/tools/python_interpreter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/tools/python_interpreter.py -------------------------------------------------------------------------------- /examples/research_projects/tools/triviaqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/tools/triviaqa.py -------------------------------------------------------------------------------- /examples/research_projects/toxicity/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/toxicity/README.md -------------------------------------------------------------------------------- /examples/research_projects/toxicity/scripts/evaluate-toxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/toxicity/scripts/evaluate-toxicity.py -------------------------------------------------------------------------------- /examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py -------------------------------------------------------------------------------- /examples/scripts/ddpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/scripts/ddpo.py -------------------------------------------------------------------------------- /examples/scripts/dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/scripts/dpo.py -------------------------------------------------------------------------------- /examples/scripts/ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/scripts/ppo.py -------------------------------------------------------------------------------- /examples/scripts/ppo_multi_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/scripts/ppo_multi_adapter.py -------------------------------------------------------------------------------- /examples/scripts/reward_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/scripts/reward_modeling.py -------------------------------------------------------------------------------- /examples/scripts/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/examples/scripts/sft.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/stale.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/scripts/stale.py -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_best_of_n_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_best_of_n_sampler.py -------------------------------------------------------------------------------- /tests/test_core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_core.py -------------------------------------------------------------------------------- /tests/test_data_collator_completion_only.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_data_collator_completion_only.py -------------------------------------------------------------------------------- /tests/test_ddpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_ddpo_trainer.py -------------------------------------------------------------------------------- /tests/test_dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_dpo_trainer.py -------------------------------------------------------------------------------- /tests/test_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_e2e.py -------------------------------------------------------------------------------- /tests/test_environments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_environments.py -------------------------------------------------------------------------------- /tests/test_iterative_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_iterative_sft_trainer.py -------------------------------------------------------------------------------- /tests/test_modeling_value_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_modeling_value_head.py -------------------------------------------------------------------------------- /tests/test_no_peft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_no_peft.py -------------------------------------------------------------------------------- /tests/test_peft_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_peft_models.py -------------------------------------------------------------------------------- /tests/test_ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_ppo_trainer.py -------------------------------------------------------------------------------- /tests/test_reward_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_reward_trainer.py -------------------------------------------------------------------------------- /tests/test_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/test_sft_trainer.py -------------------------------------------------------------------------------- /tests/testing_constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/testing_constants.py -------------------------------------------------------------------------------- /tests/testing_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/tests/testing_utils.py -------------------------------------------------------------------------------- /trl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/__init__.py -------------------------------------------------------------------------------- /trl/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/core.py -------------------------------------------------------------------------------- /trl/environment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/environment/__init__.py -------------------------------------------------------------------------------- /trl/environment/base_environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/environment/base_environment.py -------------------------------------------------------------------------------- /trl/extras/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/extras/__init__.py -------------------------------------------------------------------------------- /trl/extras/best_of_n_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/extras/best_of_n_sampler.py -------------------------------------------------------------------------------- /trl/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/import_utils.py -------------------------------------------------------------------------------- /trl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/models/__init__.py -------------------------------------------------------------------------------- /trl/models/modeling_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/models/modeling_base.py -------------------------------------------------------------------------------- /trl/models/modeling_sd_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/models/modeling_sd_base.py -------------------------------------------------------------------------------- /trl/models/modeling_value_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/models/modeling_value_head.py -------------------------------------------------------------------------------- /trl/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/trainer/__init__.py -------------------------------------------------------------------------------- /trl/trainer/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/trainer/base.py -------------------------------------------------------------------------------- /trl/trainer/ddpo_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/trainer/ddpo_config.py -------------------------------------------------------------------------------- /trl/trainer/ddpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/trainer/ddpo_trainer.py -------------------------------------------------------------------------------- /trl/trainer/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/trainer/dpo_trainer.py -------------------------------------------------------------------------------- /trl/trainer/iterative_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/trainer/iterative_sft_trainer.py -------------------------------------------------------------------------------- /trl/trainer/ppo_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/trainer/ppo_config.py -------------------------------------------------------------------------------- /trl/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /trl/trainer/reward_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/trainer/reward_trainer.py -------------------------------------------------------------------------------- /trl/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/trainer/sft_trainer.py -------------------------------------------------------------------------------- /trl/trainer/training_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/trainer/training_configs.py -------------------------------------------------------------------------------- /trl/trainer/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bram-w/trl/HEAD/trl/trainer/utils.py --------------------------------------------------------------------------------