├── .devcontainer ├── Dockerfile └── devcontainer.json ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── documentation.yml │ └── feature_request.yml └── workflows │ ├── build.yml │ └── code_quality.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── configs ├── accelerate │ ├── ddp.yaml │ ├── zero2-bf16.yaml │ ├── zero2-fp16.yaml │ └── zero3.yaml ├── nemo_configs │ ├── megatron_1.3b.yaml │ ├── megatron_20b.yaml │ ├── megatron_2b.yaml │ ├── megatron_65b.yaml │ └── sft_megatron_20b.yaml ├── sweeps │ ├── ilql_sweep.yml │ └── ppo_sweep.yml └── test_config.yml ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── api.rst │ ├── conf.py │ ├── configs.rst │ ├── data.rst │ ├── examples.rst │ ├── index.rst │ ├── installation.rst │ ├── pipelines.rst │ └── trainers.rst ├── examples ├── __init__.py ├── alpaca │ ├── README.md │ └── sft_alpaca.py ├── architext.py ├── experiments │ └── grounded_program_synthesis │ │ ├── README.md │ │ ├── __init__.py │ │ ├── configs │ │ └── trlx_ppo_config.yml │ │ ├── lang.py │ │ └── train_trlx.py ├── hh │ ├── README.md │ ├── ilql_hh.py │ ├── ppo_hh.py │ ├── sft_hh.py │ ├── to_triton.py │ └── triton_config.pbtxt ├── ilql_sentiments.py ├── ilql_sentiments_t5.py ├── llama_nemo │ ├── README.md │ ├── convert_llama_to_nemo.py │ ├── dist_train.sh │ ├── megatron_llama_cfg.yaml │ └── nemo_llama2_ppo_sentiments.py ├── nemo_ilql_inference.py ├── nemo_ilql_sentiments.py ├── nemo_ppo_inference.py ├── nemo_ppo_sentiments.py ├── nemo_sft_sentiments.py ├── nemo_vs_ds_chat.py ├── notebooks │ ├── trlx_sentiments.ipynb │ └── trlx_simulacra.ipynb ├── ppo_dense_sentiments.py ├── ppo_sentiments.py ├── ppo_sentiments_llama.py ├── ppo_sentiments_peft.py ├── ppo_sentiments_t5.py ├── ppo_translation_t5.py ├── randomwalks │ ├── README.md │ ├── __init__.py │ ├── graph-example.png │ ├── ilql_randomwalks.py │ ├── ppo_randomwalks.py │ ├── randomwalks.py │ └── rft_randomwalks.py ├── rft_sentiments.py ├── sft_sentiments.py ├── simulacra.py ├── summarize_daily_cnn │ ├── __init__.py │ └── t5_summarize_daily_cnn.py └── summarize_rlhf │ ├── README.md │ ├── configs │ ├── default_accelerate_config.yaml │ └── ds_config_trlx_gptj_summarize.json │ ├── ilql_summarize_t5.py │ ├── requirements.txt │ ├── reward_model │ ├── ds_config_gpt_j.json │ ├── gptj_reward_test.py │ ├── reward_model.py │ └── train_reward_model_gptj.py │ ├── sft │ ├── ds_config_gptj.json │ ├── summarize_dataset.py │ └── train_gptj_summarize.py │ ├── trlx_gptj_text_summarization.py │ └── trlx_inference_gptj.py ├── pyproject.toml ├── requirements.txt ├── scripts ├── accelerate_train_example.sh ├── benchmark.sh ├── slurm_train.sh └── sweep-cw.sh ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── test_configs.py ├── test_minibatch.py ├── test_models.py ├── test_peft.py ├── test_pipelines.py ├── test_trainers.py └── test_utils.py └── trlx ├── __init__.py ├── data ├── __init__.py ├── accelerate_base_datatypes.py ├── configs.py ├── default_configs.py ├── ilql_types.py ├── method_configs.py └── ppo_types.py ├── models ├── README.md ├── __init__.py ├── modeling_base.py ├── modeling_ilql.py ├── modeling_nemo_ilql.py ├── modeling_nemo_ppo.py ├── modeling_nemo_sft.py └── modeling_ppo.py ├── pipeline ├── __init__.py ├── offline_pipeline.py └── ppo_pipeline.py ├── reference.py ├── sweep.py ├── trainer ├── __init__.py ├── accelerate_base_trainer.py ├── accelerate_ilql_trainer.py ├── accelerate_ppo_trainer.py ├── accelerate_rft_trainer.py ├── accelerate_sft_trainer.py ├── nemo_ilql_trainer.py ├── nemo_ppo_trainer.py └── nemo_sft_trainer.py ├── trlx.py └── utils ├── __init__.py ├── loading.py ├── logging.py └── modeling.py /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/.devcontainer/Dockerfile -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/.devcontainer/devcontainer.json -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/.github/ISSUE_TEMPLATE/bug_report.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/.github/ISSUE_TEMPLATE/documentation.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/.github/ISSUE_TEMPLATE/feature_request.yml -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/.github/workflows/build.yml -------------------------------------------------------------------------------- /.github/workflows/code_quality.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/.github/workflows/code_quality.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/.readthedocs.yml -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/README.md -------------------------------------------------------------------------------- /configs/accelerate/ddp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/configs/accelerate/ddp.yaml -------------------------------------------------------------------------------- /configs/accelerate/zero2-bf16.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/configs/accelerate/zero2-bf16.yaml -------------------------------------------------------------------------------- /configs/accelerate/zero2-fp16.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/configs/accelerate/zero2-fp16.yaml -------------------------------------------------------------------------------- /configs/accelerate/zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/configs/accelerate/zero3.yaml -------------------------------------------------------------------------------- /configs/nemo_configs/megatron_1.3b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/configs/nemo_configs/megatron_1.3b.yaml -------------------------------------------------------------------------------- /configs/nemo_configs/megatron_20b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/configs/nemo_configs/megatron_20b.yaml -------------------------------------------------------------------------------- /configs/nemo_configs/megatron_2b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/configs/nemo_configs/megatron_2b.yaml -------------------------------------------------------------------------------- /configs/nemo_configs/megatron_65b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/configs/nemo_configs/megatron_65b.yaml -------------------------------------------------------------------------------- /configs/nemo_configs/sft_megatron_20b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/configs/nemo_configs/sft_megatron_20b.yaml -------------------------------------------------------------------------------- /configs/sweeps/ilql_sweep.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/configs/sweeps/ilql_sweep.yml -------------------------------------------------------------------------------- /configs/sweeps/ppo_sweep.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/configs/sweeps/ppo_sweep.yml -------------------------------------------------------------------------------- /configs/test_config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/configs/test_config.yml -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/docs/requirements.txt -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/docs/source/api.rst -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/docs/source/conf.py -------------------------------------------------------------------------------- /docs/source/configs.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/docs/source/configs.rst -------------------------------------------------------------------------------- /docs/source/data.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/docs/source/data.rst -------------------------------------------------------------------------------- /docs/source/examples.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/docs/source/examples.rst -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/docs/source/index.rst -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/docs/source/installation.rst -------------------------------------------------------------------------------- /docs/source/pipelines.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/docs/source/pipelines.rst -------------------------------------------------------------------------------- /docs/source/trainers.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/docs/source/trainers.rst -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/alpaca/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/alpaca/README.md -------------------------------------------------------------------------------- /examples/alpaca/sft_alpaca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/alpaca/sft_alpaca.py -------------------------------------------------------------------------------- /examples/architext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/architext.py -------------------------------------------------------------------------------- /examples/experiments/grounded_program_synthesis/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/experiments/grounded_program_synthesis/README.md -------------------------------------------------------------------------------- /examples/experiments/grounded_program_synthesis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/experiments/grounded_program_synthesis/configs/trlx_ppo_config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/experiments/grounded_program_synthesis/configs/trlx_ppo_config.yml -------------------------------------------------------------------------------- /examples/experiments/grounded_program_synthesis/lang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/experiments/grounded_program_synthesis/lang.py -------------------------------------------------------------------------------- /examples/experiments/grounded_program_synthesis/train_trlx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/experiments/grounded_program_synthesis/train_trlx.py -------------------------------------------------------------------------------- /examples/hh/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/hh/README.md -------------------------------------------------------------------------------- /examples/hh/ilql_hh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/hh/ilql_hh.py -------------------------------------------------------------------------------- /examples/hh/ppo_hh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/hh/ppo_hh.py -------------------------------------------------------------------------------- /examples/hh/sft_hh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/hh/sft_hh.py -------------------------------------------------------------------------------- /examples/hh/to_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/hh/to_triton.py -------------------------------------------------------------------------------- /examples/hh/triton_config.pbtxt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/hh/triton_config.pbtxt -------------------------------------------------------------------------------- /examples/ilql_sentiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/ilql_sentiments.py -------------------------------------------------------------------------------- /examples/ilql_sentiments_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/ilql_sentiments_t5.py -------------------------------------------------------------------------------- /examples/llama_nemo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/llama_nemo/README.md -------------------------------------------------------------------------------- /examples/llama_nemo/convert_llama_to_nemo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/llama_nemo/convert_llama_to_nemo.py -------------------------------------------------------------------------------- /examples/llama_nemo/dist_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/llama_nemo/dist_train.sh -------------------------------------------------------------------------------- /examples/llama_nemo/megatron_llama_cfg.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/llama_nemo/megatron_llama_cfg.yaml -------------------------------------------------------------------------------- /examples/llama_nemo/nemo_llama2_ppo_sentiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/llama_nemo/nemo_llama2_ppo_sentiments.py -------------------------------------------------------------------------------- /examples/nemo_ilql_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/nemo_ilql_inference.py -------------------------------------------------------------------------------- /examples/nemo_ilql_sentiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/nemo_ilql_sentiments.py -------------------------------------------------------------------------------- /examples/nemo_ppo_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/nemo_ppo_inference.py -------------------------------------------------------------------------------- /examples/nemo_ppo_sentiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/nemo_ppo_sentiments.py -------------------------------------------------------------------------------- /examples/nemo_sft_sentiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/nemo_sft_sentiments.py -------------------------------------------------------------------------------- /examples/nemo_vs_ds_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/nemo_vs_ds_chat.py -------------------------------------------------------------------------------- /examples/notebooks/trlx_sentiments.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/notebooks/trlx_sentiments.ipynb -------------------------------------------------------------------------------- /examples/notebooks/trlx_simulacra.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/notebooks/trlx_simulacra.ipynb -------------------------------------------------------------------------------- /examples/ppo_dense_sentiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/ppo_dense_sentiments.py -------------------------------------------------------------------------------- /examples/ppo_sentiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/ppo_sentiments.py -------------------------------------------------------------------------------- /examples/ppo_sentiments_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/ppo_sentiments_llama.py -------------------------------------------------------------------------------- /examples/ppo_sentiments_peft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/ppo_sentiments_peft.py -------------------------------------------------------------------------------- /examples/ppo_sentiments_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/ppo_sentiments_t5.py -------------------------------------------------------------------------------- /examples/ppo_translation_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/ppo_translation_t5.py -------------------------------------------------------------------------------- /examples/randomwalks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/randomwalks/README.md -------------------------------------------------------------------------------- /examples/randomwalks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/randomwalks/__init__.py -------------------------------------------------------------------------------- /examples/randomwalks/graph-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/randomwalks/graph-example.png -------------------------------------------------------------------------------- /examples/randomwalks/ilql_randomwalks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/randomwalks/ilql_randomwalks.py -------------------------------------------------------------------------------- /examples/randomwalks/ppo_randomwalks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/randomwalks/ppo_randomwalks.py -------------------------------------------------------------------------------- /examples/randomwalks/randomwalks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/randomwalks/randomwalks.py -------------------------------------------------------------------------------- /examples/randomwalks/rft_randomwalks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/randomwalks/rft_randomwalks.py -------------------------------------------------------------------------------- /examples/rft_sentiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/rft_sentiments.py -------------------------------------------------------------------------------- /examples/sft_sentiments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/sft_sentiments.py -------------------------------------------------------------------------------- /examples/simulacra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/simulacra.py -------------------------------------------------------------------------------- /examples/summarize_daily_cnn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/summarize_daily_cnn/t5_summarize_daily_cnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_daily_cnn/t5_summarize_daily_cnn.py -------------------------------------------------------------------------------- /examples/summarize_rlhf/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/README.md -------------------------------------------------------------------------------- /examples/summarize_rlhf/configs/default_accelerate_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/configs/default_accelerate_config.yaml -------------------------------------------------------------------------------- /examples/summarize_rlhf/configs/ds_config_trlx_gptj_summarize.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/configs/ds_config_trlx_gptj_summarize.json -------------------------------------------------------------------------------- /examples/summarize_rlhf/ilql_summarize_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/ilql_summarize_t5.py -------------------------------------------------------------------------------- /examples/summarize_rlhf/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/requirements.txt -------------------------------------------------------------------------------- /examples/summarize_rlhf/reward_model/ds_config_gpt_j.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/reward_model/ds_config_gpt_j.json -------------------------------------------------------------------------------- /examples/summarize_rlhf/reward_model/gptj_reward_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/reward_model/gptj_reward_test.py -------------------------------------------------------------------------------- /examples/summarize_rlhf/reward_model/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/reward_model/reward_model.py -------------------------------------------------------------------------------- /examples/summarize_rlhf/reward_model/train_reward_model_gptj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py -------------------------------------------------------------------------------- /examples/summarize_rlhf/sft/ds_config_gptj.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/sft/ds_config_gptj.json -------------------------------------------------------------------------------- /examples/summarize_rlhf/sft/summarize_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/sft/summarize_dataset.py -------------------------------------------------------------------------------- /examples/summarize_rlhf/sft/train_gptj_summarize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/sft/train_gptj_summarize.py -------------------------------------------------------------------------------- /examples/summarize_rlhf/trlx_gptj_text_summarization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/trlx_gptj_text_summarization.py -------------------------------------------------------------------------------- /examples/summarize_rlhf/trlx_inference_gptj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/examples/summarize_rlhf/trlx_inference_gptj.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/accelerate_train_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/scripts/accelerate_train_example.sh -------------------------------------------------------------------------------- /scripts/benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/scripts/benchmark.sh -------------------------------------------------------------------------------- /scripts/slurm_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/scripts/slurm_train.sh -------------------------------------------------------------------------------- /scripts/sweep-cw.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/scripts/sweep-cw.sh -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/tests/test_configs.py -------------------------------------------------------------------------------- /tests/test_minibatch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/tests/test_minibatch.py -------------------------------------------------------------------------------- /tests/test_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/tests/test_models.py -------------------------------------------------------------------------------- /tests/test_peft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/tests/test_peft.py -------------------------------------------------------------------------------- /tests/test_pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/tests/test_pipelines.py -------------------------------------------------------------------------------- /tests/test_trainers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/tests/test_trainers.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/tests/test_utils.py -------------------------------------------------------------------------------- /trlx/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/__init__.py -------------------------------------------------------------------------------- /trlx/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/data/__init__.py -------------------------------------------------------------------------------- /trlx/data/accelerate_base_datatypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/data/accelerate_base_datatypes.py -------------------------------------------------------------------------------- /trlx/data/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/data/configs.py -------------------------------------------------------------------------------- /trlx/data/default_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/data/default_configs.py -------------------------------------------------------------------------------- /trlx/data/ilql_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/data/ilql_types.py -------------------------------------------------------------------------------- /trlx/data/method_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/data/method_configs.py -------------------------------------------------------------------------------- /trlx/data/ppo_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/data/ppo_types.py -------------------------------------------------------------------------------- /trlx/models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/models/README.md -------------------------------------------------------------------------------- /trlx/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /trlx/models/modeling_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/models/modeling_base.py -------------------------------------------------------------------------------- /trlx/models/modeling_ilql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/models/modeling_ilql.py -------------------------------------------------------------------------------- /trlx/models/modeling_nemo_ilql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/models/modeling_nemo_ilql.py -------------------------------------------------------------------------------- /trlx/models/modeling_nemo_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/models/modeling_nemo_ppo.py -------------------------------------------------------------------------------- /trlx/models/modeling_nemo_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/models/modeling_nemo_sft.py -------------------------------------------------------------------------------- /trlx/models/modeling_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/models/modeling_ppo.py -------------------------------------------------------------------------------- /trlx/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/pipeline/__init__.py -------------------------------------------------------------------------------- /trlx/pipeline/offline_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/pipeline/offline_pipeline.py -------------------------------------------------------------------------------- /trlx/pipeline/ppo_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/pipeline/ppo_pipeline.py -------------------------------------------------------------------------------- /trlx/reference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/reference.py -------------------------------------------------------------------------------- /trlx/sweep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/sweep.py -------------------------------------------------------------------------------- /trlx/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/trainer/__init__.py -------------------------------------------------------------------------------- /trlx/trainer/accelerate_base_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/trainer/accelerate_base_trainer.py -------------------------------------------------------------------------------- /trlx/trainer/accelerate_ilql_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/trainer/accelerate_ilql_trainer.py -------------------------------------------------------------------------------- /trlx/trainer/accelerate_ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/trainer/accelerate_ppo_trainer.py -------------------------------------------------------------------------------- /trlx/trainer/accelerate_rft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/trainer/accelerate_rft_trainer.py -------------------------------------------------------------------------------- /trlx/trainer/accelerate_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/trainer/accelerate_sft_trainer.py -------------------------------------------------------------------------------- /trlx/trainer/nemo_ilql_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/trainer/nemo_ilql_trainer.py -------------------------------------------------------------------------------- /trlx/trainer/nemo_ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/trainer/nemo_ppo_trainer.py -------------------------------------------------------------------------------- /trlx/trainer/nemo_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/trainer/nemo_sft_trainer.py -------------------------------------------------------------------------------- /trlx/trlx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/trlx.py -------------------------------------------------------------------------------- /trlx/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/utils/__init__.py -------------------------------------------------------------------------------- /trlx/utils/loading.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/utils/loading.py -------------------------------------------------------------------------------- /trlx/utils/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/utils/logging.py -------------------------------------------------------------------------------- /trlx/utils/modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CarperAI/trlx/HEAD/trlx/utils/modeling.py --------------------------------------------------------------------------------