├── README.md ├── eval ├── CLAPNQ │ ├── ckpts │ │ └── DS_Store.txt │ ├── dataset │ │ └── DS_Store.txt │ ├── eval.sh │ ├── eval_api.sh │ ├── evaluation_api_minicheck.py │ ├── evaluation_ours_minicheck.py │ ├── log │ │ └── DS_Store.txt │ ├── prompt_ours.txt │ └── result │ │ └── DS_Store.txt ├── CNQ │ ├── dataset │ │ └── DS_Store.txt │ ├── eval.sh │ ├── eval_api.sh │ ├── evaluation_api.py │ ├── evaluation_ours.py │ ├── log │ │ └── DS_Store.txt │ ├── prompt_ours.txt │ └── result │ │ └── DS_Store.txt ├── ConFiQA_FiQA │ ├── dataset │ │ ├── .DS_Store │ │ └── DS_Store.txt │ ├── eval.sh │ ├── eval_api.sh │ ├── eval_api_factual.sh │ ├── eval_factual.sh │ ├── evaluation_api.py │ ├── evaluation_api_factual.py │ ├── evaluation_ours.py │ ├── evaluation_ours_factual.py │ ├── log │ │ └── DS_Store.txt │ ├── log_factual │ │ └── DS_Store.txt │ ├── prompt_ours.txt │ ├── result │ │ └── DS_Store.txt │ └── result_factual │ │ └── DS_Store.txt ├── FaithEval │ ├── eval.sh │ ├── eval_api.sh │ ├── evaluation_api.py │ ├── evaluation_ours.py │ ├── log │ │ └── DS_Store.txt │ ├── prompt_ours.txt │ └── result │ │ └── DS_Store.txt ├── FollowRAG │ ├── eval.sh │ ├── eval_api.sh │ ├── evaluation_api.py │ ├── evaluation_ours.py │ ├── followRAG │ │ └── DS_Store.txt │ ├── log │ │ └── DS_Store.txt │ ├── prompt_ours.txt │ └── result │ │ └── DS_Store.txt └── XSum_WiKiLarge │ ├── ckpts │ └── DS_Store.txt │ ├── data │ └── DS_Store.txt │ ├── eval_sim.sh │ ├── eval_sim_api.sh │ ├── eval_sum.sh │ ├── eval_sum_api.sh │ ├── evaluation_api_minicheck.py │ ├── evaluation_ours_minicheck.py │ ├── prompt_sim.txt │ ├── prompt_sum.txt │ ├── sim_log │ └── DS_Store.txt │ ├── sim_result │ └── DS_Store.txt │ ├── sum_log │ └── DS_Store.txt │ └── sum_result │ └── DS_Store.txt └── train ├── Makefile ├── TRL ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── commands │ ├── run_dpo.sh │ └── run_sft.sh ├── docker │ ├── trl-latest-gpu │ │ └── Dockerfile │ └── trl-source-gpu │ │ └── Dockerfile ├── docs │ └── source │ │ ├── _toctree.yml │ │ ├── alignprop_trainer.md │ │ ├── bco_trainer.md │ │ ├── best_of_n.md │ │ ├── callbacks.md │ │ ├── clis.md │ │ ├── community_tutorials.md │ │ ├── cpo_trainer.md │ │ ├── customization.md │ │ ├── data_utils.md │ │ ├── dataset_formats.md │ │ ├── ddpo_trainer.md │ │ ├── deepspeed_integration.md │ │ ├── detoxifying_a_lm.md │ │ ├── dpo_trainer.md │ │ ├── example_overview.md │ │ ├── gkd_trainer.md │ │ ├── grpo_trainer.md │ │ ├── how_to_train.md │ │ ├── index.md │ │ ├── installation.md │ │ ├── iterative_sft_trainer.md │ │ ├── judges.md │ │ ├── kto_trainer.md │ │ ├── learning_tools.md │ │ ├── liger_kernel_integration.md │ │ ├── logging.md │ │ ├── models.md │ │ ├── multi_adapter_rl.md │ │ ├── nash_md_trainer.md │ │ ├── online_dpo_trainer.md │ │ ├── orpo_trainer.md │ │ ├── peft_integration.md │ │ ├── ppo_trainer.md │ │ ├── prm_trainer.md │ │ ├── quickstart.md │ │ ├── reducing_memory_usage.md │ │ ├── reward_trainer.md │ │ ├── rloo_trainer.md │ │ ├── script_utils.md │ │ ├── sentiment_tuning.md │ │ ├── sft_trainer.md │ │ ├── speeding_up_training.md │ │ ├── text_environments.md │ │ ├── unsloth_integration.md │ │ ├── use_model.md │ │ ├── using_llama_models.md │ │ └── xpo_trainer.md ├── examples │ ├── README.md │ ├── accelerate_configs │ │ ├── deepspeed_zero1.yaml │ │ ├── deepspeed_zero2.yaml │ │ ├── deepspeed_zero3.yaml │ │ ├── fsdp_qlora.yaml │ │ ├── multi_gpu.yaml │ │ └── single_gpu.yaml │ ├── cli_configs │ │ └── example_config.yaml │ ├── datasets │ │ ├── hh-rlhf-helpful-base.py │ │ ├── lm-human-preferences-descriptiveness.py │ │ ├── lm-human-preferences-sentiment.py │ │ ├── math_shepherd.py │ │ ├── prm800k.py │ │ ├── rlaif-v.py │ │ ├── tldr.py │ │ ├── tldr_preference.py │ │ ├── ultrafeedback-prompt.py │ │ └── ultrafeedback.py │ ├── notebooks │ │ ├── README.md │ │ ├── best_of_n.ipynb │ │ ├── gpt2-sentiment-control.ipynb │ │ └── gpt2-sentiment.ipynb │ ├── research_projects │ │ ├── README.md │ │ ├── stack_llama │ │ │ └── scripts │ │ │ │ ├── README.md │ │ │ │ ├── merge_peft_adapter.py │ │ │ │ ├── reward_modeling.py │ │ │ │ ├── rl_training.py │ │ │ │ └── supervised_finetuning.py │ │ ├── stack_llama_2 │ │ │ └── scripts │ │ │ │ ├── README.md │ │ │ │ ├── dpo_llama2.py │ │ │ │ ├── requirements.txt │ │ │ │ └── sft_llama2.py │ │ ├── tools │ │ │ ├── calculator.py │ │ │ ├── python_interpreter.py │ │ │ └── triviaqa.py │ │ └── toxicity │ │ │ ├── README.md │ │ │ └── scripts │ │ │ ├── evaluate-toxicity.py │ │ │ └── gpt-j-6b-toxicity.py │ └── scripts │ │ ├── alignprop.py │ │ ├── bco.py │ │ ├── chat.py │ │ ├── cpo.py │ │ ├── ddpo.py │ │ ├── dpo.py │ │ ├── dpo_online.py │ │ ├── dpo_vlm.py │ │ ├── evals │ │ └── judge_tldr.py │ │ ├── gkd.py │ │ ├── kto.py │ │ ├── nash_md.py │ │ ├── orpo.py │ │ ├── ppo │ │ ├── ppo.py │ │ └── ppo_tldr.py │ │ ├── prm.py │ │ ├── reward_modeling.py │ │ ├── rloo │ │ ├── rloo.py │ │ └── rloo_tldr.py │ │ ├── sft.py │ │ ├── sft_video_llm.py │ │ ├── sft_vlm.py │ │ ├── sft_vlm_smol_vlm.py │ │ └── xpo.py ├── pyproject.toml ├── requirements.txt ├── scripts │ ├── add_copyrights.py │ ├── generate_tiny_models.py │ ├── generate_zen_dataset.py │ ├── log_example_reports.py │ └── log_reports.py ├── setup.cfg ├── setup.py ├── tests │ ├── __init__.py │ ├── slow │ │ ├── __init__.py │ │ ├── test_dpo_slow.py │ │ ├── test_sft_slow.py │ │ └── testing_constants.py │ ├── test_alignprop_trainer.py │ ├── test_bco_trainer.py │ ├── test_best_of_n_sampler.py │ ├── test_callbacks.py │ ├── test_cli.py │ ├── test_cli_utils.py │ ├── test_collators.py │ ├── test_core.py │ ├── test_cpo_trainer.py │ ├── test_data_collator_completion_only.py │ ├── test_data_utils.py │ ├── test_dataset_formatting.py │ ├── test_ddpo_trainer.py │ ├── test_dpo_trainer.py │ ├── test_environments.py │ ├── test_gkd_trainer.py │ ├── test_grpo_trainer.py │ ├── test_iterative_sft_trainer.py │ ├── test_judges.py │ ├── test_kto_trainer.py │ ├── test_modeling_geometric_mixture_wrapper.py │ ├── test_modeling_value_head.py │ ├── test_nash_md_trainer.py │ ├── test_online_dpo_trainer.py │ ├── test_orpo_trainer.py │ ├── test_peft_models.py │ ├── test_ppo_trainer.py │ ├── test_prm_trainer.py │ ├── test_reward_trainer.py │ ├── test_rich_progress_callback.py │ ├── test_rloo_trainer.py │ ├── test_sft_trainer.py │ ├── test_trainers_args.py │ ├── test_utils.py │ ├── test_xpo_trainer.py │ ├── testing_constants.py │ └── testing_utils.py ├── trl.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ ├── entry_points.txt │ ├── not-zip-safe │ ├── requires.txt │ └── top_level.txt └── trl │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── data_utils.cpython-311.pyc │ └── import_utils.cpython-311.pyc │ ├── cli.py │ ├── core.py │ ├── data_utils.py │ ├── environment │ ├── __init__.py │ └── base_environment.py │ ├── extras │ ├── __init__.py │ ├── best_of_n_sampler.py │ └── dataset_formatting.py │ ├── import_utils.py │ ├── mergekit_utils.py │ ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ ├── modeling_base.cpython-311.pyc │ │ ├── modeling_value_head.cpython-311.pyc │ │ └── utils.cpython-311.pyc │ ├── auxiliary_modules.py │ ├── modeling_base.py │ ├── modeling_sd_base.py │ ├── modeling_value_head.py │ ├── sd_utils.py │ └── utils.py │ ├── scripts │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-311.pyc │ │ └── utils.cpython-311.pyc │ ├── chat.py │ ├── dpo.py │ ├── env.py │ ├── grpo.py │ ├── kto.py │ ├── sft.py │ └── utils.py │ ├── templates │ └── lm_model_card.md │ └── trainer │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── grpo_config.cpython-311.pyc │ ├── grpo_trainer.cpython-311.pyc │ ├── model_config.cpython-311.pyc │ ├── sft_config.cpython-311.pyc │ └── utils.cpython-311.pyc │ ├── alignprop_config.py │ ├── alignprop_trainer.py │ ├── bco_config.py │ ├── bco_trainer.py │ ├── callbacks.py │ ├── cpo_config.py │ ├── cpo_trainer.py │ ├── ddpo_config.py │ ├── ddpo_trainer.py │ ├── dpo_config.py │ ├── dpo_trainer.py │ ├── gkd_config.py │ ├── gkd_trainer.py │ ├── grpo_config.py │ ├── grpo_trainer.py │ ├── iterative_sft_trainer.py │ ├── judges.py │ ├── kto_config.py │ ├── kto_trainer.py │ ├── model_config.py │ ├── nash_md_config.py │ ├── nash_md_trainer.py │ ├── online_dpo_config.py │ ├── online_dpo_trainer.py │ ├── orpo_config.py │ ├── orpo_trainer.py │ ├── ppo_config.py │ ├── ppo_trainer.py │ ├── prm_config.py │ ├── prm_trainer.py │ ├── reward_config.py │ ├── reward_trainer.py │ ├── rloo_config.py │ ├── rloo_trainer.py │ ├── sft_config.py │ ├── sft_trainer.py │ ├── utils.py │ ├── xpo_config.py │ └── xpo_trainer.py ├── assets └── readme.md ├── llama_8b_10k_2epoch.sh ├── logs └── readme.md ├── qwen_14b_10k_2epoch.sh ├── qwen_7b_10k_2epoch.sh ├── recipes ├── LLama │ └── LLama-Instruct │ │ └── llama3_8b_2epoch_10k.yaml ├── Qwen │ └── Qwen2.5-Instruct │ │ ├── qwen_14b_2epoch_10k.yaml │ │ └── qwen_7b_2epoch_10k.yaml └── accelerate_configs │ ├── ddp.yaml │ ├── zero2.yaml │ ├── zero2_offload.yaml │ ├── zero3.yaml │ └── zero3_offload.yaml ├── scripts ├── generate_reasoning.py ├── run_benchmarks.py └── upload_details.py ├── setup.cfg ├── setup.py ├── slurm ├── evaluate.slurm ├── experimental │ └── serve_r1_vllm.slurm ├── generate.slurm ├── serve_r1.slurm ├── serve_router.slurm └── train.slurm ├── src └── open_r1 │ ├── __init__.py │ ├── configs.py │ ├── evaluate.py │ ├── generate.py │ ├── grpo.py │ ├── sft.py │ └── utils │ ├── __init__.py │ ├── callbacks.py │ ├── evaluation.py │ ├── hub.py │ └── upload_details.py ├── tests ├── __init__.py └── test_rewards.py └── train_data └── readme.md /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/README.md -------------------------------------------------------------------------------- /eval/CLAPNQ/ckpts/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/CLAPNQ/dataset/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/CLAPNQ/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CLAPNQ/eval.sh -------------------------------------------------------------------------------- /eval/CLAPNQ/eval_api.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CLAPNQ/eval_api.sh -------------------------------------------------------------------------------- /eval/CLAPNQ/evaluation_api_minicheck.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CLAPNQ/evaluation_api_minicheck.py -------------------------------------------------------------------------------- /eval/CLAPNQ/evaluation_ours_minicheck.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CLAPNQ/evaluation_ours_minicheck.py -------------------------------------------------------------------------------- /eval/CLAPNQ/log/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/CLAPNQ/prompt_ours.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CLAPNQ/prompt_ours.txt -------------------------------------------------------------------------------- /eval/CLAPNQ/result/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/CNQ/dataset/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/CNQ/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CNQ/eval.sh -------------------------------------------------------------------------------- /eval/CNQ/eval_api.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CNQ/eval_api.sh -------------------------------------------------------------------------------- /eval/CNQ/evaluation_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CNQ/evaluation_api.py -------------------------------------------------------------------------------- /eval/CNQ/evaluation_ours.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CNQ/evaluation_ours.py -------------------------------------------------------------------------------- /eval/CNQ/log/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/CNQ/prompt_ours.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CNQ/prompt_ours.txt -------------------------------------------------------------------------------- /eval/CNQ/result/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/dataset/.DS_Store: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/dataset/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/eval.sh -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/eval_api.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/eval_api.sh -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/eval_api_factual.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/eval_api_factual.sh -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/eval_factual.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/eval_factual.sh -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/evaluation_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/evaluation_api.py -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/evaluation_api_factual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/evaluation_api_factual.py -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/evaluation_ours.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/evaluation_ours.py -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/evaluation_ours_factual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/evaluation_ours_factual.py -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/log/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/log_factual/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/prompt_ours.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/prompt_ours.txt -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/result/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/ConFiQA_FiQA/result_factual/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/FaithEval/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FaithEval/eval.sh -------------------------------------------------------------------------------- /eval/FaithEval/eval_api.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FaithEval/eval_api.sh -------------------------------------------------------------------------------- /eval/FaithEval/evaluation_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FaithEval/evaluation_api.py -------------------------------------------------------------------------------- /eval/FaithEval/evaluation_ours.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FaithEval/evaluation_ours.py -------------------------------------------------------------------------------- /eval/FaithEval/log/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/FaithEval/prompt_ours.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FaithEval/prompt_ours.txt -------------------------------------------------------------------------------- /eval/FaithEval/result/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/FollowRAG/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FollowRAG/eval.sh -------------------------------------------------------------------------------- /eval/FollowRAG/eval_api.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FollowRAG/eval_api.sh -------------------------------------------------------------------------------- /eval/FollowRAG/evaluation_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FollowRAG/evaluation_api.py -------------------------------------------------------------------------------- /eval/FollowRAG/evaluation_ours.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FollowRAG/evaluation_ours.py -------------------------------------------------------------------------------- /eval/FollowRAG/followRAG/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/FollowRAG/log/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/FollowRAG/prompt_ours.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FollowRAG/prompt_ours.txt -------------------------------------------------------------------------------- /eval/FollowRAG/result/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/ckpts/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/data/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/eval_sim.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/eval_sim.sh -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/eval_sim_api.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/eval_sim_api.sh -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/eval_sum.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/eval_sum.sh -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/eval_sum_api.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/eval_sum_api.sh -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/evaluation_api_minicheck.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/evaluation_api_minicheck.py -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/evaluation_ours_minicheck.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/evaluation_ours_minicheck.py -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/prompt_sim.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/prompt_sim.txt -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/prompt_sum.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/prompt_sum.txt -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/sim_log/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/sim_result/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/sum_log/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/XSum_WiKiLarge/sum_result/DS_Store.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /train/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/Makefile -------------------------------------------------------------------------------- /train/TRL/CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/CITATION.cff -------------------------------------------------------------------------------- /train/TRL/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /train/TRL/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/CONTRIBUTING.md -------------------------------------------------------------------------------- /train/TRL/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/LICENSE -------------------------------------------------------------------------------- /train/TRL/MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/MANIFEST.in -------------------------------------------------------------------------------- /train/TRL/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/Makefile -------------------------------------------------------------------------------- /train/TRL/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/README.md -------------------------------------------------------------------------------- /train/TRL/commands/run_dpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/commands/run_dpo.sh -------------------------------------------------------------------------------- /train/TRL/commands/run_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/commands/run_sft.sh -------------------------------------------------------------------------------- /train/TRL/docker/trl-latest-gpu/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docker/trl-latest-gpu/Dockerfile -------------------------------------------------------------------------------- /train/TRL/docker/trl-source-gpu/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docker/trl-source-gpu/Dockerfile -------------------------------------------------------------------------------- /train/TRL/docs/source/_toctree.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/_toctree.yml -------------------------------------------------------------------------------- /train/TRL/docs/source/alignprop_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/alignprop_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/bco_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/bco_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/best_of_n.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/best_of_n.md -------------------------------------------------------------------------------- /train/TRL/docs/source/callbacks.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/callbacks.md -------------------------------------------------------------------------------- /train/TRL/docs/source/clis.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/clis.md -------------------------------------------------------------------------------- /train/TRL/docs/source/community_tutorials.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/community_tutorials.md -------------------------------------------------------------------------------- /train/TRL/docs/source/cpo_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/cpo_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/customization.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/customization.md -------------------------------------------------------------------------------- /train/TRL/docs/source/data_utils.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/data_utils.md -------------------------------------------------------------------------------- /train/TRL/docs/source/dataset_formats.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/dataset_formats.md -------------------------------------------------------------------------------- /train/TRL/docs/source/ddpo_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/ddpo_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/deepspeed_integration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/deepspeed_integration.md -------------------------------------------------------------------------------- /train/TRL/docs/source/detoxifying_a_lm.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/detoxifying_a_lm.md -------------------------------------------------------------------------------- /train/TRL/docs/source/dpo_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/dpo_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/example_overview.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/example_overview.md -------------------------------------------------------------------------------- /train/TRL/docs/source/gkd_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/gkd_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/grpo_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/grpo_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/how_to_train.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/how_to_train.md -------------------------------------------------------------------------------- /train/TRL/docs/source/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/index.md -------------------------------------------------------------------------------- /train/TRL/docs/source/installation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/installation.md -------------------------------------------------------------------------------- /train/TRL/docs/source/iterative_sft_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/iterative_sft_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/judges.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/judges.md -------------------------------------------------------------------------------- /train/TRL/docs/source/kto_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/kto_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/learning_tools.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/learning_tools.md -------------------------------------------------------------------------------- /train/TRL/docs/source/liger_kernel_integration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/liger_kernel_integration.md -------------------------------------------------------------------------------- /train/TRL/docs/source/logging.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/logging.md -------------------------------------------------------------------------------- /train/TRL/docs/source/models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/models.md -------------------------------------------------------------------------------- /train/TRL/docs/source/multi_adapter_rl.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/multi_adapter_rl.md -------------------------------------------------------------------------------- /train/TRL/docs/source/nash_md_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/nash_md_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/online_dpo_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/online_dpo_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/orpo_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/orpo_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/peft_integration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/peft_integration.md -------------------------------------------------------------------------------- /train/TRL/docs/source/ppo_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/ppo_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/prm_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/prm_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/quickstart.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/quickstart.md -------------------------------------------------------------------------------- /train/TRL/docs/source/reducing_memory_usage.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/reducing_memory_usage.md -------------------------------------------------------------------------------- /train/TRL/docs/source/reward_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/reward_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/rloo_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/rloo_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/script_utils.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/script_utils.md -------------------------------------------------------------------------------- /train/TRL/docs/source/sentiment_tuning.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/sentiment_tuning.md -------------------------------------------------------------------------------- /train/TRL/docs/source/sft_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/sft_trainer.md -------------------------------------------------------------------------------- /train/TRL/docs/source/speeding_up_training.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/speeding_up_training.md -------------------------------------------------------------------------------- /train/TRL/docs/source/text_environments.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/text_environments.md -------------------------------------------------------------------------------- /train/TRL/docs/source/unsloth_integration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/unsloth_integration.md -------------------------------------------------------------------------------- /train/TRL/docs/source/use_model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/use_model.md -------------------------------------------------------------------------------- /train/TRL/docs/source/using_llama_models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/using_llama_models.md -------------------------------------------------------------------------------- /train/TRL/docs/source/xpo_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/xpo_trainer.md -------------------------------------------------------------------------------- /train/TRL/examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/README.md -------------------------------------------------------------------------------- /train/TRL/examples/accelerate_configs/deepspeed_zero1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/accelerate_configs/deepspeed_zero1.yaml -------------------------------------------------------------------------------- /train/TRL/examples/accelerate_configs/deepspeed_zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/accelerate_configs/deepspeed_zero2.yaml -------------------------------------------------------------------------------- /train/TRL/examples/accelerate_configs/deepspeed_zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/accelerate_configs/deepspeed_zero3.yaml -------------------------------------------------------------------------------- /train/TRL/examples/accelerate_configs/fsdp_qlora.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/accelerate_configs/fsdp_qlora.yaml -------------------------------------------------------------------------------- /train/TRL/examples/accelerate_configs/multi_gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/accelerate_configs/multi_gpu.yaml -------------------------------------------------------------------------------- /train/TRL/examples/accelerate_configs/single_gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/accelerate_configs/single_gpu.yaml -------------------------------------------------------------------------------- /train/TRL/examples/cli_configs/example_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/cli_configs/example_config.yaml -------------------------------------------------------------------------------- /train/TRL/examples/datasets/hh-rlhf-helpful-base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/hh-rlhf-helpful-base.py -------------------------------------------------------------------------------- /train/TRL/examples/datasets/lm-human-preferences-descriptiveness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/lm-human-preferences-descriptiveness.py -------------------------------------------------------------------------------- /train/TRL/examples/datasets/lm-human-preferences-sentiment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/lm-human-preferences-sentiment.py -------------------------------------------------------------------------------- /train/TRL/examples/datasets/math_shepherd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/math_shepherd.py -------------------------------------------------------------------------------- /train/TRL/examples/datasets/prm800k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/prm800k.py -------------------------------------------------------------------------------- /train/TRL/examples/datasets/rlaif-v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/rlaif-v.py -------------------------------------------------------------------------------- /train/TRL/examples/datasets/tldr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/tldr.py -------------------------------------------------------------------------------- /train/TRL/examples/datasets/tldr_preference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/tldr_preference.py -------------------------------------------------------------------------------- /train/TRL/examples/datasets/ultrafeedback-prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/ultrafeedback-prompt.py -------------------------------------------------------------------------------- /train/TRL/examples/datasets/ultrafeedback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/ultrafeedback.py -------------------------------------------------------------------------------- /train/TRL/examples/notebooks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/notebooks/README.md -------------------------------------------------------------------------------- /train/TRL/examples/notebooks/best_of_n.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/notebooks/best_of_n.ipynb -------------------------------------------------------------------------------- /train/TRL/examples/notebooks/gpt2-sentiment-control.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/notebooks/gpt2-sentiment-control.ipynb -------------------------------------------------------------------------------- /train/TRL/examples/notebooks/gpt2-sentiment.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/notebooks/gpt2-sentiment.ipynb -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/README.md -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/stack_llama/scripts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama/scripts/README.md -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/stack_llama/scripts/merge_peft_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama/scripts/merge_peft_adapter.py -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/stack_llama/scripts/reward_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama/scripts/reward_modeling.py -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/stack_llama/scripts/rl_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama/scripts/rl_training.py -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/stack_llama/scripts/supervised_finetuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama/scripts/supervised_finetuning.py -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/stack_llama_2/scripts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama_2/scripts/README.md -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/stack_llama_2/scripts/dpo_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama_2/scripts/dpo_llama2.py -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/stack_llama_2/scripts/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama_2/scripts/requirements.txt -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/stack_llama_2/scripts/sft_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama_2/scripts/sft_llama2.py -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/tools/calculator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/tools/calculator.py -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/tools/python_interpreter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/tools/python_interpreter.py -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/tools/triviaqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/tools/triviaqa.py -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/toxicity/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/toxicity/README.md -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/toxicity/scripts/evaluate-toxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/toxicity/scripts/evaluate-toxicity.py -------------------------------------------------------------------------------- /train/TRL/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/alignprop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/alignprop.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/bco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/bco.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/chat.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/cpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/cpo.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/ddpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/ddpo.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/dpo.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/dpo_online.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/dpo_online.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/dpo_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/dpo_vlm.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/evals/judge_tldr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/evals/judge_tldr.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/gkd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/gkd.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/kto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/kto.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/nash_md.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/nash_md.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/orpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/orpo.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/ppo/ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/ppo/ppo.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/ppo/ppo_tldr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/ppo/ppo_tldr.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/prm.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/reward_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/reward_modeling.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/rloo/rloo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/rloo/rloo.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/rloo/rloo_tldr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/rloo/rloo_tldr.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/sft.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/sft_video_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/sft_video_llm.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/sft_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/sft_vlm.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/sft_vlm_smol_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/sft_vlm_smol_vlm.py -------------------------------------------------------------------------------- /train/TRL/examples/scripts/xpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/xpo.py -------------------------------------------------------------------------------- /train/TRL/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/pyproject.toml -------------------------------------------------------------------------------- /train/TRL/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | datasets 3 | rich 4 | transformers>=4.46.0 -------------------------------------------------------------------------------- /train/TRL/scripts/add_copyrights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/scripts/add_copyrights.py -------------------------------------------------------------------------------- /train/TRL/scripts/generate_tiny_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/scripts/generate_tiny_models.py -------------------------------------------------------------------------------- /train/TRL/scripts/generate_zen_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/scripts/generate_zen_dataset.py -------------------------------------------------------------------------------- /train/TRL/scripts/log_example_reports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/scripts/log_example_reports.py -------------------------------------------------------------------------------- /train/TRL/scripts/log_reports.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/scripts/log_reports.py -------------------------------------------------------------------------------- /train/TRL/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_file = LICENSE -------------------------------------------------------------------------------- /train/TRL/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/setup.py -------------------------------------------------------------------------------- /train/TRL/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/__init__.py -------------------------------------------------------------------------------- /train/TRL/tests/slow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/slow/__init__.py -------------------------------------------------------------------------------- /train/TRL/tests/slow/test_dpo_slow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/slow/test_dpo_slow.py -------------------------------------------------------------------------------- /train/TRL/tests/slow/test_sft_slow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/slow/test_sft_slow.py -------------------------------------------------------------------------------- /train/TRL/tests/slow/testing_constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/slow/testing_constants.py -------------------------------------------------------------------------------- /train/TRL/tests/test_alignprop_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_alignprop_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_bco_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_bco_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_best_of_n_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_best_of_n_sampler.py -------------------------------------------------------------------------------- /train/TRL/tests/test_callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_callbacks.py -------------------------------------------------------------------------------- /train/TRL/tests/test_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_cli.py -------------------------------------------------------------------------------- /train/TRL/tests/test_cli_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_cli_utils.py -------------------------------------------------------------------------------- /train/TRL/tests/test_collators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_collators.py -------------------------------------------------------------------------------- /train/TRL/tests/test_core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_core.py -------------------------------------------------------------------------------- /train/TRL/tests/test_cpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_cpo_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_data_collator_completion_only.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_data_collator_completion_only.py -------------------------------------------------------------------------------- /train/TRL/tests/test_data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_data_utils.py -------------------------------------------------------------------------------- /train/TRL/tests/test_dataset_formatting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_dataset_formatting.py -------------------------------------------------------------------------------- /train/TRL/tests/test_ddpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_ddpo_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_dpo_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_environments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_environments.py -------------------------------------------------------------------------------- /train/TRL/tests/test_gkd_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_gkd_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_grpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_grpo_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_iterative_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_iterative_sft_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_judges.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_judges.py -------------------------------------------------------------------------------- /train/TRL/tests/test_kto_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_kto_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_modeling_geometric_mixture_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_modeling_geometric_mixture_wrapper.py -------------------------------------------------------------------------------- /train/TRL/tests/test_modeling_value_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_modeling_value_head.py -------------------------------------------------------------------------------- /train/TRL/tests/test_nash_md_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_nash_md_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_online_dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_online_dpo_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_orpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_orpo_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_peft_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_peft_models.py -------------------------------------------------------------------------------- /train/TRL/tests/test_ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_ppo_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_prm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_prm_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_reward_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_reward_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_rich_progress_callback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_rich_progress_callback.py -------------------------------------------------------------------------------- /train/TRL/tests/test_rloo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_rloo_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_sft_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/test_trainers_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_trainers_args.py -------------------------------------------------------------------------------- /train/TRL/tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_utils.py -------------------------------------------------------------------------------- /train/TRL/tests/test_xpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_xpo_trainer.py -------------------------------------------------------------------------------- /train/TRL/tests/testing_constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/testing_constants.py -------------------------------------------------------------------------------- /train/TRL/tests/testing_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/testing_utils.py -------------------------------------------------------------------------------- /train/TRL/trl.egg-info/PKG-INFO: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl.egg-info/PKG-INFO -------------------------------------------------------------------------------- /train/TRL/trl.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl.egg-info/SOURCES.txt -------------------------------------------------------------------------------- /train/TRL/trl.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /train/TRL/trl.egg-info/entry_points.txt: -------------------------------------------------------------------------------- 1 | [console_scripts] 2 | trl = trl.cli:main 3 | -------------------------------------------------------------------------------- /train/TRL/trl.egg-info/not-zip-safe: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /train/TRL/trl.egg-info/requires.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl.egg-info/requires.txt -------------------------------------------------------------------------------- /train/TRL/trl.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | trl 2 | -------------------------------------------------------------------------------- /train/TRL/trl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/__init__.py -------------------------------------------------------------------------------- /train/TRL/trl/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/__pycache__/data_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/__pycache__/data_utils.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/__pycache__/import_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/__pycache__/import_utils.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/cli.py -------------------------------------------------------------------------------- /train/TRL/trl/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/core.py -------------------------------------------------------------------------------- /train/TRL/trl/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/data_utils.py -------------------------------------------------------------------------------- /train/TRL/trl/environment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/environment/__init__.py -------------------------------------------------------------------------------- /train/TRL/trl/environment/base_environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/environment/base_environment.py -------------------------------------------------------------------------------- /train/TRL/trl/extras/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/extras/__init__.py -------------------------------------------------------------------------------- /train/TRL/trl/extras/best_of_n_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/extras/best_of_n_sampler.py -------------------------------------------------------------------------------- /train/TRL/trl/extras/dataset_formatting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/extras/dataset_formatting.py -------------------------------------------------------------------------------- /train/TRL/trl/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/import_utils.py -------------------------------------------------------------------------------- /train/TRL/trl/mergekit_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/mergekit_utils.py -------------------------------------------------------------------------------- /train/TRL/trl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/__init__.py -------------------------------------------------------------------------------- /train/TRL/trl/models/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/models/__pycache__/modeling_base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/__pycache__/modeling_base.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/models/__pycache__/modeling_value_head.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/__pycache__/modeling_value_head.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/models/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/models/auxiliary_modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/auxiliary_modules.py -------------------------------------------------------------------------------- /train/TRL/trl/models/modeling_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/modeling_base.py -------------------------------------------------------------------------------- /train/TRL/trl/models/modeling_sd_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/modeling_sd_base.py -------------------------------------------------------------------------------- /train/TRL/trl/models/modeling_value_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/modeling_value_head.py -------------------------------------------------------------------------------- /train/TRL/trl/models/sd_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/sd_utils.py -------------------------------------------------------------------------------- /train/TRL/trl/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/utils.py -------------------------------------------------------------------------------- /train/TRL/trl/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/__init__.py -------------------------------------------------------------------------------- /train/TRL/trl/scripts/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/scripts/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/scripts/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/chat.py -------------------------------------------------------------------------------- /train/TRL/trl/scripts/dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/dpo.py -------------------------------------------------------------------------------- /train/TRL/trl/scripts/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/env.py -------------------------------------------------------------------------------- /train/TRL/trl/scripts/grpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/grpo.py -------------------------------------------------------------------------------- /train/TRL/trl/scripts/kto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/kto.py -------------------------------------------------------------------------------- /train/TRL/trl/scripts/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/sft.py -------------------------------------------------------------------------------- /train/TRL/trl/scripts/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/utils.py -------------------------------------------------------------------------------- /train/TRL/trl/templates/lm_model_card.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/templates/lm_model_card.md -------------------------------------------------------------------------------- /train/TRL/trl/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__init__.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/trainer/__pycache__/grpo_config.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__pycache__/grpo_config.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/trainer/__pycache__/grpo_trainer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__pycache__/grpo_trainer.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/trainer/__pycache__/model_config.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__pycache__/model_config.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/trainer/__pycache__/sft_config.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__pycache__/sft_config.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/trainer/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /train/TRL/trl/trainer/alignprop_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/alignprop_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/alignprop_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/alignprop_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/bco_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/bco_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/bco_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/bco_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/callbacks.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/cpo_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/cpo_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/cpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/cpo_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/ddpo_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/ddpo_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/ddpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/ddpo_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/dpo_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/dpo_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/dpo_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/gkd_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/gkd_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/gkd_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/gkd_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/grpo_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/grpo_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/grpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/grpo_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/iterative_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/iterative_sft_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/judges.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/judges.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/kto_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/kto_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/kto_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/kto_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/model_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/model_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/nash_md_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/nash_md_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/nash_md_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/nash_md_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/online_dpo_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/online_dpo_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/online_dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/online_dpo_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/orpo_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/orpo_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/orpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/orpo_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/ppo_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/ppo_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/prm_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/prm_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/prm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/prm_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/reward_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/reward_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/reward_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/reward_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/rloo_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/rloo_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/rloo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/rloo_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/sft_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/sft_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/sft_trainer.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/utils.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/xpo_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/xpo_config.py -------------------------------------------------------------------------------- /train/TRL/trl/trainer/xpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/xpo_trainer.py -------------------------------------------------------------------------------- /train/assets/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /train/llama_8b_10k_2epoch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/llama_8b_10k_2epoch.sh -------------------------------------------------------------------------------- /train/logs/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /train/qwen_14b_10k_2epoch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/qwen_14b_10k_2epoch.sh -------------------------------------------------------------------------------- /train/qwen_7b_10k_2epoch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/qwen_7b_10k_2epoch.sh -------------------------------------------------------------------------------- /train/recipes/LLama/LLama-Instruct/llama3_8b_2epoch_10k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/LLama/LLama-Instruct/llama3_8b_2epoch_10k.yaml -------------------------------------------------------------------------------- /train/recipes/Qwen/Qwen2.5-Instruct/qwen_14b_2epoch_10k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/Qwen/Qwen2.5-Instruct/qwen_14b_2epoch_10k.yaml -------------------------------------------------------------------------------- /train/recipes/Qwen/Qwen2.5-Instruct/qwen_7b_2epoch_10k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/Qwen/Qwen2.5-Instruct/qwen_7b_2epoch_10k.yaml -------------------------------------------------------------------------------- /train/recipes/accelerate_configs/ddp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/accelerate_configs/ddp.yaml -------------------------------------------------------------------------------- /train/recipes/accelerate_configs/zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/accelerate_configs/zero2.yaml -------------------------------------------------------------------------------- /train/recipes/accelerate_configs/zero2_offload.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/accelerate_configs/zero2_offload.yaml -------------------------------------------------------------------------------- /train/recipes/accelerate_configs/zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/accelerate_configs/zero3.yaml -------------------------------------------------------------------------------- /train/recipes/accelerate_configs/zero3_offload.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/accelerate_configs/zero3_offload.yaml -------------------------------------------------------------------------------- /train/scripts/generate_reasoning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/scripts/generate_reasoning.py -------------------------------------------------------------------------------- /train/scripts/run_benchmarks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/scripts/run_benchmarks.py -------------------------------------------------------------------------------- /train/scripts/upload_details.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/scripts/upload_details.py -------------------------------------------------------------------------------- /train/setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/setup.cfg -------------------------------------------------------------------------------- /train/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/setup.py -------------------------------------------------------------------------------- /train/slurm/evaluate.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/slurm/evaluate.slurm -------------------------------------------------------------------------------- /train/slurm/experimental/serve_r1_vllm.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/slurm/experimental/serve_r1_vllm.slurm -------------------------------------------------------------------------------- /train/slurm/generate.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/slurm/generate.slurm -------------------------------------------------------------------------------- /train/slurm/serve_r1.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/slurm/serve_r1.slurm -------------------------------------------------------------------------------- /train/slurm/serve_router.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/slurm/serve_router.slurm -------------------------------------------------------------------------------- /train/slurm/train.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/slurm/train.slurm -------------------------------------------------------------------------------- /train/src/open_r1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/__init__.py -------------------------------------------------------------------------------- /train/src/open_r1/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/configs.py -------------------------------------------------------------------------------- /train/src/open_r1/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/evaluate.py -------------------------------------------------------------------------------- /train/src/open_r1/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/generate.py -------------------------------------------------------------------------------- /train/src/open_r1/grpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/grpo.py -------------------------------------------------------------------------------- /train/src/open_r1/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/sft.py -------------------------------------------------------------------------------- /train/src/open_r1/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/src/open_r1/utils/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/utils/callbacks.py -------------------------------------------------------------------------------- /train/src/open_r1/utils/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/utils/evaluation.py -------------------------------------------------------------------------------- /train/src/open_r1/utils/hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/utils/hub.py -------------------------------------------------------------------------------- /train/src/open_r1/utils/upload_details.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/utils/upload_details.py -------------------------------------------------------------------------------- /train/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/tests/test_rewards.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/tests/test_rewards.py -------------------------------------------------------------------------------- /train/train_data/readme.md: -------------------------------------------------------------------------------- 1 | todo 2 | --------------------------------------------------------------------------------