├── README.md
├── eval
    ├── CLAPNQ
    │   ├── ckpts
    │   │   └── DS_Store.txt
    │   ├── dataset
    │   │   └── DS_Store.txt
    │   ├── eval.sh
    │   ├── eval_api.sh
    │   ├── evaluation_api_minicheck.py
    │   ├── evaluation_ours_minicheck.py
    │   ├── log
    │   │   └── DS_Store.txt
    │   ├── prompt_ours.txt
    │   └── result
    │   │   └── DS_Store.txt
    ├── CNQ
    │   ├── dataset
    │   │   └── DS_Store.txt
    │   ├── eval.sh
    │   ├── eval_api.sh
    │   ├── evaluation_api.py
    │   ├── evaluation_ours.py
    │   ├── log
    │   │   └── DS_Store.txt
    │   ├── prompt_ours.txt
    │   └── result
    │   │   └── DS_Store.txt
    ├── ConFiQA_FiQA
    │   ├── dataset
    │   │   ├── .DS_Store
    │   │   └── DS_Store.txt
    │   ├── eval.sh
    │   ├── eval_api.sh
    │   ├── eval_api_factual.sh
    │   ├── eval_factual.sh
    │   ├── evaluation_api.py
    │   ├── evaluation_api_factual.py
    │   ├── evaluation_ours.py
    │   ├── evaluation_ours_factual.py
    │   ├── log
    │   │   └── DS_Store.txt
    │   ├── log_factual
    │   │   └── DS_Store.txt
    │   ├── prompt_ours.txt
    │   ├── result
    │   │   └── DS_Store.txt
    │   └── result_factual
    │   │   └── DS_Store.txt
    ├── FaithEval
    │   ├── eval.sh
    │   ├── eval_api.sh
    │   ├── evaluation_api.py
    │   ├── evaluation_ours.py
    │   ├── log
    │   │   └── DS_Store.txt
    │   ├── prompt_ours.txt
    │   └── result
    │   │   └── DS_Store.txt
    ├── FollowRAG
    │   ├── eval.sh
    │   ├── eval_api.sh
    │   ├── evaluation_api.py
    │   ├── evaluation_ours.py
    │   ├── followRAG
    │   │   └── DS_Store.txt
    │   ├── log
    │   │   └── DS_Store.txt
    │   ├── prompt_ours.txt
    │   └── result
    │   │   └── DS_Store.txt
    └── XSum_WiKiLarge
    │   ├── ckpts
    │       └── DS_Store.txt
    │   ├── data
    │       └── DS_Store.txt
    │   ├── eval_sim.sh
    │   ├── eval_sim_api.sh
    │   ├── eval_sum.sh
    │   ├── eval_sum_api.sh
    │   ├── evaluation_api_minicheck.py
    │   ├── evaluation_ours_minicheck.py
    │   ├── prompt_sim.txt
    │   ├── prompt_sum.txt
    │   ├── sim_log
    │       └── DS_Store.txt
    │   ├── sim_result
    │       └── DS_Store.txt
    │   ├── sum_log
    │       └── DS_Store.txt
    │   └── sum_result
    │       └── DS_Store.txt
└── train
    ├── Makefile
    ├── TRL
        ├── CITATION.cff
        ├── CODE_OF_CONDUCT.md
        ├── CONTRIBUTING.md
        ├── LICENSE
        ├── MANIFEST.in
        ├── Makefile
        ├── README.md
        ├── commands
        │   ├── run_dpo.sh
        │   └── run_sft.sh
        ├── docker
        │   ├── trl-latest-gpu
        │   │   └── Dockerfile
        │   └── trl-source-gpu
        │   │   └── Dockerfile
        ├── docs
        │   └── source
        │   │   ├── _toctree.yml
        │   │   ├── alignprop_trainer.md
        │   │   ├── bco_trainer.md
        │   │   ├── best_of_n.md
        │   │   ├── callbacks.md
        │   │   ├── clis.md
        │   │   ├── community_tutorials.md
        │   │   ├── cpo_trainer.md
        │   │   ├── customization.md
        │   │   ├── data_utils.md
        │   │   ├── dataset_formats.md
        │   │   ├── ddpo_trainer.md
        │   │   ├── deepspeed_integration.md
        │   │   ├── detoxifying_a_lm.md
        │   │   ├── dpo_trainer.md
        │   │   ├── example_overview.md
        │   │   ├── gkd_trainer.md
        │   │   ├── grpo_trainer.md
        │   │   ├── how_to_train.md
        │   │   ├── index.md
        │   │   ├── installation.md
        │   │   ├── iterative_sft_trainer.md
        │   │   ├── judges.md
        │   │   ├── kto_trainer.md
        │   │   ├── learning_tools.md
        │   │   ├── liger_kernel_integration.md
        │   │   ├── logging.md
        │   │   ├── models.md
        │   │   ├── multi_adapter_rl.md
        │   │   ├── nash_md_trainer.md
        │   │   ├── online_dpo_trainer.md
        │   │   ├── orpo_trainer.md
        │   │   ├── peft_integration.md
        │   │   ├── ppo_trainer.md
        │   │   ├── prm_trainer.md
        │   │   ├── quickstart.md
        │   │   ├── reducing_memory_usage.md
        │   │   ├── reward_trainer.md
        │   │   ├── rloo_trainer.md
        │   │   ├── script_utils.md
        │   │   ├── sentiment_tuning.md
        │   │   ├── sft_trainer.md
        │   │   ├── speeding_up_training.md
        │   │   ├── text_environments.md
        │   │   ├── unsloth_integration.md
        │   │   ├── use_model.md
        │   │   ├── using_llama_models.md
        │   │   └── xpo_trainer.md
        ├── examples
        │   ├── README.md
        │   ├── accelerate_configs
        │   │   ├── deepspeed_zero1.yaml
        │   │   ├── deepspeed_zero2.yaml
        │   │   ├── deepspeed_zero3.yaml
        │   │   ├── fsdp_qlora.yaml
        │   │   ├── multi_gpu.yaml
        │   │   └── single_gpu.yaml
        │   ├── cli_configs
        │   │   └── example_config.yaml
        │   ├── datasets
        │   │   ├── hh-rlhf-helpful-base.py
        │   │   ├── lm-human-preferences-descriptiveness.py
        │   │   ├── lm-human-preferences-sentiment.py
        │   │   ├── math_shepherd.py
        │   │   ├── prm800k.py
        │   │   ├── rlaif-v.py
        │   │   ├── tldr.py
        │   │   ├── tldr_preference.py
        │   │   ├── ultrafeedback-prompt.py
        │   │   └── ultrafeedback.py
        │   ├── notebooks
        │   │   ├── README.md
        │   │   ├── best_of_n.ipynb
        │   │   ├── gpt2-sentiment-control.ipynb
        │   │   └── gpt2-sentiment.ipynb
        │   ├── research_projects
        │   │   ├── README.md
        │   │   ├── stack_llama
        │   │   │   └── scripts
        │   │   │   │   ├── README.md
        │   │   │   │   ├── merge_peft_adapter.py
        │   │   │   │   ├── reward_modeling.py
        │   │   │   │   ├── rl_training.py
        │   │   │   │   └── supervised_finetuning.py
        │   │   ├── stack_llama_2
        │   │   │   └── scripts
        │   │   │   │   ├── README.md
        │   │   │   │   ├── dpo_llama2.py
        │   │   │   │   ├── requirements.txt
        │   │   │   │   └── sft_llama2.py
        │   │   ├── tools
        │   │   │   ├── calculator.py
        │   │   │   ├── python_interpreter.py
        │   │   │   └── triviaqa.py
        │   │   └── toxicity
        │   │   │   ├── README.md
        │   │   │   └── scripts
        │   │   │       ├── evaluate-toxicity.py
        │   │   │       └── gpt-j-6b-toxicity.py
        │   └── scripts
        │   │   ├── alignprop.py
        │   │   ├── bco.py
        │   │   ├── chat.py
        │   │   ├── cpo.py
        │   │   ├── ddpo.py
        │   │   ├── dpo.py
        │   │   ├── dpo_online.py
        │   │   ├── dpo_vlm.py
        │   │   ├── evals
        │   │       └── judge_tldr.py
        │   │   ├── gkd.py
        │   │   ├── kto.py
        │   │   ├── nash_md.py
        │   │   ├── orpo.py
        │   │   ├── ppo
        │   │       ├── ppo.py
        │   │       └── ppo_tldr.py
        │   │   ├── prm.py
        │   │   ├── reward_modeling.py
        │   │   ├── rloo
        │   │       ├── rloo.py
        │   │       └── rloo_tldr.py
        │   │   ├── sft.py
        │   │   ├── sft_video_llm.py
        │   │   ├── sft_vlm.py
        │   │   ├── sft_vlm_smol_vlm.py
        │   │   └── xpo.py
        ├── pyproject.toml
        ├── requirements.txt
        ├── scripts
        │   ├── add_copyrights.py
        │   ├── generate_tiny_models.py
        │   ├── generate_zen_dataset.py
        │   ├── log_example_reports.py
        │   └── log_reports.py
        ├── setup.cfg
        ├── setup.py
        ├── tests
        │   ├── __init__.py
        │   ├── slow
        │   │   ├── __init__.py
        │   │   ├── test_dpo_slow.py
        │   │   ├── test_sft_slow.py
        │   │   └── testing_constants.py
        │   ├── test_alignprop_trainer.py
        │   ├── test_bco_trainer.py
        │   ├── test_best_of_n_sampler.py
        │   ├── test_callbacks.py
        │   ├── test_cli.py
        │   ├── test_cli_utils.py
        │   ├── test_collators.py
        │   ├── test_core.py
        │   ├── test_cpo_trainer.py
        │   ├── test_data_collator_completion_only.py
        │   ├── test_data_utils.py
        │   ├── test_dataset_formatting.py
        │   ├── test_ddpo_trainer.py
        │   ├── test_dpo_trainer.py
        │   ├── test_environments.py
        │   ├── test_gkd_trainer.py
        │   ├── test_grpo_trainer.py
        │   ├── test_iterative_sft_trainer.py
        │   ├── test_judges.py
        │   ├── test_kto_trainer.py
        │   ├── test_modeling_geometric_mixture_wrapper.py
        │   ├── test_modeling_value_head.py
        │   ├── test_nash_md_trainer.py
        │   ├── test_online_dpo_trainer.py
        │   ├── test_orpo_trainer.py
        │   ├── test_peft_models.py
        │   ├── test_ppo_trainer.py
        │   ├── test_prm_trainer.py
        │   ├── test_reward_trainer.py
        │   ├── test_rich_progress_callback.py
        │   ├── test_rloo_trainer.py
        │   ├── test_sft_trainer.py
        │   ├── test_trainers_args.py
        │   ├── test_utils.py
        │   ├── test_xpo_trainer.py
        │   ├── testing_constants.py
        │   └── testing_utils.py
        ├── trl.egg-info
        │   ├── PKG-INFO
        │   ├── SOURCES.txt
        │   ├── dependency_links.txt
        │   ├── entry_points.txt
        │   ├── not-zip-safe
        │   ├── requires.txt
        │   └── top_level.txt
        └── trl
        │   ├── __init__.py
        │   ├── __pycache__
        │       ├── __init__.cpython-311.pyc
        │       ├── data_utils.cpython-311.pyc
        │       └── import_utils.cpython-311.pyc
        │   ├── cli.py
        │   ├── core.py
        │   ├── data_utils.py
        │   ├── environment
        │       ├── __init__.py
        │       └── base_environment.py
        │   ├── extras
        │       ├── __init__.py
        │       ├── best_of_n_sampler.py
        │       └── dataset_formatting.py
        │   ├── import_utils.py
        │   ├── mergekit_utils.py
        │   ├── models
        │       ├── __init__.py
        │       ├── __pycache__
        │       │   ├── __init__.cpython-311.pyc
        │       │   ├── modeling_base.cpython-311.pyc
        │       │   ├── modeling_value_head.cpython-311.pyc
        │       │   └── utils.cpython-311.pyc
        │       ├── auxiliary_modules.py
        │       ├── modeling_base.py
        │       ├── modeling_sd_base.py
        │       ├── modeling_value_head.py
        │       ├── sd_utils.py
        │       └── utils.py
        │   ├── scripts
        │       ├── __init__.py
        │       ├── __pycache__
        │       │   ├── __init__.cpython-311.pyc
        │       │   └── utils.cpython-311.pyc
        │       ├── chat.py
        │       ├── dpo.py
        │       ├── env.py
        │       ├── grpo.py
        │       ├── kto.py
        │       ├── sft.py
        │       └── utils.py
        │   ├── templates
        │       └── lm_model_card.md
        │   └── trainer
        │       ├── __init__.py
        │       ├── __pycache__
        │           ├── __init__.cpython-311.pyc
        │           ├── grpo_config.cpython-311.pyc
        │           ├── grpo_trainer.cpython-311.pyc
        │           ├── model_config.cpython-311.pyc
        │           ├── sft_config.cpython-311.pyc
        │           └── utils.cpython-311.pyc
        │       ├── alignprop_config.py
        │       ├── alignprop_trainer.py
        │       ├── bco_config.py
        │       ├── bco_trainer.py
        │       ├── callbacks.py
        │       ├── cpo_config.py
        │       ├── cpo_trainer.py
        │       ├── ddpo_config.py
        │       ├── ddpo_trainer.py
        │       ├── dpo_config.py
        │       ├── dpo_trainer.py
        │       ├── gkd_config.py
        │       ├── gkd_trainer.py
        │       ├── grpo_config.py
        │       ├── grpo_trainer.py
        │       ├── iterative_sft_trainer.py
        │       ├── judges.py
        │       ├── kto_config.py
        │       ├── kto_trainer.py
        │       ├── model_config.py
        │       ├── nash_md_config.py
        │       ├── nash_md_trainer.py
        │       ├── online_dpo_config.py
        │       ├── online_dpo_trainer.py
        │       ├── orpo_config.py
        │       ├── orpo_trainer.py
        │       ├── ppo_config.py
        │       ├── ppo_trainer.py
        │       ├── prm_config.py
        │       ├── prm_trainer.py
        │       ├── reward_config.py
        │       ├── reward_trainer.py
        │       ├── rloo_config.py
        │       ├── rloo_trainer.py
        │       ├── sft_config.py
        │       ├── sft_trainer.py
        │       ├── utils.py
        │       ├── xpo_config.py
        │       └── xpo_trainer.py
    ├── assets
        └── readme.md
    ├── llama_8b_10k_2epoch.sh
    ├── logs
        └── readme.md
    ├── qwen_14b_10k_2epoch.sh
    ├── qwen_7b_10k_2epoch.sh
    ├── recipes
        ├── LLama
        │   └── LLama-Instruct
        │   │   └── llama3_8b_2epoch_10k.yaml
        ├── Qwen
        │   └── Qwen2.5-Instruct
        │   │   ├── qwen_14b_2epoch_10k.yaml
        │   │   └── qwen_7b_2epoch_10k.yaml
        └── accelerate_configs
        │   ├── ddp.yaml
        │   ├── zero2.yaml
        │   ├── zero2_offload.yaml
        │   ├── zero3.yaml
        │   └── zero3_offload.yaml
    ├── scripts
        ├── generate_reasoning.py
        ├── run_benchmarks.py
        └── upload_details.py
    ├── setup.cfg
    ├── setup.py
    ├── slurm
        ├── evaluate.slurm
        ├── experimental
        │   └── serve_r1_vllm.slurm
        ├── generate.slurm
        ├── serve_r1.slurm
        ├── serve_router.slurm
        └── train.slurm
    ├── src
        └── open_r1
        │   ├── __init__.py
        │   ├── configs.py
        │   ├── evaluate.py
        │   ├── generate.py
        │   ├── grpo.py
        │   ├── sft.py
        │   └── utils
        │       ├── __init__.py
        │       ├── callbacks.py
        │       ├── evaluation.py
        │       ├── hub.py
        │       └── upload_details.py
    ├── tests
        ├── __init__.py
        └── test_rewards.py
    └── train_data
        └── readme.md


/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/README.md


--------------------------------------------------------------------------------
/eval/CLAPNQ/ckpts/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/CLAPNQ/dataset/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/CLAPNQ/eval.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CLAPNQ/eval.sh


--------------------------------------------------------------------------------
/eval/CLAPNQ/eval_api.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CLAPNQ/eval_api.sh


--------------------------------------------------------------------------------
/eval/CLAPNQ/evaluation_api_minicheck.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CLAPNQ/evaluation_api_minicheck.py


--------------------------------------------------------------------------------
/eval/CLAPNQ/evaluation_ours_minicheck.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CLAPNQ/evaluation_ours_minicheck.py


--------------------------------------------------------------------------------
/eval/CLAPNQ/log/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/CLAPNQ/prompt_ours.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CLAPNQ/prompt_ours.txt


--------------------------------------------------------------------------------
/eval/CLAPNQ/result/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/CNQ/dataset/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/CNQ/eval.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CNQ/eval.sh


--------------------------------------------------------------------------------
/eval/CNQ/eval_api.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CNQ/eval_api.sh


--------------------------------------------------------------------------------
/eval/CNQ/evaluation_api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CNQ/evaluation_api.py


--------------------------------------------------------------------------------
/eval/CNQ/evaluation_ours.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CNQ/evaluation_ours.py


--------------------------------------------------------------------------------
/eval/CNQ/log/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/CNQ/prompt_ours.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/CNQ/prompt_ours.txt


--------------------------------------------------------------------------------
/eval/CNQ/result/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/dataset/.DS_Store:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/dataset/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/eval.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/eval.sh


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/eval_api.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/eval_api.sh


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/eval_api_factual.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/eval_api_factual.sh


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/eval_factual.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/eval_factual.sh


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/evaluation_api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/evaluation_api.py


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/evaluation_api_factual.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/evaluation_api_factual.py


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/evaluation_ours.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/evaluation_ours.py


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/evaluation_ours_factual.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/evaluation_ours_factual.py


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/log/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/log_factual/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/prompt_ours.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/ConFiQA_FiQA/prompt_ours.txt


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/result/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/ConFiQA_FiQA/result_factual/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/FaithEval/eval.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FaithEval/eval.sh


--------------------------------------------------------------------------------
/eval/FaithEval/eval_api.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FaithEval/eval_api.sh


--------------------------------------------------------------------------------
/eval/FaithEval/evaluation_api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FaithEval/evaluation_api.py


--------------------------------------------------------------------------------
/eval/FaithEval/evaluation_ours.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FaithEval/evaluation_ours.py


--------------------------------------------------------------------------------
/eval/FaithEval/log/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/FaithEval/prompt_ours.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FaithEval/prompt_ours.txt


--------------------------------------------------------------------------------
/eval/FaithEval/result/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/FollowRAG/eval.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FollowRAG/eval.sh


--------------------------------------------------------------------------------
/eval/FollowRAG/eval_api.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FollowRAG/eval_api.sh


--------------------------------------------------------------------------------
/eval/FollowRAG/evaluation_api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FollowRAG/evaluation_api.py


--------------------------------------------------------------------------------
/eval/FollowRAG/evaluation_ours.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FollowRAG/evaluation_ours.py


--------------------------------------------------------------------------------
/eval/FollowRAG/followRAG/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/FollowRAG/log/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/FollowRAG/prompt_ours.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/FollowRAG/prompt_ours.txt


--------------------------------------------------------------------------------
/eval/FollowRAG/result/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/ckpts/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/data/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/eval_sim.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/eval_sim.sh


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/eval_sim_api.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/eval_sim_api.sh


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/eval_sum.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/eval_sum.sh


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/eval_sum_api.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/eval_sum_api.sh


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/evaluation_api_minicheck.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/evaluation_api_minicheck.py


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/evaluation_ours_minicheck.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/evaluation_ours_minicheck.py


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/prompt_sim.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/prompt_sim.txt


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/prompt_sum.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/eval/XSum_WiKiLarge/prompt_sum.txt


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/sim_log/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/sim_result/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/sum_log/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/eval/XSum_WiKiLarge/sum_result/DS_Store.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/train/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/Makefile


--------------------------------------------------------------------------------
/train/TRL/CITATION.cff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/CITATION.cff


--------------------------------------------------------------------------------
/train/TRL/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/CODE_OF_CONDUCT.md


--------------------------------------------------------------------------------
/train/TRL/CONTRIBUTING.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/CONTRIBUTING.md


--------------------------------------------------------------------------------
/train/TRL/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/LICENSE


--------------------------------------------------------------------------------
/train/TRL/MANIFEST.in:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/MANIFEST.in


--------------------------------------------------------------------------------
/train/TRL/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/Makefile


--------------------------------------------------------------------------------
/train/TRL/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/README.md


--------------------------------------------------------------------------------
/train/TRL/commands/run_dpo.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/commands/run_dpo.sh


--------------------------------------------------------------------------------
/train/TRL/commands/run_sft.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/commands/run_sft.sh


--------------------------------------------------------------------------------
/train/TRL/docker/trl-latest-gpu/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docker/trl-latest-gpu/Dockerfile


--------------------------------------------------------------------------------
/train/TRL/docker/trl-source-gpu/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docker/trl-source-gpu/Dockerfile


--------------------------------------------------------------------------------
/train/TRL/docs/source/_toctree.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/_toctree.yml


--------------------------------------------------------------------------------
/train/TRL/docs/source/alignprop_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/alignprop_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/bco_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/bco_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/best_of_n.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/best_of_n.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/callbacks.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/callbacks.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/clis.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/clis.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/community_tutorials.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/community_tutorials.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/cpo_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/cpo_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/customization.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/customization.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/data_utils.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/data_utils.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/dataset_formats.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/dataset_formats.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/ddpo_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/ddpo_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/deepspeed_integration.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/deepspeed_integration.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/detoxifying_a_lm.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/detoxifying_a_lm.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/dpo_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/dpo_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/example_overview.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/example_overview.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/gkd_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/gkd_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/grpo_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/grpo_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/how_to_train.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/how_to_train.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/index.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/index.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/installation.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/installation.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/iterative_sft_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/iterative_sft_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/judges.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/judges.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/kto_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/kto_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/learning_tools.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/learning_tools.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/liger_kernel_integration.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/liger_kernel_integration.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/logging.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/logging.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/models.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/models.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/multi_adapter_rl.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/multi_adapter_rl.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/nash_md_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/nash_md_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/online_dpo_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/online_dpo_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/orpo_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/orpo_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/peft_integration.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/peft_integration.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/ppo_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/ppo_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/prm_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/prm_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/quickstart.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/quickstart.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/reducing_memory_usage.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/reducing_memory_usage.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/reward_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/reward_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/rloo_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/rloo_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/script_utils.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/script_utils.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/sentiment_tuning.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/sentiment_tuning.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/sft_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/sft_trainer.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/speeding_up_training.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/speeding_up_training.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/text_environments.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/text_environments.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/unsloth_integration.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/unsloth_integration.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/use_model.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/use_model.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/using_llama_models.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/using_llama_models.md


--------------------------------------------------------------------------------
/train/TRL/docs/source/xpo_trainer.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/docs/source/xpo_trainer.md


--------------------------------------------------------------------------------
/train/TRL/examples/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/README.md


--------------------------------------------------------------------------------
/train/TRL/examples/accelerate_configs/deepspeed_zero1.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/accelerate_configs/deepspeed_zero1.yaml


--------------------------------------------------------------------------------
/train/TRL/examples/accelerate_configs/deepspeed_zero2.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/accelerate_configs/deepspeed_zero2.yaml


--------------------------------------------------------------------------------
/train/TRL/examples/accelerate_configs/deepspeed_zero3.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/accelerate_configs/deepspeed_zero3.yaml


--------------------------------------------------------------------------------
/train/TRL/examples/accelerate_configs/fsdp_qlora.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/accelerate_configs/fsdp_qlora.yaml


--------------------------------------------------------------------------------
/train/TRL/examples/accelerate_configs/multi_gpu.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/accelerate_configs/multi_gpu.yaml


--------------------------------------------------------------------------------
/train/TRL/examples/accelerate_configs/single_gpu.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/accelerate_configs/single_gpu.yaml


--------------------------------------------------------------------------------
/train/TRL/examples/cli_configs/example_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/cli_configs/example_config.yaml


--------------------------------------------------------------------------------
/train/TRL/examples/datasets/hh-rlhf-helpful-base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/hh-rlhf-helpful-base.py


--------------------------------------------------------------------------------
/train/TRL/examples/datasets/lm-human-preferences-descriptiveness.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/lm-human-preferences-descriptiveness.py


--------------------------------------------------------------------------------
/train/TRL/examples/datasets/lm-human-preferences-sentiment.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/lm-human-preferences-sentiment.py


--------------------------------------------------------------------------------
/train/TRL/examples/datasets/math_shepherd.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/math_shepherd.py


--------------------------------------------------------------------------------
/train/TRL/examples/datasets/prm800k.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/prm800k.py


--------------------------------------------------------------------------------
/train/TRL/examples/datasets/rlaif-v.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/rlaif-v.py


--------------------------------------------------------------------------------
/train/TRL/examples/datasets/tldr.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/tldr.py


--------------------------------------------------------------------------------
/train/TRL/examples/datasets/tldr_preference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/tldr_preference.py


--------------------------------------------------------------------------------
/train/TRL/examples/datasets/ultrafeedback-prompt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/ultrafeedback-prompt.py


--------------------------------------------------------------------------------
/train/TRL/examples/datasets/ultrafeedback.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/datasets/ultrafeedback.py


--------------------------------------------------------------------------------
/train/TRL/examples/notebooks/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/notebooks/README.md


--------------------------------------------------------------------------------
/train/TRL/examples/notebooks/best_of_n.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/notebooks/best_of_n.ipynb


--------------------------------------------------------------------------------
/train/TRL/examples/notebooks/gpt2-sentiment-control.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/notebooks/gpt2-sentiment-control.ipynb


--------------------------------------------------------------------------------
/train/TRL/examples/notebooks/gpt2-sentiment.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/notebooks/gpt2-sentiment.ipynb


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/README.md


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/stack_llama/scripts/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama/scripts/README.md


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/stack_llama/scripts/merge_peft_adapter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama/scripts/merge_peft_adapter.py


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/stack_llama/scripts/reward_modeling.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama/scripts/reward_modeling.py


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/stack_llama/scripts/rl_training.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama/scripts/rl_training.py


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/stack_llama/scripts/supervised_finetuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama/scripts/supervised_finetuning.py


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/stack_llama_2/scripts/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama_2/scripts/README.md


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/stack_llama_2/scripts/dpo_llama2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama_2/scripts/dpo_llama2.py


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/stack_llama_2/scripts/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama_2/scripts/requirements.txt


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/stack_llama_2/scripts/sft_llama2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/stack_llama_2/scripts/sft_llama2.py


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/tools/calculator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/tools/calculator.py


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/tools/python_interpreter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/tools/python_interpreter.py


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/tools/triviaqa.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/tools/triviaqa.py


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/toxicity/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/toxicity/README.md


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/toxicity/scripts/evaluate-toxicity.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/toxicity/scripts/evaluate-toxicity.py


--------------------------------------------------------------------------------
/train/TRL/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/alignprop.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/alignprop.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/bco.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/bco.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/chat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/chat.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/cpo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/cpo.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/ddpo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/ddpo.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/dpo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/dpo.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/dpo_online.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/dpo_online.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/dpo_vlm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/dpo_vlm.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/evals/judge_tldr.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/evals/judge_tldr.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/gkd.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/gkd.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/kto.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/kto.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/nash_md.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/nash_md.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/orpo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/orpo.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/ppo/ppo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/ppo/ppo.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/ppo/ppo_tldr.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/ppo/ppo_tldr.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/prm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/prm.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/reward_modeling.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/reward_modeling.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/rloo/rloo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/rloo/rloo.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/rloo/rloo_tldr.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/rloo/rloo_tldr.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/sft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/sft.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/sft_video_llm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/sft_video_llm.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/sft_vlm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/sft_vlm.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/sft_vlm_smol_vlm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/sft_vlm_smol_vlm.py


--------------------------------------------------------------------------------
/train/TRL/examples/scripts/xpo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/examples/scripts/xpo.py


--------------------------------------------------------------------------------
/train/TRL/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/pyproject.toml


--------------------------------------------------------------------------------
/train/TRL/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | datasets
3 | rich
4 | transformers>=4.46.0


--------------------------------------------------------------------------------
/train/TRL/scripts/add_copyrights.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/scripts/add_copyrights.py


--------------------------------------------------------------------------------
/train/TRL/scripts/generate_tiny_models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/scripts/generate_tiny_models.py


--------------------------------------------------------------------------------
/train/TRL/scripts/generate_zen_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/scripts/generate_zen_dataset.py


--------------------------------------------------------------------------------
/train/TRL/scripts/log_example_reports.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/scripts/log_example_reports.py


--------------------------------------------------------------------------------
/train/TRL/scripts/log_reports.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/scripts/log_reports.py


--------------------------------------------------------------------------------
/train/TRL/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | license_file = LICENSE


--------------------------------------------------------------------------------
/train/TRL/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/setup.py


--------------------------------------------------------------------------------
/train/TRL/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/__init__.py


--------------------------------------------------------------------------------
/train/TRL/tests/slow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/slow/__init__.py


--------------------------------------------------------------------------------
/train/TRL/tests/slow/test_dpo_slow.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/slow/test_dpo_slow.py


--------------------------------------------------------------------------------
/train/TRL/tests/slow/test_sft_slow.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/slow/test_sft_slow.py


--------------------------------------------------------------------------------
/train/TRL/tests/slow/testing_constants.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/slow/testing_constants.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_alignprop_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_alignprop_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_bco_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_bco_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_best_of_n_sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_best_of_n_sampler.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_callbacks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_callbacks.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_cli.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_cli.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_cli_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_cli_utils.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_collators.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_collators.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_core.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_core.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_cpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_cpo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_data_collator_completion_only.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_data_collator_completion_only.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_data_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_data_utils.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_dataset_formatting.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_dataset_formatting.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_ddpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_ddpo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_dpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_dpo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_environments.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_environments.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_gkd_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_gkd_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_grpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_grpo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_iterative_sft_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_iterative_sft_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_judges.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_judges.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_kto_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_kto_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_modeling_geometric_mixture_wrapper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_modeling_geometric_mixture_wrapper.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_modeling_value_head.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_modeling_value_head.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_nash_md_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_nash_md_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_online_dpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_online_dpo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_orpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_orpo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_peft_models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_peft_models.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_ppo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_ppo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_prm_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_prm_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_reward_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_reward_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_rich_progress_callback.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_rich_progress_callback.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_rloo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_rloo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_sft_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_sft_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_trainers_args.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_trainers_args.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_utils.py


--------------------------------------------------------------------------------
/train/TRL/tests/test_xpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/test_xpo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/tests/testing_constants.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/testing_constants.py


--------------------------------------------------------------------------------
/train/TRL/tests/testing_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/tests/testing_utils.py


--------------------------------------------------------------------------------
/train/TRL/trl.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl.egg-info/PKG-INFO


--------------------------------------------------------------------------------
/train/TRL/trl.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl.egg-info/SOURCES.txt


--------------------------------------------------------------------------------
/train/TRL/trl.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/train/TRL/trl.egg-info/entry_points.txt:
--------------------------------------------------------------------------------
1 | [console_scripts]
2 | trl = trl.cli:main
3 | 


--------------------------------------------------------------------------------
/train/TRL/trl.egg-info/not-zip-safe:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/train/TRL/trl.egg-info/requires.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl.egg-info/requires.txt


--------------------------------------------------------------------------------
/train/TRL/trl.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | trl
2 | 


--------------------------------------------------------------------------------
/train/TRL/trl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/__init__.py


--------------------------------------------------------------------------------
/train/TRL/trl/__pycache__/__init__.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/__pycache__/__init__.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/__pycache__/data_utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/__pycache__/data_utils.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/__pycache__/import_utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/__pycache__/import_utils.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/cli.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/cli.py


--------------------------------------------------------------------------------
/train/TRL/trl/core.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/core.py


--------------------------------------------------------------------------------
/train/TRL/trl/data_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/data_utils.py


--------------------------------------------------------------------------------
/train/TRL/trl/environment/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/environment/__init__.py


--------------------------------------------------------------------------------
/train/TRL/trl/environment/base_environment.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/environment/base_environment.py


--------------------------------------------------------------------------------
/train/TRL/trl/extras/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/extras/__init__.py


--------------------------------------------------------------------------------
/train/TRL/trl/extras/best_of_n_sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/extras/best_of_n_sampler.py


--------------------------------------------------------------------------------
/train/TRL/trl/extras/dataset_formatting.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/extras/dataset_formatting.py


--------------------------------------------------------------------------------
/train/TRL/trl/import_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/import_utils.py


--------------------------------------------------------------------------------
/train/TRL/trl/mergekit_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/mergekit_utils.py


--------------------------------------------------------------------------------
/train/TRL/trl/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/__init__.py


--------------------------------------------------------------------------------
/train/TRL/trl/models/__pycache__/__init__.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/__pycache__/__init__.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/models/__pycache__/modeling_base.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/__pycache__/modeling_base.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/models/__pycache__/modeling_value_head.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/__pycache__/modeling_value_head.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/models/__pycache__/utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/__pycache__/utils.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/models/auxiliary_modules.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/auxiliary_modules.py


--------------------------------------------------------------------------------
/train/TRL/trl/models/modeling_base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/modeling_base.py


--------------------------------------------------------------------------------
/train/TRL/trl/models/modeling_sd_base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/modeling_sd_base.py


--------------------------------------------------------------------------------
/train/TRL/trl/models/modeling_value_head.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/modeling_value_head.py


--------------------------------------------------------------------------------
/train/TRL/trl/models/sd_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/sd_utils.py


--------------------------------------------------------------------------------
/train/TRL/trl/models/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/models/utils.py


--------------------------------------------------------------------------------
/train/TRL/trl/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/__init__.py


--------------------------------------------------------------------------------
/train/TRL/trl/scripts/__pycache__/__init__.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/__pycache__/__init__.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/scripts/__pycache__/utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/__pycache__/utils.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/scripts/chat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/chat.py


--------------------------------------------------------------------------------
/train/TRL/trl/scripts/dpo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/dpo.py


--------------------------------------------------------------------------------
/train/TRL/trl/scripts/env.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/env.py


--------------------------------------------------------------------------------
/train/TRL/trl/scripts/grpo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/grpo.py


--------------------------------------------------------------------------------
/train/TRL/trl/scripts/kto.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/kto.py


--------------------------------------------------------------------------------
/train/TRL/trl/scripts/sft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/sft.py


--------------------------------------------------------------------------------
/train/TRL/trl/scripts/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/scripts/utils.py


--------------------------------------------------------------------------------
/train/TRL/trl/templates/lm_model_card.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/templates/lm_model_card.md


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__init__.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/__pycache__/__init__.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__pycache__/__init__.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/__pycache__/grpo_config.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__pycache__/grpo_config.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/__pycache__/grpo_trainer.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__pycache__/grpo_trainer.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/__pycache__/model_config.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__pycache__/model_config.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/__pycache__/sft_config.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__pycache__/sft_config.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/__pycache__/utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/__pycache__/utils.cpython-311.pyc


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/alignprop_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/alignprop_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/alignprop_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/alignprop_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/bco_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/bco_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/bco_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/bco_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/callbacks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/callbacks.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/cpo_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/cpo_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/cpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/cpo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/ddpo_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/ddpo_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/ddpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/ddpo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/dpo_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/dpo_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/dpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/dpo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/gkd_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/gkd_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/gkd_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/gkd_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/grpo_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/grpo_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/grpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/grpo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/iterative_sft_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/iterative_sft_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/judges.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/judges.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/kto_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/kto_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/kto_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/kto_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/model_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/model_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/nash_md_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/nash_md_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/nash_md_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/nash_md_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/online_dpo_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/online_dpo_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/online_dpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/online_dpo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/orpo_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/orpo_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/orpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/orpo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/ppo_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/ppo_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/ppo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/ppo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/prm_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/prm_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/prm_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/prm_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/reward_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/reward_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/reward_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/reward_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/rloo_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/rloo_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/rloo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/rloo_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/sft_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/sft_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/sft_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/sft_trainer.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/utils.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/xpo_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/xpo_config.py


--------------------------------------------------------------------------------
/train/TRL/trl/trainer/xpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/TRL/trl/trainer/xpo_trainer.py


--------------------------------------------------------------------------------
/train/assets/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/train/llama_8b_10k_2epoch.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/llama_8b_10k_2epoch.sh


--------------------------------------------------------------------------------
/train/logs/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/train/qwen_14b_10k_2epoch.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/qwen_14b_10k_2epoch.sh


--------------------------------------------------------------------------------
/train/qwen_7b_10k_2epoch.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/qwen_7b_10k_2epoch.sh


--------------------------------------------------------------------------------
/train/recipes/LLama/LLama-Instruct/llama3_8b_2epoch_10k.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/LLama/LLama-Instruct/llama3_8b_2epoch_10k.yaml


--------------------------------------------------------------------------------
/train/recipes/Qwen/Qwen2.5-Instruct/qwen_14b_2epoch_10k.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/Qwen/Qwen2.5-Instruct/qwen_14b_2epoch_10k.yaml


--------------------------------------------------------------------------------
/train/recipes/Qwen/Qwen2.5-Instruct/qwen_7b_2epoch_10k.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/Qwen/Qwen2.5-Instruct/qwen_7b_2epoch_10k.yaml


--------------------------------------------------------------------------------
/train/recipes/accelerate_configs/ddp.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/accelerate_configs/ddp.yaml


--------------------------------------------------------------------------------
/train/recipes/accelerate_configs/zero2.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/accelerate_configs/zero2.yaml


--------------------------------------------------------------------------------
/train/recipes/accelerate_configs/zero2_offload.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/accelerate_configs/zero2_offload.yaml


--------------------------------------------------------------------------------
/train/recipes/accelerate_configs/zero3.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/accelerate_configs/zero3.yaml


--------------------------------------------------------------------------------
/train/recipes/accelerate_configs/zero3_offload.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/recipes/accelerate_configs/zero3_offload.yaml


--------------------------------------------------------------------------------
/train/scripts/generate_reasoning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/scripts/generate_reasoning.py


--------------------------------------------------------------------------------
/train/scripts/run_benchmarks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/scripts/run_benchmarks.py


--------------------------------------------------------------------------------
/train/scripts/upload_details.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/scripts/upload_details.py


--------------------------------------------------------------------------------
/train/setup.cfg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/setup.cfg


--------------------------------------------------------------------------------
/train/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/setup.py


--------------------------------------------------------------------------------
/train/slurm/evaluate.slurm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/slurm/evaluate.slurm


--------------------------------------------------------------------------------
/train/slurm/experimental/serve_r1_vllm.slurm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/slurm/experimental/serve_r1_vllm.slurm


--------------------------------------------------------------------------------
/train/slurm/generate.slurm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/slurm/generate.slurm


--------------------------------------------------------------------------------
/train/slurm/serve_r1.slurm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/slurm/serve_r1.slurm


--------------------------------------------------------------------------------
/train/slurm/serve_router.slurm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/slurm/serve_router.slurm


--------------------------------------------------------------------------------
/train/slurm/train.slurm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/slurm/train.slurm


--------------------------------------------------------------------------------
/train/src/open_r1/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/__init__.py


--------------------------------------------------------------------------------
/train/src/open_r1/configs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/configs.py


--------------------------------------------------------------------------------
/train/src/open_r1/evaluate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/evaluate.py


--------------------------------------------------------------------------------
/train/src/open_r1/generate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/generate.py


--------------------------------------------------------------------------------
/train/src/open_r1/grpo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/grpo.py


--------------------------------------------------------------------------------
/train/src/open_r1/sft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/sft.py


--------------------------------------------------------------------------------
/train/src/open_r1/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/train/src/open_r1/utils/callbacks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/utils/callbacks.py


--------------------------------------------------------------------------------
/train/src/open_r1/utils/evaluation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/utils/evaluation.py


--------------------------------------------------------------------------------
/train/src/open_r1/utils/hub.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/utils/hub.py


--------------------------------------------------------------------------------
/train/src/open_r1/utils/upload_details.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/src/open_r1/utils/upload_details.py


--------------------------------------------------------------------------------
/train/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/train/tests/test_rewards.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S1s-Z/CANOE/HEAD/train/tests/test_rewards.py


--------------------------------------------------------------------------------
/train/train_data/readme.md:
--------------------------------------------------------------------------------
1 | todo
2 | 


--------------------------------------------------------------------------------