├── .gitignore ├── LICENSE ├── README.md ├── assets ├── logo.png ├── overview.png ├── pipeline.png ├── rb2_res.png ├── res_task3.png ├── result.png └── wechat.png ├── eval ├── cal_score_benchmarks_for_close_source.py ├── cal_score_benchmarks_for_open_source.py └── eval_by_vllm_for_open_source.py ├── requirements_rl.txt ├── requirements_sft.txt ├── scripts ├── eval │ ├── close_source_models │ │ ├── evaluate_gemini.sh │ │ ├── evaluate_gemini_only_calculate_score.sh │ │ ├── evaluate_gpt4o.sh │ │ └── evaluate_gpt4o_only_calculate_score.sh │ └── open_source_models │ │ ├── calculate_score │ │ └── calculate_score.sh │ │ ├── multi_gpu_eval │ │ ├── eval_by_vllm_all_tasks_ans_sft_multi_gpu.sh │ │ ├── eval_by_vllm_all_tasks_cot_sft_multi_gpu.sh │ │ ├── eval_by_vllm_all_tasks_reason_rft_multi_gpu.sh │ │ ├── eval_by_vllm_all_tasks_zero_shot_multi_gpu.sh │ │ ├── eval_by_vllm_task1_ans_sft_multi_gpu.sh │ │ ├── eval_by_vllm_task1_cot_sft_multi_gpu.sh │ │ ├── eval_by_vllm_task1_reason_rft_multi_gpu.sh │ │ ├── eval_by_vllm_task1_zero_shot_multi_gpu.sh │ │ ├── eval_by_vllm_task2_ans_sft_multi_gpu.sh │ │ ├── eval_by_vllm_task2_cot_sft_multi_gpu.sh │ │ ├── eval_by_vllm_task2_reason_rft_multi_gpu.sh │ │ ├── eval_by_vllm_task2_zero_shot_multi_gpu.sh │ │ ├── eval_by_vllm_task3_ans_sft_multi_gpu.sh │ │ ├── eval_by_vllm_task3_cot_sft_multi_gpu.sh │ │ ├── eval_by_vllm_task3_reason_rft_multi_gpu.sh │ │ └── eval_by_vllm_task3_zero_shot_multi_gpu.sh │ │ └── single_gpu_eval │ │ ├── eval_by_vllm_all_tasks_ans_sft_single_gpu.sh │ │ ├── eval_by_vllm_all_tasks_cot_sft_single_gpu.sh │ │ ├── eval_by_vllm_all_tasks_reason_rft_single_gpu.sh │ │ ├── eval_by_vllm_all_tasks_zero_shot_single_gpu.sh │ │ ├── eval_by_vllm_task1_ans_sft_single_gpu.sh │ │ ├── eval_by_vllm_task1_cot_sft_single_gpu.sh │ │ ├── eval_by_vllm_task1_reason_rft_single_gpu.sh │ │ ├── eval_by_vllm_task1_zero_shot_single_gpu.sh │ │ ├── eval_by_vllm_task2_ans_sft_single_gpu.sh │ │ ├── eval_by_vllm_task2_cot_sft_single_gpu.sh │ │ ├── eval_by_vllm_task2_reason_rft_single_gpu.sh │ │ ├── eval_by_vllm_task2_zero_shot_single_gpu.sh │ │ ├── eval_by_vllm_task3_ans_sft_single_gpu.sh │ │ ├── eval_by_vllm_task3_cot_sft_single_gpu.sh │ │ ├── eval_by_vllm_task3_reason_rft_single_gpu.sh │ │ └── eval_by_vllm_task3_zero_shot_single_gpu.sh └── train │ ├── ans_sft │ ├── resume_finetune_qwen2vl_2b_task1_ans_sft.sh │ ├── resume_finetune_qwen2vl_2b_task2_ans_sft.sh │ ├── resume_finetune_qwen2vl_2b_task3_ans_sft.sh │ ├── resume_finetune_qwen2vl_7b_task1_ans_sft.sh │ ├── resume_finetune_qwen2vl_7b_task2_ans_sft.sh │ └── resume_finetune_qwen2vl_7b_task3_ans_sft.sh │ ├── cot_sft │ ├── resume_finetune_qwen2vl_2b_task1_cot_sft.sh │ ├── resume_finetune_qwen2vl_2b_task2_cot_sft.sh │ ├── resume_finetune_qwen2vl_2b_task3_cot_sft.sh │ ├── resume_finetune_qwen2vl_7b_task1_cot_sft.sh │ ├── resume_finetune_qwen2vl_7b_task2_cot_sft.sh │ └── resume_finetune_qwen2vl_7b_task3_cot_sft.sh │ ├── reason_rft │ ├── stage_rl │ │ ├── resume_finetune_qwen2vl_2b_task1_stage2_rl.sh │ │ ├── resume_finetune_qwen2vl_2b_task2_stage2_rl.sh │ │ ├── resume_finetune_qwen2vl_2b_task3_stage2_rl.sh │ │ ├── resume_finetune_qwen2vl_7b_task1_stage2_rl.sh │ │ ├── resume_finetune_qwen2vl_7b_task2_stage2_rl.sh │ │ └── resume_finetune_qwen2vl_7b_task3_stage2_rl.sh │ └── stage_sft │ │ ├── resume_finetune_qwen2vl_2b_task1_stage1_sft.sh │ │ ├── resume_finetune_qwen2vl_2b_task2_stage1_sft.sh │ │ ├── resume_finetune_qwen2vl_2b_task3_stage1_sft.sh │ │ ├── resume_finetune_qwen2vl_7b_task1_stage1_sft.sh │ │ ├── resume_finetune_qwen2vl_7b_task2_stage1_sft.sh │ │ └── resume_finetune_qwen2vl_7b_task3_stage1_sft.sh │ ├── reason_rft_zero │ ├── resume_finetune_qwen2vl_2b_task1_only_rl.sh │ ├── resume_finetune_qwen2vl_2b_task2_only_rl.sh │ ├── resume_finetune_qwen2vl_2b_task3_only_rl.sh │ ├── resume_finetune_qwen2vl_7b_task1_only_rl.sh │ ├── resume_finetune_qwen2vl_7b_task2_only_rl.sh │ └── resume_finetune_qwen2vl_7b_task3_only_rl.sh │ └── zero3.json ├── train ├── stage_rl │ ├── __init__.py │ ├── configs.py │ ├── grpo.py │ ├── prompt.py │ ├── reward.py │ ├── trainer │ │ ├── __init__.py │ │ └── mm_grpo_trainer.py │ └── utils │ │ ├── __init__.py │ │ ├── callbacks.py │ │ ├── evaluation.py │ │ ├── hub.py │ │ └── upload_details.py └── stage_sft │ ├── api.py │ ├── data │ └── dataset_info.json │ ├── llamafactory │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ ├── app.py │ │ ├── chat.py │ │ ├── common.py │ │ └── protocol.py │ ├── chat │ │ ├── __init__.py │ │ ├── base_engine.py │ │ ├── chat_model.py │ │ ├── hf_engine.py │ │ └── vllm_engine.py │ ├── cli.py │ ├── data │ │ ├── __init__.py │ │ ├── aligner.py │ │ ├── collator.py │ │ ├── data_utils.py │ │ ├── formatter.py │ │ ├── loader.py │ │ ├── mm_plugin.py │ │ ├── parser.py │ │ ├── preprocess.py │ │ ├── processors │ │ │ ├── __init__.py │ │ │ ├── feedback.py │ │ │ ├── pairwise.py │ │ │ ├── pretrain.py │ │ │ ├── processor_utils.py │ │ │ ├── supervised.py │ │ │ └── unsupervised.py │ │ ├── template.py │ │ └── tool_utils.py │ ├── eval │ │ ├── __init__.py │ │ ├── evaluator.py │ │ └── template.py │ ├── extras │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── env.py │ │ ├── logging.py │ │ ├── misc.py │ │ ├── packages.py │ │ └── ploting.py │ ├── hparams │ │ ├── __init__.py │ │ ├── data_args.py │ │ ├── evaluation_args.py │ │ ├── finetuning_args.py │ │ ├── generating_args.py │ │ ├── model_args.py │ │ ├── parser.py │ │ └── training_args.py │ ├── launcher.py │ ├── model │ │ ├── __init__.py │ │ ├── adapter.py │ │ ├── loader.py │ │ ├── model_utils │ │ │ ├── __init__.py │ │ │ ├── attention.py │ │ │ ├── checkpointing.py │ │ │ ├── embedding.py │ │ │ ├── liger_kernel.py │ │ │ ├── longlora.py │ │ │ ├── misc.py │ │ │ ├── mod.py │ │ │ ├── moe.py │ │ │ ├── packing.py │ │ │ ├── quantization.py │ │ │ ├── rope.py │ │ │ ├── unsloth.py │ │ │ ├── valuehead.py │ │ │ └── visual.py │ │ └── patcher.py │ ├── train │ │ ├── __init__.py │ │ ├── callbacks.py │ │ ├── dpo │ │ │ ├── __init__.py │ │ │ ├── trainer.py │ │ │ └── workflow.py │ │ ├── kto │ │ │ ├── __init__.py │ │ │ ├── trainer.py │ │ │ └── workflow.py │ │ ├── ppo │ │ │ ├── __init__.py │ │ │ ├── ppo_utils.py │ │ │ ├── trainer.py │ │ │ └── workflow.py │ │ ├── pt │ │ │ ├── __init__.py │ │ │ ├── trainer.py │ │ │ └── workflow.py │ │ ├── rm │ │ │ ├── __init__.py │ │ │ ├── metric.py │ │ │ ├── trainer.py │ │ │ └── workflow.py │ │ ├── sft │ │ │ ├── __init__.py │ │ │ ├── metric.py │ │ │ ├── trainer.py │ │ │ └── workflow.py │ │ ├── test_utils.py │ │ ├── trainer_utils.py │ │ └── tuner.py │ └── webui │ │ ├── __init__.py │ │ ├── chatter.py │ │ ├── common.py │ │ ├── components │ │ ├── __init__.py │ │ ├── chatbot.py │ │ ├── data.py │ │ ├── eval.py │ │ ├── export.py │ │ ├── infer.py │ │ ├── top.py │ │ └── train.py │ │ ├── css.py │ │ ├── engine.py │ │ ├── interface.py │ │ ├── locales.py │ │ ├── manager.py │ │ ├── runner.py │ │ └── utils.py │ ├── train.py │ └── webui.py └── utils ├── convert_qwen2vl_format.py ├── convert_sft_data_trance.py ├── distill_cot_data.py ├── distill_cot_data_trance.py └── prompts.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/README.md -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/assets/logo.png -------------------------------------------------------------------------------- /assets/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/assets/overview.png -------------------------------------------------------------------------------- /assets/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/assets/pipeline.png -------------------------------------------------------------------------------- /assets/rb2_res.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/assets/rb2_res.png -------------------------------------------------------------------------------- /assets/res_task3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/assets/res_task3.png -------------------------------------------------------------------------------- /assets/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/assets/result.png -------------------------------------------------------------------------------- /assets/wechat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/assets/wechat.png -------------------------------------------------------------------------------- /eval/cal_score_benchmarks_for_close_source.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/eval/cal_score_benchmarks_for_close_source.py -------------------------------------------------------------------------------- /eval/cal_score_benchmarks_for_open_source.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/eval/cal_score_benchmarks_for_open_source.py -------------------------------------------------------------------------------- /eval/eval_by_vllm_for_open_source.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/eval/eval_by_vllm_for_open_source.py -------------------------------------------------------------------------------- /requirements_rl.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/requirements_rl.txt -------------------------------------------------------------------------------- /requirements_sft.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/requirements_sft.txt -------------------------------------------------------------------------------- /scripts/eval/close_source_models/evaluate_gemini.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/close_source_models/evaluate_gemini.sh -------------------------------------------------------------------------------- /scripts/eval/close_source_models/evaluate_gemini_only_calculate_score.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/close_source_models/evaluate_gemini_only_calculate_score.sh -------------------------------------------------------------------------------- /scripts/eval/close_source_models/evaluate_gpt4o.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/close_source_models/evaluate_gpt4o.sh -------------------------------------------------------------------------------- /scripts/eval/close_source_models/evaluate_gpt4o_only_calculate_score.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/close_source_models/evaluate_gpt4o_only_calculate_score.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/calculate_score/calculate_score.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/calculate_score/calculate_score.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_all_tasks_ans_sft_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_all_tasks_ans_sft_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_all_tasks_cot_sft_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_all_tasks_cot_sft_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_all_tasks_reason_rft_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_all_tasks_reason_rft_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_all_tasks_zero_shot_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_all_tasks_zero_shot_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task1_ans_sft_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task1_ans_sft_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task1_cot_sft_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task1_cot_sft_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task1_reason_rft_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task1_reason_rft_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task1_zero_shot_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task1_zero_shot_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task2_ans_sft_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task2_ans_sft_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task2_cot_sft_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task2_cot_sft_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task2_reason_rft_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task2_reason_rft_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task2_zero_shot_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task2_zero_shot_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task3_ans_sft_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task3_ans_sft_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task3_cot_sft_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task3_cot_sft_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task3_reason_rft_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task3_reason_rft_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task3_zero_shot_multi_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/multi_gpu_eval/eval_by_vllm_task3_zero_shot_multi_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_all_tasks_ans_sft_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_all_tasks_ans_sft_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_all_tasks_cot_sft_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_all_tasks_cot_sft_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_all_tasks_reason_rft_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_all_tasks_reason_rft_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_all_tasks_zero_shot_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_all_tasks_zero_shot_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task1_ans_sft_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task1_ans_sft_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task1_cot_sft_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task1_cot_sft_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task1_reason_rft_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task1_reason_rft_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task1_zero_shot_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task1_zero_shot_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task2_ans_sft_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task2_ans_sft_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task2_cot_sft_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task2_cot_sft_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task2_reason_rft_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task2_reason_rft_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task2_zero_shot_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task2_zero_shot_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task3_ans_sft_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task3_ans_sft_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task3_cot_sft_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task3_cot_sft_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task3_reason_rft_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task3_reason_rft_single_gpu.sh -------------------------------------------------------------------------------- /scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task3_zero_shot_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/eval/open_source_models/single_gpu_eval/eval_by_vllm_task3_zero_shot_single_gpu.sh -------------------------------------------------------------------------------- /scripts/train/ans_sft/resume_finetune_qwen2vl_2b_task1_ans_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/ans_sft/resume_finetune_qwen2vl_2b_task1_ans_sft.sh -------------------------------------------------------------------------------- /scripts/train/ans_sft/resume_finetune_qwen2vl_2b_task2_ans_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/ans_sft/resume_finetune_qwen2vl_2b_task2_ans_sft.sh -------------------------------------------------------------------------------- /scripts/train/ans_sft/resume_finetune_qwen2vl_2b_task3_ans_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/ans_sft/resume_finetune_qwen2vl_2b_task3_ans_sft.sh -------------------------------------------------------------------------------- /scripts/train/ans_sft/resume_finetune_qwen2vl_7b_task1_ans_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/ans_sft/resume_finetune_qwen2vl_7b_task1_ans_sft.sh -------------------------------------------------------------------------------- /scripts/train/ans_sft/resume_finetune_qwen2vl_7b_task2_ans_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/ans_sft/resume_finetune_qwen2vl_7b_task2_ans_sft.sh -------------------------------------------------------------------------------- /scripts/train/ans_sft/resume_finetune_qwen2vl_7b_task3_ans_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/ans_sft/resume_finetune_qwen2vl_7b_task3_ans_sft.sh -------------------------------------------------------------------------------- /scripts/train/cot_sft/resume_finetune_qwen2vl_2b_task1_cot_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/cot_sft/resume_finetune_qwen2vl_2b_task1_cot_sft.sh -------------------------------------------------------------------------------- /scripts/train/cot_sft/resume_finetune_qwen2vl_2b_task2_cot_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/cot_sft/resume_finetune_qwen2vl_2b_task2_cot_sft.sh -------------------------------------------------------------------------------- /scripts/train/cot_sft/resume_finetune_qwen2vl_2b_task3_cot_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/cot_sft/resume_finetune_qwen2vl_2b_task3_cot_sft.sh -------------------------------------------------------------------------------- /scripts/train/cot_sft/resume_finetune_qwen2vl_7b_task1_cot_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/cot_sft/resume_finetune_qwen2vl_7b_task1_cot_sft.sh -------------------------------------------------------------------------------- /scripts/train/cot_sft/resume_finetune_qwen2vl_7b_task2_cot_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/cot_sft/resume_finetune_qwen2vl_7b_task2_cot_sft.sh -------------------------------------------------------------------------------- /scripts/train/cot_sft/resume_finetune_qwen2vl_7b_task3_cot_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/cot_sft/resume_finetune_qwen2vl_7b_task3_cot_sft.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft/stage_rl/resume_finetune_qwen2vl_2b_task1_stage2_rl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft/stage_rl/resume_finetune_qwen2vl_2b_task1_stage2_rl.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft/stage_rl/resume_finetune_qwen2vl_2b_task2_stage2_rl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft/stage_rl/resume_finetune_qwen2vl_2b_task2_stage2_rl.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft/stage_rl/resume_finetune_qwen2vl_2b_task3_stage2_rl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft/stage_rl/resume_finetune_qwen2vl_2b_task3_stage2_rl.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft/stage_rl/resume_finetune_qwen2vl_7b_task1_stage2_rl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft/stage_rl/resume_finetune_qwen2vl_7b_task1_stage2_rl.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft/stage_rl/resume_finetune_qwen2vl_7b_task2_stage2_rl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft/stage_rl/resume_finetune_qwen2vl_7b_task2_stage2_rl.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft/stage_rl/resume_finetune_qwen2vl_7b_task3_stage2_rl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft/stage_rl/resume_finetune_qwen2vl_7b_task3_stage2_rl.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft/stage_sft/resume_finetune_qwen2vl_2b_task1_stage1_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft/stage_sft/resume_finetune_qwen2vl_2b_task1_stage1_sft.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft/stage_sft/resume_finetune_qwen2vl_2b_task2_stage1_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft/stage_sft/resume_finetune_qwen2vl_2b_task2_stage1_sft.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft/stage_sft/resume_finetune_qwen2vl_2b_task3_stage1_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft/stage_sft/resume_finetune_qwen2vl_2b_task3_stage1_sft.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft/stage_sft/resume_finetune_qwen2vl_7b_task1_stage1_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft/stage_sft/resume_finetune_qwen2vl_7b_task1_stage1_sft.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft/stage_sft/resume_finetune_qwen2vl_7b_task2_stage1_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft/stage_sft/resume_finetune_qwen2vl_7b_task2_stage1_sft.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft/stage_sft/resume_finetune_qwen2vl_7b_task3_stage1_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft/stage_sft/resume_finetune_qwen2vl_7b_task3_stage1_sft.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft_zero/resume_finetune_qwen2vl_2b_task1_only_rl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft_zero/resume_finetune_qwen2vl_2b_task1_only_rl.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft_zero/resume_finetune_qwen2vl_2b_task2_only_rl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft_zero/resume_finetune_qwen2vl_2b_task2_only_rl.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft_zero/resume_finetune_qwen2vl_2b_task3_only_rl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft_zero/resume_finetune_qwen2vl_2b_task3_only_rl.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft_zero/resume_finetune_qwen2vl_7b_task1_only_rl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft_zero/resume_finetune_qwen2vl_7b_task1_only_rl.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft_zero/resume_finetune_qwen2vl_7b_task2_only_rl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft_zero/resume_finetune_qwen2vl_7b_task2_only_rl.sh -------------------------------------------------------------------------------- /scripts/train/reason_rft_zero/resume_finetune_qwen2vl_7b_task3_only_rl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/reason_rft_zero/resume_finetune_qwen2vl_7b_task3_only_rl.sh -------------------------------------------------------------------------------- /scripts/train/zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/scripts/train/zero3.json -------------------------------------------------------------------------------- /train/stage_rl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_rl/__init__.py -------------------------------------------------------------------------------- /train/stage_rl/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_rl/configs.py -------------------------------------------------------------------------------- /train/stage_rl/grpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_rl/grpo.py -------------------------------------------------------------------------------- /train/stage_rl/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_rl/prompt.py -------------------------------------------------------------------------------- /train/stage_rl/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_rl/reward.py -------------------------------------------------------------------------------- /train/stage_rl/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_rl/trainer/__init__.py -------------------------------------------------------------------------------- /train/stage_rl/trainer/mm_grpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_rl/trainer/mm_grpo_trainer.py -------------------------------------------------------------------------------- /train/stage_rl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/stage_rl/utils/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_rl/utils/callbacks.py -------------------------------------------------------------------------------- /train/stage_rl/utils/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_rl/utils/evaluation.py -------------------------------------------------------------------------------- /train/stage_rl/utils/hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_rl/utils/hub.py -------------------------------------------------------------------------------- /train/stage_rl/utils/upload_details.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_rl/utils/upload_details.py -------------------------------------------------------------------------------- /train/stage_sft/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/api.py -------------------------------------------------------------------------------- /train/stage_sft/data/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/data/dataset_info.json -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/__init__.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/api/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/api/app.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/api/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/api/chat.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/api/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/api/common.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/api/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/api/protocol.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/chat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/chat/__init__.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/chat/base_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/chat/base_engine.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/chat/chat_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/chat/chat_model.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/chat/hf_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/chat/hf_engine.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/chat/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/chat/vllm_engine.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/cli.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/__init__.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/aligner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/aligner.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/collator.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/data_utils.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/formatter.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/loader.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/mm_plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/mm_plugin.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/parser.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/preprocess.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/processors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/processors/feedback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/processors/feedback.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/processors/pairwise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/processors/pairwise.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/processors/pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/processors/pretrain.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/processors/processor_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/processors/processor_utils.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/processors/supervised.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/processors/supervised.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/processors/unsupervised.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/processors/unsupervised.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/template.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/data/tool_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/data/tool_utils.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/eval/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/eval/evaluator.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/eval/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/eval/template.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/extras/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/extras/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/extras/constants.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/extras/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/extras/env.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/extras/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/extras/logging.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/extras/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/extras/misc.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/extras/packages.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/extras/packages.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/extras/ploting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/extras/ploting.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/hparams/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/hparams/__init__.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/hparams/data_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/hparams/data_args.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/hparams/evaluation_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/hparams/evaluation_args.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/hparams/finetuning_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/hparams/finetuning_args.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/hparams/generating_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/hparams/generating_args.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/hparams/model_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/hparams/model_args.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/hparams/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/hparams/parser.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/hparams/training_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/hparams/training_args.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/launcher.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/__init__.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/adapter.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/loader.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/attention.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/checkpointing.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/embedding.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/liger_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/liger_kernel.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/longlora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/longlora.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/misc.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/mod.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/mod.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/moe.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/packing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/packing.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/quantization.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/rope.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/unsloth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/unsloth.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/valuehead.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/valuehead.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/model_utils/visual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/model_utils/visual.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/model/patcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/model/patcher.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/callbacks.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/dpo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/dpo/__init__.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/dpo/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/dpo/trainer.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/dpo/workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/dpo/workflow.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/kto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/kto/__init__.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/kto/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/kto/trainer.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/kto/workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/kto/workflow.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/ppo/__init__.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/ppo/ppo_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/ppo/ppo_utils.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/ppo/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/ppo/trainer.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/ppo/workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/ppo/workflow.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/pt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/pt/__init__.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/pt/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/pt/trainer.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/pt/workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/pt/workflow.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/rm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/rm/__init__.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/rm/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/rm/metric.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/rm/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/rm/trainer.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/rm/workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/rm/workflow.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/sft/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/sft/__init__.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/sft/metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/sft/metric.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/sft/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/sft/trainer.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/sft/workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/sft/workflow.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/test_utils.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/trainer_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/trainer_utils.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/train/tuner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/train/tuner.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/chatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/chatter.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/common.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/components/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/components/__init__.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/components/chatbot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/components/chatbot.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/components/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/components/data.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/components/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/components/eval.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/components/export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/components/export.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/components/infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/components/infer.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/components/top.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/components/top.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/components/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/components/train.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/css.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/css.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/engine.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/interface.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/locales.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/locales.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/manager.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/runner.py -------------------------------------------------------------------------------- /train/stage_sft/llamafactory/webui/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/llamafactory/webui/utils.py -------------------------------------------------------------------------------- /train/stage_sft/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/train.py -------------------------------------------------------------------------------- /train/stage_sft/webui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/train/stage_sft/webui.py -------------------------------------------------------------------------------- /utils/convert_qwen2vl_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/utils/convert_qwen2vl_format.py -------------------------------------------------------------------------------- /utils/convert_sft_data_trance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/utils/convert_sft_data_trance.py -------------------------------------------------------------------------------- /utils/distill_cot_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/utils/distill_cot_data.py -------------------------------------------------------------------------------- /utils/distill_cot_data_trance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/utils/distill_cot_data_trance.py -------------------------------------------------------------------------------- /utils/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanhuajie/Reason-RFT/HEAD/utils/prompts.py --------------------------------------------------------------------------------