├── .gitignore ├── LICENSE ├── README.md ├── conda-recipe.yaml ├── framework-colorblindfriendly.jpg ├── mcts_rl ├── __init__.py ├── algorithms │ ├── __init__.py │ ├── dpo │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── main.py │ │ └── trainer.py │ ├── mcts │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── main.py │ │ ├── mcts │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── embedding_retrieve.py │ │ │ ├── mcts.py │ │ │ ├── search_config.py │ │ │ └── world_model.py │ │ └── trainer.py │ └── ppo │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── main.py │ │ └── trainer.py ├── configs │ ├── __init__.py │ ├── constants.py │ ├── deepspeed_config.py │ ├── ds_eval_config_template.json │ ├── ds_train_config_template.json │ └── fsdp_config.json ├── datasets │ ├── __init__.py │ ├── base.py │ ├── preference.py │ ├── prompt_only.py │ ├── raw │ │ ├── __init__.py │ │ ├── alpaca.py │ │ ├── aqua.py │ │ ├── arithmo.py │ │ ├── exam.py │ │ ├── firefly.py │ │ ├── gsm8k.py │ │ ├── hh_rlhf.py │ │ ├── math.py │ │ ├── math_qa.py │ │ ├── mcq.py │ │ ├── mcq_for_eval.py │ │ ├── mcq_pairs.py │ │ ├── moss.py │ │ ├── prm800k.py │ │ ├── qa_feedback.py │ │ └── safe_rlhf.py │ ├── safety_preference.py │ ├── supervised.py │ └── utils.py ├── finetune │ ├── __init__.py │ ├── __main__.py │ ├── deepspeed.py │ ├── huggingface.py │ ├── main.py │ └── trainer.py ├── logger.py ├── models │ ├── __init__.py │ ├── normalizer.py │ ├── pretrained.py │ └── score_model │ │ ├── __init__.py │ │ ├── bloom │ │ ├── __init__.py │ │ └── modeling_bloom.py │ │ ├── gpt2 │ │ ├── __init__.py │ │ └── modeling_gpt2.py │ │ ├── gpt_neo │ │ ├── __init__.py │ │ └── modeling_gpt_neo.py │ │ ├── gpt_neox │ │ ├── __init__.py │ │ └── modeling_gpt_neox.py │ │ ├── gptj │ │ ├── __init__.py │ │ └── modeling_gptj.py │ │ ├── llama │ │ ├── __init__.py │ │ └── modeling_llama.py │ │ ├── mistral │ │ ├── __init__.py │ │ └── modeling_mistral.py │ │ ├── open_llama │ │ ├── __init__.py │ │ └── modeling_open_llama.py │ │ └── opt │ │ ├── __init__.py │ │ └── modeling_opt.py ├── trainers │ ├── __init__.py │ ├── base.py │ ├── rl_trainer.py │ ├── supervised_trainer.py │ └── tsrl_trainer.py ├── utils.py └── version.py ├── requirements.txt ├── scripts ├── eval │ ├── mctseval_math.sh │ └── mctseval_sqa.sh ├── mcts_csr.sh ├── mcts_mathqa.sh ├── mcts_mathqa2.sh └── mcts_mathqa_llama3.sh └── visualize.ipynb /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/README.md -------------------------------------------------------------------------------- /conda-recipe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/conda-recipe.yaml -------------------------------------------------------------------------------- /framework-colorblindfriendly.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/framework-colorblindfriendly.jpg -------------------------------------------------------------------------------- /mcts_rl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/__init__.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/__init__.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/dpo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/dpo/__init__.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/dpo/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/dpo/__main__.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/dpo/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/dpo/main.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/dpo/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/dpo/trainer.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/mcts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/mcts/__init__.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/mcts/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/mcts/__main__.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/mcts/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/mcts/main.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/mcts/mcts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/mcts/mcts/__init__.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/mcts/mcts/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/mcts/mcts/base.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/mcts/mcts/embedding_retrieve.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/mcts/mcts/embedding_retrieve.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/mcts/mcts/mcts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/mcts/mcts/mcts.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/mcts/mcts/search_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/mcts/mcts/search_config.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/mcts/mcts/world_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/mcts/mcts/world_model.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/mcts/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/mcts/trainer.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/ppo/__init__.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/ppo/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/ppo/__main__.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/ppo/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/ppo/main.py -------------------------------------------------------------------------------- /mcts_rl/algorithms/ppo/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/algorithms/ppo/trainer.py -------------------------------------------------------------------------------- /mcts_rl/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/configs/__init__.py -------------------------------------------------------------------------------- /mcts_rl/configs/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/configs/constants.py -------------------------------------------------------------------------------- /mcts_rl/configs/deepspeed_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/configs/deepspeed_config.py -------------------------------------------------------------------------------- /mcts_rl/configs/ds_eval_config_template.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/configs/ds_eval_config_template.json -------------------------------------------------------------------------------- /mcts_rl/configs/ds_train_config_template.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/configs/ds_train_config_template.json -------------------------------------------------------------------------------- /mcts_rl/configs/fsdp_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/configs/fsdp_config.json -------------------------------------------------------------------------------- /mcts_rl/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/__init__.py -------------------------------------------------------------------------------- /mcts_rl/datasets/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/base.py -------------------------------------------------------------------------------- /mcts_rl/datasets/preference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/preference.py -------------------------------------------------------------------------------- /mcts_rl/datasets/prompt_only.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/prompt_only.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/__init__.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/alpaca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/alpaca.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/aqua.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/aqua.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/arithmo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/arithmo.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/exam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/exam.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/firefly.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/firefly.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/gsm8k.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/hh_rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/hh_rlhf.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/math.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/math_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/math_qa.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/mcq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/mcq.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/mcq_for_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/mcq_for_eval.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/mcq_pairs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/mcq_pairs.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/moss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/moss.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/prm800k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/prm800k.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/qa_feedback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/qa_feedback.py -------------------------------------------------------------------------------- /mcts_rl/datasets/raw/safe_rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/raw/safe_rlhf.py -------------------------------------------------------------------------------- /mcts_rl/datasets/safety_preference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/safety_preference.py -------------------------------------------------------------------------------- /mcts_rl/datasets/supervised.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/supervised.py -------------------------------------------------------------------------------- /mcts_rl/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/datasets/utils.py -------------------------------------------------------------------------------- /mcts_rl/finetune/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/finetune/__init__.py -------------------------------------------------------------------------------- /mcts_rl/finetune/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/finetune/__main__.py -------------------------------------------------------------------------------- /mcts_rl/finetune/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/finetune/deepspeed.py -------------------------------------------------------------------------------- /mcts_rl/finetune/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/finetune/huggingface.py -------------------------------------------------------------------------------- /mcts_rl/finetune/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/finetune/main.py -------------------------------------------------------------------------------- /mcts_rl/finetune/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/finetune/trainer.py -------------------------------------------------------------------------------- /mcts_rl/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/logger.py -------------------------------------------------------------------------------- /mcts_rl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/__init__.py -------------------------------------------------------------------------------- /mcts_rl/models/normalizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/normalizer.py -------------------------------------------------------------------------------- /mcts_rl/models/pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/pretrained.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/__init__.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/bloom/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/bloom/__init__.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/bloom/modeling_bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/bloom/modeling_bloom.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/gpt2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/gpt2/__init__.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/gpt2/modeling_gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/gpt2/modeling_gpt2.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/gpt_neo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/gpt_neo/__init__.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/gpt_neo/modeling_gpt_neo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/gpt_neo/modeling_gpt_neo.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/gpt_neox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/gpt_neox/__init__.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/gpt_neox/modeling_gpt_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/gpt_neox/modeling_gpt_neox.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/gptj/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/gptj/__init__.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/gptj/modeling_gptj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/gptj/modeling_gptj.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/llama/__init__.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/llama/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/llama/modeling_llama.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/mistral/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/mistral/__init__.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/mistral/modeling_mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/mistral/modeling_mistral.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/open_llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/open_llama/__init__.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/open_llama/modeling_open_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/open_llama/modeling_open_llama.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/opt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/opt/__init__.py -------------------------------------------------------------------------------- /mcts_rl/models/score_model/opt/modeling_opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/models/score_model/opt/modeling_opt.py -------------------------------------------------------------------------------- /mcts_rl/trainers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/trainers/__init__.py -------------------------------------------------------------------------------- /mcts_rl/trainers/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/trainers/base.py -------------------------------------------------------------------------------- /mcts_rl/trainers/rl_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/trainers/rl_trainer.py -------------------------------------------------------------------------------- /mcts_rl/trainers/supervised_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/trainers/supervised_trainer.py -------------------------------------------------------------------------------- /mcts_rl/trainers/tsrl_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/trainers/tsrl_trainer.py -------------------------------------------------------------------------------- /mcts_rl/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/utils.py -------------------------------------------------------------------------------- /mcts_rl/version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/mcts_rl/version.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/eval/mctseval_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/scripts/eval/mctseval_math.sh -------------------------------------------------------------------------------- /scripts/eval/mctseval_sqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/scripts/eval/mctseval_sqa.sh -------------------------------------------------------------------------------- /scripts/mcts_csr.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/scripts/mcts_csr.sh -------------------------------------------------------------------------------- /scripts/mcts_mathqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/scripts/mcts_mathqa.sh -------------------------------------------------------------------------------- /scripts/mcts_mathqa2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/scripts/mcts_mathqa2.sh -------------------------------------------------------------------------------- /scripts/mcts_mathqa_llama3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/scripts/mcts_mathqa_llama3.sh -------------------------------------------------------------------------------- /visualize.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YuxiXie/MCTS-DPO/HEAD/visualize.ipynb --------------------------------------------------------------------------------